From 2fa6eaf93bfe5b638b6824f25ad1ebde686bd7d4 Mon Sep 17 00:00:00 2001 From: xur-llvm <59886942+xur-llvm@users.noreply.github.com> Date: Thu, 13 Jun 2024 10:21:46 -0700 Subject: [PATCH 001/155] [llvm-profgen] Add support for Linux kenrel profile (#92831) Add the support to handle Linux kernel perf files. The functionality is under option -kernel. Note that currently only main kernel (in vmlinux) is handled: kernel modules are not handled. --------- Co-authored-by: Han Shen --- llvm/tools/llvm-profgen/PerfReader.cpp | 137 +++++++++++++-------- llvm/tools/llvm-profgen/PerfReader.h | 34 ++--- llvm/tools/llvm-profgen/ProfiledBinary.cpp | 7 ++ llvm/tools/llvm-profgen/ProfiledBinary.h | 11 ++ 4 files changed, 123 insertions(+), 66 deletions(-) diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp index e63c6d61b3bfcc..e1f5cc900cfd71 100644 --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -321,7 +321,7 @@ bool VirtualUnwinder::unwind(const PerfSample *Sample, uint64_t Repeat) { std::unique_ptr PerfReaderBase::create(ProfiledBinary *Binary, PerfInputFile &PerfInput, - std::optional PIDFilter) { + std::optional PIDFilter) { std::unique_ptr PerfReader; if (PerfInput.Format == PerfFormat::UnsymbolizedProfile) { @@ -331,9 +331,10 @@ PerfReaderBase::create(ProfiledBinary *Binary, PerfInputFile &PerfInput, } // For perf data input, we need to convert them into perf script first. + // If this is a kernel perf file, there is no need for retrieving PIDs. if (PerfInput.Format == PerfFormat::PerfData) - PerfInput = - PerfScriptReader::convertPerfDataToTrace(Binary, PerfInput, PIDFilter); + PerfInput = PerfScriptReader::convertPerfDataToTrace( + Binary, Binary->isKernel(), PerfInput, PIDFilter); assert((PerfInput.Format == PerfFormat::PerfScript) && "Should be a perfscript!"); @@ -353,9 +354,9 @@ PerfReaderBase::create(ProfiledBinary *Binary, PerfInputFile &PerfInput, } PerfInputFile -PerfScriptReader::convertPerfDataToTrace(ProfiledBinary *Binary, +PerfScriptReader::convertPerfDataToTrace(ProfiledBinary *Binary, bool SkipPID, PerfInputFile &File, - std::optional PIDFilter) { + std::optional PIDFilter) { StringRef PerfData = File.InputFile; // Run perf script to retrieve PIDs matching binary we're interested in. auto PerfExecutable = sys::Process::FindInEnvPath("PATH", "perf"); @@ -363,49 +364,59 @@ PerfScriptReader::convertPerfDataToTrace(ProfiledBinary *Binary, exitWithError("Perf not found."); } std::string PerfPath = *PerfExecutable; - SmallString<128> PerfTraceFile; sys::fs::createUniquePath("perf-script-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%.tmp", PerfTraceFile, /*MakeAbsolute=*/true); std::string ErrorFile = std::string(PerfTraceFile) + ".err"; - StringRef ScriptMMapArgs[] = {PerfPath, "script", "--show-mmap-events", - "-F", "comm,pid", "-i", - PerfData}; std::optional Redirects[] = {std::nullopt, // Stdin StringRef(PerfTraceFile), // Stdout StringRef(ErrorFile)}; // Stderr - sys::ExecuteAndWait(PerfPath, ScriptMMapArgs, std::nullopt, Redirects); - PerfScriptReader::TempFileCleanups.emplace_back(PerfTraceFile); PerfScriptReader::TempFileCleanups.emplace_back(ErrorFile); - // Collect the PIDs - TraceStream TraceIt(PerfTraceFile); std::string PIDs; - std::unordered_set PIDSet; - while (!TraceIt.isAtEoF()) { - MMapEvent MMap; - if (isMMap2Event(TraceIt.getCurrentLine()) && - extractMMap2EventForBinary(Binary, TraceIt.getCurrentLine(), MMap)) { - auto It = PIDSet.emplace(MMap.PID); - if (It.second && (!PIDFilter || MMap.PID == *PIDFilter)) { - if (!PIDs.empty()) { - PIDs.append(","); + if (!SkipPID) { + StringRef ScriptMMapArgs[] = {PerfPath, "script", "--show-mmap-events", + "-F", "comm,pid", "-i", + PerfData}; + sys::ExecuteAndWait(PerfPath, ScriptMMapArgs, std::nullopt, Redirects); + + // Collect the PIDs + TraceStream TraceIt(PerfTraceFile); + std::unordered_set PIDSet; + while (!TraceIt.isAtEoF()) { + MMapEvent MMap; + if (isMMapEvent(TraceIt.getCurrentLine()) && + extractMMapEventForBinary(Binary, TraceIt.getCurrentLine(), MMap)) { + auto It = PIDSet.emplace(MMap.PID); + if (It.second && (!PIDFilter || MMap.PID == *PIDFilter)) { + if (!PIDs.empty()) { + PIDs.append(","); + } + PIDs.append(utostr(MMap.PID)); } - PIDs.append(utostr(MMap.PID)); } + TraceIt.advance(); } - TraceIt.advance(); - } - if (PIDs.empty()) { - exitWithError("No relevant mmap event is found in perf data."); + if (PIDs.empty()) { + exitWithError("No relevant mmap event is found in perf data."); + } } // Run perf script again to retrieve events for PIDs collected above - StringRef ScriptSampleArgs[] = {PerfPath, "script", "--show-mmap-events", - "-F", "ip,brstack", "--pid", - PIDs, "-i", PerfData}; + SmallVector ScriptSampleArgs; + ScriptSampleArgs.push_back(PerfPath); + ScriptSampleArgs.push_back("script"); + ScriptSampleArgs.push_back("--show-mmap-events"); + ScriptSampleArgs.push_back("-F"); + ScriptSampleArgs.push_back("ip,brstack"); + ScriptSampleArgs.push_back("-i"); + ScriptSampleArgs.push_back(PerfData); + if (!PIDs.empty()) { + ScriptSampleArgs.push_back("--pid"); + ScriptSampleArgs.push_back(PIDs); + } sys::ExecuteAndWait(PerfPath, ScriptSampleArgs, std::nullopt, Redirects); return {std::string(PerfTraceFile), PerfFormat::PerfScript, @@ -428,7 +439,10 @@ static StringRef filename(StringRef Path, bool UseBackSlash) { void PerfScriptReader::updateBinaryAddress(const MMapEvent &Event) { // Drop the event which doesn't belong to user-provided binary StringRef BinaryName = filename(Event.BinaryPath, Binary->isCOFF()); - if (Binary->getName() != BinaryName) + bool IsKernel = Binary->isKernel(); + if (!IsKernel && Binary->getName() != BinaryName) + return; + if (IsKernel && !Binary->isKernelImageName(BinaryName)) return; // Drop the event if process does not match pid filter @@ -441,7 +455,7 @@ void PerfScriptReader::updateBinaryAddress(const MMapEvent &Event) { return; } - if (Event.Offset == Binary->getTextSegmentOffset()) { + if (IsKernel || Event.Offset == Binary->getTextSegmentOffset()) { // A binary image could be unloaded and then reloaded at different // place, so update binary load address. // Only update for the first executable segment and assume all other @@ -950,16 +964,23 @@ void PerfScriptReader::parseSample(TraceStream &TraceIt) { parseSample(TraceIt, Count); } -bool PerfScriptReader::extractMMap2EventForBinary(ProfiledBinary *Binary, - StringRef Line, - MMapEvent &MMap) { - // Parse a line like: +bool PerfScriptReader::extractMMapEventForBinary(ProfiledBinary *Binary, + StringRef Line, + MMapEvent &MMap) { + // Parse a MMap2 line like: // PERF_RECORD_MMAP2 2113428/2113428: [0x7fd4efb57000(0x204000) @ 0 // 08:04 19532229 3585508847]: r-xp /usr/lib64/libdl-2.17.so - constexpr static const char *const Pattern = - "PERF_RECORD_MMAP2 ([0-9]+)/[0-9]+: " + constexpr static const char *const MMap2Pattern = + "PERF_RECORD_MMAP2 (-?[0-9]+)/[0-9]+: " "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ " "(0x[a-f0-9]+|0) .*\\]: [-a-z]+ (.*)"; + // Parse a MMap line like + // PERF_RECORD_MMAP -1/0: [0xffffffff81e00000(0x3e8fa000) @ \ + // 0xffffffff81e00000]: x [kernel.kallsyms]_text + constexpr static const char *const MMapPattern = + "PERF_RECORD_MMAP (-?[0-9]+)/[0-9]+: " + "\\[(0x[a-f0-9]+)\\((0x[a-f0-9]+)\\) @ " + "(0x[a-f0-9]+|0)\\]: [-a-z]+ (.*)"; // Field 0 - whole line // Field 1 - PID // Field 2 - base address @@ -975,14 +996,25 @@ bool PerfScriptReader::extractMMap2EventForBinary(ProfiledBinary *Binary, BINARY_PATH = 5 }; - Regex RegMmap2(Pattern); + bool R = false; SmallVector Fields; - bool R = RegMmap2.match(Line, &Fields); + if (Line.contains("PERF_RECORD_MMAP2 ")) { + Regex RegMmap2(MMap2Pattern); + R = RegMmap2.match(Line, &Fields); + } else if (Line.contains("PERF_RECORD_MMAP ")) { + Regex RegMmap(MMapPattern); + R = RegMmap.match(Line, &Fields); + } else + llvm_unreachable("unexpected MMAP event entry"); + if (!R) { std::string WarningMsg = "Cannot parse mmap event: " + Line.str() + " \n"; WithColor::warning() << WarningMsg; + return false; } - Fields[PID].getAsInteger(10, MMap.PID); + long long MMapPID = 0; + getAsSignedInteger(Fields[PID], 10, MMapPID); + MMap.PID = MMapPID; Fields[MMAPPED_ADDRESS].getAsInteger(0, MMap.Address); Fields[MMAPPED_SIZE].getAsInteger(0, MMap.Size); Fields[PAGE_OFFSET].getAsInteger(0, MMap.Offset); @@ -993,19 +1025,22 @@ bool PerfScriptReader::extractMMap2EventForBinary(ProfiledBinary *Binary, } StringRef BinaryName = filename(MMap.BinaryPath, Binary->isCOFF()); + if (Binary->isKernel()) { + return Binary->isKernelImageName(BinaryName); + } return Binary->getName() == BinaryName; } -void PerfScriptReader::parseMMap2Event(TraceStream &TraceIt) { +void PerfScriptReader::parseMMapEvent(TraceStream &TraceIt) { MMapEvent MMap; - if (extractMMap2EventForBinary(Binary, TraceIt.getCurrentLine(), MMap)) + if (extractMMapEventForBinary(Binary, TraceIt.getCurrentLine(), MMap)) updateBinaryAddress(MMap); TraceIt.advance(); } void PerfScriptReader::parseEventOrSample(TraceStream &TraceIt) { - if (isMMap2Event(TraceIt.getCurrentLine())) - parseMMap2Event(TraceIt); + if (isMMapEvent(TraceIt.getCurrentLine())) + parseMMapEvent(TraceIt); else parseSample(TraceIt); } @@ -1032,7 +1067,7 @@ bool PerfScriptReader::isLBRSample(StringRef Line) { return false; } -bool PerfScriptReader::isMMap2Event(StringRef Line) { +bool PerfScriptReader::isMMapEvent(StringRef Line) { // Short cut to avoid string find is possible. if (Line.empty() || Line.size() < 50) return false; @@ -1040,9 +1075,9 @@ bool PerfScriptReader::isMMap2Event(StringRef Line) { if (std::isdigit(Line[0])) return false; - // PERF_RECORD_MMAP2 does not appear at the beginning of the line - // for ` perf script --show-mmap-events -i ...` - return Line.contains("PERF_RECORD_MMAP2"); + // PERF_RECORD_MMAP2 or PERF_RECORD_MMAP does not appear at the beginning of + // the line for ` perf script --show-mmap-events -i ...` + return Line.contains("PERF_RECORD_MMAP"); } // The raw hybird sample is like @@ -1208,6 +1243,10 @@ void PerfScriptReader::warnInvalidRange() { void PerfScriptReader::parsePerfTraces() { // Parse perf traces and do aggregation. parseAndAggregateTrace(); + if (Binary->isKernel() && !Binary->getIsLoadedByMMap()) { + exitWithError( + "Kernel is requested, but no kernel is found in mmap events."); + } emitWarningSummary(NumLeafExternalFrame, NumTotalSample, "of samples have leaf external frame in call stack."); diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h index b821cbe13efae6..a3bd7a0a6493e9 100644 --- a/llvm/tools/llvm-profgen/PerfReader.h +++ b/llvm/tools/llvm-profgen/PerfReader.h @@ -570,7 +570,7 @@ class PerfReaderBase { virtual ~PerfReaderBase() = default; static std::unique_ptr create(ProfiledBinary *Binary, PerfInputFile &PerfInput, - std::optional PIDFilter); + std::optional PIDFilter); // Entry of the reader to parse multiple perf traces virtual void parsePerfTraces() = 0; @@ -595,15 +595,15 @@ class PerfReaderBase { class PerfScriptReader : public PerfReaderBase { public: PerfScriptReader(ProfiledBinary *B, StringRef PerfTrace, - std::optional PID) - : PerfReaderBase(B, PerfTrace), PIDFilter(PID){}; + std::optional PID) + : PerfReaderBase(B, PerfTrace), PIDFilter(PID) {}; // Entry of the reader to parse multiple perf traces void parsePerfTraces() override; // Generate perf script from perf data - static PerfInputFile - convertPerfDataToTrace(ProfiledBinary *Binary, PerfInputFile &File, - std::optional PIDFilter); + static PerfInputFile convertPerfDataToTrace(ProfiledBinary *Binary, + bool SkipPID, PerfInputFile &File, + std::optional PIDFilter); // Extract perf script type by peaking at the input static PerfContent checkPerfScriptType(StringRef FileName); @@ -615,7 +615,7 @@ class PerfScriptReader : public PerfReaderBase { protected: // The parsed MMap event struct MMapEvent { - uint64_t PID = 0; + int64_t PID = 0; uint64_t Address = 0; uint64_t Size = 0; uint64_t Offset = 0; @@ -625,15 +625,15 @@ class PerfScriptReader : public PerfReaderBase { // Check whether a given line is LBR sample static bool isLBRSample(StringRef Line); // Check whether a given line is MMAP event - static bool isMMap2Event(StringRef Line); - // Parse a single line of a PERF_RECORD_MMAP2 event looking for a + static bool isMMapEvent(StringRef Line); + // Parse a single line of a PERF_RECORD_MMAP event looking for a // mapping between the binary name and its memory layout. - static bool extractMMap2EventForBinary(ProfiledBinary *Binary, StringRef Line, - MMapEvent &MMap); + static bool extractMMapEventForBinary(ProfiledBinary *Binary, StringRef Line, + MMapEvent &MMap); // Update base address based on mmap events void updateBinaryAddress(const MMapEvent &Event); // Parse mmap event and update binary address - void parseMMap2Event(TraceStream &TraceIt); + void parseMMapEvent(TraceStream &TraceIt); // Parse perf events/samples and do aggregation void parseAndAggregateTrace(); // Parse either an MMAP event or a perf sample @@ -669,7 +669,7 @@ class PerfScriptReader : public PerfReaderBase { // Keep track of all invalid return addresses std::set InvalidReturnAddresses; // PID for the process of interest - std::optional PIDFilter; + std::optional PIDFilter; }; /* @@ -681,8 +681,8 @@ class PerfScriptReader : public PerfReaderBase { class LBRPerfReader : public PerfScriptReader { public: LBRPerfReader(ProfiledBinary *Binary, StringRef PerfTrace, - std::optional PID) - : PerfScriptReader(Binary, PerfTrace, PID){}; + std::optional PID) + : PerfScriptReader(Binary, PerfTrace, PID) {}; // Parse the LBR only sample. void parseSample(TraceStream &TraceIt, uint64_t Count) override; }; @@ -699,8 +699,8 @@ class LBRPerfReader : public PerfScriptReader { class HybridPerfReader : public PerfScriptReader { public: HybridPerfReader(ProfiledBinary *Binary, StringRef PerfTrace, - std::optional PID) - : PerfScriptReader(Binary, PerfTrace, PID){}; + std::optional PID) + : PerfScriptReader(Binary, PerfTrace, PID) {}; // Parse the hybrid sample including the call and LBR line void parseSample(TraceStream &TraceIt, uint64_t Count) override; void generateUnsymbolizedProfile() override; diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index 1baf35820f97fa..a7e506d32ac2ee 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -56,6 +56,10 @@ static cl::list DisassembleFunctions( cl::desc("List of functions to print disassembly for. Accept demangled " "names only. Only work with show-disassembly-only")); +static cl::opt + KernelBinary("kernel", + cl::desc("Generate the profile for Linux kernel binary.")); + extern cl::opt ShowDetailedWarning; extern cl::opt InferMissingFrames; @@ -221,6 +225,9 @@ void ProfiledBinary::load() { LLVM_DEBUG(dbgs() << "Loading " << Path << "\n"); + // Mark the binary as a kernel image; + IsKernel = KernelBinary; + // Find the preferred load address for text sections. setPreferredTextSegmentAddresses(Obj); diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h index 5d2088ad7691c4..8d96d159895933 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.h +++ b/llvm/tools/llvm-profgen/ProfiledBinary.h @@ -291,6 +291,9 @@ class ProfiledBinary { // Whether we need to symbolize all instructions to get function context size. bool TrackFuncContextSize = false; + // Whether this is a kernel image; + bool IsKernel = false; + // Indicate if the base loading address is parsed from the mmap event or uses // the preferred address bool IsLoadedByMMap = false; @@ -428,6 +431,14 @@ class ProfiledBinary { bool usePseudoProbes() const { return UsePseudoProbes; } bool useFSDiscriminator() const { return UseFSDiscriminator; } + bool isKernel() const { return IsKernel; } + + static bool isKernelImageName(StringRef BinaryName) { + return BinaryName == "[kernel.kallsyms]" || + BinaryName == "[kernel.kallsyms]_stext" || + BinaryName == "[kernel.kallsyms]_text"; + } + // Get the index in CodeAddressVec for the address // As we might get an address which is not the code // here it would round to the next valid code address by From 3dd73dc1996940645620fd191110b57c49183531 Mon Sep 17 00:00:00 2001 From: Haowei Date: Thu, 13 Jun 2024 10:24:24 -0700 Subject: [PATCH 002/155] [Fuchsia] Add armv7m and armv8m runtimes to Fuchsia Clang toolchain (#95337) This patch adds armv7m and armv8m runtimes to Fuchsia Clang toolchain configuration. --- clang/cmake/caches/Fuchsia-stage2.cmake | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake index aa07b04be65ccf..a573ec54732101 100644 --- a/clang/cmake/caches/Fuchsia-stage2.cmake +++ b/clang/cmake/caches/Fuchsia-stage2.cmake @@ -300,14 +300,14 @@ if(FUCHSIA_SDK) set(LLVM_RUNTIME_MULTILIB_hwasan+noexcept_TARGETS "aarch64-unknown-fuchsia;riscv64-unknown-fuchsia" CACHE STRING "") endif() -foreach(target armv6m-unknown-eabi) +foreach(target armv6m-unknown-eabi;armv7m-unknown-eabi;armv8m-unknown-eabi) list(APPEND BUILTIN_TARGETS "${target}") set(BUILTINS_${target}_CMAKE_SYSTEM_NAME Generic CACHE STRING "") set(BUILTINS_${target}_CMAKE_SYSTEM_PROCESSOR arm CACHE STRING "") set(BUILTINS_${target}_CMAKE_SYSROOT "" CACHE STRING "") set(BUILTINS_${target}_CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "") foreach(lang C;CXX;ASM) - set(BUILTINS_${target}_CMAKE_${lang}_FLAGS "--target=${target} -mcpu=cortex-m0plus -mthumb" CACHE STRING "") + set(BUILTINS_${target}_CMAKE_${lang}_FLAGS "--target=${target} -mthumb" CACHE STRING "") endforeach() foreach(type SHARED;MODULE;EXE) set(BUILTINS_${target}_CMAKE_${type}_LINKER_FLAGS "-fuse-ld=lld" CACHE STRING "") @@ -321,7 +321,7 @@ foreach(target armv6m-unknown-eabi) set(RUNTIMES_${target}_CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "") set(RUNTIMES_${target}_CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY CACHE STRING "") foreach(lang C;CXX;ASM) - set(RUNTIMES_${target}_CMAKE_${lang}_FLAGS "--target=${target} -mcpu=cortex-m0plus -mthumb" CACHE STRING "") + set(RUNTIMES_${target}_CMAKE_${lang}_FLAGS "--target=${target} -mthumb" CACHE STRING "") endforeach() foreach(type SHARED;MODULE;EXE) set(RUNTIMES_${target}_CMAKE_${type}_LINKER_FLAGS "-fuse-ld=lld" CACHE STRING "") From 010c55bf44144f6370a0c4995c30ec51b06e1efe Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Thu, 13 Jun 2024 10:46:09 -0700 Subject: [PATCH 003/155] [flang] Improve error recovery in tricky situation (#95168) When the very first statement of the executable part has syntax errors, it's not at all obvious whether the error messages that are reported to the user should be those from its failure to be the last statement of the specification part or its failure to be the first executable statement when both failures are at the same character in the cooked character stream. Fortran makes this problem more exciting by allowing statement function definitions look a lot like several executable statements. The current error recovery scheme for declaration constructs depends on a look-ahead test to see whether the failed construct is actually the first executable statement. This works fine when the first executable statement is not in error, but should also allow for some error cases that begin with the tokens of an executable statement. This can obviously still go wrong for declaration constructs that are unparseable and also have ambiguity in their leading tokens with executable statements, but that seems to be a less likely case. Also improves error recovery for parenthesized items. --- flang/lib/Parser/expr-parsers.cpp | 3 ++- flang/lib/Parser/program-parsers.cpp | 24 ++++++++++++++++-------- flang/lib/Parser/token-parsers.h | 28 +++++++++++++++++++++++++++- flang/test/Parser/recovery01.f90 | 10 ++++++++++ flang/test/Parser/recovery02.f90 | 8 ++++++++ 5 files changed, 63 insertions(+), 10 deletions(-) create mode 100644 flang/test/Parser/recovery01.f90 create mode 100644 flang/test/Parser/recovery02.f90 diff --git a/flang/lib/Parser/expr-parsers.cpp b/flang/lib/Parser/expr-parsers.cpp index a47aae166b5758..77a13de7fd02d8 100644 --- a/flang/lib/Parser/expr-parsers.cpp +++ b/flang/lib/Parser/expr-parsers.cpp @@ -70,7 +70,8 @@ TYPE_PARSER(construct( constexpr auto primary{instrumented("primary"_en_US, first(construct(indirect(Parser{})), construct(literalConstant), - construct(construct(parenthesized(expr))), + construct(construct("(" >> + expr / !","_tok / recovery(")"_tok, SkipPastNested<'(', ')'>{}))), construct(indirect(functionReference) / !"("_tok / !"%"_tok), construct(designator / !"("_tok / !"%"_tok), construct(indirect(Parser{})), // %LEN or %KIND diff --git a/flang/lib/Parser/program-parsers.cpp b/flang/lib/Parser/program-parsers.cpp index 6f25ba48272208..b51b60157f39c0 100644 --- a/flang/lib/Parser/program-parsers.cpp +++ b/flang/lib/Parser/program-parsers.cpp @@ -86,10 +86,15 @@ TYPE_CONTEXT_PARSER("specification part"_en_US, // are in contexts that impose constraints on the kinds of statements that // are allowed, and so we have a variant production for declaration-construct // that implements those constraints. -constexpr auto execPartLookAhead{first(actionStmt >> ok, openaccConstruct >> ok, - openmpConstruct >> ok, "ASSOCIATE ("_tok, "BLOCK"_tok, "SELECT"_tok, - "CHANGE TEAM"_sptok, "CRITICAL"_tok, "DO"_tok, "IF ("_tok, "WHERE ("_tok, - "FORALL ("_tok, "!$CUF"_tok)}; +constexpr auto actionStmtLookAhead{first(actionStmt >> ok, + // Also accept apparent action statements with errors if they might be + // first in the execution part + "ALLOCATE ("_tok, "CALL" >> name >> "("_tok, "GO TO"_tok, "OPEN ("_tok, + "PRINT"_tok / space / !"("_tok, "READ ("_tok, "WRITE ("_tok)}; +constexpr auto execPartLookAhead{first(actionStmtLookAhead >> ok, + openaccConstruct >> ok, openmpConstruct >> ok, "ASSOCIATE ("_tok, + "BLOCK"_tok, "SELECT"_tok, "CHANGE TEAM"_sptok, "CRITICAL"_tok, "DO"_tok, + "IF ("_tok, "WHERE ("_tok, "FORALL ("_tok, "!$CUF"_tok)}; constexpr auto declErrorRecovery{ stmtErrorRecoveryStart >> !execPartLookAhead >> skipStmtErrorRecovery}; constexpr auto misplacedSpecificationStmt{Parser{} >> @@ -446,10 +451,13 @@ TYPE_PARSER(extension( "<<<" >> construct(scalarExpr, "," >> scalarExpr, maybe("," >> scalarIntExpr), maybe("," >> scalarIntExpr)) / ">>>")) -TYPE_PARSER(construct( - sourced(construct("CALL" >> Parser{}, - maybe(Parser{}), - defaulted(parenthesized(optionalList(actualArgSpec))))))) +constexpr auto actualArgSpecList{optionalList(actualArgSpec)}; +TYPE_CONTEXT_PARSER("CALL statement"_en_US, + construct( + sourced(construct("CALL" >> Parser{}, + maybe(Parser{}) / space, + "(" >> actualArgSpecList / ")" || + lookAhead(endOfStmt) >> defaulted(actualArgSpecList))))) // R1522 procedure-designator -> // procedure-name | proc-component-ref | data-ref % binding-name diff --git a/flang/lib/Parser/token-parsers.h b/flang/lib/Parser/token-parsers.h index 2495017d19649d..fe6bc1f69f576b 100644 --- a/flang/lib/Parser/token-parsers.h +++ b/flang/lib/Parser/token-parsers.h @@ -560,6 +560,8 @@ template struct SkipPast { while (std::optional p{state.GetNextChar()}) { if (**p == goal) { return {Success{}}; + } else if (**p == '\n') { + break; } } return std::nullopt; @@ -574,8 +576,32 @@ template struct SkipTo { while (std::optional p{state.PeekAtNextChar()}) { if (**p == goal) { return {Success{}}; + } else if (**p == '\n') { + break; + } else { + state.UncheckedAdvance(); + } + } + return std::nullopt; + } +}; + +template struct SkipPastNested { + using resultType = Success; + constexpr SkipPastNested() {} + constexpr SkipPastNested(const SkipPastNested &) {} + static std::optional Parse(ParseState &state) { + int nesting{1}; + while (std::optional p{state.GetNextChar()}) { + if (**p == right) { + if (!--nesting) { + return {Success{}}; + } + } else if (**p == left) { + ++nesting; + } else if (**p == '\n') { + break; } - state.UncheckedAdvance(); } return std::nullopt; } diff --git a/flang/test/Parser/recovery01.f90 b/flang/test/Parser/recovery01.f90 new file mode 100644 index 00000000000000..674abaccc7c7d0 --- /dev/null +++ b/flang/test/Parser/recovery01.f90 @@ -0,0 +1,10 @@ +! RUN: not %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s +program main + call foo(i, & + j, & + k, & + 1$) +end + +!CHECK: error: expected ')' +!CHECK: in the context: CALL statement diff --git a/flang/test/Parser/recovery02.f90 b/flang/test/Parser/recovery02.f90 new file mode 100644 index 00000000000000..0d0d15545cf39a --- /dev/null +++ b/flang/test/Parser/recovery02.f90 @@ -0,0 +1,8 @@ +! RUN: not %flang_fc1 -fsyntax-only %s 2>&1 | FileCheck %s +continue ! force executable part +CALL ADD_HASH_BLOCK(d_c,f_c,dimc, & + (h2b-1+noab*(h1b-1+noab*(p4b-noab-1+nvab*(p3b-noab-1$))))) +end + +!CHECK: error: expected ')' +!CHECK: in the context: CALL statement From 2414a90730d87c20d9ff8d7951ed24e3328124ed Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Thu, 13 Jun 2024 10:57:06 -0700 Subject: [PATCH 004/155] [flang] Catch NULL(MOLD=assumed-rank) (#95270) An assumed-rank dummy argument is not an acceptable MOLD argument to NULL(), whose result must have a known rank at compilation time. --- flang/lib/Evaluate/intrinsics.cpp | 4 ++++ flang/test/Semantics/null01.f90 | 8 +++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/flang/lib/Evaluate/intrinsics.cpp b/flang/lib/Evaluate/intrinsics.cpp index 58c1b6989f495a..ace316174a8928 100644 --- a/flang/lib/Evaluate/intrinsics.cpp +++ b/flang/lib/Evaluate/intrinsics.cpp @@ -2691,6 +2691,10 @@ SpecificCall IntrinsicProcTable::Implementation::HandleNull( mold = nullptr; } if (mold) { + if (IsAssumedRank(*arguments[0])) { + context.messages().Say(arguments[0]->sourceLocation(), + "MOLD= argument to NULL() must not be assumed-rank"_err_en_US); + } bool isProcPtrTarget{ IsProcedurePointerTarget(*mold) && !IsNullObjectPointer(*mold)}; if (isProcPtrTarget || IsAllocatableOrPointerObject(*mold)) { diff --git a/flang/test/Semantics/null01.f90 b/flang/test/Semantics/null01.f90 index 3bf620048e2f22..04d94865356b0c 100644 --- a/flang/test/Semantics/null01.f90 +++ b/flang/test/Semantics/null01.f90 @@ -151,10 +151,16 @@ subroutine s1(x) subroutine s2(x) type(pdt(*)), pointer, intent(in) :: x end - subroutine test + subroutine s3(ar) + real, pointer :: ar(..) + end + subroutine test(ar) + real, pointer :: ar(..) !ERROR: Actual argument associated with dummy argument 'x=' is a NULL() pointer without a MOLD= to provide a character length call s1(null()) !ERROR: Actual argument associated with dummy argument 'x=' is a NULL() pointer without a MOLD= to provide a value for the assumed type parameter 'n' call s2(null()) + !ERROR: MOLD= argument to NULL() must not be assumed-rank + call s3(null(ar)) end end From 4b493e31b2c5d72d993f0e914adb711f3ce4ba05 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 13 Jun 2024 11:08:17 -0700 Subject: [PATCH 005/155] [ProfileData] Add getValueArrayForSite (#95335) Without this patch, a typical traversal over the value data looks like: uint32_t NV = Func.getNumValueDataForSite(VK, S); std::unique_ptr VD = Func.getValueForSite(VK, S); for (uint32_t V = 0; V < NV; V++) Do something with VD[V].Value and/or VD[V].Count; This patch adds getValueArrayForSite, which returns ArrayRef, so we can do: for (const auto &V : Func.getValueArrayForSite(VK, S)) Do something with V.Value and/or V.Count; I'm planning to migrate the existing uses of getValueForSite to getValueArrayForSite in follow-up patches and remove getValueForSite and getNumValueDataForSite. --- llvm/include/llvm/ProfileData/InstrProf.h | 9 +++++++++ llvm/unittests/ProfileData/InstrProfTest.cpp | 14 +++++++------- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 2772fddc087378..0c899e6d84965c 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -864,6 +864,10 @@ struct InstrProfRecord { /// Return the total number of ValueData for ValueKind. inline uint32_t getNumValueData(uint32_t ValueKind) const; + /// Return the array of profiled values at \p Site. + inline ArrayRef getValueArrayForSite(uint32_t ValueKind, + uint32_t Site) const; + /// Return the number of value data collected for ValueKind at profiling /// site: Site. inline uint32_t getNumValueDataForSite(uint32_t ValueKind, @@ -1060,6 +1064,11 @@ uint32_t InstrProfRecord::getNumValueDataForSite(uint32_t ValueKind, return getValueSitesForKind(ValueKind)[Site].ValueData.size(); } +ArrayRef +InstrProfRecord::getValueArrayForSite(uint32_t ValueKind, uint32_t Site) const { + return getValueSitesForKind(ValueKind)[Site].ValueData; +} + std::unique_ptr InstrProfRecord::getValueForSite(uint32_t ValueKind, uint32_t Site) const { uint32_t N = getNumValueDataForSite(ValueKind, Site); diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp index 8acb0fa0c717aa..0309be4eb10fb4 100644 --- a/llvm/unittests/ProfileData/InstrProfTest.cpp +++ b/llvm/unittests/ProfileData/InstrProfTest.cpp @@ -867,7 +867,7 @@ TEST_P(InstrProfReaderWriterTest, icall_and_vtable_data_read_write) { // First indirect site. { - auto VD = R->getValueForSite(IPVK_IndirectCallTarget, 0); + auto VD = R->getValueArrayForSite(IPVK_IndirectCallTarget, 0); EXPECT_EQ(VD[0].Count, 3U * getProfWeight()); EXPECT_EQ(VD[1].Count, 2U * getProfWeight()); @@ -880,7 +880,7 @@ TEST_P(InstrProfReaderWriterTest, icall_and_vtable_data_read_write) { // First vtable site. { - auto VD = R->getValueForSite(IPVK_VTableTarget, 0); + auto VD = R->getValueArrayForSite(IPVK_VTableTarget, 0); EXPECT_EQ(VD[0].Count, 3U * getProfWeight()); EXPECT_EQ(VD[1].Count, 2U * getProfWeight()); @@ -893,7 +893,7 @@ TEST_P(InstrProfReaderWriterTest, icall_and_vtable_data_read_write) { // Second vtable site. { - auto VD = R->getValueForSite(IPVK_VTableTarget, 1); + auto VD = R->getValueArrayForSite(IPVK_VTableTarget, 1); EXPECT_EQ(VD[0].Count, 2U * getProfWeight()); EXPECT_EQ(VD[1].Count, 1U * getProfWeight()); @@ -1125,7 +1125,7 @@ TEST_P(MaybeSparseInstrProfTest, icall_and_vtable_data_merge) { // Test the merged values for indirect calls. { - auto VD = R->getValueForSite(IPVK_IndirectCallTarget, 0); + auto VD = R->getValueArrayForSite(IPVK_IndirectCallTarget, 0); EXPECT_STREQ((const char *)VD[0].Value, "callee2"); EXPECT_EQ(VD[0].Count, 7U); EXPECT_STREQ((const char *)VD[1].Value, "callee3"); @@ -1162,7 +1162,7 @@ TEST_P(MaybeSparseInstrProfTest, icall_and_vtable_data_merge) { // Test the merged values for vtables { - auto VD0 = R->getValueForSite(IPVK_VTableTarget, 0); + auto VD0 = R->getValueArrayForSite(IPVK_VTableTarget, 0); EXPECT_EQ(VD0[0].Value, getCalleeAddress(vtable2)); EXPECT_EQ(VD0[0].Count, 7U); EXPECT_EQ(VD0[1].Value, getCalleeAddress(vtable3)); @@ -1172,7 +1172,7 @@ TEST_P(MaybeSparseInstrProfTest, icall_and_vtable_data_merge) { EXPECT_EQ(VD0[3].Value, getCalleeAddress(vtable1)); EXPECT_EQ(VD0[3].Count, 1U); - auto VD1 = R->getValueForSite(IPVK_VTableTarget, 1); + auto VD1 = R->getValueArrayForSite(IPVK_VTableTarget, 1); EXPECT_EQ(VD1[0].Value, getCalleeAddress(vtable3)); EXPECT_EQ(VD1[0].Count, 6U); EXPECT_EQ(VD1[1].Value, getCalleeAddress(vtable4)); @@ -1182,7 +1182,7 @@ TEST_P(MaybeSparseInstrProfTest, icall_and_vtable_data_merge) { EXPECT_EQ(VD1[3].Value, getCalleeAddress(vtable1)); EXPECT_EQ(VD1[3].Count, 1U); - auto VD2 = R->getValueForSite(IPVK_VTableTarget, 2); + auto VD2 = R->getValueArrayForSite(IPVK_VTableTarget, 2); EXPECT_EQ(VD2[0].Value, getCalleeAddress(vtable3)); EXPECT_EQ(VD2[0].Count, 6U); EXPECT_EQ(VD2[1].Value, getCalleeAddress(vtable2)); From f8fc883da951064a310e365680b4b567fad58ebc Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Thu, 13 Jun 2024 11:10:32 -0700 Subject: [PATCH 006/155] [flang][runtime] Distinguish VALUE from non-VALUE operations in REDUCE (#95297) Accommodate operations with VALUE dummy arguments in the runtime support for the REDUCE intrinsic function by splitting most entry points into Reduce...Ref and Reduce...Value variants. Further work will be needed in lowering to call the ...Value entry points. --- .../Optimizer/Builder/Runtime/RTBuilder.h | 24 +- flang/include/flang/Runtime/reduce.h | 425 ++++++++---- .../Optimizer/Builder/Runtime/Reduction.cpp | 130 ++-- flang/runtime/reduce.cpp | 615 ++++++++++++++---- flang/test/Lower/Intrinsics/reduce.f90 | 66 +- flang/unittests/Runtime/Reduction.cpp | 9 +- 6 files changed, 906 insertions(+), 363 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h b/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h index 99161c57fbe288..809d5b8d569dc9 100644 --- a/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h +++ b/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h @@ -53,10 +53,10 @@ namespace fir::runtime { using TypeBuilderFunc = mlir::Type (*)(mlir::MLIRContext *); using FuncTypeBuilderFunc = mlir::FunctionType (*)(mlir::MLIRContext *); -#define REDUCTION_OPERATION_MODEL(T) \ +#define REDUCTION_REF_OPERATION_MODEL(T) \ template <> \ constexpr TypeBuilderFunc \ - getModel>() { \ + getModel>() { \ return [](mlir::MLIRContext *context) -> mlir::Type { \ TypeBuilderFunc f{getModel()}; \ auto refTy = fir::ReferenceType::get(f(context)); \ @@ -480,18 +480,18 @@ constexpr TypeBuilderFunc getModel() { }; } -REDUCTION_OPERATION_MODEL(std::int8_t) -REDUCTION_OPERATION_MODEL(std::int16_t) -REDUCTION_OPERATION_MODEL(std::int32_t) -REDUCTION_OPERATION_MODEL(std::int64_t) -REDUCTION_OPERATION_MODEL(Fortran::common::int128_t) +REDUCTION_REF_OPERATION_MODEL(std::int8_t) +REDUCTION_REF_OPERATION_MODEL(std::int16_t) +REDUCTION_REF_OPERATION_MODEL(std::int32_t) +REDUCTION_REF_OPERATION_MODEL(std::int64_t) +REDUCTION_REF_OPERATION_MODEL(Fortran::common::int128_t) -REDUCTION_OPERATION_MODEL(float) -REDUCTION_OPERATION_MODEL(double) -REDUCTION_OPERATION_MODEL(long double) +REDUCTION_REF_OPERATION_MODEL(float) +REDUCTION_REF_OPERATION_MODEL(double) +REDUCTION_REF_OPERATION_MODEL(long double) -REDUCTION_OPERATION_MODEL(std::complex) -REDUCTION_OPERATION_MODEL(std::complex) +REDUCTION_REF_OPERATION_MODEL(std::complex) +REDUCTION_REF_OPERATION_MODEL(std::complex) REDUCTION_CHAR_OPERATION_MODEL(char) REDUCTION_CHAR_OPERATION_MODEL(char16_t) diff --git a/flang/include/flang/Runtime/reduce.h b/flang/include/flang/Runtime/reduce.h index 975aa6dea305f5..60f54c393b4bbd 100644 --- a/flang/include/flang/Runtime/reduce.h +++ b/flang/include/flang/Runtime/reduce.h @@ -28,7 +28,9 @@ namespace Fortran::runtime { class Descriptor; -template using ReductionOperation = T (*)(const T *, const T *); +template +using ReferenceReductionOperation = T (*)(const T *, const T *); +template using ValueReductionOperation = T (*)(T, T); template using ReductionCharOperation = void (*)(CHAR *hiddenResult, std::size_t resultLen, const CHAR *x, const CHAR *y, std::size_t xLen, @@ -38,185 +40,364 @@ using ReductionDerivedTypeOperation = void (*)( extern "C" { -std::int8_t RTDECL(ReduceInteger1)(const Descriptor &, - ReductionOperation, const char *source, int line, int dim = 0, - const Descriptor *mask = nullptr, const std::int8_t *identity = nullptr, - bool ordered = true); -void RTDECL(ReduceInteger1Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation, const char *source, int line, int dim, +std::int8_t RTDECL(ReduceInteger1Ref)(const Descriptor &, + ReferenceReductionOperation, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr, + const std::int8_t *identity = nullptr, bool ordered = true); +std::int8_t RTDECL(ReduceInteger1Value)(const Descriptor &, + ValueReductionOperation, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr, + const std::int8_t *identity = nullptr, bool ordered = true); +void RTDECL(ReduceInteger1DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation, const char *source, int line, + int dim, const Descriptor *mask = nullptr, + const std::int8_t *identity = nullptr, bool ordered = true); +void RTDECL(ReduceInteger1DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation, const char *source, int line, int dim, const Descriptor *mask = nullptr, const std::int8_t *identity = nullptr, bool ordered = true); -std::int16_t RTDECL(ReduceInteger2)(const Descriptor &, - ReductionOperation, const char *source, int line, int dim = 0, - const Descriptor *mask = nullptr, const std::int16_t *identity = nullptr, - bool ordered = true); -void RTDECL(ReduceInteger2Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation, const char *source, int line, int dim, - const Descriptor *mask = nullptr, const std::int16_t *identity = nullptr, - bool ordered = true); -std::int32_t RTDECL(ReduceInteger4)(const Descriptor &, - ReductionOperation, const char *source, int line, int dim = 0, - const Descriptor *mask = nullptr, const std::int32_t *identity = nullptr, - bool ordered = true); -void RTDECL(ReduceInteger4Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation, const char *source, int line, int dim, - const Descriptor *mask = nullptr, const std::int32_t *identity = nullptr, - bool ordered = true); -std::int64_t RTDECL(ReduceInteger8)(const Descriptor &, - ReductionOperation, const char *source, int line, int dim = 0, - const Descriptor *mask = nullptr, const std::int64_t *identity = nullptr, - bool ordered = true); -void RTDECL(ReduceInteger8Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation, const char *source, int line, int dim, - const Descriptor *mask = nullptr, const std::int64_t *identity = nullptr, - bool ordered = true); +std::int16_t RTDECL(ReduceInteger2Ref)(const Descriptor &, + ReferenceReductionOperation, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr, + const std::int16_t *identity = nullptr, bool ordered = true); +std::int16_t RTDECL(ReduceInteger2Value)(const Descriptor &, + ValueReductionOperation, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr, + const std::int16_t *identity = nullptr, bool ordered = true); +void RTDECL(ReduceInteger2DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation, const char *source, int line, + int dim, const Descriptor *mask = nullptr, + const std::int16_t *identity = nullptr, bool ordered = true); +void RTDECL(ReduceInteger2DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation, const char *source, int line, + int dim, const Descriptor *mask = nullptr, + const std::int16_t *identity = nullptr, bool ordered = true); +std::int32_t RTDECL(ReduceInteger4Ref)(const Descriptor &, + ReferenceReductionOperation, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr, + const std::int32_t *identity = nullptr, bool ordered = true); +std::int32_t RTDECL(ReduceInteger4Value)(const Descriptor &, + ValueReductionOperation, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr, + const std::int32_t *identity = nullptr, bool ordered = true); +void RTDECL(ReduceInteger4DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation, const char *source, int line, + int dim, const Descriptor *mask = nullptr, + const std::int32_t *identity = nullptr, bool ordered = true); +void RTDECL(ReduceInteger4DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation, const char *source, int line, + int dim, const Descriptor *mask = nullptr, + const std::int32_t *identity = nullptr, bool ordered = true); +std::int64_t RTDECL(ReduceInteger8Ref)(const Descriptor &, + ReferenceReductionOperation, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr, + const std::int64_t *identity = nullptr, bool ordered = true); +std::int64_t RTDECL(ReduceInteger8Value)(const Descriptor &, + ValueReductionOperation, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr, + const std::int64_t *identity = nullptr, bool ordered = true); +void RTDECL(ReduceInteger8DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation, const char *source, int line, + int dim, const Descriptor *mask = nullptr, + const std::int64_t *identity = nullptr, bool ordered = true); +void RTDECL(ReduceInteger8DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation, const char *source, int line, + int dim, const Descriptor *mask = nullptr, + const std::int64_t *identity = nullptr, bool ordered = true); #ifdef __SIZEOF_INT128__ -common::int128_t RTDECL(ReduceInteger16)(const Descriptor &, - ReductionOperation, const char *source, int line, +common::int128_t RTDECL(ReduceInteger16Ref)(const Descriptor &, + ReferenceReductionOperation, const char *source, int line, int dim = 0, const Descriptor *mask = nullptr, const common::int128_t *identity = nullptr, bool ordered = true); -void RTDECL(ReduceInteger16Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation, const char *source, int line, int dim, - const Descriptor *mask = nullptr, +common::int128_t RTDECL(ReduceInteger16Value)(const Descriptor &, + ValueReductionOperation, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr, + const common::int128_t *identity = nullptr, bool ordered = true); +void RTDECL(ReduceInteger16DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation, const char *source, int line, + int dim, const Descriptor *mask = nullptr, + const common::int128_t *identity = nullptr, bool ordered = true); +void RTDECL(ReduceInteger16DimValue)(Descriptor &result, + const Descriptor &array, ValueReductionOperation, + const char *source, int line, int dim, const Descriptor *mask = nullptr, const common::int128_t *identity = nullptr, bool ordered = true); #endif // REAL/COMPLEX(2 & 3) return 32-bit float results for the caller to downconvert -float RTDECL(ReduceReal2)(const Descriptor &, ReductionOperation, - const char *source, int line, int dim = 0, const Descriptor *mask = nullptr, +float RTDECL(ReduceReal2Ref)(const Descriptor &, + ReferenceReductionOperation, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr, const float *identity = nullptr, bool ordered = true); -void RTDECL(ReduceReal2Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation, const char *source, int line, int dim, +float RTDECL(ReduceReal2Value)(const Descriptor &, + ValueReductionOperation, const char *source, int line, int dim = 0, const Descriptor *mask = nullptr, const float *identity = nullptr, bool ordered = true); -float RTDECL(ReduceReal3)(const Descriptor &, ReductionOperation, - const char *source, int line, int dim = 0, const Descriptor *mask = nullptr, +void RTDECL(ReduceReal2DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation, const char *source, int line, int dim, + const Descriptor *mask = nullptr, const float *identity = nullptr, + bool ordered = true); +void RTDECL(ReduceReal2DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation, const char *source, int line, int dim, + const Descriptor *mask = nullptr, const float *identity = nullptr, + bool ordered = true); +float RTDECL(ReduceReal3Ref)(const Descriptor &, + ReferenceReductionOperation, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr, const float *identity = nullptr, bool ordered = true); -void RTDECL(ReduceReal3Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation, const char *source, int line, int dim, +float RTDECL(ReduceReal3Value)(const Descriptor &, + ValueReductionOperation, const char *source, int line, int dim = 0, const Descriptor *mask = nullptr, const float *identity = nullptr, bool ordered = true); -float RTDECL(ReduceReal4)(const Descriptor &, ReductionOperation, - const char *source, int line, int dim = 0, const Descriptor *mask = nullptr, +void RTDECL(ReduceReal3DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation, const char *source, int line, int dim, + const Descriptor *mask = nullptr, const float *identity = nullptr, + bool ordered = true); +void RTDECL(ReduceReal3DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation, const char *source, int line, int dim, + const Descriptor *mask = nullptr, const float *identity = nullptr, + bool ordered = true); +float RTDECL(ReduceReal4Ref)(const Descriptor &, + ReferenceReductionOperation, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr, const float *identity = nullptr, bool ordered = true); -void RTDECL(ReduceReal4Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation, const char *source, int line, int dim, +float RTDECL(ReduceReal4Value)(const Descriptor &, + ValueReductionOperation, const char *source, int line, int dim = 0, const Descriptor *mask = nullptr, const float *identity = nullptr, bool ordered = true); -double RTDECL(ReduceReal8)(const Descriptor &, ReductionOperation, - const char *source, int line, int dim = 0, const Descriptor *mask = nullptr, +void RTDECL(ReduceReal4DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation, const char *source, int line, int dim, + const Descriptor *mask = nullptr, const float *identity = nullptr, + bool ordered = true); +void RTDECL(ReduceReal4DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation, const char *source, int line, int dim, + const Descriptor *mask = nullptr, const float *identity = nullptr, + bool ordered = true); +double RTDECL(ReduceReal8Ref)(const Descriptor &, + ReferenceReductionOperation, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr, const double *identity = nullptr, bool ordered = true); -void RTDECL(ReduceReal8Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation, const char *source, int line, int dim, +double RTDECL(ReduceReal8Value)(const Descriptor &, + ValueReductionOperation, const char *source, int line, int dim = 0, const Descriptor *mask = nullptr, const double *identity = nullptr, bool ordered = true); -#if LDBL_MANT_DIG == 64 -long double RTDECL(ReduceReal10)(const Descriptor &, - ReductionOperation, const char *source, int line, int dim = 0, - const Descriptor *mask = nullptr, const long double *identity = nullptr, +void RTDECL(ReduceReal8DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation, const char *source, int line, int dim, + const Descriptor *mask = nullptr, const double *identity = nullptr, bool ordered = true); -void RTDECL(ReduceReal10Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation, const char *source, int line, int dim, +void RTDECL(ReduceReal8DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation, const char *source, int line, int dim, + const Descriptor *mask = nullptr, const double *identity = nullptr, + bool ordered = true); +#if LDBL_MANT_DIG == 64 +long double RTDECL(ReduceReal10Ref)(const Descriptor &, + ReferenceReductionOperation, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr, + const long double *identity = nullptr, bool ordered = true); +long double RTDECL(ReduceReal10Value)(const Descriptor &, + ValueReductionOperation, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr, + const long double *identity = nullptr, bool ordered = true); +void RTDECL(ReduceReal10DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation, const char *source, int line, + int dim, const Descriptor *mask = nullptr, + const long double *identity = nullptr, bool ordered = true); +void RTDECL(ReduceReal10DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation, const char *source, int line, int dim, const Descriptor *mask = nullptr, const long double *identity = nullptr, bool ordered = true); #endif #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 -CppFloat128Type RTDECL(ReduceReal16)(const Descriptor &, - ReductionOperation, const char *source, int line, +CppFloat128Type RTDECL(ReduceReal16Ref)(const Descriptor &, + ReferenceReductionOperation, const char *source, int line, int dim = 0, const Descriptor *mask = nullptr, const CppFloat128Type *identity = nullptr, bool ordered = true); -void RTDECL(ReduceReal16Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation, const char *source, int line, int dim, - const Descriptor *mask = nullptr, const CppFloat128Type *identity = nullptr, - bool ordered = true); +CppFloat128Type RTDECL(ReduceReal16Value)(const Descriptor &, + ValueReductionOperation, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr, + const CppFloat128Type *identity = nullptr, bool ordered = true); +void RTDECL(ReduceReal16DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation, const char *source, int line, + int dim, const Descriptor *mask = nullptr, + const CppFloat128Type *identity = nullptr, bool ordered = true); +void RTDECL(ReduceReal16DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation, const char *source, int line, + int dim, const Descriptor *mask = nullptr, + const CppFloat128Type *identity = nullptr, bool ordered = true); #endif -void RTDECL(CppReduceComplex2)(std::complex &, const Descriptor &, - ReductionOperation>, const char *source, int line, +void RTDECL(CppReduceComplex2Ref)(std::complex &, const Descriptor &, + ReferenceReductionOperation>, const char *source, + int line, int dim = 0, const Descriptor *mask = nullptr, + const std::complex *identity = nullptr, bool ordered = true); +void RTDECL(CppReduceComplex2Value)(std::complex &, const Descriptor &, + ValueReductionOperation>, const char *source, int line, int dim = 0, const Descriptor *mask = nullptr, const std::complex *identity = nullptr, bool ordered = true); -void RTDECL(CppReduceComplex2Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation>, const char *source, int line, - int dim, const Descriptor *mask = nullptr, +void RTDECL(CppReduceComplex2DimRef)(Descriptor &result, + const Descriptor &array, ReferenceReductionOperation>, + const char *source, int line, int dim, const Descriptor *mask = nullptr, + const std::complex *identity = nullptr, bool ordered = true); +void RTDECL(CppReduceComplex2DimValue)(Descriptor &result, + const Descriptor &array, ValueReductionOperation>, + const char *source, int line, int dim, const Descriptor *mask = nullptr, const std::complex *identity = nullptr, bool ordered = true); -void RTDECL(CppReduceComplex3)(std::complex &, const Descriptor &, - ReductionOperation>, const char *source, int line, +void RTDECL(CppReduceComplex3Ref)(std::complex &, const Descriptor &, + ReferenceReductionOperation>, const char *source, + int line, int dim = 0, const Descriptor *mask = nullptr, + const std::complex *identity = nullptr, bool ordered = true); +void RTDECL(CppReduceComplex3Value)(std::complex &, const Descriptor &, + ValueReductionOperation>, const char *source, int line, int dim = 0, const Descriptor *mask = nullptr, const std::complex *identity = nullptr, bool ordered = true); -void RTDECL(CppReduceComplex3Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation>, const char *source, int line, - int dim, const Descriptor *mask = nullptr, +void RTDECL(CppReduceComplex3DimRef)(Descriptor &result, + const Descriptor &array, ReferenceReductionOperation>, + const char *source, int line, int dim, const Descriptor *mask = nullptr, + const std::complex *identity = nullptr, bool ordered = true); +void RTDECL(CppReduceComplex3DimValue)(Descriptor &result, + const Descriptor &array, ValueReductionOperation>, + const char *source, int line, int dim, const Descriptor *mask = nullptr, const std::complex *identity = nullptr, bool ordered = true); -void RTDECL(CppReduceComplex4)(std::complex &, const Descriptor &, - ReductionOperation>, const char *source, int line, +void RTDECL(CppReduceComplex4Ref)(std::complex &, const Descriptor &, + ReferenceReductionOperation>, const char *source, + int line, int dim = 0, const Descriptor *mask = nullptr, + const std::complex *identity = nullptr, bool ordered = true); +void RTDECL(CppReduceComplex4Value)(std::complex &, const Descriptor &, + ValueReductionOperation>, const char *source, int line, int dim = 0, const Descriptor *mask = nullptr, const std::complex *identity = nullptr, bool ordered = true); -void RTDECL(CppReduceComplex4Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation>, const char *source, int line, - int dim, const Descriptor *mask = nullptr, +void RTDECL(CppReduceComplex4DimRef)(Descriptor &result, + const Descriptor &array, ReferenceReductionOperation>, + const char *source, int line, int dim, const Descriptor *mask = nullptr, const std::complex *identity = nullptr, bool ordered = true); -void RTDECL(CppReduceComplex8)(std::complex &, const Descriptor &, - ReductionOperation>, const char *source, int line, +void RTDECL(CppReduceComplex4DimValue)(Descriptor &result, + const Descriptor &array, ValueReductionOperation>, + const char *source, int line, int dim, const Descriptor *mask = nullptr, + const std::complex *identity = nullptr, bool ordered = true); +void RTDECL(CppReduceComplex8Ref)(std::complex &, const Descriptor &, + ReferenceReductionOperation>, const char *source, + int line, int dim = 0, const Descriptor *mask = nullptr, + const std::complex *identity = nullptr, bool ordered = true); +void RTDECL(CppReduceComplex8Value)(std::complex &, const Descriptor &, + ValueReductionOperation>, const char *source, int line, int dim = 0, const Descriptor *mask = nullptr, const std::complex *identity = nullptr, bool ordered = true); -void RTDECL(CppReduceComplex8Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation>, const char *source, int line, - int dim, const Descriptor *mask = nullptr, +void RTDECL(CppReduceComplex8DimRef)(Descriptor &result, + const Descriptor &array, ReferenceReductionOperation>, + const char *source, int line, int dim, const Descriptor *mask = nullptr, + const std::complex *identity = nullptr, bool ordered = true); +void RTDECL(CppReduceComplex8DimValue)(Descriptor &result, + const Descriptor &array, ValueReductionOperation>, + const char *source, int line, int dim, const Descriptor *mask = nullptr, const std::complex *identity = nullptr, bool ordered = true); #if LDBL_MANT_DIG == 64 -void RTDECL(CppReduceComplex10)(std::complex &, const Descriptor &, - ReductionOperation>, const char *source, int line, - int dim = 0, const Descriptor *mask = nullptr, +void RTDECL(CppReduceComplex10Ref)(std::complex &, + const Descriptor &, ReferenceReductionOperation>, + const char *source, int line, int dim = 0, const Descriptor *mask = nullptr, const std::complex *identity = nullptr, bool ordered = true); -void RTDECL(CppReduceComplex10Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation>, const char *source, int line, - int dim, const Descriptor *mask = nullptr, +void RTDECL(CppReduceComplex10Value)(std::complex &, + const Descriptor &, ValueReductionOperation>, + const char *source, int line, int dim = 0, const Descriptor *mask = nullptr, + const std::complex *identity = nullptr, bool ordered = true); +void RTDECL(CppReduceComplex10DimRef)(Descriptor &result, + const Descriptor &array, + ReferenceReductionOperation>, const char *source, + int line, int dim, const Descriptor *mask = nullptr, + const std::complex *identity = nullptr, bool ordered = true); +void RTDECL(CppReduceComplex10DimValue)(Descriptor &result, + const Descriptor &array, ValueReductionOperation>, + const char *source, int line, int dim, const Descriptor *mask = nullptr, const std::complex *identity = nullptr, bool ordered = true); #endif #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 -void RTDECL(CppReduceComplex16)(std::complex &, - const Descriptor &, ReductionOperation>, +void RTDECL(CppReduceComplex16Ref)(std::complex &, + const Descriptor &, + ReferenceReductionOperation>, const char *source, int line, int dim = 0, const Descriptor *mask = nullptr, const std::complex *identity = nullptr, bool ordered = true); -void RTDECL(CppReduceComplex16Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation>, const char *source, +void RTDECL(CppReduceComplex16Value)(std::complex &, + const Descriptor &, ValueReductionOperation>, + const char *source, int line, int dim = 0, const Descriptor *mask = nullptr, + const std::complex *identity = nullptr, + bool ordered = true); +void RTDECL(CppReduceComplex16DimRef)(Descriptor &result, + const Descriptor &array, + ReferenceReductionOperation>, + const char *source, int line, int dim, const Descriptor *mask = nullptr, + const std::complex *identity = nullptr, + bool ordered = true); +void RTDECL(CppReduceComplex16DimValue)(Descriptor &result, + const Descriptor &array, + ValueReductionOperation>, const char *source, int line, int dim, const Descriptor *mask = nullptr, const std::complex *identity = nullptr, bool ordered = true); #endif -bool RTDECL(ReduceLogical1)(const Descriptor &, ReductionOperation, - const char *source, int line, int dim = 0, const Descriptor *mask = nullptr, +bool RTDECL(ReduceLogical1Ref)(const Descriptor &, + ReferenceReductionOperation, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr, + const std::int8_t *identity = nullptr, bool ordered = true); +bool RTDECL(ReduceLogical1Value)(const Descriptor &, + ValueReductionOperation, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr, + const std::int8_t *identity = nullptr, bool ordered = true); +void RTDECL(ReduceLogical1DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation, const char *source, int line, + int dim, const Descriptor *mask = nullptr, const std::int8_t *identity = nullptr, bool ordered = true); -void RTDECL(ReduceLogical1Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation, const char *source, int line, int dim, +void RTDECL(ReduceLogical1DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation, const char *source, int line, int dim, const Descriptor *mask = nullptr, const std::int8_t *identity = nullptr, bool ordered = true); -bool RTDECL(ReduceLogical2)(const Descriptor &, - ReductionOperation, const char *source, int line, int dim = 0, - const Descriptor *mask = nullptr, const std::int16_t *identity = nullptr, - bool ordered = true); -void RTDECL(ReduceLogical2Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation, const char *source, int line, int dim, - const Descriptor *mask = nullptr, const std::int16_t *identity = nullptr, - bool ordered = true); -bool RTDECL(ReduceLogical4)(const Descriptor &, - ReductionOperation, const char *source, int line, int dim = 0, - const Descriptor *mask = nullptr, const std::int32_t *identity = nullptr, - bool ordered = true); -void RTDECL(ReduceLogical4Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation, const char *source, int line, int dim, - const Descriptor *mask = nullptr, const std::int32_t *identity = nullptr, - bool ordered = true); -bool RTDECL(ReduceLogical8)(const Descriptor &, - ReductionOperation, const char *source, int line, int dim = 0, - const Descriptor *mask = nullptr, const std::int64_t *identity = nullptr, - bool ordered = true); -void RTDECL(ReduceLogical8Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation, const char *source, int line, int dim, - const Descriptor *mask = nullptr, const std::int64_t *identity = nullptr, - bool ordered = true); +bool RTDECL(ReduceLogical2Ref)(const Descriptor &, + ReferenceReductionOperation, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr, + const std::int16_t *identity = nullptr, bool ordered = true); +bool RTDECL(ReduceLogical2Value)(const Descriptor &, + ValueReductionOperation, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr, + const std::int16_t *identity = nullptr, bool ordered = true); +void RTDECL(ReduceLogical2DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation, const char *source, int line, + int dim, const Descriptor *mask = nullptr, + const std::int16_t *identity = nullptr, bool ordered = true); +void RTDECL(ReduceLogical2DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation, const char *source, int line, + int dim, const Descriptor *mask = nullptr, + const std::int16_t *identity = nullptr, bool ordered = true); +bool RTDECL(ReduceLogical4Ref)(const Descriptor &, + ReferenceReductionOperation, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr, + const std::int32_t *identity = nullptr, bool ordered = true); +bool RTDECL(ReduceLogical4Value)(const Descriptor &, + ValueReductionOperation, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr, + const std::int32_t *identity = nullptr, bool ordered = true); +void RTDECL(ReduceLogical4DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation, const char *source, int line, + int dim, const Descriptor *mask = nullptr, + const std::int32_t *identity = nullptr, bool ordered = true); +void RTDECL(ReduceLogical4DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation, const char *source, int line, + int dim, const Descriptor *mask = nullptr, + const std::int32_t *identity = nullptr, bool ordered = true); +bool RTDECL(ReduceLogical8Ref)(const Descriptor &, + ReferenceReductionOperation, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr, + const std::int64_t *identity = nullptr, bool ordered = true); +bool RTDECL(ReduceLogical8Value)(const Descriptor &, + ValueReductionOperation, const char *source, int line, + int dim = 0, const Descriptor *mask = nullptr, + const std::int64_t *identity = nullptr, bool ordered = true); +void RTDECL(ReduceLogical8DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation, const char *source, int line, + int dim, const Descriptor *mask = nullptr, + const std::int64_t *identity = nullptr, bool ordered = true); +void RTDECL(ReduceLogical8DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation, const char *source, int line, + int dim, const Descriptor *mask = nullptr, + const std::int64_t *identity = nullptr, bool ordered = true); void RTDECL(ReduceChar1)(char *result, const Descriptor &array, ReductionCharOperation, const char *source, int line, int dim = 0, diff --git a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp index 4b086a98de47b2..c306b50eb56983 100644 --- a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp @@ -469,7 +469,8 @@ struct ForcedIParity16 { /// Placeholder for real*10 version of Reduce Intrinsic struct ForcedReduceReal10 { - static constexpr const char *name = ExpandAndQuoteKey(RTNAME(ReduceReal10)); + static constexpr const char *name = + ExpandAndQuoteKey(RTNAME(ReduceReal10Ref)); static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { return [](mlir::MLIRContext *ctx) { auto ty = mlir::FloatType::getF80(ctx); @@ -488,7 +489,8 @@ struct ForcedReduceReal10 { /// Placeholder for real*16 version of Reduce Intrinsic struct ForcedReduceReal16 { - static constexpr const char *name = ExpandAndQuoteKey(RTNAME(ReduceReal16)); + static constexpr const char *name = + ExpandAndQuoteKey(RTNAME(ReduceReal16Ref)); static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { return [](mlir::MLIRContext *ctx) { auto ty = mlir::FloatType::getF128(ctx); @@ -508,7 +510,7 @@ struct ForcedReduceReal16 { /// Placeholder for DIM real*10 version of Reduce Intrinsic struct ForcedReduceReal10Dim { static constexpr const char *name = - ExpandAndQuoteKey(RTNAME(ReduceReal10Dim)); + ExpandAndQuoteKey(RTNAME(ReduceReal10DimRef)); static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { return [](mlir::MLIRContext *ctx) { auto ty = mlir::FloatType::getF80(ctx); @@ -530,7 +532,7 @@ struct ForcedReduceReal10Dim { /// Placeholder for DIM real*16 version of Reduce Intrinsic struct ForcedReduceReal16Dim { static constexpr const char *name = - ExpandAndQuoteKey(RTNAME(ReduceReal16Dim)); + ExpandAndQuoteKey(RTNAME(ReduceReal16DimRef)); static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { return [](mlir::MLIRContext *ctx) { auto ty = mlir::FloatType::getF128(ctx); @@ -552,7 +554,7 @@ struct ForcedReduceReal16Dim { /// Placeholder for integer*16 version of Reduce Intrinsic struct ForcedReduceInteger16 { static constexpr const char *name = - ExpandAndQuoteKey(RTNAME(ReduceInteger16)); + ExpandAndQuoteKey(RTNAME(ReduceInteger16Ref)); static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { return [](mlir::MLIRContext *ctx) { auto ty = mlir::IntegerType::get(ctx, 128); @@ -572,7 +574,7 @@ struct ForcedReduceInteger16 { /// Placeholder for DIM integer*16 version of Reduce Intrinsic struct ForcedReduceInteger16Dim { static constexpr const char *name = - ExpandAndQuoteKey(RTNAME(ReduceInteger16Dim)); + ExpandAndQuoteKey(RTNAME(ReduceInteger16DimRef)); static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { return [](mlir::MLIRContext *ctx) { auto ty = mlir::IntegerType::get(ctx, 128); @@ -594,7 +596,7 @@ struct ForcedReduceInteger16Dim { /// Placeholder for complex(10) version of Reduce Intrinsic struct ForcedReduceComplex10 { static constexpr const char *name = - ExpandAndQuoteKey(RTNAME(CppReduceComplex10)); + ExpandAndQuoteKey(RTNAME(CppReduceComplex10Ref)); static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { return [](mlir::MLIRContext *ctx) { auto ty = mlir::ComplexType::get(mlir::FloatType::getF80(ctx)); @@ -615,7 +617,7 @@ struct ForcedReduceComplex10 { /// Placeholder for Dim complex(10) version of Reduce Intrinsic struct ForcedReduceComplex10Dim { static constexpr const char *name = - ExpandAndQuoteKey(RTNAME(CppReduceComplex10Dim)); + ExpandAndQuoteKey(RTNAME(CppReduceComplex10DimRef)); static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { return [](mlir::MLIRContext *ctx) { auto ty = mlir::ComplexType::get(mlir::FloatType::getF80(ctx)); @@ -637,7 +639,7 @@ struct ForcedReduceComplex10Dim { /// Placeholder for complex(16) version of Reduce Intrinsic struct ForcedReduceComplex16 { static constexpr const char *name = - ExpandAndQuoteKey(RTNAME(CppReduceComplex16)); + ExpandAndQuoteKey(RTNAME(CppReduceComplex16Ref)); static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { return [](mlir::MLIRContext *ctx) { auto ty = mlir::ComplexType::get(mlir::FloatType::getF128(ctx)); @@ -658,7 +660,7 @@ struct ForcedReduceComplex16 { /// Placeholder for Dim complex(16) version of Reduce Intrinsic struct ForcedReduceComplex16Dim { static constexpr const char *name = - ExpandAndQuoteKey(RTNAME(CppReduceComplex16Dim)); + ExpandAndQuoteKey(RTNAME(CppReduceComplex16DimRef)); static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { return [](mlir::MLIRContext *ctx) { auto ty = mlir::ComplexType::get(mlir::FloatType::getF128(ctx)); @@ -1471,17 +1473,17 @@ void fir::runtime::genReduce(fir::FirOpBuilder &builder, mlir::Location loc, fir::factory::CharacterExprHelper charHelper{builder, loc}; if (eleTy == fir::ComplexType::get(ctx, 2)) - func = - fir::runtime::getRuntimeFunc(loc, builder); + func = fir::runtime::getRuntimeFunc(loc, + builder); else if (eleTy == fir::ComplexType::get(ctx, 3)) - func = - fir::runtime::getRuntimeFunc(loc, builder); + func = fir::runtime::getRuntimeFunc(loc, + builder); else if (eleTy == fir::ComplexType::get(ctx, 4)) - func = - fir::runtime::getRuntimeFunc(loc, builder); + func = fir::runtime::getRuntimeFunc(loc, + builder); else if (eleTy == fir::ComplexType::get(ctx, 8)) - func = - fir::runtime::getRuntimeFunc(loc, builder); + func = fir::runtime::getRuntimeFunc(loc, + builder); else if (eleTy == fir::ComplexType::get(ctx, 10)) func = fir::runtime::getRuntimeFunc(loc, builder); else if (eleTy == fir::ComplexType::get(ctx, 16)) @@ -1529,35 +1531,43 @@ mlir::Value fir::runtime::genReduce(fir::FirOpBuilder &builder, "expect real, interger or logical"); if (eleTy.isF16()) - func = fir::runtime::getRuntimeFunc(loc, builder); + func = fir::runtime::getRuntimeFunc(loc, builder); else if (eleTy.isBF16()) - func = fir::runtime::getRuntimeFunc(loc, builder); + func = fir::runtime::getRuntimeFunc(loc, builder); else if (eleTy.isF32()) - func = fir::runtime::getRuntimeFunc(loc, builder); + func = fir::runtime::getRuntimeFunc(loc, builder); else if (eleTy.isF64()) - func = fir::runtime::getRuntimeFunc(loc, builder); + func = fir::runtime::getRuntimeFunc(loc, builder); else if (eleTy.isF80()) func = fir::runtime::getRuntimeFunc(loc, builder); else if (eleTy.isF128()) func = fir::runtime::getRuntimeFunc(loc, builder); else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(1))) - func = fir::runtime::getRuntimeFunc(loc, builder); + func = + fir::runtime::getRuntimeFunc(loc, builder); else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(2))) - func = fir::runtime::getRuntimeFunc(loc, builder); + func = + fir::runtime::getRuntimeFunc(loc, builder); else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(4))) - func = fir::runtime::getRuntimeFunc(loc, builder); + func = + fir::runtime::getRuntimeFunc(loc, builder); else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(8))) - func = fir::runtime::getRuntimeFunc(loc, builder); + func = + fir::runtime::getRuntimeFunc(loc, builder); else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(16))) func = fir::runtime::getRuntimeFunc(loc, builder); else if (eleTy == fir::LogicalType::get(ctx, 1)) - func = fir::runtime::getRuntimeFunc(loc, builder); + func = + fir::runtime::getRuntimeFunc(loc, builder); else if (eleTy == fir::LogicalType::get(ctx, 2)) - func = fir::runtime::getRuntimeFunc(loc, builder); + func = + fir::runtime::getRuntimeFunc(loc, builder); else if (eleTy == fir::LogicalType::get(ctx, 4)) - func = fir::runtime::getRuntimeFunc(loc, builder); + func = + fir::runtime::getRuntimeFunc(loc, builder); else if (eleTy == fir::LogicalType::get(ctx, 8)) - func = fir::runtime::getRuntimeFunc(loc, builder); + func = + fir::runtime::getRuntimeFunc(loc, builder); else fir::intrinsicTypeTODO(builder, eleTy, loc, "REDUCE"); @@ -1586,59 +1596,63 @@ void fir::runtime::genReduceDim(fir::FirOpBuilder &builder, mlir::Location loc, fir::factory::CharacterExprHelper charHelper{builder, loc}; if (eleTy.isF16()) - func = fir::runtime::getRuntimeFunc(loc, builder); + func = + fir::runtime::getRuntimeFunc(loc, builder); else if (eleTy.isBF16()) - func = fir::runtime::getRuntimeFunc(loc, builder); + func = + fir::runtime::getRuntimeFunc(loc, builder); else if (eleTy.isF32()) - func = fir::runtime::getRuntimeFunc(loc, builder); + func = + fir::runtime::getRuntimeFunc(loc, builder); else if (eleTy.isF64()) - func = fir::runtime::getRuntimeFunc(loc, builder); + func = + fir::runtime::getRuntimeFunc(loc, builder); else if (eleTy.isF80()) func = fir::runtime::getRuntimeFunc(loc, builder); else if (eleTy.isF128()) func = fir::runtime::getRuntimeFunc(loc, builder); else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(1))) - func = - fir::runtime::getRuntimeFunc(loc, builder); + func = fir::runtime::getRuntimeFunc(loc, + builder); else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(2))) - func = - fir::runtime::getRuntimeFunc(loc, builder); + func = fir::runtime::getRuntimeFunc(loc, + builder); else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(4))) - func = - fir::runtime::getRuntimeFunc(loc, builder); + func = fir::runtime::getRuntimeFunc(loc, + builder); else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(8))) - func = - fir::runtime::getRuntimeFunc(loc, builder); + func = fir::runtime::getRuntimeFunc(loc, + builder); else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(16))) func = fir::runtime::getRuntimeFunc(loc, builder); else if (eleTy == fir::ComplexType::get(ctx, 2)) - func = fir::runtime::getRuntimeFunc(loc, - builder); + func = fir::runtime::getRuntimeFunc( + loc, builder); else if (eleTy == fir::ComplexType::get(ctx, 3)) - func = fir::runtime::getRuntimeFunc(loc, - builder); + func = fir::runtime::getRuntimeFunc( + loc, builder); else if (eleTy == fir::ComplexType::get(ctx, 4)) - func = fir::runtime::getRuntimeFunc(loc, - builder); + func = fir::runtime::getRuntimeFunc( + loc, builder); else if (eleTy == fir::ComplexType::get(ctx, 8)) - func = fir::runtime::getRuntimeFunc(loc, - builder); + func = fir::runtime::getRuntimeFunc( + loc, builder); else if (eleTy == fir::ComplexType::get(ctx, 10)) func = fir::runtime::getRuntimeFunc(loc, builder); else if (eleTy == fir::ComplexType::get(ctx, 16)) func = fir::runtime::getRuntimeFunc(loc, builder); else if (eleTy == fir::LogicalType::get(ctx, 1)) - func = - fir::runtime::getRuntimeFunc(loc, builder); + func = fir::runtime::getRuntimeFunc(loc, + builder); else if (eleTy == fir::LogicalType::get(ctx, 2)) - func = - fir::runtime::getRuntimeFunc(loc, builder); + func = fir::runtime::getRuntimeFunc(loc, + builder); else if (eleTy == fir::LogicalType::get(ctx, 4)) - func = - fir::runtime::getRuntimeFunc(loc, builder); + func = fir::runtime::getRuntimeFunc(loc, + builder); else if (eleTy == fir::LogicalType::get(ctx, 8)) - func = - fir::runtime::getRuntimeFunc(loc, builder); + func = fir::runtime::getRuntimeFunc(loc, + builder); else if (fir::isa_char(eleTy) && charHelper.getCharacterKind(eleTy) == 1) func = fir::runtime::getRuntimeFunc(loc, builder); diff --git a/flang/runtime/reduce.cpp b/flang/runtime/reduce.cpp index f8a5221a1ebf76..2f4bb6ea159cf4 100644 --- a/flang/runtime/reduce.cpp +++ b/flang/runtime/reduce.cpp @@ -16,11 +16,12 @@ namespace Fortran::runtime { -template class ReduceAccumulator { +template class ReduceAccumulator { public: - RT_API_ATTRS ReduceAccumulator(const Descriptor &array, - ReductionOperation operation, const T *identity, - Terminator &terminator) + using Operation = std::conditional_t, + ReferenceReductionOperation>; + RT_API_ATTRS ReduceAccumulator(const Descriptor &array, Operation operation, + const T *identity, Terminator &terminator) : array_{array}, operation_{operation}, identity_{identity}, terminator_{terminator} {} RT_API_ATTRS void Reinitialize() { result_.reset(); } @@ -28,7 +29,11 @@ template class ReduceAccumulator { RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) { const auto *operand{array_.Element(at)}; if (result_) { - result_ = operation_(&*result_, operand); + if constexpr (isByValue) { + result_ = operation_(*result_, *operand); + } else { + result_ = operation_(&*result_, operand); + } } else { result_ = *operand; } @@ -48,7 +53,7 @@ template class ReduceAccumulator { private: const Descriptor &array_; common::optional result_; - ReductionOperation operation_; + Operation operation_; const T *identity_{nullptr}; Terminator &terminator_; }; @@ -104,104 +109,213 @@ class BufferedReduceAccumulator { extern "C" { RT_EXT_API_GROUP_BEGIN -std::int8_t RTDEF(ReduceInteger1)(const Descriptor &array, - ReductionOperation operation, const char *source, int line, - int dim, const Descriptor *mask, const std::int8_t *identity, +std::int8_t RTDEF(ReduceInteger1Ref)(const Descriptor &array, + ReferenceReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int8_t *identity, + bool ordered) { + Terminator terminator{source, line}; + return GetTotalReduction(array, source, line, dim, + mask, + ReduceAccumulator{ + array, operation, identity, terminator}, + "REDUCE"); +} +std::int8_t RTDEF(ReduceInteger1Value)(const Descriptor &array, + ValueReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int8_t *identity, bool ordered) { Terminator terminator{source, line}; return GetTotalReduction(array, source, line, dim, mask, - ReduceAccumulator{array, operation, identity, terminator}, + ReduceAccumulator{ + array, operation, identity, terminator}, "REDUCE"); } -void RTDEF(ReduceInteger1Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation operation, const char *source, int line, - int dim, const Descriptor *mask, const std::int8_t *identity, +void RTDEF(ReduceInteger1DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int8_t *identity, + bool ordered) { + Terminator terminator{source, line}; + using Accumulator = ReduceAccumulator; + Accumulator accumulator{array, operation, identity, terminator}; + PartialReduction(result, array, + array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator); +} +void RTDEF(ReduceInteger1DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int8_t *identity, bool ordered) { Terminator terminator{source, line}; - using Accumulator = ReduceAccumulator; + using Accumulator = ReduceAccumulator; Accumulator accumulator{array, operation, identity, terminator}; PartialReduction(result, array, array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator); } -std::int16_t RTDEF(ReduceInteger2)(const Descriptor &array, - ReductionOperation operation, const char *source, int line, - int dim, const Descriptor *mask, const std::int16_t *identity, +std::int16_t RTDEF(ReduceInteger2Ref)(const Descriptor &array, + ReferenceReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int16_t *identity, + bool ordered) { + Terminator terminator{source, line}; + return GetTotalReduction(array, source, line, dim, + mask, + ReduceAccumulator{ + array, operation, identity, terminator}, + "REDUCE"); +} +std::int16_t RTDEF(ReduceInteger2Value)(const Descriptor &array, + ValueReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int16_t *identity, bool ordered) { Terminator terminator{source, line}; return GetTotalReduction(array, source, line, dim, mask, - ReduceAccumulator{array, operation, identity, terminator}, + ReduceAccumulator{ + array, operation, identity, terminator}, "REDUCE"); } -void RTDEF(ReduceInteger2Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation operation, const char *source, int line, - int dim, const Descriptor *mask, const std::int16_t *identity, +void RTDEF(ReduceInteger2DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int16_t *identity, + bool ordered) { + Terminator terminator{source, line}; + using Accumulator = ReduceAccumulator; + Accumulator accumulator{array, operation, identity, terminator}; + PartialReduction(result, array, + array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator); +} +void RTDEF(ReduceInteger2DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int16_t *identity, bool ordered) { Terminator terminator{source, line}; - using Accumulator = ReduceAccumulator; + using Accumulator = ReduceAccumulator; Accumulator accumulator{array, operation, identity, terminator}; PartialReduction(result, array, array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator); } -std::int32_t RTDEF(ReduceInteger4)(const Descriptor &array, - ReductionOperation operation, const char *source, int line, - int dim, const Descriptor *mask, const std::int32_t *identity, +std::int32_t RTDEF(ReduceInteger4Ref)(const Descriptor &array, + ReferenceReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int32_t *identity, + bool ordered) { + Terminator terminator{source, line}; + return GetTotalReduction(array, source, line, dim, + mask, + ReduceAccumulator{ + array, operation, identity, terminator}, + "REDUCE"); +} +std::int32_t RTDEF(ReduceInteger4Value)(const Descriptor &array, + ValueReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int32_t *identity, bool ordered) { Terminator terminator{source, line}; return GetTotalReduction(array, source, line, dim, mask, - ReduceAccumulator{array, operation, identity, terminator}, + ReduceAccumulator{ + array, operation, identity, terminator}, "REDUCE"); } -void RTDEF(ReduceInteger4Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation operation, const char *source, int line, - int dim, const Descriptor *mask, const std::int32_t *identity, +void RTDEF(ReduceInteger4DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int32_t *identity, + bool ordered) { + Terminator terminator{source, line}; + using Accumulator = ReduceAccumulator; + Accumulator accumulator{array, operation, identity, terminator}; + PartialReduction(result, array, + array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator); +} +void RTDEF(ReduceInteger4DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int32_t *identity, bool ordered) { Terminator terminator{source, line}; - using Accumulator = ReduceAccumulator; + using Accumulator = ReduceAccumulator; Accumulator accumulator{array, operation, identity, terminator}; PartialReduction(result, array, array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator); } -std::int64_t RTDEF(ReduceInteger8)(const Descriptor &array, - ReductionOperation operation, const char *source, int line, - int dim, const Descriptor *mask, const std::int64_t *identity, +std::int64_t RTDEF(ReduceInteger8Ref)(const Descriptor &array, + ReferenceReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int64_t *identity, + bool ordered) { + Terminator terminator{source, line}; + return GetTotalReduction(array, source, line, dim, + mask, + ReduceAccumulator{ + array, operation, identity, terminator}, + "REDUCE"); +} +std::int64_t RTDEF(ReduceInteger8Value)(const Descriptor &array, + ValueReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int64_t *identity, bool ordered) { Terminator terminator{source, line}; return GetTotalReduction(array, source, line, dim, mask, - ReduceAccumulator{array, operation, identity, terminator}, + ReduceAccumulator{ + array, operation, identity, terminator}, "REDUCE"); } -void RTDEF(ReduceInteger8Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation operation, const char *source, int line, - int dim, const Descriptor *mask, const std::int64_t *identity, +void RTDEF(ReduceInteger8DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int64_t *identity, + bool ordered) { + Terminator terminator{source, line}; + using Accumulator = ReduceAccumulator; + Accumulator accumulator{array, operation, identity, terminator}; + PartialReduction(result, array, + array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator); +} +void RTDEF(ReduceInteger8DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int64_t *identity, bool ordered) { Terminator terminator{source, line}; - using Accumulator = ReduceAccumulator; + using Accumulator = ReduceAccumulator; Accumulator accumulator{array, operation, identity, terminator}; PartialReduction(result, array, array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator); } #ifdef __SIZEOF_INT128__ -common::int128_t RTDEF(ReduceInteger16)(const Descriptor &array, - ReductionOperation operation, const char *source, +common::int128_t RTDEF(ReduceInteger16Ref)(const Descriptor &array, + ReferenceReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const common::int128_t *identity, + bool ordered) { + Terminator terminator{source, line}; + return GetTotalReduction(array, source, line, dim, + mask, + ReduceAccumulator{ + array, operation, identity, terminator}, + "REDUCE"); +} +common::int128_t RTDEF(ReduceInteger16Value)(const Descriptor &array, + ValueReductionOperation operation, const char *source, int line, int dim, const Descriptor *mask, const common::int128_t *identity, bool ordered) { Terminator terminator{source, line}; return GetTotalReduction(array, source, line, dim, mask, - ReduceAccumulator{ + ReduceAccumulator{ array, operation, identity, terminator}, "REDUCE"); } -void RTDEF(ReduceInteger16Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation operation, const char *source, +void RTDEF(ReduceInteger16DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const common::int128_t *identity, + bool ordered) { + Terminator terminator{source, line}; + using Accumulator = ReduceAccumulator; + Accumulator accumulator{array, operation, identity, terminator}; + PartialReduction(result, array, + array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator); +} +void RTDEF(ReduceInteger16DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation operation, const char *source, int line, int dim, const Descriptor *mask, const common::int128_t *identity, bool ordered) { Terminator terminator{source, line}; - using Accumulator = ReduceAccumulator; + using Accumulator = ReduceAccumulator; Accumulator accumulator{array, operation, identity, terminator}; PartialReduction(result, array, array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator); @@ -209,231 +323,464 @@ void RTDEF(ReduceInteger16Dim)(Descriptor &result, const Descriptor &array, #endif // TODO: real/complex(2 & 3) -float RTDEF(ReduceReal4)(const Descriptor &array, - ReductionOperation operation, const char *source, int line, int dim, - const Descriptor *mask, const float *identity, bool ordered) { +float RTDEF(ReduceReal4Ref)(const Descriptor &array, + ReferenceReductionOperation operation, const char *source, int line, + int dim, const Descriptor *mask, const float *identity, bool ordered) { + Terminator terminator{source, line}; + return GetTotalReduction(array, source, line, dim, + mask, + ReduceAccumulator{array, operation, identity, terminator}, + "REDUCE"); +} +float RTDEF(ReduceReal4Value)(const Descriptor &array, + ValueReductionOperation operation, const char *source, int line, + int dim, const Descriptor *mask, const float *identity, bool ordered) { Terminator terminator{source, line}; return GetTotalReduction(array, source, line, dim, - mask, ReduceAccumulator{array, operation, identity, terminator}, + mask, + ReduceAccumulator{array, operation, identity, terminator}, "REDUCE"); } -void RTDEF(ReduceReal4Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation operation, const char *source, int line, int dim, - const Descriptor *mask, const float *identity, bool ordered) { +void RTDEF(ReduceReal4DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation operation, const char *source, int line, + int dim, const Descriptor *mask, const float *identity, bool ordered) { + Terminator terminator{source, line}; + using Accumulator = ReduceAccumulator; + Accumulator accumulator{array, operation, identity, terminator}; + PartialReduction(result, array, + array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator); +} +void RTDEF(ReduceReal4DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation operation, const char *source, int line, + int dim, const Descriptor *mask, const float *identity, bool ordered) { Terminator terminator{source, line}; - using Accumulator = ReduceAccumulator; + using Accumulator = ReduceAccumulator; Accumulator accumulator{array, operation, identity, terminator}; PartialReduction(result, array, array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator); } -double RTDEF(ReduceReal8)(const Descriptor &array, - ReductionOperation operation, const char *source, int line, int dim, - const Descriptor *mask, const double *identity, bool ordered) { +double RTDEF(ReduceReal8Ref)(const Descriptor &array, + ReferenceReductionOperation operation, const char *source, int line, + int dim, const Descriptor *mask, const double *identity, bool ordered) { Terminator terminator{source, line}; return GetTotalReduction(array, source, line, dim, - mask, ReduceAccumulator{array, operation, identity, terminator}, + mask, + ReduceAccumulator{array, operation, identity, terminator}, + "REDUCE"); +} +double RTDEF(ReduceReal8Value)(const Descriptor &array, + ValueReductionOperation operation, const char *source, int line, + int dim, const Descriptor *mask, const double *identity, bool ordered) { + Terminator terminator{source, line}; + return GetTotalReduction(array, source, line, dim, + mask, + ReduceAccumulator{array, operation, identity, terminator}, "REDUCE"); } -void RTDEF(ReduceReal8Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation operation, const char *source, int line, int dim, - const Descriptor *mask, const double *identity, bool ordered) { +void RTDEF(ReduceReal8DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation operation, const char *source, int line, + int dim, const Descriptor *mask, const double *identity, bool ordered) { Terminator terminator{source, line}; - using Accumulator = ReduceAccumulator; + using Accumulator = ReduceAccumulator; + Accumulator accumulator{array, operation, identity, terminator}; + PartialReduction(result, array, + array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator); +} +void RTDEF(ReduceReal8DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation operation, const char *source, int line, + int dim, const Descriptor *mask, const double *identity, bool ordered) { + Terminator terminator{source, line}; + using Accumulator = ReduceAccumulator; Accumulator accumulator{array, operation, identity, terminator}; PartialReduction(result, array, array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator); } #if LDBL_MANT_DIG == 64 -long double RTDEF(ReduceReal10)(const Descriptor &array, - ReductionOperation operation, const char *source, int line, - int dim, const Descriptor *mask, const long double *identity, +long double RTDEF(ReduceReal10Ref)(const Descriptor &array, + ReferenceReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const long double *identity, + bool ordered) { + Terminator terminator{source, line}; + return GetTotalReduction(array, source, line, dim, + mask, + ReduceAccumulator{ + array, operation, identity, terminator}, + "REDUCE"); +} +long double RTDEF(ReduceReal10Value)(const Descriptor &array, + ValueReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const long double *identity, bool ordered) { Terminator terminator{source, line}; return GetTotalReduction(array, source, line, dim, mask, - ReduceAccumulator{array, operation, identity, terminator}, + ReduceAccumulator{ + array, operation, identity, terminator}, "REDUCE"); } -void RTDEF(ReduceReal10Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation operation, const char *source, int line, - int dim, const Descriptor *mask, const long double *identity, +void RTDEF(ReduceReal10DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const long double *identity, + bool ordered) { + Terminator terminator{source, line}; + using Accumulator = ReduceAccumulator; + Accumulator accumulator{array, operation, identity, terminator}; + PartialReduction(result, array, + array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator); +} +void RTDEF(ReduceReal10DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const long double *identity, bool ordered) { Terminator terminator{source, line}; - using Accumulator = ReduceAccumulator; + using Accumulator = ReduceAccumulator; Accumulator accumulator{array, operation, identity, terminator}; PartialReduction(result, array, array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator); } #endif #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 -CppFloat128Type RTDEF(ReduceReal16)(const Descriptor &array, - ReductionOperation operation, const char *source, int line, - int dim, const Descriptor *mask, const CppFloat128Type *identity, +CppFloat128Type RTDEF(ReduceReal16Ref)(const Descriptor &array, + ReferenceReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const CppFloat128Type *identity, bool ordered) { Terminator terminator{source, line}; return GetTotalReduction(array, source, line, dim, mask, - ReduceAccumulator{ + ReduceAccumulator{ array, operation, identity, terminator}, "REDUCE"); } -void RTDEF(ReduceReal16Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation operation, const char *source, int line, - int dim, const Descriptor *mask, const CppFloat128Type *identity, +CppFloat128Type RTDEF(ReduceReal16Value)(const Descriptor &array, + ValueReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const CppFloat128Type *identity, bool ordered) { Terminator terminator{source, line}; - using Accumulator = ReduceAccumulator; + return GetTotalReduction(array, source, line, dim, + mask, + ReduceAccumulator{ + array, operation, identity, terminator}, + "REDUCE"); +} +void RTDEF(ReduceReal16DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const CppFloat128Type *identity, + bool ordered) { + Terminator terminator{source, line}; + using Accumulator = ReduceAccumulator; + Accumulator accumulator{array, operation, identity, terminator}; + PartialReduction(result, array, + array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator); +} +void RTDEF(ReduceReal16DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const CppFloat128Type *identity, + bool ordered) { + Terminator terminator{source, line}; + using Accumulator = ReduceAccumulator; Accumulator accumulator{array, operation, identity, terminator}; PartialReduction(result, array, array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator); } #endif -void RTDEF(CppReduceComplex4)(std::complex &result, - const Descriptor &array, ReductionOperation> operation, +void RTDEF(CppReduceComplex4Ref)(std::complex &result, + const Descriptor &array, + ReferenceReductionOperation> operation, const char *source, int line, int dim, const Descriptor *mask, const std::complex *identity, bool ordered) { Terminator terminator{source, line}; result = GetTotalReduction(array, source, line, dim, mask, - ReduceAccumulator>{ + ReduceAccumulator, false>{ + array, operation, identity, terminator}, + "REDUCE"); +} +void RTDEF(CppReduceComplex4Value)(std::complex &result, + const Descriptor &array, + ValueReductionOperation> operation, const char *source, + int line, int dim, const Descriptor *mask, + const std::complex *identity, bool ordered) { + Terminator terminator{source, line}; + result = GetTotalReduction(array, source, line, dim, + mask, + ReduceAccumulator, true>{ array, operation, identity, terminator}, "REDUCE"); } -void RTDEF(CppReduceComplex4Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation> operation, const char *source, +void RTDEF(CppReduceComplex4DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation> operation, + const char *source, int line, int dim, const Descriptor *mask, + const std::complex *identity, bool ordered) { + Terminator terminator{source, line}; + using Accumulator = ReduceAccumulator, false>; + Accumulator accumulator{array, operation, identity, terminator}; + PartialReduction(result, array, + array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator); +} +void RTDEF(CppReduceComplex4DimValue)(Descriptor &result, + const Descriptor &array, + ValueReductionOperation> operation, const char *source, int line, int dim, const Descriptor *mask, const std::complex *identity, bool ordered) { Terminator terminator{source, line}; - using Accumulator = ReduceAccumulator>; + using Accumulator = ReduceAccumulator, true>; Accumulator accumulator{array, operation, identity, terminator}; PartialReduction(result, array, array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator); } -void RTDEF(CppReduceComplex8)(std::complex &result, - const Descriptor &array, ReductionOperation> operation, +void RTDEF(CppReduceComplex8Ref)(std::complex &result, + const Descriptor &array, + ReferenceReductionOperation> operation, const char *source, int line, int dim, const Descriptor *mask, const std::complex *identity, bool ordered) { Terminator terminator{source, line}; result = GetTotalReduction(array, source, line, dim, mask, - ReduceAccumulator>{ + ReduceAccumulator, false>{ array, operation, identity, terminator}, "REDUCE"); } -void RTDEF(CppReduceComplex8Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation> operation, const char *source, +void RTDEF(CppReduceComplex8Value)(std::complex &result, + const Descriptor &array, + ValueReductionOperation> operation, const char *source, int line, int dim, const Descriptor *mask, const std::complex *identity, bool ordered) { Terminator terminator{source, line}; - using Accumulator = ReduceAccumulator>; + result = GetTotalReduction(array, source, line, dim, + mask, + ReduceAccumulator, true>{ + array, operation, identity, terminator}, + "REDUCE"); +} +void RTDEF(CppReduceComplex8DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation> operation, + const char *source, int line, int dim, const Descriptor *mask, + const std::complex *identity, bool ordered) { + Terminator terminator{source, line}; + using Accumulator = ReduceAccumulator, false>; Accumulator accumulator{array, operation, identity, terminator}; PartialReduction(result, array, array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator); } -#if LDBL_MANT_DIG == 64 -void RTDEF(CppReduceComplex10)(std::complex &result, +void RTDEF(CppReduceComplex8DimValue)(Descriptor &result, const Descriptor &array, - ReductionOperation> operation, const char *source, + ValueReductionOperation> operation, const char *source, int line, int dim, const Descriptor *mask, + const std::complex *identity, bool ordered) { + Terminator terminator{source, line}; + using Accumulator = ReduceAccumulator, true>; + Accumulator accumulator{array, operation, identity, terminator}; + PartialReduction(result, array, + array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator); +} +#if LDBL_MANT_DIG == 64 +void RTDEF(CppReduceComplex10Ref)(std::complex &result, + const Descriptor &array, + ReferenceReductionOperation> operation, + const char *source, int line, int dim, const Descriptor *mask, const std::complex *identity, bool ordered) { Terminator terminator{source, line}; result = GetTotalReduction(array, source, line, dim, mask, - ReduceAccumulator>{ + ReduceAccumulator, false>{ array, operation, identity, terminator}, "REDUCE"); } -void RTDEF(CppReduceComplex10Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation> operation, const char *source, - int line, int dim, const Descriptor *mask, +void RTDEF(CppReduceComplex10Value)(std::complex &result, + const Descriptor &array, + ValueReductionOperation> operation, + const char *source, int line, int dim, const Descriptor *mask, const std::complex *identity, bool ordered) { Terminator terminator{source, line}; - using Accumulator = ReduceAccumulator>; + result = GetTotalReduction(array, source, line, + dim, mask, + ReduceAccumulator, true>{ + array, operation, identity, terminator}, + "REDUCE"); +} +void RTDEF(CppReduceComplex10DimRef)(Descriptor &result, + const Descriptor &array, + ReferenceReductionOperation> operation, + const char *source, int line, int dim, const Descriptor *mask, + const std::complex *identity, bool ordered) { + Terminator terminator{source, line}; + using Accumulator = ReduceAccumulator, false>; + Accumulator accumulator{array, operation, identity, terminator}; + PartialReduction(result, array, + array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator); +} +void RTDEF(CppReduceComplex10DimValue)(Descriptor &result, + const Descriptor &array, + ValueReductionOperation> operation, + const char *source, int line, int dim, const Descriptor *mask, + const std::complex *identity, bool ordered) { + Terminator terminator{source, line}; + using Accumulator = ReduceAccumulator, true>; Accumulator accumulator{array, operation, identity, terminator}; PartialReduction(result, array, array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator); } #endif #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 -void RTDEF(CppReduceComplex16)(std::complex &result, +void RTDEF(CppReduceComplex16Ref)(std::complex &result, + const Descriptor &array, + ReferenceReductionOperation> operation, + const char *source, int line, int dim, const Descriptor *mask, + const std::complex *identity, bool ordered) { + Terminator terminator{source, line}; + result = GetTotalReduction(array, source, line, + dim, mask, + ReduceAccumulator, false>{ + array, operation, identity, terminator}, + "REDUCE"); +} +void RTDEF(CppReduceComplex16Value)(std::complex &result, const Descriptor &array, - ReductionOperation> operation, + ValueReductionOperation> operation, const char *source, int line, int dim, const Descriptor *mask, const std::complex *identity, bool ordered) { Terminator terminator{source, line}; result = GetTotalReduction(array, source, line, dim, mask, - ReduceAccumulator>{ + ReduceAccumulator, true>{ array, operation, identity, terminator}, "REDUCE"); } -void RTDEF(CppReduceComplex16Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation> operation, +void RTDEF(CppReduceComplex16DimRef)(Descriptor &result, + const Descriptor &array, + ReferenceReductionOperation> operation, const char *source, int line, int dim, const Descriptor *mask, const std::complex *identity, bool ordered) { Terminator terminator{source, line}; - using Accumulator = ReduceAccumulator>; + using Accumulator = ReduceAccumulator, false>; + Accumulator accumulator{array, operation, identity, terminator}; + PartialReduction(result, array, + array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator); +} +void RTDEF(CppReduceComplex16DimValue)(Descriptor &result, + const Descriptor &array, + ValueReductionOperation> operation, + const char *source, int line, int dim, const Descriptor *mask, + const std::complex *identity, bool ordered) { + Terminator terminator{source, line}; + using Accumulator = ReduceAccumulator, true>; Accumulator accumulator{array, operation, identity, terminator}; PartialReduction(result, array, array.ElementBytes(), dim, mask, terminator, "REDUCE", accumulator); } #endif -bool RTDEF(ReduceLogical1)(const Descriptor &array, - ReductionOperation operation, const char *source, int line, - int dim, const Descriptor *mask, const std::int8_t *identity, +bool RTDEF(ReduceLogical1Ref)(const Descriptor &array, + ReferenceReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int8_t *identity, bool ordered) { - return RTNAME(ReduceInteger1)( + return RTNAME(ReduceInteger1Ref)( array, operation, source, line, dim, mask, identity, ordered) != 0; } -void RTDEF(ReduceLogical1Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation operation, const char *source, int line, - int dim, const Descriptor *mask, const std::int8_t *identity, +bool RTDEF(ReduceLogical1Value)(const Descriptor &array, + ValueReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int8_t *identity, + bool ordered) { + return RTNAME(ReduceInteger1Value)( + array, operation, source, line, dim, mask, identity, ordered) != 0; +} +void RTDEF(ReduceLogical1DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int8_t *identity, + bool ordered) { + RTNAME(ReduceInteger1DimRef) + (result, array, operation, source, line, dim, mask, identity, ordered); +} +void RTDEF(ReduceLogical1DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int8_t *identity, bool ordered) { - RTNAME(ReduceInteger1Dim) + RTNAME(ReduceInteger1DimValue) (result, array, operation, source, line, dim, mask, identity, ordered); } -bool RTDEF(ReduceLogical2)(const Descriptor &array, - ReductionOperation operation, const char *source, int line, - int dim, const Descriptor *mask, const std::int16_t *identity, +bool RTDEF(ReduceLogical2Ref)(const Descriptor &array, + ReferenceReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int16_t *identity, bool ordered) { - return RTNAME(ReduceInteger2)( + return RTNAME(ReduceInteger2Ref)( array, operation, source, line, dim, mask, identity, ordered) != 0; } -void RTDEF(ReduceLogical2Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation operation, const char *source, int line, - int dim, const Descriptor *mask, const std::int16_t *identity, +bool RTDEF(ReduceLogical2Value)(const Descriptor &array, + ValueReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int16_t *identity, bool ordered) { - RTNAME(ReduceInteger2Dim) + return RTNAME(ReduceInteger2Value)( + array, operation, source, line, dim, mask, identity, ordered) != 0; +} +void RTDEF(ReduceLogical2DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int16_t *identity, + bool ordered) { + RTNAME(ReduceInteger2DimRef) (result, array, operation, source, line, dim, mask, identity, ordered); } -bool RTDEF(ReduceLogical4)(const Descriptor &array, - ReductionOperation operation, const char *source, int line, - int dim, const Descriptor *mask, const std::int32_t *identity, +void RTDEF(ReduceLogical2DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int16_t *identity, bool ordered) { - return RTNAME(ReduceInteger4)( + RTNAME(ReduceInteger2DimValue) + (result, array, operation, source, line, dim, mask, identity, ordered); +} +bool RTDEF(ReduceLogical4Ref)(const Descriptor &array, + ReferenceReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int32_t *identity, + bool ordered) { + return RTNAME(ReduceInteger4Ref)( + array, operation, source, line, dim, mask, identity, ordered) != 0; +} +bool RTDEF(ReduceLogical4Value)(const Descriptor &array, + ValueReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int32_t *identity, + bool ordered) { + return RTNAME(ReduceInteger4Value)( array, operation, source, line, dim, mask, identity, ordered) != 0; } -void RTDEF(ReduceLogical4Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation operation, const char *source, int line, - int dim, const Descriptor *mask, const std::int32_t *identity, +void RTDEF(ReduceLogical4DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int32_t *identity, + bool ordered) { + RTNAME(ReduceInteger4DimRef) + (result, array, operation, source, line, dim, mask, identity, ordered); +} +void RTDEF(ReduceLogical4DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int32_t *identity, bool ordered) { - RTNAME(ReduceInteger4Dim) + RTNAME(ReduceInteger4DimValue) (result, array, operation, source, line, dim, mask, identity, ordered); } -bool RTDEF(ReduceLogical8)(const Descriptor &array, - ReductionOperation operation, const char *source, int line, - int dim, const Descriptor *mask, const std::int64_t *identity, +bool RTDEF(ReduceLogical8Ref)(const Descriptor &array, + ReferenceReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int64_t *identity, bool ordered) { - return RTNAME(ReduceInteger8)( + return RTNAME(ReduceInteger8Ref)( array, operation, source, line, dim, mask, identity, ordered) != 0; } -void RTDEF(ReduceLogical8Dim)(Descriptor &result, const Descriptor &array, - ReductionOperation operation, const char *source, int line, - int dim, const Descriptor *mask, const std::int64_t *identity, +bool RTDEF(ReduceLogical8Value)(const Descriptor &array, + ValueReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int64_t *identity, + bool ordered) { + return RTNAME(ReduceInteger8Value)( + array, operation, source, line, dim, mask, identity, ordered) != 0; +} +void RTDEF(ReduceLogical8DimRef)(Descriptor &result, const Descriptor &array, + ReferenceReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int64_t *identity, + bool ordered) { + RTNAME(ReduceInteger8DimRef) + (result, array, operation, source, line, dim, mask, identity, ordered); +} +void RTDEF(ReduceLogical8DimValue)(Descriptor &result, const Descriptor &array, + ValueReductionOperation operation, const char *source, + int line, int dim, const Descriptor *mask, const std::int64_t *identity, bool ordered) { - RTNAME(ReduceInteger8Dim) + RTNAME(ReduceInteger8DimValue) (result, array, operation, source, line, dim, mask, identity, ordered); } diff --git a/flang/test/Lower/Intrinsics/reduce.f90 b/flang/test/Lower/Intrinsics/reduce.f90 index 8d7b7798a94c56..7619edffd529e9 100644 --- a/flang/test/Lower/Intrinsics/reduce.f90 +++ b/flang/test/Lower/Intrinsics/reduce.f90 @@ -40,7 +40,7 @@ subroutine integer1(a, id) ! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[BOX_PROC]] : (!fir.boxproc<() -> ()>) -> ((!fir.ref, !fir.ref) -> !fir.ref) ! CHECK: %[[A_NONE:.*]] = fir.convert %[[A]]#1 : (!fir.box>) -> !fir.box ! CHECK: %[[MASK_NONE:.*]] = fir.convert %[[MASK]] : (!fir.box) -> !fir.box -! CHECK: %[[REDUCE_RES:.*]] = fir.call @_FortranAReduceInteger1(%[[A_NONE]], %[[BOX_ADDR]], %{{.*}}, %{{.*}}, %c1{{.*}}, %[[MASK_NONE]], %[[IDENTITY]], %false) fastmath : (!fir.box, (!fir.ref, !fir.ref) -> !fir.ref, !fir.ref, i32, i32, !fir.box, !fir.ref, i1) -> i8 +! CHECK: %[[REDUCE_RES:.*]] = fir.call @_FortranAReduceInteger1Ref(%[[A_NONE]], %[[BOX_ADDR]], %{{.*}}, %{{.*}}, %c1{{.*}}, %[[MASK_NONE]], %[[IDENTITY]], %false) fastmath : (!fir.box, (!fir.ref, !fir.ref) -> !fir.ref, !fir.ref, i32, i32, !fir.box, !fir.ref, i1) -> i8 ! CHECK: hlfir.assign %[[REDUCE_RES]] to %[[RES]]#0 : i8, !fir.ref ! CHECK: %[[ADDR_OP:.*]] = fir.address_of(@_QMreduce_modPred_int1) : (!fir.ref, !fir.ref) -> i8 ! CHECK: %[[BOX_PROC:.*]] = fir.emboxproc %[[ADDR_OP]] : ((!fir.ref, !fir.ref) -> i8) -> !fir.boxproc<() -> ()> @@ -48,13 +48,13 @@ subroutine integer1(a, id) ! CHECK: %[[BOX_ADDR:.*]] = fir.box_addr %[[BOX_PROC]] : (!fir.boxproc<() -> ()>) -> ((!fir.ref, !fir.ref) -> !fir.ref) ! CHECK: %[[A_NONE:.*]] = fir.convert %[[A]]#1 : (!fir.box>) -> !fir.box ! CHECK: %[[MASK_NONE:.*]] = fir.convert %[[MASK]] : (!fir.box) -> !fir.box -! CHECK: %{{.*}} = fir.call @_FortranAReduceInteger1(%[[A_NONE]], %[[BOX_ADDR]], %{{.*}}, %{{.*}}, %c1{{.*}}, %[[MASK_NONE]], %[[ID]]#1, %false{{.*}}) fastmath : (!fir.box, (!fir.ref, !fir.ref) -> !fir.ref, !fir.ref, i32, i32, !fir.box, !fir.ref, i1) -> i8 -! CHECK: fir.call @_FortranAReduceInteger1(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}#1, %true) +! CHECK: %{{.*}} = fir.call @_FortranAReduceInteger1Ref(%[[A_NONE]], %[[BOX_ADDR]], %{{.*}}, %{{.*}}, %c1{{.*}}, %[[MASK_NONE]], %[[ID]]#1, %false{{.*}}) fastmath : (!fir.box, (!fir.ref, !fir.ref) -> !fir.ref, !fir.ref, i32, i32, !fir.box, !fir.ref, i1) -> i8 +! CHECK: fir.call @_FortranAReduceInteger1Ref(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}#1, %true) ! CHECK: %[[MASK:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {fortran_attrs = #fir.var_attrs, uniq_name = "_QQro.3xl4.0"} : (!fir.ref>>, !fir.shape<1>) -> (!fir.ref>>, !fir.ref>>) ! CHECK: %[[SHAPE_C3:.*]] = fir.shape %c3{{.*}} : (index) -> !fir.shape<1> ! CHECK: %[[BOXED_MASK:.*]] = fir.embox %[[MASK]]#1(%[[SHAPE_C3]]) : (!fir.ref>>, !fir.shape<1>) -> !fir.box>> ! CHECK: %[[CONV_MASK:.*]] = fir.convert %[[BOXED_MASK]] : (!fir.box>>) -> !fir.box -! CHECK: fir.call @_FortranAReduceInteger1(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[CONV_MASK]], %{{.*}}, %false{{.*}}) +! CHECK: fir.call @_FortranAReduceInteger1Ref(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[CONV_MASK]], %{{.*}}, %false{{.*}}) pure function red_int2(a,b) integer(2), intent(in) :: a, b @@ -68,7 +68,7 @@ subroutine integer2(a) res = reduce(a, red_int2) end subroutine -! CHECK: fir.call @_FortranAReduceInteger2 +! CHECK: fir.call @_FortranAReduceInteger2Ref pure function red_int4(a,b) integer(4), intent(in) :: a, b @@ -82,7 +82,7 @@ subroutine integer4(a) res = reduce(a, red_int4) end subroutine -! CHECK: fir.call @_FortranAReduceInteger4 +! CHECK: fir.call @_FortranAReduceInteger4Ref pure function red_int8(a,b) integer(8), intent(in) :: a, b @@ -96,7 +96,7 @@ subroutine integer8(a) res = reduce(a, red_int8) end subroutine -! CHECK: fir.call @_FortranAReduceInteger8 +! CHECK: fir.call @_FortranAReduceInteger8Ref pure function red_int16(a,b) integer(16), intent(in) :: a, b @@ -110,7 +110,7 @@ subroutine integer16(a) res = reduce(a, red_int16) end subroutine -! CHECK: fir.call @_FortranAReduceInteger16 +! CHECK: fir.call @_FortranAReduceInteger16Ref pure function red_real2(a,b) real(2), intent(in) :: a, b @@ -124,7 +124,7 @@ subroutine real2(a) res = reduce(a, red_real2) end subroutine -! CHECK: fir.call @_FortranAReduceReal2 +! CHECK: fir.call @_FortranAReduceReal2Ref pure function red_real3(a,b) real(3), intent(in) :: a, b @@ -138,7 +138,7 @@ subroutine real3(a) res = reduce(a, red_real3) end subroutine -! CHECK: fir.call @_FortranAReduceReal3 +! CHECK: fir.call @_FortranAReduceReal3Ref pure function red_real4(a,b) real(4), intent(in) :: a, b @@ -152,7 +152,7 @@ subroutine real4(a) res = reduce(a, red_real4) end subroutine -! CHECK: fir.call @_FortranAReduceReal4 +! CHECK: fir.call @_FortranAReduceReal4Ref pure function red_real8(a,b) real(8), intent(in) :: a, b @@ -166,7 +166,7 @@ subroutine real8(a) res = reduce(a, red_real8) end subroutine -! CHECK: fir.call @_FortranAReduceReal8 +! CHECK: fir.call @_FortranAReduceReal8Ref pure function red_real10(a,b) real(10), intent(in) :: a, b @@ -180,7 +180,7 @@ subroutine real10(a) res = reduce(a, red_real10) end subroutine -! CHECK: fir.call @_FortranAReduceReal10 +! CHECK: fir.call @_FortranAReduceReal10Ref pure function red_real16(a,b) real(16), intent(in) :: a, b @@ -194,7 +194,7 @@ subroutine real16(a) res = reduce(a, red_real16) end subroutine -! CHECK: fir.call @_FortranAReduceReal16 +! CHECK: fir.call @_FortranAReduceReal16Ref pure function red_complex2(a,b) complex(2), intent(in) :: a, b @@ -292,7 +292,7 @@ subroutine log1(a) res = reduce(a, red_log1) end subroutine -! CHECK: fir.call @_FortranAReduceLogical1 +! CHECK: fir.call @_FortranAReduceLogical1Ref pure function red_log2(a,b) logical(2), intent(in) :: a, b @@ -306,7 +306,7 @@ subroutine log2(a) res = reduce(a, red_log2) end subroutine -! CHECK: fir.call @_FortranAReduceLogical2 +! CHECK: fir.call @_FortranAReduceLogical2Ref pure function red_log4(a,b) logical(4), intent(in) :: a, b @@ -320,7 +320,7 @@ subroutine log4(a) res = reduce(a, red_log4) end subroutine -! CHECK: fir.call @_FortranAReduceLogical4 +! CHECK: fir.call @_FortranAReduceLogical4Ref pure function red_log8(a,b) logical(8), intent(in) :: a, b @@ -334,7 +334,7 @@ subroutine log8(a) res = reduce(a, red_log8) end subroutine -! CHECK: fir.call @_FortranAReduceLogical8 +! CHECK: fir.call @_FortranAReduceLogical8Ref pure function red_char1(a,b) character(1), intent(in) :: a, b @@ -403,7 +403,7 @@ subroutine integer1dim(a, id) res = reduce(a, red_int1, 2) end subroutine -! CHECK: fir.call @_FortranAReduceInteger1Dim +! CHECK: fir.call @_FortranAReduceInteger1DimRef subroutine integer2dim(a, id) integer(2), intent(in) :: a(:,:) @@ -412,7 +412,7 @@ subroutine integer2dim(a, id) res = reduce(a, red_int2, 2) end subroutine -! CHECK: fir.call @_FortranAReduceInteger2Dim +! CHECK: fir.call @_FortranAReduceInteger2DimRef subroutine integer4dim(a, id) integer(4), intent(in) :: a(:,:) @@ -421,7 +421,7 @@ subroutine integer4dim(a, id) res = reduce(a, red_int4, 2) end subroutine -! CHECK: fir.call @_FortranAReduceInteger4Dim +! CHECK: fir.call @_FortranAReduceInteger4DimRef subroutine integer8dim(a, id) integer(8), intent(in) :: a(:,:) @@ -430,7 +430,7 @@ subroutine integer8dim(a, id) res = reduce(a, red_int8, 2) end subroutine -! CHECK: fir.call @_FortranAReduceInteger8Dim +! CHECK: fir.call @_FortranAReduceInteger8DimRef subroutine integer16dim(a, id) integer(16), intent(in) :: a(:,:) @@ -439,7 +439,7 @@ subroutine integer16dim(a, id) res = reduce(a, red_int16, 2) end subroutine -! CHECK: fir.call @_FortranAReduceInteger16Dim +! CHECK: fir.call @_FortranAReduceInteger16DimRef subroutine real2dim(a, id) real(2), intent(in) :: a(:,:) @@ -448,7 +448,7 @@ subroutine real2dim(a, id) res = reduce(a, red_real2, 2) end subroutine -! CHECK: fir.call @_FortranAReduceReal2Dim +! CHECK: fir.call @_FortranAReduceReal2DimRef subroutine real3dim(a, id) real(3), intent(in) :: a(:,:) @@ -457,7 +457,7 @@ subroutine real3dim(a, id) res = reduce(a, red_real3, 2) end subroutine -! CHECK: fir.call @_FortranAReduceReal3Dim +! CHECK: fir.call @_FortranAReduceReal3DimRef subroutine real4dim(a, id) real(4), intent(in) :: a(:,:) @@ -466,7 +466,7 @@ subroutine real4dim(a, id) res = reduce(a, red_real4, 2) end subroutine -! CHECK: fir.call @_FortranAReduceReal4Dim +! CHECK: fir.call @_FortranAReduceReal4DimRef subroutine real8dim(a, id) real(8), intent(in) :: a(:,:) @@ -475,7 +475,7 @@ subroutine real8dim(a, id) res = reduce(a, red_real8, 2) end subroutine -! CHECK: fir.call @_FortranAReduceReal8Dim +! CHECK: fir.call @_FortranAReduceReal8DimRef subroutine real10dim(a, id) real(10), intent(in) :: a(:,:) @@ -484,7 +484,7 @@ subroutine real10dim(a, id) res = reduce(a, red_real10, 2) end subroutine -! CHECK: fir.call @_FortranAReduceReal10Dim +! CHECK: fir.call @_FortranAReduceReal10DimRef subroutine real16dim(a, id) real(16), intent(in) :: a(:,:) @@ -493,7 +493,7 @@ subroutine real16dim(a, id) res = reduce(a, red_real16, 2) end subroutine -! CHECK: fir.call @_FortranAReduceReal16Dim +! CHECK: fir.call @_FortranAReduceReal16DimRef subroutine complex2dim(a, id) complex(2), intent(in) :: a(:,:) @@ -556,7 +556,7 @@ subroutine logical1dim(a, id) res = reduce(a, red_log1, 2) end subroutine -! CHECK: fir.call @_FortranAReduceLogical1Dim +! CHECK: fir.call @_FortranAReduceLogical1DimRef subroutine logical2dim(a, id) logical(2), intent(in) :: a(:,:) @@ -565,7 +565,7 @@ subroutine logical2dim(a, id) res = reduce(a, red_log2, 2) end subroutine -! CHECK: fir.call @_FortranAReduceLogical2Dim +! CHECK: fir.call @_FortranAReduceLogical2DimRef subroutine logical4dim(a, id) logical(4), intent(in) :: a(:,:) @@ -574,7 +574,7 @@ subroutine logical4dim(a, id) res = reduce(a, red_log4, 2) end subroutine -! CHECK: fir.call @_FortranAReduceLogical4Dim +! CHECK: fir.call @_FortranAReduceLogical4DimRef subroutine logical8dim(a, id) logical(8), intent(in) :: a(:,:) @@ -583,7 +583,7 @@ subroutine logical8dim(a, id) res = reduce(a, red_log8, 2) end subroutine -! CHECK: fir.call @_FortranAReduceLogical8Dim +! CHECK: fir.call @_FortranAReduceLogical8DimRef subroutine testtypeDim(a) type(t1), intent(in) :: a(:,:) diff --git a/flang/unittests/Runtime/Reduction.cpp b/flang/unittests/Runtime/Reduction.cpp index b2661e78abdf58..41c8d86c35b762 100644 --- a/flang/unittests/Runtime/Reduction.cpp +++ b/flang/unittests/Runtime/Reduction.cpp @@ -647,23 +647,24 @@ static std::int32_t IMultiply(const std::int32_t *x, const std::int32_t *y) { TEST(Reductions, ReduceInt4) { auto intVector{MakeArray( std::vector{4}, std::vector{1, 2, 3, 4})}; - EXPECT_EQ(RTNAME(ReduceInteger4)(*intVector, IAdd, __FILE__, __LINE__), 10); EXPECT_EQ( - RTNAME(ReduceInteger4)(*intVector, IMultiply, __FILE__, __LINE__), 24); + RTNAME(ReduceInteger4Ref)(*intVector, IAdd, __FILE__, __LINE__), 10); + EXPECT_EQ( + RTNAME(ReduceInteger4Ref)(*intVector, IMultiply, __FILE__, __LINE__), 24); } TEST(Reductions, ReduceInt4Dim) { auto intMatrix{MakeArray( std::vector{2, 2}, std::vector{1, 2, 3, 4})}; StaticDescriptor<1, true> statDesc; Descriptor &sums{statDesc.descriptor()}; - RTNAME(ReduceInteger4Dim)(sums, *intMatrix, IAdd, __FILE__, __LINE__, 1); + RTNAME(ReduceInteger4DimRef)(sums, *intMatrix, IAdd, __FILE__, __LINE__, 1); EXPECT_EQ(sums.rank(), 1); EXPECT_EQ(sums.GetDimension(0).LowerBound(), 1); EXPECT_EQ(sums.GetDimension(0).Extent(), 2); EXPECT_EQ(*sums.ZeroBasedIndexedElement(0), 3); EXPECT_EQ(*sums.ZeroBasedIndexedElement(1), 7); sums.Destroy(); - RTNAME(ReduceInteger4Dim)(sums, *intMatrix, IAdd, __FILE__, __LINE__, 2); + RTNAME(ReduceInteger4DimRef)(sums, *intMatrix, IAdd, __FILE__, __LINE__, 2); EXPECT_EQ(sums.rank(), 1); EXPECT_EQ(sums.GetDimension(0).LowerBound(), 1); EXPECT_EQ(sums.GetDimension(0).Extent(), 2); From 86bee819120b5ba4b7262c7800a88fbf904d4932 Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Thu, 13 Jun 2024 11:22:27 -0700 Subject: [PATCH 007/155] =?UTF-8?q?[flang][preprocessor]=20Fixed-form=20co?= =?UTF-8?q?ntinuation=20across=20preprocessing=20di=E2=80=A6=20(#95332)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …rective Implement fixed-form line continuation when the continuation line is the result of text produced by an #include or other preprocessing directive. This accommodates the somewhat common practice of putting dummy or actual arguments into a header file and #including it into several code sites. Fixes https://github.com/llvm/llvm-project/issues/78928. --- flang/include/flang/Parser/provenance.h | 5 +++++ flang/include/flang/Parser/token-sequence.h | 1 + flang/lib/Parser/prescan.cpp | 20 +++++++++++++++++--- flang/lib/Parser/prescan.h | 2 ++ flang/lib/Parser/provenance.cpp | 10 ++++++++++ flang/lib/Parser/token-sequence.cpp | 12 ++++++++---- flang/test/Preprocessing/ff-args.h | 1 + flang/test/Preprocessing/ff-include-args.F | 14 ++++++++++++++ 8 files changed, 58 insertions(+), 7 deletions(-) create mode 100644 flang/test/Preprocessing/ff-args.h create mode 100644 flang/test/Preprocessing/ff-include-args.F diff --git a/flang/include/flang/Parser/provenance.h b/flang/include/flang/Parser/provenance.h index 73d500f32831b2..42c5b3de2cbe2f 100644 --- a/flang/include/flang/Parser/provenance.h +++ b/flang/include/flang/Parser/provenance.h @@ -257,6 +257,10 @@ class CookedSource { provenanceMap_.Put(pm); } + void MarkPossibleFixedFormContinuation() { + possibleFixedFormContinuations_.push_back(BufferedBytes()); + } + std::size_t BufferedBytes() const; void Marshal(AllCookedSources &); // marshals text into one contiguous block void CompileProvenanceRangeToOffsetMappings(AllSources &); @@ -269,6 +273,7 @@ class CookedSource { std::string data_; // all of it, prescanned and preprocessed OffsetToProvenanceMappings provenanceMap_; ProvenanceRangeToOffsetMappings invertedMap_; + std::list possibleFixedFormContinuations_; }; class AllCookedSources { diff --git a/flang/include/flang/Parser/token-sequence.h b/flang/include/flang/Parser/token-sequence.h index ee5f71edd03c81..1f82a3c1a203ac 100644 --- a/flang/include/flang/Parser/token-sequence.h +++ b/flang/include/flang/Parser/token-sequence.h @@ -125,6 +125,7 @@ class TokenSequence { TokenSequence &ClipComment(const Prescanner &, bool skipFirst = false); const TokenSequence &CheckBadFortranCharacters( Messages &, const Prescanner &, bool allowAmpersand) const; + bool BadlyNestedParentheses() const; const TokenSequence &CheckBadParentheses(Messages &) const; void Emit(CookedSource &) const; llvm::raw_ostream &Dump(llvm::raw_ostream &) const; diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp index e4801c36505b40..8efcd617cf0f92 100644 --- a/flang/lib/Parser/prescan.cpp +++ b/flang/lib/Parser/prescan.cpp @@ -295,8 +295,13 @@ void Prescanner::CheckAndEmitLine( // Applications play shenanigans with line continuation before and // after #include'd subprogram argument lists. if (!isNestedInIncludeDirective_ && !omitNewline_ && - !afterIncludeDirective_) { - tokens.CheckBadParentheses(messages_); + !afterIncludeDirective_ && tokens.BadlyNestedParentheses()) { + if (inFixedForm_ && nextLine_ < limit_ && + IsPreprocessorDirectiveLine(nextLine_)) { + // don't complain + } else { + tokens.CheckBadParentheses(messages_); + } } tokens.Emit(cooked_); if (omitNewline_) { @@ -350,7 +355,16 @@ void Prescanner::LabelField(TokenSequence &token) { ++column_; } if (badColumn && !preprocessor_.IsNameDefined(token.CurrentOpenToken())) { - if (features_.ShouldWarn(common::UsageWarning::Scanning)) { + if (prescannerNesting_ > 0 && *badColumn == 6 && + cooked_.BufferedBytes() == firstCookedCharacterOffset_) { + // This is the first source line in #included text or conditional + // code under #if. + // If it turns out that the preprocessed text begins with a + // fixed form continuation line, the newline at the end + // of the latest source line beforehand will be deleted in + // CookedSource::Marshal(). + cooked_.MarkPossibleFixedFormContinuation(); + } else if (features_.ShouldWarn(common::UsageWarning::Scanning)) { Say(GetProvenance(start + *badColumn - 1), *badColumn == 6 ? "Statement should not begin with a continuation line"_warn_en_US diff --git a/flang/lib/Parser/prescan.h b/flang/lib/Parser/prescan.h index cf64bdb02a9b7e..b6f6d2ca439ee7 100644 --- a/flang/lib/Parser/prescan.h +++ b/flang/lib/Parser/prescan.h @@ -247,6 +247,8 @@ class Prescanner { bool omitNewline_{false}; bool skipLeadingAmpersand_{false}; + const std::size_t firstCookedCharacterOffset_{cooked_.BufferedBytes()}; + const Provenance spaceProvenance_{ allSources_.CompilerInsertionProvenance(' ')}; const Provenance backslashProvenance_{ diff --git a/flang/lib/Parser/provenance.cpp b/flang/lib/Parser/provenance.cpp index 55ef67fd6288df..6e2e7326e2167a 100644 --- a/flang/lib/Parser/provenance.cpp +++ b/flang/lib/Parser/provenance.cpp @@ -513,6 +513,16 @@ void CookedSource::Marshal(AllCookedSources &allCookedSources) { "(after end of source)")); data_ = buffer_.Marshal(); buffer_.clear(); + for (std::size_t ffStart : possibleFixedFormContinuations_) { + if (ffStart > 0 && ffStart + 1 < data_.size() && + data_[ffStart - 1] == '\n' && data_[ffStart] == ' ') { + // This fixed form include line is the first source line in an + // #include file (or after an empty one). Connect it with the previous + // source line by deleting its terminal newline. + data_[ffStart - 1] = ' '; + } + } + possibleFixedFormContinuations_.clear(); allCookedSources.Register(*this); } diff --git a/flang/lib/Parser/token-sequence.cpp b/flang/lib/Parser/token-sequence.cpp index 40560bbacb54f8..133e60ba4f0098 100644 --- a/flang/lib/Parser/token-sequence.cpp +++ b/flang/lib/Parser/token-sequence.cpp @@ -378,9 +378,7 @@ const TokenSequence &TokenSequence::CheckBadFortranCharacters( return *this; } -const TokenSequence &TokenSequence::CheckBadParentheses( - Messages &messages) const { - // First, a quick pass with no allocation for the common case +bool TokenSequence::BadlyNestedParentheses() const { int nesting{0}; std::size_t tokens{SizeInTokens()}; for (std::size_t j{0}; j < tokens; ++j) { @@ -394,8 +392,14 @@ const TokenSequence &TokenSequence::CheckBadParentheses( } } } - if (nesting != 0) { + return nesting != 0; +} + +const TokenSequence &TokenSequence::CheckBadParentheses( + Messages &messages) const { + if (BadlyNestedParentheses()) { // There's an error; diagnose it + std::size_t tokens{SizeInTokens()}; std::vector stack; for (std::size_t j{0}; j < tokens; ++j) { CharBlock token{TokenAt(j)}; diff --git a/flang/test/Preprocessing/ff-args.h b/flang/test/Preprocessing/ff-args.h new file mode 100644 index 00000000000000..99562784006c90 --- /dev/null +++ b/flang/test/Preprocessing/ff-args.h @@ -0,0 +1 @@ + +3.14159) \ No newline at end of file diff --git a/flang/test/Preprocessing/ff-include-args.F b/flang/test/Preprocessing/ff-include-args.F new file mode 100644 index 00000000000000..81e4102598c2fd --- /dev/null +++ b/flang/test/Preprocessing/ff-include-args.F @@ -0,0 +1,14 @@ +! RUN: %flang -E %s 2>&1 | FileCheck %s +! CHECK: call foo ( 3.14159) +! CHECK: subroutine foo(test) + call foo ( +#include "ff-args.h" + end +#define TEST + subroutine foo( +#ifdef TEST + +test) +#else + +) +#endif + end From 2146fd0d8d0ede4657354594c012e7543534cd87 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 13 Jun 2024 11:49:21 -0700 Subject: [PATCH 008/155] Revert "Reland "[AArch64] Decouple feature dependency expansion. (#94279)" (#95231)" This reverts commit 70510733af33c70ff7877eaf30d7718b9358a725. The following code is now incorrectly rejected. ``` % cat neon.c #include __attribute__((target("arch=armv8-a"))) uint64x2_t foo(uint64x2_t a, uint64x2_t b) { return veorq_u64(a, b); } % newclang --target=aarch64-linux-gnu -c neon.c neon.c:5:10: error: always_inline function 'veorq_u64' requires target feature 'outline-atomics', but would be inlined into function 'foo' that is compiled without support for 'outline-atomics' 5 | return veorq_u64(a, b); | ^ 1 error generated. ``` "+outline-atomics" seems misleading here. --- clang/include/clang/AST/ASTContext.h | 3 + clang/lib/AST/ASTContext.cpp | 59 +++++----- clang/lib/AST/CMakeLists.txt | 2 - clang/lib/Basic/Targets/AArch64.cpp | 105 +++++++++++------ clang/lib/Basic/Targets/AArch64.h | 4 + .../CodeGen/aarch64-cpu-supports-target.c | 4 +- .../aarch64-sme-attrs.cpp | 2 +- clang/test/CodeGen/aarch64-targetattr.c | 48 ++++---- clang/test/CodeGen/attr-target-version.c | 46 ++++---- clang/test/Sema/aarch64-neon-target.c | 4 +- .../llvm/TargetParser/AArch64TargetParser.h | 107 +++++++----------- llvm/lib/TargetParser/AArch64TargetParser.cpp | 51 +++------ 12 files changed, 222 insertions(+), 213 deletions(-) diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index f1f20fca477a47..53ece996769a87 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -3210,6 +3210,9 @@ class ASTContext : public RefCountedBase { /// valid feature names. ParsedTargetAttr filterFunctionTargetAttrs(const TargetAttr *TD) const; + std::vector + filterFunctionTargetVersionAttrs(const TargetVersionAttr *TV) const; + void getFunctionFeatureMap(llvm::StringMap &FeatureMap, const FunctionDecl *) const; void getFunctionFeatureMap(llvm::StringMap &FeatureMap, diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index aa22825602a40f..34aa399fda2f86 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -87,7 +87,6 @@ #include "llvm/Support/MD5.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/TargetParser/AArch64TargetParser.h" #include "llvm/TargetParser/Triple.h" #include #include @@ -13677,20 +13676,17 @@ QualType ASTContext::getCorrespondingSignedFixedPointType(QualType Ty) const { } } -// Given a list of FMV features, return a concatenated list of the -// corresponding backend features (which may contain duplicates). -static std::vector getFMVBackendFeaturesFor( - const llvm::SmallVectorImpl &FMVFeatStrings) { - std::vector BackendFeats; - for (StringRef F : FMVFeatStrings) { - if (auto FMVExt = llvm::AArch64::parseArchExtension(F)) { - SmallVector Feats; - FMVExt->DependentFeatures.split(Feats, ',', -1, false); - for (StringRef F : Feats) - BackendFeats.push_back(F.str()); - } - } - return BackendFeats; +std::vector ASTContext::filterFunctionTargetVersionAttrs( + const TargetVersionAttr *TV) const { + assert(TV != nullptr); + llvm::SmallVector Feats; + std::vector ResFeats; + TV->getFeatures(Feats); + for (auto &Feature : Feats) + if (Target->validateCpuSupports(Feature.str())) + // Use '?' to mark features that came from TargetVersion. + ResFeats.push_back("?" + Feature.str()); + return ResFeats; } ParsedTargetAttr @@ -13725,12 +13721,10 @@ void ASTContext::getFunctionFeatureMap(llvm::StringMap &FeatureMap, // Make a copy of the features as passed on the command line into the // beginning of the additional features from the function to override. - // AArch64 handles command line option features in parseTargetAttr(). - if (!Target->getTriple().isAArch64()) - ParsedAttr.Features.insert( - ParsedAttr.Features.begin(), - Target->getTargetOpts().FeaturesAsWritten.begin(), - Target->getTargetOpts().FeaturesAsWritten.end()); + ParsedAttr.Features.insert( + ParsedAttr.Features.begin(), + Target->getTargetOpts().FeaturesAsWritten.begin(), + Target->getTargetOpts().FeaturesAsWritten.end()); if (ParsedAttr.CPU != "" && Target->isValidCPUName(ParsedAttr.CPU)) TargetCPU = ParsedAttr.CPU; @@ -13751,31 +13745,32 @@ void ASTContext::getFunctionFeatureMap(llvm::StringMap &FeatureMap, Target->getTargetOpts().FeaturesAsWritten.end()); Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features); } else if (const auto *TC = FD->getAttr()) { + std::vector Features; if (Target->getTriple().isAArch64()) { + // TargetClones for AArch64 llvm::SmallVector Feats; TC->getFeatures(Feats, GD.getMultiVersionIndex()); - std::vector Features = getFMVBackendFeaturesFor(Feats); + for (StringRef Feat : Feats) + if (Target->validateCpuSupports(Feat.str())) + // Use '?' to mark features that came from AArch64 TargetClones. + Features.push_back("?" + Feat.str()); Features.insert(Features.begin(), Target->getTargetOpts().FeaturesAsWritten.begin(), Target->getTargetOpts().FeaturesAsWritten.end()); - Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features); } else { - std::vector Features; StringRef VersionStr = TC->getFeatureStr(GD.getMultiVersionIndex()); if (VersionStr.starts_with("arch=")) TargetCPU = VersionStr.drop_front(sizeof("arch=") - 1); else if (VersionStr != "default") Features.push_back((StringRef{"+"} + VersionStr).str()); - Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features); } - } else if (const auto *TV = FD->getAttr()) { - llvm::SmallVector Feats; - TV->getFeatures(Feats); - std::vector Features = getFMVBackendFeaturesFor(Feats); - Features.insert(Features.begin(), - Target->getTargetOpts().FeaturesAsWritten.begin(), - Target->getTargetOpts().FeaturesAsWritten.end()); Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Features); + } else if (const auto *TV = FD->getAttr()) { + std::vector Feats = filterFunctionTargetVersionAttrs(TV); + Feats.insert(Feats.begin(), + Target->getTargetOpts().FeaturesAsWritten.begin(), + Target->getTargetOpts().FeaturesAsWritten.end()); + Target->initFeatureMap(FeatureMap, getDiagnostics(), TargetCPU, Feats); } else { FeatureMap = Target->getTargetOpts().FeatureMap; } diff --git a/clang/lib/AST/CMakeLists.txt b/clang/lib/AST/CMakeLists.txt index 0328666d59b1fc..a5d3dacfc1a84e 100644 --- a/clang/lib/AST/CMakeLists.txt +++ b/clang/lib/AST/CMakeLists.txt @@ -139,6 +139,4 @@ add_clang_library(clangAST omp_gen ClangDriverOptions intrinsics_gen - # These generated headers are included transitively. - AArch64TargetParserTableGen ) diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 6fba5fff7bcc19..08d13c41a48572 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -1052,18 +1052,57 @@ bool AArch64TargetInfo::handleTargetFeatures(std::vector &Features, return true; } +bool AArch64TargetInfo::initFeatureMap( + llvm::StringMap &Features, DiagnosticsEngine &Diags, StringRef CPU, + const std::vector &FeaturesVec) const { + std::vector UpdatedFeaturesVec; + // Parse the CPU and add any implied features. + std::optional CpuInfo = llvm::AArch64::parseCpu(CPU); + if (CpuInfo) { + auto Exts = CpuInfo->getImpliedExtensions(); + std::vector CPUFeats; + llvm::AArch64::getExtensionFeatures(Exts, CPUFeats); + for (auto F : CPUFeats) { + assert((F[0] == '+' || F[0] == '-') && "Expected +/- in target feature!"); + UpdatedFeaturesVec.push_back(F.str()); + } + } + + // Process target and dependent features. This is done in two loops collecting + // them into UpdatedFeaturesVec: first to add dependent '+'features, second to + // add target '+/-'features that can later disable some of features added on + // the first loop. Function Multi Versioning features begin with '?'. + for (const auto &Feature : FeaturesVec) + if (((Feature[0] == '?' || Feature[0] == '+')) && + AArch64TargetInfo::doesFeatureAffectCodeGen(Feature.substr(1))) { + StringRef DepFeatures = + AArch64TargetInfo::getFeatureDependencies(Feature.substr(1)); + SmallVector AttrFeatures; + DepFeatures.split(AttrFeatures, ","); + for (auto F : AttrFeatures) + UpdatedFeaturesVec.push_back(F.str()); + } + for (const auto &Feature : FeaturesVec) + if (Feature[0] != '?') { + std::string UpdatedFeature = Feature; + if (Feature[0] == '+') { + std::optional Extension = + llvm::AArch64::parseArchExtension(Feature.substr(1)); + if (Extension) + UpdatedFeature = Extension->Feature.str(); + } + UpdatedFeaturesVec.push_back(UpdatedFeature); + } + + return TargetInfo::initFeatureMap(Features, Diags, CPU, UpdatedFeaturesVec); +} + // Parse AArch64 Target attributes, which are a comma separated list of: // "arch=" - parsed to features as per -march=.. // "cpu=" - parsed to features as per -mcpu=.., with CPU set to // "tune=" - TuneCPU set to // "feature", "no-feature" - Add (or remove) feature. // "+feature", "+nofeature" - Add (or remove) feature. -// -// A feature may correspond to an Extension (anything with a corresponding -// AEK_), in which case an ExtensionSet is used to parse it and expand its -// dependencies. Otherwise the feature is passed through (e.g. +v8.1a, -// +outline-atomics, -fmv, etc). Features coming from the command line are -// already parsed, therefore their dependencies do not need expansion. ParsedTargetAttr AArch64TargetInfo::parseTargetAttr(StringRef Features) const { ParsedTargetAttr Ret; if (Features == "default") @@ -1073,26 +1112,23 @@ ParsedTargetAttr AArch64TargetInfo::parseTargetAttr(StringRef Features) const { bool FoundArch = false; auto SplitAndAddFeatures = [](StringRef FeatString, - std::vector &Features, - llvm::AArch64::ExtensionSet &FeatureBits) { + std::vector &Features) { SmallVector SplitFeatures; FeatString.split(SplitFeatures, StringRef("+"), -1, false); for (StringRef Feature : SplitFeatures) { - if (FeatureBits.parseModifier(Feature, /* AllowNoDashForm = */ true)) - continue; - // Pass through features that are not extensions, e.g. +v8.1a, - // +outline-atomics, -fmv, etc. - if (Feature.starts_with("no")) - Features.push_back("-" + Feature.drop_front(2).str()); + StringRef FeatureName = llvm::AArch64::getArchExtFeature(Feature); + if (!FeatureName.empty()) + Features.push_back(FeatureName.str()); else - Features.push_back("+" + Feature.str()); + // Pushing the original feature string to give a sema error later on + // when they get checked. + if (Feature.starts_with("no")) + Features.push_back("-" + Feature.drop_front(2).str()); + else + Features.push_back("+" + Feature.str()); } }; - llvm::AArch64::ExtensionSet FeatureBits; - // Reconstruct the bitset from the command line option features. - FeatureBits.reconstructFromParsedFeatures(getTargetOpts().FeaturesAsWritten); - for (auto &Feature : AttrFeatures) { Feature = Feature.trim(); if (Feature.starts_with("fpmath=")) @@ -1115,9 +1151,9 @@ ParsedTargetAttr AArch64TargetInfo::parseTargetAttr(StringRef Features) const { // Ret.Features. if (!AI) continue; - FeatureBits.addArchDefaults(*AI); + Ret.Features.push_back(AI->ArchFeature.str()); // Add any extra features, after the + - SplitAndAddFeatures(Split.second, Ret.Features, FeatureBits); + SplitAndAddFeatures(Split.second, Ret.Features); } else if (Feature.starts_with("cpu=")) { if (!Ret.CPU.empty()) Ret.Duplicate = "cpu="; @@ -1127,10 +1163,7 @@ ParsedTargetAttr AArch64TargetInfo::parseTargetAttr(StringRef Features) const { std::pair Split = Feature.split("=").second.trim().split("+"); Ret.CPU = Split.first; - if (auto CpuInfo = llvm::AArch64::parseCpu(Ret.CPU)) { - FeatureBits.addCPUDefaults(*CpuInfo); - SplitAndAddFeatures(Split.second, Ret.Features, FeatureBits); - } + SplitAndAddFeatures(Split.second, Ret.Features); } } else if (Feature.starts_with("tune=")) { if (!Ret.Tune.empty()) @@ -1138,19 +1171,25 @@ ParsedTargetAttr AArch64TargetInfo::parseTargetAttr(StringRef Features) const { else Ret.Tune = Feature.split("=").second.trim(); } else if (Feature.starts_with("+")) { - SplitAndAddFeatures(Feature, Ret.Features, FeatureBits); + SplitAndAddFeatures(Feature, Ret.Features); + } else if (Feature.starts_with("no-")) { + StringRef FeatureName = + llvm::AArch64::getArchExtFeature(Feature.split("-").second); + if (!FeatureName.empty()) + Ret.Features.push_back("-" + FeatureName.drop_front(1).str()); + else + Ret.Features.push_back("-" + Feature.split("-").second.str()); } else { - if (FeatureBits.parseModifier(Feature, /* AllowNoDashForm = */ true)) - continue; - // Pass through features that are not extensions, e.g. +v8.1a, - // +outline-atomics, -fmv, etc. - if (Feature.starts_with("no-")) - Ret.Features.push_back("-" + Feature.drop_front(3).str()); + // Try parsing the string to the internal target feature name. If it is + // invalid, add the original string (which could already be an internal + // name). These should be checked later by isValidFeatureName. + StringRef FeatureName = llvm::AArch64::getArchExtFeature(Feature); + if (!FeatureName.empty()) + Ret.Features.push_back(FeatureName.str()); else Ret.Features.push_back("+" + Feature.str()); } } - FeatureBits.toLLVMFeatureList(Ret.Features); return Ret; } diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index 696553ef8038a8..12fb50286f7511 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -107,6 +107,10 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo { unsigned multiVersionSortPriority(StringRef Name) const override; unsigned multiVersionFeatureCost() const override; + bool + initFeatureMap(llvm::StringMap &Features, DiagnosticsEngine &Diags, + StringRef CPU, + const std::vector &FeaturesVec) const override; bool useFP16ConversionIntrinsics() const override { return false; } diff --git a/clang/test/CodeGen/aarch64-cpu-supports-target.c b/clang/test/CodeGen/aarch64-cpu-supports-target.c index 28187bcf745331..e023944b24e53a 100644 --- a/clang/test/CodeGen/aarch64-cpu-supports-target.c +++ b/clang/test/CodeGen/aarch64-cpu-supports-target.c @@ -48,5 +48,5 @@ int test_versions() { return code(); } // CHECK: attributes #0 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -// CHECK: attributes #1 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon" } -// CHECK: attributes #2 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+sve" } +// CHECK: attributes #1 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+neon" } +// CHECK: attributes #2 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sve" } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/aarch64-sme-attrs.cpp b/clang/test/CodeGen/aarch64-sme-intrinsics/aarch64-sme-attrs.cpp index 9885ac45e6a0e0..af8933d93d6cbb 100644 --- a/clang/test/CodeGen/aarch64-sme-intrinsics/aarch64-sme-attrs.cpp +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/aarch64-sme-attrs.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +bf16 \ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme \ // RUN: -disable-O0-optnone -Werror -emit-llvm -o - %s \ // RUN: | opt -S -passes=mem2reg \ // RUN: | opt -S -passes=inline \ diff --git a/clang/test/CodeGen/aarch64-targetattr.c b/clang/test/CodeGen/aarch64-targetattr.c index 644e6a692c3be9..3e7a2092456071 100644 --- a/clang/test/CodeGen/aarch64-targetattr.c +++ b/clang/test/CodeGen/aarch64-targetattr.c @@ -58,50 +58,58 @@ void v1msve() {} // CHECK-LABEL: @plussve() #12 __attribute__((target("+sve"))) void plussve() {} -// CHECK-LABEL: @plussveplussve2() #12 +// CHECK-LABEL: @plussveplussve2() #13 __attribute__((target("+sve+nosve2"))) void plussveplussve2() {} -// CHECK-LABEL: @plussveminusnosve2() #12 +// CHECK-LABEL: @plussveminusnosve2() #13 __attribute__((target("sve,no-sve2"))) void plussveminusnosve2() {} -// CHECK-LABEL: @plusfp16() #13 +// CHECK-LABEL: @plusfp16() #14 __attribute__((target("+fp16"))) void plusfp16() {} -// CHECK-LABEL: @all() #14 +// CHECK-LABEL: @all() #15 __attribute__((target("cpu=neoverse-n1,tune=cortex-a710,arch=armv8.6-a+sve2"))) void all() {} -// CHECK-LABEL: @allplusbranchprotection() #15 +// CHECK-LABEL: @allplusbranchprotection() #16 __attribute__((target("cpu=neoverse-n1,tune=cortex-a710,arch=armv8.6-a+sve2,branch-protection=standard"))) void allplusbranchprotection() {} -// CHECK-LABEL: @plusnosimd() #16 +// These tests check that the user facing and internal llvm name are both accepted. +// CHECK-LABEL: @plusnoneon() #17 +__attribute__((target("+noneon"))) +void plusnoneon() {} +// CHECK-LABEL: @plusnosimd() #17 __attribute__((target("+nosimd"))) void plusnosimd() {} -// CHECK-LABEL: @nosimd() #16 +// CHECK-LABEL: @noneon() #17 +__attribute__((target("no-neon"))) +void noneon() {} +// CHECK-LABEL: @nosimd() #17 __attribute__((target("no-simd"))) void nosimd() {} // This isn't part of the standard interface, but test that -arch features should not apply anything else. -// CHECK-LABEL: @minusarch() #17 +// CHECK-LABEL: @minusarch() #18 __attribute__((target("no-v9.3a"))) void minusarch() {} // CHECK: attributes #0 = { {{.*}} "target-features"="+crc,+fp-armv8,+lse,+neon,+ras,+rdm,+v8.1a,+v8.2a,+v8a" } // CHECK: attributes #1 = { {{.*}} "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+v8.1a,+v8.2a,+v8a" } // CHECK: attributes #2 = { {{.*}} "target-features"="+crc,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8a" } -// CHECK: attributes #3 = { {{.*}} "target-features"="+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+ras,+rcpc,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" } -// CHECK: attributes #4 = { {{.*}} "target-cpu"="cortex-a710" "target-features"="+bf16,+complxnum,+crc,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+mte,+neon,+pauth,+ras,+rcpc,+rdm,+sb,+sve,+sve2,+sve2-bitperm,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+v9a" } +// CHECK: attributes #3 = { {{.*}} "target-features"="+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+ras,+rcpc,+rdm,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" } +// CHECK: attributes #4 = { {{.*}} "target-cpu"="cortex-a710" "target-features"="+bf16,+complxnum,+crc,+dotprod,+flagm,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+mte,+neon,+pauth,+ras,+rcpc,+rdm,+sb,+sve,+sve2,+sve2-bitperm" } // CHECK: attributes #5 = { {{.*}} "tune-cpu"="cortex-a710" } // CHECK: attributes #6 = { {{.*}} "target-cpu"="generic" } // CHECK: attributes #7 = { {{.*}} "tune-cpu"="generic" } -// CHECK: attributes #8 = { {{.*}} "target-cpu"="neoverse-n1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+spe,+ssbs,+v8.1a,+v8.2a,+v8a" "tune-cpu"="cortex-a710" } -// CHECK: attributes #9 = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+sve" "tune-cpu"="cortex-a710" } -// CHECK: attributes #10 = { {{.*}} "target-cpu"="neoverse-v1" "target-features"="+aes,+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+rand,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a" } -// CHECK: attributes #11 = { {{.*}} "target-cpu"="neoverse-v1" "target-features"="+aes,+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+rand,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+spe,+ssbs,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8a,-sve" } -// CHECK: attributes #12 = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+sve" } -// CHECK: attributes #13 = { {{.*}} "target-features"="+fp-armv8,+fullfp16" } -// CHECK: attributes #14 = { {{.*}} "target-cpu"="neoverse-n1" "target-features"="+aes,+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+ras,+rcpc,+rdm,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" } -// CHECK: attributes #15 = { {{.*}} "branch-target-enforcement"="true" "guarded-control-stack"="true" {{.*}} "target-features"="+aes,+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+ras,+rcpc,+rdm,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" } -// CHECK-NOT: attributes #16 = {{.*}} "target-features" -// CHECK: attributes #17 = { {{.*}} "target-features"="-v9.3a" } +// CHECK: attributes #8 = { {{.*}} "target-cpu"="neoverse-n1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+spe,+ssbs" "tune-cpu"="cortex-a710" } +// CHECK: attributes #9 = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+sve" "tune-cpu"="cortex-a710" } +// CHECK: attributes #10 = { {{.*}} "target-cpu"="neoverse-v1" "target-features"="+aes,+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+rand,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+spe,+ssbs,+sve,+sve2" } +// CHECK: attributes #11 = { {{.*}} "target-cpu"="neoverse-v1" "target-features"="+aes,+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+rand,+ras,+rcpc,+rdm,+sha2,+sha3,+sm4,+spe,+ssbs,-sve" } +// CHECK: attributes #12 = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+sve" } +// CHECK: attributes #13 = { {{.*}} "target-features"="+fp-armv8,+fullfp16,+neon,+sve,-sve2" } +// CHECK: attributes #14 = { {{.*}} "target-features"="+fullfp16" } +// CHECK: attributes #15 = { {{.*}} "target-cpu"="neoverse-n1" "target-features"="+aes,+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+ras,+rcpc,+rdm,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" } +// CHECK: attributes #16 = { {{.*}} "branch-target-enforcement"="true" "guarded-control-stack"="true" {{.*}} "target-features"="+aes,+bf16,+complxnum,+crc,+dotprod,+fp-armv8,+fullfp16,+i8mm,+jsconv,+lse,+neon,+pauth,+ras,+rcpc,+rdm,+sha2,+spe,+ssbs,+sve,+sve2,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8.6a,+v8a" "tune-cpu"="cortex-a710" } +// CHECK: attributes #17 = { {{.*}} "target-features"="-neon" } +// CHECK: attributes #18 = { {{.*}} "target-features"="-v9.3a" } diff --git a/clang/test/CodeGen/attr-target-version.c b/clang/test/CodeGen/attr-target-version.c index 75f8734e5aaf37..3597711333d341 100644 --- a/clang/test/CodeGen/attr-target-version.c +++ b/clang/test/CodeGen/attr-target-version.c @@ -1129,42 +1129,42 @@ int caller(void) { return used_def_without_default_decl() + used_decl_without_de // CHECK-NOFMV-NEXT: ret i32 0 // //. -// CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+flagm,+fp-armv8,+fp16fml,+fullfp16,+neon,+rand,-v9.5a" } +// CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+flagm,+fp16fml,+fullfp16,+neon,+rand,-fp-armv8,-v9.5a" } // CHECK: attributes #[[ATTR1]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+altnzcv,+bf16,+flagm,+sme,+sme-i16i64,-fp-armv8,-v9.5a" } -// CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+lse,+neon,+sha2,-v9.5a" } -// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+dotprod,+fp-armv8,+ls64,+neon,-v9.5a" } -// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fp16fml,+fullfp16,+neon,-v9.5a" } -// CHECK: attributes #[[ATTR5]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,-v9.5a" } +// CHECK: attributes #[[ATTR2]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+lse,+neon,+sha2,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR3]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+dotprod,+ls64,+neon,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR4]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp16fml,+fullfp16,+neon,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR5]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+neon,-fp-armv8,-v9.5a" } // CHECK: attributes #[[ATTR6]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,-fp-armv8,-v9.5a" } // CHECK: attributes #[[ATTR7]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bti,-fp-armv8,-v9.5a" } // CHECK: attributes #[[ATTR8]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+sme,+sme2,-fp-armv8,-v9.5a" } // CHECK: attributes #[[ATTR9:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fp-armv8,-v9.5a" } // CHECK: attributes #[[ATTR10]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ccpp,-fp-armv8,-v9.5a" } // CHECK: attributes #[[ATTR11]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fp-armv8,-v9.5a" } -// CHECK: attributes #[[ATTR12]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,-v9.5a" } +// CHECK: attributes #[[ATTR12]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+neon,-fp-armv8,-v9.5a" } // CHECK: attributes #[[ATTR13]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sb,-fp-armv8,-v9.5a" } // CHECK: attributes #[[ATTR14]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+mops,-fp-armv8,-v9.5a" } -// CHECK: attributes #[[ATTR15]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+dotprod,+fp-armv8,+neon,-v9.5a" } -// CHECK: attributes #[[ATTR16]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sve,-v9.5a" } +// CHECK: attributes #[[ATTR15]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+dotprod,+neon,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR16]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+neon,+sve,-fp-armv8,-v9.5a" } // CHECK: attributes #[[ATTR17]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+lse,-fp-armv8,-v9.5a" } -// CHECK: attributes #[[ATTR18]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+rdm,-v9.5a" } -// CHECK: attributes #[[ATTR19:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+jsconv,+neon,-v9.5a" } -// CHECK: attributes #[[ATTR20:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+rdm,-v9.5a" } -// CHECK: attributes #[[ATTR21]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+jsconv,+neon,-v9.5a" } -// CHECK: attributes #[[ATTR22]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+aes,+f64mm,+fp-armv8,+fullfp16,+neon,+sve,-v9.5a" } -// CHECK: attributes #[[ATTR23]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+complxnum,+fp-armv8,+fullfp16,+neon,+rdm,+sme,-v9.5a" } -// CHECK: attributes #[[ATTR24]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+f32mm,+fp-armv8,+fullfp16,+i8mm,+neon,+sha2,+sha3,+sve,-v9.5a" } -// CHECK: attributes #[[ATTR25]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+dit,+fp-armv8,+fullfp16,+neon,+sve,-v9.5a" } +// CHECK: attributes #[[ATTR18]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+neon,+rdm,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR19:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+jsconv,+neon,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR20:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+neon,+rdm,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR21]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+jsconv,+neon,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR22]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+aes,+f64mm,+fullfp16,+neon,+sve,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR23]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+complxnum,+fullfp16,+neon,+rdm,+sme,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR24]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+f32mm,+fullfp16,+i8mm,+neon,+sha2,+sha3,+sve,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR25]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+dit,+fullfp16,+neon,+sve,-fp-armv8,-v9.5a" } // CHECK: attributes #[[ATTR26]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ccpp,+rcpc,-fp-armv8,-v9.5a" } -// CHECK: attributes #[[ATTR27]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ccdp,+ccpp,+fp-armv8,+jsconv,+neon,-v9.5a" } +// CHECK: attributes #[[ATTR27]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ccdp,+ccpp,+jsconv,+neon,-fp-armv8,-v9.5a" } // CHECK: attributes #[[ATTR28]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fptoint,+rcpc,-fp-armv8,-v9.5a" } -// CHECK: attributes #[[ATTR29]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+fp-armv8,+fullfp16,+neon,+sve,-v9.5a" } -// CHECK: attributes #[[ATTR30]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sve,+sve2,+sve2-aes,+sve2-sha3,-v9.5a" } -// CHECK: attributes #[[ATTR31]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sve,+sve2,+sve2-aes,+sve2-bitperm,-v9.5a" } -// CHECK: attributes #[[ATTR32]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+mte,+neon,+sve,+sve2,+sve2-sm4,-v9.5a" } +// CHECK: attributes #[[ATTR29]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bf16,+fullfp16,+neon,+sve,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR30]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+neon,+sve,+sve2,+sve2-aes,+sve2-sha3,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR31]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+neon,+sve,+sve2,+sve2-aes,+sve2-bitperm,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR32]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+mte,+neon,+sve,+sve2,+sve2-sm4,-fp-armv8,-v9.5a" } // CHECK: attributes #[[ATTR33]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+mops,+mte,+rcpc,+rcpc3,-fp-armv8,-v9.5a" } -// CHECK: attributes #[[ATTR34]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+sm4,-v9.5a" } -// CHECK: attributes #[[ATTR35]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+lse,+neon,+rdm,-v9.5a" } +// CHECK: attributes #[[ATTR34]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+neon,+sm4,-fp-armv8,-v9.5a" } +// CHECK: attributes #[[ATTR35]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+lse,+neon,+rdm,-fp-armv8,-v9.5a" } //. // CHECK-NOFMV: attributes #[[ATTR0]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" } // CHECK-NOFMV: attributes #[[ATTR1:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" } diff --git a/clang/test/Sema/aarch64-neon-target.c b/clang/test/Sema/aarch64-neon-target.c index 642afddd88c154..fa45fff1d183d6 100644 --- a/clang/test/Sema/aarch64-neon-target.c +++ b/clang/test/Sema/aarch64-neon-target.c @@ -69,8 +69,8 @@ void undefined(uint32x2_t v2i32, uint32x4_t v4i32, uint16x8_t v8i16, uint8x16_t vrnd_f16(v4f16); // expected-error {{always_inline function 'vrnd_f16' requires target feature 'fullfp16'}} vmaxnm_f16(v4f16, v4f16); // expected-error {{always_inline function 'vmaxnm_f16' requires target feature 'fullfp16'}} vrndi_f16(v4f16); // expected-error {{always_inline function 'vrndi_f16' requires target feature 'fullfp16'}} - // fp16fml depends on fp-armv8 - vfmlal_low_f16(v2f32, v4f16, v4f16); // expected-error {{always_inline function 'vfmlal_low_f16' requires target feature 'fp-armv8'}} + // fp16fml + vfmlal_low_f16(v2f32, v4f16, v4f16); // expected-error {{always_inline function 'vfmlal_low_f16' requires target feature 'fp16fml'}} // i8mm vmmlaq_s32(v4i32, v8i16, v8i16); // expected-error {{always_inline function 'vmmlaq_s32' requires target feature 'i8mm'}} vusdot_laneq_s32(v2i32, v8i8, v8i16, 0); // expected-error {{always_inline function 'vusdot_s32' requires target feature 'i8mm'}} diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h index 9da4bf4471c9bb..df8e685eb6667b 100644 --- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h +++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h @@ -132,6 +132,48 @@ struct ExtensionInfo { #define EMIT_EXTENSIONS #include "llvm/TargetParser/AArch64TargetParserDef.inc" +struct ExtensionSet { + // Set of extensions which are currently enabled. + ExtensionBitset Enabled; + // Set of extensions which have been enabled or disabled at any point. Used + // to avoid cluttering the cc1 command-line with lots of unneeded features. + ExtensionBitset Touched; + // Base architecture version, which we need to know because some feature + // dependencies change depending on this. + const ArchInfo *BaseArch; + + ExtensionSet() : Enabled(), Touched(), BaseArch(nullptr) {} + + // Enable the given architecture extension, and any other extensions it + // depends on. Does not change the base architecture, or follow dependencies + // between features which are only related by required arcitecture versions. + void enable(ArchExtKind E); + + // Disable the given architecture extension, and any other extensions which + // depend on it. Does not change the base architecture, or follow + // dependencies between features which are only related by required + // arcitecture versions. + void disable(ArchExtKind E); + + // Add default extensions for the given CPU. Records the base architecture, + // to later resolve dependencies which depend on it. + void addCPUDefaults(const CpuInfo &CPU); + + // Add default extensions for the given architecture version. Records the + // base architecture, to later resolve dependencies which depend on it. + void addArchDefaults(const ArchInfo &Arch); + + // Add or remove a feature based on a modifier string. The string must be of + // the form "" to enable a feature or "no" to disable it. This + // will also enable or disable any features as required by the dependencies + // between them. + bool parseModifier(StringRef Modifier); + + // Convert the set of enabled extension to an LLVM feature list, appending + // them to Features. + void toLLVMFeatureList(std::vector &Features) const; +}; + // Represents a dependency between two architecture extensions. Later is the // feature which was added to the architecture after Earlier, and expands the // functionality provided by it. If Later is enabled, then Earlier will also be @@ -554,65 +596,6 @@ inline constexpr CpuInfo CpuInfos[] = { AArch64::AEK_PROFILE}))}, }; -struct ExtensionSet { - // Set of extensions which are currently enabled. - ExtensionBitset Enabled; - // Set of extensions which have been enabled or disabled at any point. Used - // to avoid cluttering the cc1 command-line with lots of unneeded features. - ExtensionBitset Touched; - // Base architecture version, which we need to know because some feature - // dependencies change depending on this. - const ArchInfo *BaseArch; - - ExtensionSet() : Enabled(), Touched(), BaseArch(nullptr) {} - - // Enable the given architecture extension, and any other extensions it - // depends on. Does not change the base architecture, or follow dependencies - // between features which are only related by required arcitecture versions. - void enable(ArchExtKind E); - - // Disable the given architecture extension, and any other extensions which - // depend on it. Does not change the base architecture, or follow - // dependencies between features which are only related by required - // arcitecture versions. - void disable(ArchExtKind E); - - // Add default extensions for the given CPU. Records the base architecture, - // to later resolve dependencies which depend on it. - void addCPUDefaults(const CpuInfo &CPU); - - // Add default extensions for the given architecture version. Records the - // base architecture, to later resolve dependencies which depend on it. - void addArchDefaults(const ArchInfo &Arch); - - // Add or remove a feature based on a modifier string. The string must be of - // the form "" to enable a feature or "no" to disable it. This - // will also enable or disable any features as required by the dependencies - // between them. - bool parseModifier(StringRef Modifier, const bool AllowNoDashForm = false); - - // Constructs a new ExtensionSet by toggling the corresponding bits for every - // feature in the \p Features list without expanding their dependencies. Used - // for reconstructing an ExtensionSet from the output of toLLVMFeatures(). - void reconstructFromParsedFeatures(const std::vector &Features); - - // Convert the set of enabled extension to an LLVM feature list, appending - // them to Features. - template void toLLVMFeatureList(std::vector &Features) const { - if (BaseArch && !BaseArch->ArchFeature.empty()) - Features.emplace_back(T(BaseArch->ArchFeature)); - - for (const auto &E : Extensions) { - if (E.Feature.empty() || !Touched.test(E.ID)) - continue; - if (Enabled.test(E.ID)) - Features.emplace_back(T(E.Feature)); - else - Features.emplace_back(T(E.NegFeature)); - } - } -}; - // Name alias. struct Alias { StringRef AltName; @@ -636,13 +619,7 @@ const ArchInfo *getArchForCpu(StringRef CPU); // Parser const ArchInfo *parseArch(StringRef Arch); - -// Return the extension which has the given -target-feature name. -std::optional targetFeatureToExtension(StringRef TargetFeature); - -// Parse a name as defined by the Extension class in tablegen. std::optional parseArchExtension(StringRef Extension); - // Given the name of a CPU or alias, return the correponding CpuInfo. std::optional parseCpu(StringRef Name); // Used by target parser tests diff --git a/llvm/lib/TargetParser/AArch64TargetParser.cpp b/llvm/lib/TargetParser/AArch64TargetParser.cpp index d1cc306790522d..ca356ec82bf1f9 100644 --- a/llvm/lib/TargetParser/AArch64TargetParser.cpp +++ b/llvm/lib/TargetParser/AArch64TargetParser.cpp @@ -122,14 +122,6 @@ AArch64::parseArchExtension(StringRef ArchExt) { return {}; } -std::optional -AArch64::targetFeatureToExtension(StringRef TargetFeature) { - for (const auto &E : Extensions) - if (TargetFeature == E.Feature) - return E; - return {}; -} - std::optional AArch64::parseCpu(StringRef Name) { // Resolve aliases first. Name = resolveCPUAlias(Name); @@ -221,6 +213,21 @@ void AArch64::ExtensionSet::disable(ArchExtKind E) { disable(Dep.Later); } +void AArch64::ExtensionSet::toLLVMFeatureList( + std::vector &Features) const { + if (BaseArch && !BaseArch->ArchFeature.empty()) + Features.push_back(BaseArch->ArchFeature); + + for (const auto &E : Extensions) { + if (E.Feature.empty() || !Touched.test(E.ID)) + continue; + if (Enabled.test(E.ID)) + Features.push_back(E.Feature); + else + Features.push_back(E.NegFeature); + } +} + void AArch64::ExtensionSet::addCPUDefaults(const CpuInfo &CPU) { LLVM_DEBUG(llvm::dbgs() << "addCPUDefaults(" << CPU.Name << ")\n"); BaseArch = &CPU.Arch; @@ -240,18 +247,11 @@ void AArch64::ExtensionSet::addArchDefaults(const ArchInfo &Arch) { enable(E.ID); } -bool AArch64::ExtensionSet::parseModifier(StringRef Modifier, - const bool AllowNoDashForm) { +bool AArch64::ExtensionSet::parseModifier(StringRef Modifier) { LLVM_DEBUG(llvm::dbgs() << "parseModifier(" << Modifier << ")\n"); - size_t NChars = 0; - // The "no-feat" form is allowed in the target attribute but nowhere else. - if (AllowNoDashForm && Modifier.starts_with("no-")) - NChars = 3; - else if (Modifier.starts_with("no")) - NChars = 2; - bool IsNegated = NChars != 0; - StringRef ArchExt = Modifier.drop_front(NChars); + bool IsNegated = Modifier.starts_with("no"); + StringRef ArchExt = IsNegated ? Modifier.drop_front(2) : Modifier; if (auto AE = parseArchExtension(ArchExt)) { if (AE->Feature.empty() || AE->NegFeature.empty()) @@ -265,21 +265,6 @@ bool AArch64::ExtensionSet::parseModifier(StringRef Modifier, return false; } -void AArch64::ExtensionSet::reconstructFromParsedFeatures( - const std::vector &Features) { - assert(Touched.none() && "Bitset already initialized"); - for (auto &F : Features) { - bool IsNegated = F[0] == '-'; - if (auto AE = targetFeatureToExtension(F)) { - Touched.set(AE->ID); - if (IsNegated) - Enabled.reset(AE->ID); - else - Enabled.set(AE->ID); - } - } -} - const AArch64::ExtensionInfo & AArch64::getExtensionByID(AArch64::ArchExtKind ExtID) { return lookupExtensionByID(ExtID); From cd94fa7e7595cbd0c22e898170d8ee9648b47285 Mon Sep 17 00:00:00 2001 From: Andrew Browne Date: Thu, 13 Jun 2024 11:54:40 -0700 Subject: [PATCH 009/155] [DFSan] Fix sscanf checking that ordinary characters match. (#95333) Fixes: #94769 --- compiler-rt/lib/dfsan/dfsan_custom.cpp | 522 ++++++++++++++----------- compiler-rt/test/dfsan/sscanf.c | 111 +++++- 2 files changed, 393 insertions(+), 240 deletions(-) diff --git a/compiler-rt/lib/dfsan/dfsan_custom.cpp b/compiler-rt/lib/dfsan/dfsan_custom.cpp index af3c1f4d1673c4..050f5232c04087 100644 --- a/compiler-rt/lib/dfsan/dfsan_custom.cpp +++ b/compiler-rt/lib/dfsan/dfsan_custom.cpp @@ -2198,50 +2198,12 @@ struct Formatter { return retval; } - int scan() { - char *tmp_fmt = build_format_string(true); - int read_count = 0; - int retval = sscanf(str + str_off, tmp_fmt, &read_count); - if (retval > 0) { - if (-1 == num_scanned) - num_scanned = 0; - num_scanned += retval; - } - free(tmp_fmt); - return read_count; - } - - template - int scan(T arg) { - char *tmp_fmt = build_format_string(true); - int read_count = 0; - int retval = sscanf(str + str_off, tmp_fmt, arg, &read_count); - if (retval > 0) { - if (-1 == num_scanned) - num_scanned = 0; - num_scanned += retval; - } - free(tmp_fmt); - return read_count; - } - - // with_n -> toggles adding %n on/off; off by default - char *build_format_string(bool with_n = false) { + char *build_format_string() { size_t fmt_size = fmt_cur - fmt_start + 1; - size_t add_size = 0; - if (with_n) - add_size = 2; - char *new_fmt = (char *)malloc(fmt_size + 1 + add_size); + char *new_fmt = (char *)malloc(fmt_size + 1); assert(new_fmt); internal_memcpy(new_fmt, fmt_start, fmt_size); - if (!with_n) { - new_fmt[fmt_size] = '\0'; - } else { - new_fmt[fmt_size] = '%'; - new_fmt[fmt_size + 1] = 'n'; - new_fmt[fmt_size + 2] = '\0'; - } - + new_fmt[fmt_size] = '\0'; return new_fmt; } @@ -2467,6 +2429,102 @@ static int format_buffer(char *str, size_t size, const char *fmt, return formatter.str_off; } +// Scans a chunk either a constant string or a single format directive (e.g., +// '%.3f'). +struct Scanner { + Scanner(char *str_, const char *fmt_, size_t size_) + : str(str_), + str_off(0), + size(size_), + fmt_start(fmt_), + fmt_cur(fmt_), + width(-1), + num_scanned(0), + skip(false) {} + + // Consumes a chunk of ordinary characters. + // Returns number of matching ordinary characters. + // Returns -1 if the match failed. + // In format strings, a space will match multiple spaces. + int check_match_ordinary() { + char *tmp_fmt = build_format_string_with_n(); + int read_count = -1; + sscanf(str + str_off, tmp_fmt, &read_count); + free(tmp_fmt); + if (read_count > 0) { + str_off += read_count; + } + return read_count; + } + + int scan() { + char *tmp_fmt = build_format_string_with_n(); + int read_count = 0; + int retval = sscanf(str + str_off, tmp_fmt, &read_count); + free(tmp_fmt); + if (retval > 0) { + num_scanned += retval; + } + return read_count; + } + + template + int scan(T arg) { + char *tmp_fmt = build_format_string_with_n(); + int read_count = 0; + int retval = sscanf(str + str_off, tmp_fmt, arg, &read_count); + free(tmp_fmt); + if (retval > 0) { + num_scanned += retval; + } + return read_count; + } + + // Adds %n onto current format string to measure length. + char *build_format_string_with_n() { + size_t fmt_size = fmt_cur - fmt_start + 1; + // +2 for %n, +1 for \0 + char *new_fmt = (char *)malloc(fmt_size + 2 + 1); + assert(new_fmt); + internal_memcpy(new_fmt, fmt_start, fmt_size); + new_fmt[fmt_size] = '%'; + new_fmt[fmt_size + 1] = 'n'; + new_fmt[fmt_size + 2] = '\0'; + return new_fmt; + } + + char *str_cur() { return str + str_off; } + + size_t num_written_bytes(int retval) { + if (retval < 0) { + return 0; + } + + size_t num_avail = str_off < size ? size - str_off : 0; + if (num_avail == 0) { + return 0; + } + + size_t num_written = retval; + // A return value of {v,}snprintf of size or more means that the output was + // truncated. + if (num_written >= num_avail) { + num_written -= num_avail; + } + + return num_written; + } + + char *str; + size_t str_off; + size_t size; + const char *fmt_start; + const char *fmt_cur; + int width; + int num_scanned; + bool skip; +}; + // This function is an inverse of format_buffer: we take the input buffer, // scan it in search for format strings and store the results in the varargs. // The labels are propagated from the input buffer to the varargs. @@ -2474,220 +2532,222 @@ static int scan_buffer(char *str, size_t size, const char *fmt, dfsan_label *va_labels, dfsan_label *ret_label, dfsan_origin *str_origin, dfsan_origin *ret_origin, va_list ap) { - Formatter formatter(str, fmt, size); - while (*formatter.fmt_cur) { - formatter.fmt_start = formatter.fmt_cur; - formatter.width = -1; - formatter.skip = false; + Scanner scanner(str, fmt, size); + while (*scanner.fmt_cur) { + scanner.fmt_start = scanner.fmt_cur; + scanner.width = -1; + scanner.skip = false; int read_count = 0; void *dst_ptr = 0; size_t write_size = 0; - if (*formatter.fmt_cur != '%') { - // Ordinary character. Consume all the characters until a '%' or the end - // of the string. - for (; *(formatter.fmt_cur + 1) && *(formatter.fmt_cur + 1) != '%'; - ++formatter.fmt_cur) { + if (*scanner.fmt_cur != '%') { + // Ordinary character and spaces. + // Consume all the characters until a '%' or the end of the string. + for (; *(scanner.fmt_cur + 1) && *(scanner.fmt_cur + 1) != '%'; + ++scanner.fmt_cur) { + } + if (scanner.check_match_ordinary() < 0) { + // The ordinary characters did not match. + break; } - read_count = formatter.scan(); - dfsan_set_label(0, formatter.str_cur(), - formatter.num_written_bytes(read_count)); } else { // Conversion directive. Consume all the characters until a conversion // specifier or the end of the string. bool end_fmt = false; - for (; *formatter.fmt_cur && !end_fmt;) { - switch (*++formatter.fmt_cur) { - case 'd': - case 'i': - case 'o': - case 'u': - case 'x': - case 'X': - if (formatter.skip) { - read_count = formatter.scan(); - } else { - switch (*(formatter.fmt_cur - 1)) { - case 'h': - // Also covers the 'hh' case (since the size of the arg is still - // an int). - dst_ptr = va_arg(ap, int *); - read_count = formatter.scan((int *)dst_ptr); - write_size = sizeof(int); - break; - case 'l': - if (formatter.fmt_cur - formatter.fmt_start >= 2 && - *(formatter.fmt_cur - 2) == 'l') { - dst_ptr = va_arg(ap, long long int *); - read_count = formatter.scan((long long int *)dst_ptr); - write_size = sizeof(long long int); - } else { - dst_ptr = va_arg(ap, long int *); - read_count = formatter.scan((long int *)dst_ptr); - write_size = sizeof(long int); + for (; *scanner.fmt_cur && !end_fmt;) { + switch (*++scanner.fmt_cur) { + case 'd': + case 'i': + case 'o': + case 'u': + case 'x': + case 'X': + if (scanner.skip) { + read_count = scanner.scan(); + } else { + switch (*(scanner.fmt_cur - 1)) { + case 'h': + // Also covers the 'hh' case (since the size of the arg is + // still an int). + dst_ptr = va_arg(ap, int *); + read_count = scanner.scan((int *)dst_ptr); + write_size = sizeof(int); + break; + case 'l': + if (scanner.fmt_cur - scanner.fmt_start >= 2 && + *(scanner.fmt_cur - 2) == 'l') { + dst_ptr = va_arg(ap, long long int *); + read_count = scanner.scan((long long int *)dst_ptr); + write_size = sizeof(long long int); + } else { + dst_ptr = va_arg(ap, long int *); + read_count = scanner.scan((long int *)dst_ptr); + write_size = sizeof(long int); + } + break; + case 'q': + dst_ptr = va_arg(ap, long long int *); + read_count = scanner.scan((long long int *)dst_ptr); + write_size = sizeof(long long int); + break; + case 'j': + dst_ptr = va_arg(ap, intmax_t *); + read_count = scanner.scan((intmax_t *)dst_ptr); + write_size = sizeof(intmax_t); + break; + case 'z': + case 't': + dst_ptr = va_arg(ap, size_t *); + read_count = scanner.scan((size_t *)dst_ptr); + write_size = sizeof(size_t); + break; + default: + dst_ptr = va_arg(ap, int *); + read_count = scanner.scan((int *)dst_ptr); + write_size = sizeof(int); + } + // get the label associated with the string at the corresponding + // place + dfsan_label l = dfsan_read_label( + scanner.str_cur(), scanner.num_written_bytes(read_count)); + dfsan_set_label(l, dst_ptr, write_size); + if (str_origin != nullptr) { + dfsan_set_label(l, dst_ptr, write_size); + size_t scan_count = scanner.num_written_bytes(read_count); + size_t size = scan_count > write_size ? write_size : scan_count; + dfsan_mem_origin_transfer(dst_ptr, scanner.str_cur(), size); } - break; - case 'q': - dst_ptr = va_arg(ap, long long int *); - read_count = formatter.scan((long long int *)dst_ptr); - write_size = sizeof(long long int); - break; - case 'j': - dst_ptr = va_arg(ap, intmax_t *); - read_count = formatter.scan((intmax_t *)dst_ptr); - write_size = sizeof(intmax_t); - break; - case 'z': - case 't': - dst_ptr = va_arg(ap, size_t *); - read_count = formatter.scan((size_t *)dst_ptr); - write_size = sizeof(size_t); - break; - default: - dst_ptr = va_arg(ap, int *); - read_count = formatter.scan((int *)dst_ptr); - write_size = sizeof(int); - } - // get the label associated with the string at the corresponding - // place - dfsan_label l = dfsan_read_label( - formatter.str_cur(), formatter.num_written_bytes(read_count)); - dfsan_set_label(l, dst_ptr, write_size); - if (str_origin != nullptr) { - dfsan_set_label(l, dst_ptr, write_size); - size_t scan_count = formatter.num_written_bytes(read_count); - size_t size = scan_count > write_size ? write_size : scan_count; - dfsan_mem_origin_transfer(dst_ptr, formatter.str_cur(), size); } - } - end_fmt = true; + end_fmt = true; - break; + break; - case 'a': - case 'A': - case 'e': - case 'E': - case 'f': - case 'F': - case 'g': - case 'G': - if (formatter.skip) { - read_count = formatter.scan(); - } else { - if (*(formatter.fmt_cur - 1) == 'L') { - dst_ptr = va_arg(ap, long double *); - read_count = formatter.scan((long double *)dst_ptr); - write_size = sizeof(long double); - } else if (*(formatter.fmt_cur - 1) == 'l') { - dst_ptr = va_arg(ap, double *); - read_count = formatter.scan((double *)dst_ptr); - write_size = sizeof(double); + case 'a': + case 'A': + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + if (scanner.skip) { + read_count = scanner.scan(); } else { - dst_ptr = va_arg(ap, float *); - read_count = formatter.scan((float *)dst_ptr); - write_size = sizeof(float); - } - dfsan_label l = dfsan_read_label( - formatter.str_cur(), formatter.num_written_bytes(read_count)); - dfsan_set_label(l, dst_ptr, write_size); - if (str_origin != nullptr) { - dfsan_set_label(l, dst_ptr, write_size); - size_t scan_count = formatter.num_written_bytes(read_count); - size_t size = scan_count > write_size ? write_size : scan_count; - dfsan_mem_origin_transfer(dst_ptr, formatter.str_cur(), size); + if (*(scanner.fmt_cur - 1) == 'L') { + dst_ptr = va_arg(ap, long double *); + read_count = scanner.scan((long double *)dst_ptr); + write_size = sizeof(long double); + } else if (*(scanner.fmt_cur - 1) == 'l') { + dst_ptr = va_arg(ap, double *); + read_count = scanner.scan((double *)dst_ptr); + write_size = sizeof(double); + } else { + dst_ptr = va_arg(ap, float *); + read_count = scanner.scan((float *)dst_ptr); + write_size = sizeof(float); + } + dfsan_label l = dfsan_read_label( + scanner.str_cur(), scanner.num_written_bytes(read_count)); + dfsan_set_label(l, dst_ptr, write_size); + if (str_origin != nullptr) { + dfsan_set_label(l, dst_ptr, write_size); + size_t scan_count = scanner.num_written_bytes(read_count); + size_t size = scan_count > write_size ? write_size : scan_count; + dfsan_mem_origin_transfer(dst_ptr, scanner.str_cur(), size); + } } - } - end_fmt = true; - break; + end_fmt = true; + break; - case 'c': - if (formatter.skip) { - read_count = formatter.scan(); - } else { - dst_ptr = va_arg(ap, char *); - read_count = formatter.scan((char *)dst_ptr); - write_size = sizeof(char); - dfsan_label l = dfsan_read_label( - formatter.str_cur(), formatter.num_written_bytes(read_count)); - dfsan_set_label(l, dst_ptr, write_size); - if (str_origin != nullptr) { - size_t scan_count = formatter.num_written_bytes(read_count); - size_t size = scan_count > write_size ? write_size : scan_count; - dfsan_mem_origin_transfer(dst_ptr, formatter.str_cur(), size); + case 'c': + if (scanner.skip) { + read_count = scanner.scan(); + } else { + dst_ptr = va_arg(ap, char *); + read_count = scanner.scan((char *)dst_ptr); + write_size = sizeof(char); + dfsan_label l = dfsan_read_label( + scanner.str_cur(), scanner.num_written_bytes(read_count)); + dfsan_set_label(l, dst_ptr, write_size); + if (str_origin != nullptr) { + size_t scan_count = scanner.num_written_bytes(read_count); + size_t size = scan_count > write_size ? write_size : scan_count; + dfsan_mem_origin_transfer(dst_ptr, scanner.str_cur(), size); + } } - } - end_fmt = true; - break; + end_fmt = true; + break; - case 's': { - if (formatter.skip) { - read_count = formatter.scan(); - } else { - dst_ptr = va_arg(ap, char *); - read_count = formatter.scan((char *)dst_ptr); - if (1 == read_count) { - // special case: we have parsed a single string and we need to - // update read_count with the string size - read_count = strlen((char *)dst_ptr); + case 's': { + if (scanner.skip) { + read_count = scanner.scan(); + } else { + dst_ptr = va_arg(ap, char *); + read_count = scanner.scan((char *)dst_ptr); + if (1 == read_count) { + // special case: we have parsed a single string and we need to + // update read_count with the string size + read_count = strlen((char *)dst_ptr); + } + if (str_origin) + dfsan_mem_origin_transfer( + dst_ptr, scanner.str_cur(), + scanner.num_written_bytes(read_count)); + va_labels++; + dfsan_mem_shadow_transfer(dst_ptr, scanner.str_cur(), + scanner.num_written_bytes(read_count)); } - if (str_origin) - dfsan_mem_origin_transfer(dst_ptr, formatter.str_cur(), - formatter.num_written_bytes(read_count)); - va_labels++; - dfsan_mem_shadow_transfer(dst_ptr, formatter.str_cur(), - formatter.num_written_bytes(read_count)); + end_fmt = true; + break; } - end_fmt = true; - break; - } - case 'p': - if (formatter.skip) { - read_count = formatter.scan(); - } else { - dst_ptr = va_arg(ap, void *); - read_count = - formatter.scan((int *)dst_ptr); // note: changing void* to int* + case 'p': + if (scanner.skip) { + read_count = scanner.scan(); + } else { + dst_ptr = va_arg(ap, void *); + read_count = + scanner.scan((int *)dst_ptr); // note: changing void* to int* // since we need to call sizeof - write_size = sizeof(int); - - dfsan_label l = dfsan_read_label( - formatter.str_cur(), formatter.num_written_bytes(read_count)); - dfsan_set_label(l, dst_ptr, write_size); - if (str_origin != nullptr) { - dfsan_set_label(l, dst_ptr, write_size); - size_t scan_count = formatter.num_written_bytes(read_count); - size_t size = scan_count > write_size ? write_size : scan_count; - dfsan_mem_origin_transfer(dst_ptr, formatter.str_cur(), size); + write_size = sizeof(int); + + dfsan_label l = dfsan_read_label( + scanner.str_cur(), scanner.num_written_bytes(read_count)); + dfsan_set_label(l, dst_ptr, write_size); + if (str_origin != nullptr) { + dfsan_set_label(l, dst_ptr, write_size); + size_t scan_count = scanner.num_written_bytes(read_count); + size_t size = scan_count > write_size ? write_size : scan_count; + dfsan_mem_origin_transfer(dst_ptr, scanner.str_cur(), size); + } } - } - end_fmt = true; - break; + end_fmt = true; + break; - case 'n': { - if (!formatter.skip) { - int *ptr = va_arg(ap, int *); - *ptr = (int)formatter.str_off; - *va_labels++ = 0; - dfsan_set_label(0, ptr, sizeof(*ptr)); - if (str_origin != nullptr) - *str_origin++ = 0; + case 'n': { + if (!scanner.skip) { + int *ptr = va_arg(ap, int *); + *ptr = (int)scanner.str_off; + *va_labels++ = 0; + dfsan_set_label(0, ptr, sizeof(*ptr)); + if (str_origin != nullptr) + *str_origin++ = 0; + } + end_fmt = true; + break; } - end_fmt = true; - break; - } - case '%': - read_count = formatter.scan(); - end_fmt = true; - break; + case '%': + read_count = scanner.scan(); + end_fmt = true; + break; - case '*': - formatter.skip = true; - break; + case '*': + scanner.skip = true; + break; - default: - break; + default: + break; } } } @@ -2697,8 +2757,8 @@ static int scan_buffer(char *str, size_t size, const char *fmt, return read_count; } - formatter.fmt_cur++; - formatter.str_off += read_count; + scanner.fmt_cur++; + scanner.str_off += read_count; } (void)va_labels; // Silence unused-but-set-parameter warning @@ -2707,7 +2767,7 @@ static int scan_buffer(char *str, size_t size, const char *fmt, *ret_origin = 0; // Number of items scanned in total. - return formatter.num_scanned; + return scanner.num_scanned; } extern "C" { diff --git a/compiler-rt/test/dfsan/sscanf.c b/compiler-rt/test/dfsan/sscanf.c index dbc2de4ba96c1d..88325642ef5e39 100644 --- a/compiler-rt/test/dfsan/sscanf.c +++ b/compiler-rt/test/dfsan/sscanf.c @@ -1,18 +1,111 @@ // RUN: %clang_dfsan %s -o %t && %run %t -// XFAIL: * #include #include int main(int argc, char *argv[]) { - char buf[256] = "10000000000-100000000000 rw-p 00000000 00:00 0"; - long rss = 0; - // This test exposes a bug in DFSan's sscanf, that leads to flakiness - // in release_shadow_space.c (see - // https://github.com/llvm/llvm-project/issues/91287) - if (sscanf(buf, "Garbage text before, %ld, Garbage text after", &rss) == 1) { - printf("Error: matched %ld\n", rss); - return 1; + { + char buf[256] = "10000000000-100000000000 rw-p 00000000 00:00 0"; + long rss = 0; + // This test exposes a bug in DFSan's sscanf, that leads to flakiness + // in release_shadow_space.c (see + // https://github.com/llvm/llvm-project/issues/91287) + int r = sscanf(buf, "Garbage text before, %ld, Garbage text after", &rss); + assert(r == 0); + } + + // Testing other variations of sscanf behavior. + { + int a = 0; + int b = 0; + int r = sscanf("abc42 cat 99", "abc%d cat %d", &a, &b); + assert(a == 42); + assert(b == 99); + assert(r == 2); + } + + { + int a = 0; + int b = 0; + int r = sscanf("abc42 cat 99", "abc%d dog %d", &a, &b); + assert(a == 42); + assert(r == 1); + } + + { + int a = 0; + int b = 0; + int r = sscanf("abx42 dog 99", "abc%d dog %d", &a, &b); + assert(r == 0); + } + + { + int r = sscanf("abx", "abc"); + assert(r == 0); + } + + { + int r = sscanf("abc", "abc"); + assert(r == 0); + } + + { + int n = 0; + int r = sscanf("abc", "abc%n", &n); + assert(n == 3); + assert(r == 0); + } + + { + int n = 1234; + int r = sscanf("abxy", "abcd%n", &n); + assert(n == 1234); + assert(r == 0); + } + + { + int a = 0; + int n = 1234; + int r = sscanf("abcd99", "abcd%d%n", &a, &n); + assert(a == 99); + assert(n == 6); + assert(r == 1); + } + + { + int n = 1234; + int r = sscanf("abcdsuffix", "abcd%n", &n); + assert(n == 4); + assert(r == 0); + } + + { + int n = 1234; + int r = sscanf("abxxsuffix", "abcd%n", &n); + assert(n == 1234); + assert(r == 0); + } + + { + int a = 0; + int b = 0; + int n = 1234; + int r = sscanf("abcd99 xy100", "abcd%d xy%d%n", &a, &b, &n); + assert(a == 99); + assert(b == 100); + assert(n == 12); + assert(r == 2); + } + + { + int a = 0; + int b = 0; + int n = 1234; + int r = sscanf("abcd99 xy100", "abcd%d zz%d%n", &a, &b, &n); + assert(a == 99); + assert(b == 0); + assert(n == 1234); + assert(r == 1); } return 0; From d4a0154902fb9b0611ed857134b26a64a1d5ad1e Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 13 Jun 2024 20:20:27 +0100 Subject: [PATCH 010/155] [llvm-project] Fix typo "seperate" (#95373) --- clang-tools-extra/clangd/TidyProvider.cpp | 10 ++++---- .../include/clang/Frontend/FrontendOptions.h | 2 +- .../include/clang/InstallAPI/DylibVerifier.h | 2 +- clang/lib/InstallAPI/Visitor.cpp | 2 +- clang/lib/Serialization/ASTWriterStmt.cpp | 2 +- compiler-rt/test/dfsan/custom.cpp | 2 +- .../Linux/ppc64/trivial-tls-pwr10.test | 2 +- .../FlangOmpReport/yaml_summarizer.py | 2 +- flang/lib/Semantics/check-omp-structure.cpp | 10 ++++---- flang/test/Driver/mllvm_vs_mmlir.f90 | 2 +- libc/src/__support/FPUtil/x86_64/FEnvImpl.h | 2 +- .../stdio/printf_core/float_hex_converter.h | 10 ++++---- .../str_to_float_comparison_test.cpp | 2 +- lld/test/wasm/data-segments.ll | 2 +- .../lldb/Expression/DWARFExpressionList.h | 2 +- lldb/include/lldb/Target/MemoryTagManager.h | 2 +- .../NativeRegisterContextLinux_arm64.cpp | 2 +- lldb/test/API/CMakeLists.txt | 2 +- .../TestGdbRemoteMemoryTagging.py | 2 +- .../DW_AT_data_bit_offset-DW_OP_stack_value.s | 2 +- llvm/include/llvm/CodeGen/LiveRegUnits.h | 2 +- llvm/include/llvm/CodeGen/MIRFormatter.h | 2 +- llvm/include/llvm/MC/MCAsmInfo.h | 2 +- llvm/include/llvm/Support/raw_socket_stream.h | 2 +- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h | 2 +- .../CodeGen/AssignmentTrackingAnalysis.cpp | 6 ++--- .../SelectionDAG/SelectionDAGBuilder.cpp | 4 ++-- llvm/lib/FileCheck/FileCheck.cpp | 2 +- llvm/lib/IR/DebugInfo.cpp | 2 +- llvm/lib/MC/MCPseudoProbe.cpp | 2 +- llvm/lib/Support/VirtualFileSystem.cpp | 2 +- llvm/lib/Support/raw_socket_stream.cpp | 2 +- llvm/lib/Target/ARM/ARMISelLowering.cpp | 2 +- .../Target/RISCV/RISCVMachineFunctionInfo.h | 2 +- llvm/lib/TargetParser/RISCVISAInfo.cpp | 2 +- llvm/lib/TextAPI/Utils.cpp | 2 +- llvm/lib/Transforms/IPO/Attributor.cpp | 4 ++-- .../lib/Transforms/IPO/SampleProfileProbe.cpp | 2 +- .../Scalar/RewriteStatepointsForGC.cpp | 2 +- .../Transforms/Utils/LoopUnrollRuntime.cpp | 2 +- llvm/test/CodeGen/X86/AMX/amx-greedy-ra.ll | 2 +- llvm/test/CodeGen/X86/apx/shift-eflags.ll | 24 +++++++++---------- .../X86/merge-consecutive-stores-nt.ll | 4 ++-- llvm/test/CodeGen/X86/shift-eflags.ll | 24 +++++++++---------- .../InstSimplify/constant-fold-fp-denormal.ll | 2 +- .../LoopVectorize/LoongArch/defaults.ll | 2 +- .../LoopVectorize/RISCV/defaults.ll | 2 +- .../split-gep-or-as-add.ll | 2 +- llvm/test/Verifier/alloc-size-failedparse.ll | 2 +- llvm/test/tools/llvm-ar/windows-path.test | 2 +- .../ELF/mirror-permissions-win.test | 2 +- llvm/tools/llvm-cov/CodeCoverage.cpp | 2 +- llvm/tools/llvm-profgen/PerfReader.cpp | 2 +- llvm/unittests/Support/Path.cpp | 4 ++-- .../Analysis/Presburger/IntegerRelation.h | 2 +- .../Analysis/Presburger/PresburgerSpace.h | 2 +- .../mlir/Dialect/OpenMP/OpenMPInterfaces.h | 2 +- .../Analysis/Presburger/PresburgerSpace.cpp | 2 +- .../lib/Conversion/GPUCommon/GPUOpsLowering.h | 2 +- .../LLVMIR/IR/BasicPtxBuilderInterface.cpp | 2 +- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 6 ++--- .../CPU/sparse_reduce_custom_prod.mlir | 2 +- .../omptarget-constant-alloca-raise.mlir | 2 +- openmp/tools/Modules/FindOpenMPTarget.cmake | 2 +- 64 files changed, 106 insertions(+), 106 deletions(-) diff --git a/clang-tools-extra/clangd/TidyProvider.cpp b/clang-tools-extra/clangd/TidyProvider.cpp index a4121df30d3dfa..a87238e0c0938c 100644 --- a/clang-tools-extra/clangd/TidyProvider.cpp +++ b/clang-tools-extra/clangd/TidyProvider.cpp @@ -195,10 +195,10 @@ TidyProvider addTidyChecks(llvm::StringRef Checks, } TidyProvider disableUnusableChecks(llvm::ArrayRef ExtraBadChecks) { - constexpr llvm::StringLiteral Seperator(","); + constexpr llvm::StringLiteral Separator(","); static const std::string BadChecks = llvm::join_items( - Seperator, - // We want this list to start with a seperator to + Separator, + // We want this list to start with a separator to // simplify appending in the lambda. So including an // empty string here will force that. "", @@ -227,7 +227,7 @@ TidyProvider disableUnusableChecks(llvm::ArrayRef ExtraBadChecks) { for (const std::string &Str : ExtraBadChecks) { if (Str.empty()) continue; - Size += Seperator.size(); + Size += Separator.size(); if (LLVM_LIKELY(Str.front() != '-')) ++Size; Size += Str.size(); @@ -238,7 +238,7 @@ TidyProvider disableUnusableChecks(llvm::ArrayRef ExtraBadChecks) { for (const std::string &Str : ExtraBadChecks) { if (Str.empty()) continue; - DisableGlob += Seperator; + DisableGlob += Separator; if (LLVM_LIKELY(Str.front() != '-')) DisableGlob.push_back('-'); DisableGlob += Str; diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h index bd4981ca0ac08c..ebb8e9e59c6b64 100644 --- a/clang/include/clang/Frontend/FrontendOptions.h +++ b/clang/include/clang/Frontend/FrontendOptions.h @@ -520,7 +520,7 @@ class FrontendOptions { std::string ProductName; // Currently this is only used as part of the `-extract-api` action. - // A comma seperated list of files providing a list of APIs to + // A comma separated list of files providing a list of APIs to // ignore when extracting documentation. std::vector ExtractAPIIgnoresFileList; diff --git a/clang/include/clang/InstallAPI/DylibVerifier.h b/clang/include/clang/InstallAPI/DylibVerifier.h index ae0428abbb9c71..333f0cff077fda 100644 --- a/clang/include/clang/InstallAPI/DylibVerifier.h +++ b/clang/include/clang/InstallAPI/DylibVerifier.h @@ -135,7 +135,7 @@ class DylibVerifier : llvm::MachO::RecordVisitor { // Check if an internal declaration in zippered library has an // external declaration for a different platform. This results - // in the symbol being in a "seperate" platform slice. + // in the symbol being in a "separate" platform slice. bool shouldIgnoreInternalZipperedSymbol(const Record *R, const SymbolContext &SymCtx) const; diff --git a/clang/lib/InstallAPI/Visitor.cpp b/clang/lib/InstallAPI/Visitor.cpp index cf3aaa4c6ec931..367ae53b208b63 100644 --- a/clang/lib/InstallAPI/Visitor.cpp +++ b/clang/lib/InstallAPI/Visitor.cpp @@ -218,7 +218,7 @@ bool InstallAPIVisitor::VisitVarDecl(const VarDecl *D) { if (isa(D)) return true; - // Skip variables in records. They are handled seperately for C++. + // Skip variables in records. They are handled separately for C++. if (D->getDeclContext()->isRecord()) return true; diff --git a/clang/lib/Serialization/ASTWriterStmt.cpp b/clang/lib/Serialization/ASTWriterStmt.cpp index 1a98e30e0f89fa..1ba6d5501fd102 100644 --- a/clang/lib/Serialization/ASTWriterStmt.cpp +++ b/clang/lib/Serialization/ASTWriterStmt.cpp @@ -37,7 +37,7 @@ namespace clang { unsigned AbbrevToUse; /// A helper that can help us to write a packed bit across function - /// calls. For example, we may write seperate bits in seperate functions: + /// calls. For example, we may write separate bits in separate functions: /// /// void VisitA(A* a) { /// Record.push_back(a->isSomething()); diff --git a/compiler-rt/test/dfsan/custom.cpp b/compiler-rt/test/dfsan/custom.cpp index cede0d64dbcf2f..54bb17cb4a0354 100644 --- a/compiler-rt/test/dfsan/custom.cpp +++ b/compiler-rt/test/dfsan/custom.cpp @@ -2280,7 +2280,7 @@ void test_sscanf() { // %n, %s, %d, %f, and %% already tested } -// Tested by a seperate source file. This empty function is here to appease the +// Tested by a separate source file. This empty function is here to appease the // check-wrappers script. void test_fork() {} diff --git a/compiler-rt/test/orc/TestCases/Linux/ppc64/trivial-tls-pwr10.test b/compiler-rt/test/orc/TestCases/Linux/ppc64/trivial-tls-pwr10.test index 93561b1645c332..a393a009d2f5a6 100644 --- a/compiler-rt/test/orc/TestCases/Linux/ppc64/trivial-tls-pwr10.test +++ b/compiler-rt/test/orc/TestCases/Linux/ppc64/trivial-tls-pwr10.test @@ -2,7 +2,7 @@ // RUN: %clangxx -fPIC -c -o %t/main.o %S/Inputs/trivial-tls-main.cpp // RUN: %clangxx -fPIC -c -o %t/pwr10.o %S/Inputs/trivial-tls-pwr10.cpp // RUN: %llvm_jitlink %t/main.o %t/pwr10.o -// FIXME: We seperate pwr10 code from main object file due to currrent +// FIXME: We separate pwr10 code from main object file due to currrent // implementation only supports one PLT stub for the same symbol. // For example, `bl __tls_get_addr` in one object file has only one PLT stub, // however we need another different PLT stub for `bl __tls_get_addr@notoc` diff --git a/flang/examples/FlangOmpReport/yaml_summarizer.py b/flang/examples/FlangOmpReport/yaml_summarizer.py index 5726ff8da77f5d..1e522c00aab8d6 100644 --- a/flang/examples/FlangOmpReport/yaml_summarizer.py +++ b/flang/examples/FlangOmpReport/yaml_summarizer.py @@ -21,7 +21,7 @@ Parameters: -d --directory Specify which directory to scan. Multiple directories can be searched by - providing a semicolon seperated list of directories. + providing a semicolon separated list of directories. -l --log Combine all yaml files into one log (instead of generating a summary) diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 2d3ccd1c0c195f..e5baddf5994027 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -2781,7 +2781,7 @@ void OmpStructureChecker::CheckIsLoopIvPartOfClause( } } } -// Following clauses have a seperate node in parse-tree.h. +// Following clauses have a separate node in parse-tree.h. // Atomic-clause CHECK_SIMPLE_PARSER_CLAUSE(OmpAtomicRead, OMPC_read) CHECK_SIMPLE_PARSER_CLAUSE(OmpAtomicWrite, OMPC_write) @@ -2887,18 +2887,18 @@ void OmpStructureChecker::CheckAllowedMapTypes( const parser::OmpMapType::Type &type, const std::list &allowedMapTypeList) { if (!llvm::is_contained(allowedMapTypeList, type)) { - std::string commaSeperatedMapTypes; + std::string commaSeparatedMapTypes; llvm::interleave( allowedMapTypeList.begin(), allowedMapTypeList.end(), [&](const parser::OmpMapType::Type &mapType) { - commaSeperatedMapTypes.append(parser::ToUpperCaseLetters( + commaSeparatedMapTypes.append(parser::ToUpperCaseLetters( parser::OmpMapType::EnumToString(mapType))); }, - [&] { commaSeperatedMapTypes.append(", "); }); + [&] { commaSeparatedMapTypes.append(", "); }); context_.Say(GetContext().clauseSource, "Only the %s map types are permitted " "for MAP clauses on the %s directive"_err_en_US, - commaSeperatedMapTypes, ContextDirectiveAsFortran()); + commaSeparatedMapTypes, ContextDirectiveAsFortran()); } } diff --git a/flang/test/Driver/mllvm_vs_mmlir.f90 b/flang/test/Driver/mllvm_vs_mmlir.f90 index 99c3418dc6a1f9..074843fe993686 100644 --- a/flang/test/Driver/mllvm_vs_mmlir.f90 +++ b/flang/test/Driver/mllvm_vs_mmlir.f90 @@ -1,6 +1,6 @@ ! Verify that `-mllvm` options are forwarded to LLVM and `-mmlir` to MLIR. -! In practice, '-mmlir --help' is a super-set of '-mllvm --help' and that limits what we can test here. With a better seperation of +! In practice, '-mmlir --help' is a super-set of '-mllvm --help' and that limits what we can test here. With a better separation of ! LLVM, MLIR and Flang global options, we should be able to write a stricter test. ! RUN: %flang_fc1 -mmlir --help | FileCheck %s --check-prefix=MLIR diff --git a/libc/src/__support/FPUtil/x86_64/FEnvImpl.h b/libc/src/__support/FPUtil/x86_64/FEnvImpl.h index a157b81aaaf325..2aa69565efc58e 100644 --- a/libc/src/__support/FPUtil/x86_64/FEnvImpl.h +++ b/libc/src/__support/FPUtil/x86_64/FEnvImpl.h @@ -248,7 +248,7 @@ LIBC_INLINE int raise_except(int excepts) { // of the "Intel 64 and IA-32 Architectures Software Developer's // Manual, Vol 1". - // FPU status word is read for each exception seperately as the + // FPU status word is read for each exception separately as the // exception handler can potentially write to it (typically to clear // the corresponding exception flag). By reading it separately, we // ensure that the writes by the exception handler are maintained diff --git a/libc/src/stdio/printf_core/float_hex_converter.h b/libc/src/stdio/printf_core/float_hex_converter.h index 68a4ba6209d091..8fac36df3d1e2e 100644 --- a/libc/src/stdio/printf_core/float_hex_converter.h +++ b/libc/src/stdio/printf_core/float_hex_converter.h @@ -199,13 +199,13 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer, constexpr cpp::string_view HEXADECIMAL_POINT("."); // This is for the letter 'p' before the exponent. - const char exp_seperator = a + ('p' - 'a'); - constexpr int EXP_SEPERATOR_LEN = 1; + const char exp_separator = a + ('p' - 'a'); + constexpr int EXP_SEPARATOR_LEN = 1; padding = static_cast(to_conv.min_width - (sign_char > 0 ? 1 : 0) - PREFIX_LEN - mant_digits - trailing_zeroes - static_cast(has_hexadecimal_point) - - EXP_SEPERATOR_LEN - (EXP_LEN - exp_cur)); + EXP_SEPARATOR_LEN - (EXP_LEN - exp_cur)); if (padding < 0) padding = 0; @@ -223,7 +223,7 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer, RET_IF_RESULT_NEGATIVE(writer->write({mant_buffer + 1, mant_digits - 1})); if (trailing_zeroes > 0) RET_IF_RESULT_NEGATIVE(writer->write('0', trailing_zeroes)); - RET_IF_RESULT_NEGATIVE(writer->write(exp_seperator)); + RET_IF_RESULT_NEGATIVE(writer->write(exp_separator)); RET_IF_RESULT_NEGATIVE( writer->write({exp_buffer + exp_cur, EXP_LEN - exp_cur})); if (padding > 0) @@ -247,7 +247,7 @@ LIBC_INLINE int convert_float_hex_exp(Writer *writer, RET_IF_RESULT_NEGATIVE(writer->write({mant_buffer + 1, mant_digits - 1})); if (trailing_zeroes > 0) RET_IF_RESULT_NEGATIVE(writer->write('0', trailing_zeroes)); - RET_IF_RESULT_NEGATIVE(writer->write(exp_seperator)); + RET_IF_RESULT_NEGATIVE(writer->write(exp_separator)); RET_IF_RESULT_NEGATIVE( writer->write({exp_buffer + exp_cur, EXP_LEN - exp_cur})); } diff --git a/libc/test/src/__support/str_to_float_comparison_test.cpp b/libc/test/src/__support/str_to_float_comparison_test.cpp index 19f3f869f3c4cd..7641c594c3d3b8 100644 --- a/libc/test/src/__support/str_to_float_comparison_test.cpp +++ b/libc/test/src/__support/str_to_float_comparison_test.cpp @@ -143,7 +143,7 @@ int main(int argc, char *argv[]) { int fails = 0; // Bitdiffs are cases where the expected result and actual result only differ - // by +/- the least significant bit. They are tracked seperately from larger + // by +/- the least significant bit. They are tracked separately from larger // failures since a bitdiff is most likely the result of a rounding error, and // splitting them off makes them easier to track down. int bitdiffs = 0; diff --git a/lld/test/wasm/data-segments.ll b/lld/test/wasm/data-segments.ll index 7ff949794d8e93..9354e6c8e4d2b0 100644 --- a/lld/test/wasm/data-segments.ll +++ b/lld/test/wasm/data-segments.ll @@ -161,7 +161,7 @@ ; DIS-EMPTY: ; DIS-NEXT: end -; In PIC mode __wasm_apply_data_relocs is export seperatly to __wasm_call_ctors +; In PIC mode __wasm_apply_data_relocs is export separatly to __wasm_call_ctors ; PIC-DIS: <__wasm_apply_data_relocs>: ; PIC-DIS-EMPTY: diff --git a/lldb/include/lldb/Expression/DWARFExpressionList.h b/lldb/include/lldb/Expression/DWARFExpressionList.h index f711a1cbe9bbd8..664c2603770f62 100644 --- a/lldb/include/lldb/Expression/DWARFExpressionList.h +++ b/lldb/include/lldb/Expression/DWARFExpressionList.h @@ -92,7 +92,7 @@ class DWARFExpressionList { lldb::addr_t func_load_addr, lldb::addr_t file_addr, ABI *abi) const; - /// Dump all locaitons with each seperated by new line. + /// Dump all locaitons with each separated by new line. void GetDescription(Stream *s, lldb::DescriptionLevel level, ABI *abi) const; /// Search for a load address in the dwarf location list diff --git a/lldb/include/lldb/Target/MemoryTagManager.h b/lldb/include/lldb/Target/MemoryTagManager.h index b082224c38edbc..6bd4180fff703f 100644 --- a/lldb/include/lldb/Target/MemoryTagManager.h +++ b/lldb/include/lldb/Target/MemoryTagManager.h @@ -103,7 +103,7 @@ class MemoryTagManager { // transport. virtual size_t GetTagSizeInBytes() const = 0; - // Unpack tags from their stored format (e.g. gdb qMemTags data) into seperate + // Unpack tags from their stored format (e.g. gdb qMemTags data) into separate // tags. // // Checks that each tag is within the expected value range and if granules is diff --git a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp index 9b5f7aef1efe53..5397e90022f397 100644 --- a/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp +++ b/lldb/source/Plugins/Process/Linux/NativeRegisterContextLinux_arm64.cpp @@ -388,7 +388,7 @@ NativeRegisterContextLinux_arm64::ReadRegister(const RegisterInfo *reg_info, return error; } - // ZA is part of the SME set but uses a seperate member buffer for + // ZA is part of the SME set but uses a separate member buffer for // storage. Therefore its effective byte offset is always 0 even if it // isn't 0 within the SME register set. src = (uint8_t *)GetZABuffer() + GetZAHeaderSize(); diff --git a/lldb/test/API/CMakeLists.txt b/lldb/test/API/CMakeLists.txt index 856beb894208c0..27f285230cafaf 100644 --- a/lldb/test/API/CMakeLists.txt +++ b/lldb/test/API/CMakeLists.txt @@ -36,7 +36,7 @@ set(LLDB_TEST_ARCH # Users can override LLDB_TEST_USER_ARGS to specify arbitrary arguments to pass to the script set(LLDB_TEST_USER_ARGS "" - CACHE STRING "Specify additional arguments to pass to test runner. Seperate \ + CACHE STRING "Specify additional arguments to pass to test runner. Separate \ items with \";\". For example: '-C;gcc;-C;clang;-A;i386;-A;x86_64'") set(LLDB_TEST_COMMON_ARGS_VAR diff --git a/lldb/test/API/tools/lldb-server/memory-tagging/TestGdbRemoteMemoryTagging.py b/lldb/test/API/tools/lldb-server/memory-tagging/TestGdbRemoteMemoryTagging.py index 6ddd264057c3c3..87507583724e6b 100644 --- a/lldb/test/API/tools/lldb-server/memory-tagging/TestGdbRemoteMemoryTagging.py +++ b/lldb/test/API/tools/lldb-server/memory-tagging/TestGdbRemoteMemoryTagging.py @@ -170,7 +170,7 @@ def test_tag_write_QMemTags_packets(self): self.check_tag_write("{:x},20".format(buf_address), "E03") # Missing data self.check_tag_write("{:x},20:1".format(buf_address), "E03") - # Zero length write must still include seperator after type + # Zero length write must still include separator after type self.check_tag_write("{:x},0:1".format(buf_address), "E03") # Empty address self.check_tag_write(",10:1:01", "E03") diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/DW_AT_data_bit_offset-DW_OP_stack_value.s b/lldb/test/Shell/SymbolFile/DWARF/x86/DW_AT_data_bit_offset-DW_OP_stack_value.s index 074da09bc61eed..f82dd19466e63f 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/DW_AT_data_bit_offset-DW_OP_stack_value.s +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/DW_AT_data_bit_offset-DW_OP_stack_value.s @@ -51,7 +51,7 @@ # # DW_AT_location (DW_OP_constu 0x64a40101, DW_OP_stack_value) # -# to work-around a seperate bug. +# to work-around a separate bug. .zerofill __DATA,__bss,__type_anchor,4,2 ## @_type_anchor .zerofill __DATA,__bss,_ug.0,1,2 ## @ug.0 diff --git a/llvm/include/llvm/CodeGen/LiveRegUnits.h b/llvm/include/llvm/CodeGen/LiveRegUnits.h index e96165d6b3bbf3..405a9c123d041d 100644 --- a/llvm/include/llvm/CodeGen/LiveRegUnits.h +++ b/llvm/include/llvm/CodeGen/LiveRegUnits.h @@ -43,7 +43,7 @@ class LiveRegUnits { /// For a machine instruction \p MI, adds all register units used in /// \p UsedRegUnits and defined or clobbered in \p ModifiedRegUnits. This is /// useful when walking over a range of instructions to track registers - /// used or defined seperately. + /// used or defined separately. static void accumulateUsedDefed(const MachineInstr &MI, LiveRegUnits &ModifiedRegUnits, LiveRegUnits &UsedRegUnits, diff --git a/llvm/include/llvm/CodeGen/MIRFormatter.h b/llvm/include/llvm/CodeGen/MIRFormatter.h index 42087f549426b7..ab2244f255f3bc 100644 --- a/llvm/include/llvm/CodeGen/MIRFormatter.h +++ b/llvm/include/llvm/CodeGen/MIRFormatter.h @@ -46,7 +46,7 @@ class MIRFormatter { } /// Implement target specific parsing of immediate mnemonics. The mnemonic is - /// dot seperated strings. + /// dot separated strings. virtual bool parseImmMnemonic(const unsigned OpCode, const unsigned OpIdx, StringRef Src, int64_t &Imm, ErrorCallbackType ErrorCallback) const { diff --git a/llvm/include/llvm/MC/MCAsmInfo.h b/llvm/include/llvm/MC/MCAsmInfo.h index f9dd6012d5e942..48056b6ad61379 100644 --- a/llvm/include/llvm/MC/MCAsmInfo.h +++ b/llvm/include/llvm/MC/MCAsmInfo.h @@ -393,7 +393,7 @@ class MCAsmInfo { /// for ELF targets. Defaults to true. bool HasSingleParameterDotFile = true; - /// True if the target has a four strings .file directive, strings seperated + /// True if the target has a four strings .file directive, strings separated /// by comma. Defaults to false. bool HasFourStringsDotFile = false; diff --git a/llvm/include/llvm/Support/raw_socket_stream.h b/llvm/include/llvm/Support/raw_socket_stream.h index bddd47eb75e1a9..eed865fb5af49c 100644 --- a/llvm/include/llvm/Support/raw_socket_stream.h +++ b/llvm/include/llvm/Support/raw_socket_stream.h @@ -61,7 +61,7 @@ class ListeningSocket { std::atomic FD; std::string SocketPath; // Not modified after construction - /// If a seperate thread calls ListeningSocket::shutdown, the ListeningSocket + /// If a separate thread calls ListeningSocket::shutdown, the ListeningSocket /// file descriptor (FD) could be closed while ::poll is waiting for it to be /// ready to perform a I/O operations. ::poll will continue to block even /// after FD is closed so use a self-pipe mechanism to get ::poll to return diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h index 4c03bf79d04da7..55d149e049c948 100644 --- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h +++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h @@ -220,7 +220,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase { // DIGlobalVariableExpression referencing the DIGlobalVariable. DenseMap CVGlobalVariableOffsets; - // Map used to seperate variables according to the lexical scope they belong + // Map used to separate variables according to the lexical scope they belong // in. This is populated by recordLocalVariable() before // collectLexicalBlocks() separates the variables between the FunctionInfo // and LexicalBlocks. diff --git a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp index ec5fc06d01fb11..8afd75069589e1 100644 --- a/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp +++ b/llvm/lib/CodeGen/AssignmentTrackingAnalysis.cpp @@ -2493,7 +2493,7 @@ removeRedundantDbgLocsUsingBackwardScan(const BasicBlock *BB, bool Changed = false; SmallDenseMap VariableDefinedBytes; // Scan over the entire block, not just over the instructions mapped by - // FnVarLocs, because wedges in FnVarLocs may only be seperated by debug + // FnVarLocs, because wedges in FnVarLocs may only be separated by debug // instructions. for (const Instruction &I : reverse(*BB)) { if (!isa(I)) { @@ -2593,7 +2593,7 @@ removeRedundantDbgLocsUsingForwardScan(const BasicBlock *BB, VariableMap; // Scan over the entire block, not just over the instructions mapped by - // FnVarLocs, because wedges in FnVarLocs may only be seperated by debug + // FnVarLocs, because wedges in FnVarLocs may only be separated by debug // instructions. for (const Instruction &I : *BB) { // Get the defs that come just before this instruction. @@ -2681,7 +2681,7 @@ removeUndefDbgLocsFromEntryBlock(const BasicBlock *BB, DenseMap> VariableMap; // Scan over the entire block, not just over the instructions mapped by - // FnVarLocs, because wedges in FnVarLocs may only be seperated by debug + // FnVarLocs, because wedges in FnVarLocs may only be separated by debug // instructions. for (const Instruction &I : *BB) { // Get the defs that come just before this instruction. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index be5e0f6ef058b0..58cf7de6fc20db 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6606,7 +6606,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, return; } case Intrinsic::dbg_assign: { - // Debug intrinsics are handled seperately in assignment tracking mode. + // Debug intrinsics are handled separately in assignment tracking mode. if (AssignmentTrackingEnabled) return; // If assignment tracking hasn't been enabled then fall through and treat @@ -6614,7 +6614,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, [[fallthrough]]; } case Intrinsic::dbg_value: { - // Debug intrinsics are handled seperately in assignment tracking mode. + // Debug intrinsics are handled separately in assignment tracking mode. if (AssignmentTrackingEnabled) return; const DbgValueInst &DI = cast(I); diff --git a/llvm/lib/FileCheck/FileCheck.cpp b/llvm/lib/FileCheck/FileCheck.cpp index 1719f8ef2b4367..1eb8330232321e 100644 --- a/llvm/lib/FileCheck/FileCheck.cpp +++ b/llvm/lib/FileCheck/FileCheck.cpp @@ -624,7 +624,7 @@ Expected> Pattern::parseNumericSubstitutionBlock( ExpressionFormat ExplicitFormat = ExpressionFormat(); unsigned Precision = 0; - // Parse format specifier (NOTE: ',' is also an argument seperator). + // Parse format specifier (NOTE: ',' is also an argument separator). size_t FormatSpecEnd = Expr.find(','); size_t FunctionStart = Expr.find('('); if (FormatSpecEnd != StringRef::npos && FormatSpecEnd < FunctionStart) { diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index 4c3f37ceaaa46a..228e17641ffc48 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -2010,7 +2010,7 @@ bool calculateFragmentIntersectImpl( // SliceSizeInBits=32, Dest=%dest, Assign=dbg.assign) // // Drawing the store (s) in memory followed by the shortened version ($), - // then the dbg.assign (d), with the fragment information on a seperate scale + // then the dbg.assign (d), with the fragment information on a separate scale // underneath: // // Memory diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp index 040f3aab881288..2a75f46c57aa87 100644 --- a/llvm/lib/MC/MCPseudoProbe.cpp +++ b/llvm/lib/MC/MCPseudoProbe.cpp @@ -146,7 +146,7 @@ void MCPseudoProbeInlineTree::emit(MCObjectStreamer *MCOS, dbgs() << "Group [\n"; MCPseudoProbeTable::DdgPrintIndent += 2; }); - assert(!isRoot() && "Root should be handled seperately"); + assert(!isRoot() && "Root should be handled separately"); // Emit probes grouped by GUID. LLVM_DEBUG({ diff --git a/llvm/lib/Support/VirtualFileSystem.cpp b/llvm/lib/Support/VirtualFileSystem.cpp index 7360901f2962c9..f9c15bf7809eb2 100644 --- a/llvm/lib/Support/VirtualFileSystem.cpp +++ b/llvm/lib/Support/VirtualFileSystem.cpp @@ -1373,7 +1373,7 @@ std::error_code RedirectingFileSystem::makeAbsolute(SmallVectorImpl &Path) llvm::sys::path::is_absolute(Path, llvm::sys::path::Style::windows_backslash)) // This covers windows absolute path with forward slash as well, as the - // forward slashes are treated as path seperation in llvm::path + // forward slashes are treated as path separation in llvm::path // regardless of what path::Style is used. return {}; diff --git a/llvm/lib/Support/raw_socket_stream.cpp b/llvm/lib/Support/raw_socket_stream.cpp index 549d537709bf21..4cd3d58b80198e 100644 --- a/llvm/lib/Support/raw_socket_stream.cpp +++ b/llvm/lib/Support/raw_socket_stream.cpp @@ -266,7 +266,7 @@ void ListeningSocket::shutdown() { ::close(ObservedFD); ::unlink(SocketPath.c_str()); - // Ensure ::poll returns if shutdown is called by a seperate thread + // Ensure ::poll returns if shutdown is called by a separate thread char Byte = 'A'; ssize_t written = ::write(PipeFD[1], &Byte, 1); diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index e3270471981cc7..5a617968307ddf 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -13709,7 +13709,7 @@ static SDValue PerformADDVecReduce(SDNode *N, SelectionDAG &DAG, // t2: i64 = build_pair t1, t1:1 // t3: i64 = add t2, y // Otherwise we try to push the add up above VADDLVAx, to potentially allow - // the add to be simplified seperately. + // the add to be simplified separately. // We also need to check for sext / zext and commutitive adds. auto MakeVecReduce = [&](unsigned Opcode, unsigned OpcodeA, SDValue NA, SDValue NB) { diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h index fcc20c17c6b403..779c652b4d8fc4 100644 --- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h +++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h @@ -135,7 +135,7 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo { bool isPushable(const MachineFunction &MF) const { // We cannot use fixed locations for the callee saved spill slots if the // function uses a varargs save area. - // TODO: Use a seperate placement for vararg registers to enable Zcmp. + // TODO: Use a separate placement for vararg registers to enable Zcmp. return MF.getSubtarget().hasStdExtZcmp() && !MF.getTarget().Options.DisableFramePointerElim(MF) && VarArgsSaveSize == 0; diff --git a/llvm/lib/TargetParser/RISCVISAInfo.cpp b/llvm/lib/TargetParser/RISCVISAInfo.cpp index 4e0d07d39f9ba3..258238bfb212d0 100644 --- a/llvm/lib/TargetParser/RISCVISAInfo.cpp +++ b/llvm/lib/TargetParser/RISCVISAInfo.cpp @@ -757,7 +757,7 @@ RISCVISAInfo::parseArchString(StringRef Arch, bool EnableExperimentalExtension, Ext, SeenExtMap, IgnoreUnknown, EnableExperimentalExtension, ExperimentalExtensionVersionCheck)) return std::move(E); - // Multi-letter extension must be seperate following extension with + // Multi-letter extension must be separate following extension with // underscore break; } else { diff --git a/llvm/lib/TextAPI/Utils.cpp b/llvm/lib/TextAPI/Utils.cpp index 01021e3a264dd8..8a06d53942a947 100644 --- a/llvm/lib/TextAPI/Utils.cpp +++ b/llvm/lib/TextAPI/Utils.cpp @@ -215,7 +215,7 @@ llvm::MachO::parseAliasList(std::unique_ptr &Buffer) { if (L.starts_with("#")) continue; StringRef Symbol, Remain, Alias; - // Base symbol is seperated by whitespace. + // Base symbol is separated by whitespace. std::tie(Symbol, Remain) = getToken(L); // The Alias symbol ends before a comment or EOL. std::tie(Alias, Remain) = getToken(Remain, "#"); diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp index b6866580ccd3fc..09d7ee7c9478d5 100644 --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -132,13 +132,13 @@ static cl::opt #ifndef NDEBUG static cl::list SeedAllowList("attributor-seed-allow-list", cl::Hidden, - cl::desc("Comma seperated list of attribute names that are " + cl::desc("Comma separated list of attribute names that are " "allowed to be seeded."), cl::CommaSeparated); static cl::list FunctionSeedAllowList( "attributor-function-seed-allow-list", cl::Hidden, - cl::desc("Comma seperated list of function names that are " + cl::desc("Comma separated list of function names that are " "allowed to be seeded."), cl::CommaSeparated); #endif diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp index 3daa05a76d3643..882b15c23ede58 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp @@ -343,7 +343,7 @@ uint32_t SampleProfileProber::getCallsiteId(const Instruction *Call) const { void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) { Module *M = F.getParent(); MDBuilder MDB(F.getContext()); - // Since the GUID from probe desc and inline stack are computed seperately, we + // Since the GUID from probe desc and inline stack are computed separately, we // need to make sure their names are consistent, so here also use the name // from debug info. StringRef FName = F.getName(); diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 858e54c4a9bc2d..10fc4050966bdf 100644 --- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -1323,7 +1323,7 @@ static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache, IsKnownBaseMapTy &KnownBases) { StatepointLiveSetTy PotentiallyDerivedPointers = result.LiveSet; // We assume that all pointers passed to deopt are base pointers; as an - // optimization, we can use this to avoid seperately materializing the base + // optimization, we can use this to avoid separately materializing the base // pointer graph. This is only relevant since we're very conservative about // generating new conflict nodes during base pointer insertion. If we were // smarter there, this would be irrelevant. diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp index dd7150bc63ec4f..15303a17467b42 100644 --- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -849,7 +849,7 @@ bool llvm::UnrollRuntimeLoopRemainder( for (unsigned i = 0; i < oldNumOperands; i++){ auto *PredBB =PN.getIncomingBlock(i); if (PredBB == Latch) - // The latch exit is handled seperately, see connectX + // The latch exit is handled separately, see connectX continue; if (!L->contains(PredBB)) // Even if we had dedicated exits, the code above inserted an diff --git a/llvm/test/CodeGen/X86/AMX/amx-greedy-ra.ll b/llvm/test/CodeGen/X86/AMX/amx-greedy-ra.ll index 2ca0e3e37107f5..8f314fe2a843b2 100644 --- a/llvm/test/CodeGen/X86/AMX/amx-greedy-ra.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-greedy-ra.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+amx-int8 -mattr=+avx512f -verify-machineinstrs -stop-after tileconfig | FileCheck %s -; Test the tile register is allocated in a seperate pass. +; Test the tile register is allocated in a separate pass. define i16 @foo(i32 noundef %t, i16 %row, i16 %col) nounwind { ; CHECK-LABEL: name: foo diff --git a/llvm/test/CodeGen/X86/apx/shift-eflags.ll b/llvm/test/CodeGen/X86/apx/shift-eflags.ll index 932cdc189bf9f8..5da5090307e627 100644 --- a/llvm/test/CodeGen/X86/apx/shift-eflags.ll +++ b/llvm/test/CodeGen/X86/apx/shift-eflags.ll @@ -159,7 +159,7 @@ define i32 @shl_const1_self_select(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ret i32 %r } -; ashr by variable - use seperate test +; ashr by variable - use separate test define i32 @ashr_var(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: ashr_var: ; CHECK: # %bb.0: @@ -176,7 +176,7 @@ define i32 @ashr_var(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ret i32 %r } -; lshr by variable - use seperate test +; lshr by variable - use separate test define i32 @lshr_var(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: lshr_var: ; CHECK: # %bb.0: @@ -193,7 +193,7 @@ define i32 @lshr_var(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ret i32 %r } -; shl by variable - use seperate test +; shl by variable - use separate test define i32 @shl_var(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: shl_var: ; CHECK: # %bb.0: @@ -210,7 +210,7 @@ define i32 @shl_var(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ret i32 %r } -; ashr by variable and using result - use seperate test +; ashr by variable and using result - use separate test define i32 @ashr_var_self_select(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: ashr_var_self_select: ; CHECK: # %bb.0: @@ -226,7 +226,7 @@ define i32 @ashr_var_self_select(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ret i32 %r } -; lshr by variable and using result - use seperate test +; lshr by variable and using result - use separate test define i32 @lshr_var_self_select(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: lshr_var_self_select: ; CHECK: # %bb.0: @@ -242,7 +242,7 @@ define i32 @lshr_var_self_select(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ret i32 %r } -; shl by variable and using result - use seperate test +; shl by variable and using result - use separate test define i32 @shl_var_self_select(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: shl_var_self_select: ; CHECK: # %bb.0: @@ -258,7 +258,7 @@ define i32 @shl_var_self_select(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ret i32 %r } -; ashr by non-zero variable - use seperate test +; ashr by non-zero variable - use separate test define i32 @ashr_var_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: ashr_var_amt_never_zero: ; CHECK: # %bb.0: @@ -275,7 +275,7 @@ define i32 @ashr_var_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ret i32 %r } -; lshr by non-zero variable - use seperate test +; lshr by non-zero variable - use separate test define i32 @lshr_var_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: lshr_var_amt_never_zero: ; CHECK: # %bb.0: @@ -292,7 +292,7 @@ define i32 @lshr_var_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ret i32 %r } -; shl by non-zero variable - use seperate test +; shl by non-zero variable - use separate test define i32 @shl_var_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: shl_var_amt_never_zero: ; CHECK: # %bb.0: @@ -309,7 +309,7 @@ define i32 @shl_var_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ret i32 %r } -; ashr by non-zero variable and using result - use seperate test +; ashr by non-zero variable and using result - use separate test define i32 @ashr_var_self_select_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: ashr_var_self_select_amt_never_zero: ; CHECK: # %bb.0: @@ -325,7 +325,7 @@ define i32 @ashr_var_self_select_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 % ret i32 %r } -; lshr by non-zero variable and using result - use seperate test +; lshr by non-zero variable and using result - use separate test define i32 @lshr_var_self_select_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: lshr_var_self_select_amt_never_zero: ; CHECK: # %bb.0: @@ -341,7 +341,7 @@ define i32 @lshr_var_self_select_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 % ret i32 %r } -; shl by non-zero variable and using result - use seperate test +; shl by non-zero variable and using result - use separate test define i32 @shl_var_self_select_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: shl_var_self_select_amt_never_zero: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll b/llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll index c08056982bd836..9c3057e4e42a41 100644 --- a/llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll +++ b/llvm/test/CodeGen/X86/merge-consecutive-stores-nt.ll @@ -158,7 +158,7 @@ define void @merge_2_v4f32_align32_mix_ntstore(ptr %a0, ptr %a1) nounwind { } ; AVX2 can't perform NT-load-ymm on 16-byte aligned memory. -; Must be kept seperate as VMOVNTDQA xmm. +; Must be kept separate as VMOVNTDQA xmm. define void @merge_2_v4f32_align16_ntload(ptr %a0, ptr %a1) nounwind { ; X86-LABEL: merge_2_v4f32_align16_ntload: ; X86: # %bb.0: @@ -211,7 +211,7 @@ define void @merge_2_v4f32_align16_ntload(ptr %a0, ptr %a1) nounwind { } ; AVX can't perform NT-store-ymm on 16-byte aligned memory. -; Must be kept seperate as VMOVNTPS xmm. +; Must be kept separate as VMOVNTPS xmm. define void @merge_2_v4f32_align16_ntstore(ptr %a0, ptr %a1) nounwind { ; X86-LABEL: merge_2_v4f32_align16_ntstore: ; X86: # %bb.0: diff --git a/llvm/test/CodeGen/X86/shift-eflags.ll b/llvm/test/CodeGen/X86/shift-eflags.ll index 8d4597ec21bcdd..6eddf50ce5c9d5 100644 --- a/llvm/test/CodeGen/X86/shift-eflags.ll +++ b/llvm/test/CodeGen/X86/shift-eflags.ll @@ -171,7 +171,7 @@ define i32 @shl_const1_self_select(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ret i32 %r } -; ashr by variable - use seperate test +; ashr by variable - use separate test define i32 @ashr_var(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: ashr_var: ; CHECK: # %bb.0: @@ -188,7 +188,7 @@ define i32 @ashr_var(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ret i32 %r } -; lshr by variable - use seperate test +; lshr by variable - use separate test define i32 @lshr_var(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: lshr_var: ; CHECK: # %bb.0: @@ -205,7 +205,7 @@ define i32 @lshr_var(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ret i32 %r } -; shl by variable - use seperate test +; shl by variable - use separate test define i32 @shl_var(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: shl_var: ; CHECK: # %bb.0: @@ -222,7 +222,7 @@ define i32 @shl_var(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ret i32 %r } -; ashr by variable and using result - use seperate test +; ashr by variable and using result - use separate test define i32 @ashr_var_self_select(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: ashr_var_self_select: ; CHECK: # %bb.0: @@ -239,7 +239,7 @@ define i32 @ashr_var_self_select(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ret i32 %r } -; lshr by variable and using result - use seperate test +; lshr by variable and using result - use separate test define i32 @lshr_var_self_select(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: lshr_var_self_select: ; CHECK: # %bb.0: @@ -256,7 +256,7 @@ define i32 @lshr_var_self_select(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ret i32 %r } -; shl by variable and using result - use seperate test +; shl by variable and using result - use separate test define i32 @shl_var_self_select(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: shl_var_self_select: ; CHECK: # %bb.0: @@ -273,7 +273,7 @@ define i32 @shl_var_self_select(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ret i32 %r } -; ashr by non-zero variable - use seperate test +; ashr by non-zero variable - use separate test define i32 @ashr_var_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: ashr_var_amt_never_zero: ; CHECK: # %bb.0: @@ -292,7 +292,7 @@ define i32 @ashr_var_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ret i32 %r } -; lshr by non-zero variable - use seperate test +; lshr by non-zero variable - use separate test define i32 @lshr_var_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: lshr_var_amt_never_zero: ; CHECK: # %bb.0: @@ -311,7 +311,7 @@ define i32 @lshr_var_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ret i32 %r } -; shl by non-zero variable - use seperate test +; shl by non-zero variable - use separate test define i32 @shl_var_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: shl_var_amt_never_zero: ; CHECK: # %bb.0: @@ -330,7 +330,7 @@ define i32 @shl_var_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ret i32 %r } -; ashr by non-zero variable and using result - use seperate test +; ashr by non-zero variable and using result - use separate test define i32 @ashr_var_self_select_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: ashr_var_self_select_amt_never_zero: ; CHECK: # %bb.0: @@ -349,7 +349,7 @@ define i32 @ashr_var_self_select_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 % ret i32 %r } -; lshr by non-zero variable and using result - use seperate test +; lshr by non-zero variable and using result - use separate test define i32 @lshr_var_self_select_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: lshr_var_self_select_amt_never_zero: ; CHECK: # %bb.0: @@ -368,7 +368,7 @@ define i32 @lshr_var_self_select_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 % ret i32 %r } -; shl by non-zero variable and using result - use seperate test +; shl by non-zero variable and using result - use separate test define i32 @shl_var_self_select_amt_never_zero(i32 %a0, i32 %a1, i32 %a2, i32 %a3) { ; CHECK-LABEL: shl_var_self_select_amt_never_zero: ; CHECK: # %bb.0: diff --git a/llvm/test/Transforms/InstSimplify/constant-fold-fp-denormal.ll b/llvm/test/Transforms/InstSimplify/constant-fold-fp-denormal.ll index e3ff9416abdd10..bcd75898a4ffd9 100644 --- a/llvm/test/Transforms/InstSimplify/constant-fold-fp-denormal.ll +++ b/llvm/test/Transforms/InstSimplify/constant-fold-fp-denormal.ll @@ -2,7 +2,7 @@ ; RUN: opt -S -passes=instsimplify < %s | FileCheck %s ; Test cases for denormal handling mode when constant folding floating point -; operations. Input and output modes are checked seperately. +; operations. Input and output modes are checked separately. ; ============================================================================ ; ; fadd tests diff --git a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll index 28c1eef84e2575..2cee1dacea37dc 100644 --- a/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll +++ b/llvm/test/Transforms/LoopVectorize/LoongArch/defaults.ll @@ -4,7 +4,7 @@ ;; This is a collection of tests whose only purpose is to show changes in the ;; default configuration. Please keep these tests minimal - if you're testing ;; functionality of some specific configuration, please place that in a -;; seperate test file with a hard coded configuration (even if that +;; separate test file with a hard coded configuration (even if that ;; configuration is the current default). target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll b/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll index db0be4c6281575..1d1b20d16d0902 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/defaults.ll @@ -4,7 +4,7 @@ ; This is a collection of tests whose only purpose is to show changes in the ; default configuration. Please keep these tests minimal - if you're testing ; functionality of some specific configuration, please place that in a -; seperate test file with a hard coded configuration (even if that +; separate test file with a hard coded configuration (even if that ; configuration is the current default). target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll index e405bbd5347ee0..b3096820425b8e 100644 --- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll +++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep-or-as-add.ll @@ -3,7 +3,7 @@ ;; Check that or operations, either with operands with no bits in common or that ;; are disjoint are lowered into constant GEPs. Note that because this is a -;; target-independent test, the GEP seperator will lower the seperated-off constant +;; target-independent test, the GEP separator will lower the separated-off constant ;; part to ptrtoint-based arithmetic. define void @testOrDoesntSplit(ptr %p) { diff --git a/llvm/test/Verifier/alloc-size-failedparse.ll b/llvm/test/Verifier/alloc-size-failedparse.ll index ec492269d2ef5e..64e3b162ed9b77 100644 --- a/llvm/test/Verifier/alloc-size-failedparse.ll +++ b/llvm/test/Verifier/alloc-size-failedparse.ll @@ -1,7 +1,7 @@ ; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s ; ; We handle allocsize with identical args in the parser, rather than the -; verifier. So, a seperate test is needed. +; verifier. So, a separate test is needed. ; CHECK: 'allocsize' indices can't refer to the same parameter declare ptr @a(i32, i32) allocsize(0, 0) diff --git a/llvm/test/tools/llvm-ar/windows-path.test b/llvm/test/tools/llvm-ar/windows-path.test index e33fbf532c338c..9c5cf1319553ba 100644 --- a/llvm/test/tools/llvm-ar/windows-path.test +++ b/llvm/test/tools/llvm-ar/windows-path.test @@ -1,4 +1,4 @@ -# Test that windows path seperators are handled correctly. +# Test that windows path separators are handled correctly. REQUIRES: system-windows # Note: many of these tests depend on relative paths, so we have to cd to a diff --git a/llvm/test/tools/llvm-objcopy/ELF/mirror-permissions-win.test b/llvm/test/tools/llvm-objcopy/ELF/mirror-permissions-win.test index c35df343c0d328..f8b0d389df9b85 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/mirror-permissions-win.test +++ b/llvm/test/tools/llvm-objcopy/ELF/mirror-permissions-win.test @@ -1,7 +1,7 @@ ## Test that permissions for ouput files are mirrored ## from their input files. -## The Unix version of this test is seperated because it needs +## The Unix version of this test is separated because it needs ## to use umask(1). Windows has no umask, so it can be considered ## to be always 0, the required behavior. # REQUIRES: system-windows diff --git a/llvm/tools/llvm-cov/CodeCoverage.cpp b/llvm/tools/llvm-cov/CodeCoverage.cpp index 1e5bfbe5c3aade..75028ac0bb505b 100644 --- a/llvm/tools/llvm-cov/CodeCoverage.cpp +++ b/llvm/tools/llvm-cov/CodeCoverage.cpp @@ -515,7 +515,7 @@ void CodeCoverageTool::remapPathNames(const CoverageMapping &Coverage) { if (!PathRemappings) return; - // Convert remapping paths to native paths with trailing seperators. + // Convert remapping paths to native paths with trailing separators. auto nativeWithTrailing = [](StringRef Path) -> std::string { if (Path.empty()) return ""; diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp index e1f5cc900cfd71..111c546f5329fb 100644 --- a/llvm/tools/llvm-profgen/PerfReader.cpp +++ b/llvm/tools/llvm-profgen/PerfReader.cpp @@ -564,7 +564,7 @@ bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt, // The raw format of LBR stack is like: // 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ... // ... 0x4005c8/0x4005dc/P/-/-/0 - // It's in FIFO order and seperated by whitespace. + // It's in FIFO order and separated by whitespace. SmallVector Records; TraceIt.getCurrentLine().rtrim().split(Records, " ", -1, false); auto WarnInvalidLBR = [](TraceStream &TraceIt) { diff --git a/llvm/unittests/Support/Path.cpp b/llvm/unittests/Support/Path.cpp index bdae7a8ee4b55b..463a6991dac515 100644 --- a/llvm/unittests/Support/Path.cpp +++ b/llvm/unittests/Support/Path.cpp @@ -626,8 +626,8 @@ TEST(SupportDeathTest, TempDirectoryOnWindows) { // different values of specific env vars. To prevent corrupting env vars of // the current process all checks are done in separated processes. EXPECT_TEMP_DIR(_wputenv_s(L"TMP", L"C:\\OtherFolder"), "C:\\OtherFolder"); - EXPECT_TEMP_DIR(_wputenv_s(L"TMP", L"C:/Unix/Path/Seperators"), - "C:\\Unix\\Path\\Seperators"); + EXPECT_TEMP_DIR(_wputenv_s(L"TMP", L"C:/Unix/Path/Separators"), + "C:\\Unix\\Path\\Separators"); EXPECT_TEMP_DIR(_wputenv_s(L"TMP", L"Local Path"), ".+\\Local Path$"); EXPECT_TEMP_DIR(_wputenv_s(L"TMP", L"F:\\TrailingSep\\"), "F:\\TrailingSep"); EXPECT_TEMP_DIR( diff --git a/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h b/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h index 40e96e2583d221..ad26e9786d0b0a 100644 --- a/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h +++ b/mlir/include/mlir/Analysis/Presburger/IntegerRelation.h @@ -642,7 +642,7 @@ class IntegerRelation { /// the split become symbols, or some of the symbols immediately after the /// split become dimensions. void setDimSymbolSeparation(unsigned newSymbolCount) { - space.setVarSymbolSeperation(newSymbolCount); + space.setVarSymbolSeparation(newSymbolCount); } /// Return a set corresponding to all points in the domain of the relation. diff --git a/mlir/include/mlir/Analysis/Presburger/PresburgerSpace.h b/mlir/include/mlir/Analysis/Presburger/PresburgerSpace.h index a8ec373f885e21..9a41e4aee61d00 100644 --- a/mlir/include/mlir/Analysis/Presburger/PresburgerSpace.h +++ b/mlir/include/mlir/Analysis/Presburger/PresburgerSpace.h @@ -236,7 +236,7 @@ class PresburgerSpace { /// symbol count, either a chunk of dimensional variables immediately before /// the split become symbols, or some of the symbols immediately after the /// split become dimensions. - void setVarSymbolSeperation(unsigned newSymbolCount); + void setVarSymbolSeparation(unsigned newSymbolCount); /// Swaps the posA^th variable of kindA and posB^th variable of kindB. void swapVar(VarKind kindA, VarKind kindB, unsigned posA, unsigned posB); diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPInterfaces.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPInterfaces.h index 787c48b05c5c5c..989ab1710c211e 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPInterfaces.h +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPInterfaces.h @@ -28,7 +28,7 @@ namespace mlir::omp { // You can override defaults here or implement more complex implementations of -// functions. Or define a completely seperate external model implementation, +// functions. Or define a completely separate external model implementation, // to override the existing implementation. struct OffloadModuleDefaultModel : public OffloadModuleInterface::ExternalModel { /// The lowering of gpu.printf to a call to an external printf() function /// /// This pass will add a declaration of printf() to the GPUModule if needed -/// and seperate out the format strings into global constants. For some +/// and separate out the format strings into global constants. For some /// runtimes, such as OpenCL on AMD, this is sufficient setup, as the compiler /// will lower printf calls to appropriate device-side code struct GPUPrintfOpToLLVMCallLowering diff --git a/mlir/lib/Dialect/LLVMIR/IR/BasicPtxBuilderInterface.cpp b/mlir/lib/Dialect/LLVMIR/IR/BasicPtxBuilderInterface.cpp index f7f1e944d637d0..3dacc70838e3e5 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/BasicPtxBuilderInterface.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/BasicPtxBuilderInterface.cpp @@ -65,7 +65,7 @@ void PtxBuilder::insertValue(Value v, PTXRegisterMod itype) { auto getModifier = [&]() -> const char * { if (itype == PTXRegisterMod::ReadWrite) { assert(false && "Read-Write modifier is not supported. Try setting the " - "same value as Write and Read seperately."); + "same value as Write and Read separately."); return "+"; } if (itype == PTXRegisterMod::Write) { diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 6ec4c120c11ea5..b7b2f8c4fa8c44 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -2263,7 +2263,7 @@ static llvm::omp::OpenMPOffloadMappingFlags mapParentWithMembers( // This creates the initial MEMBER_OF mapping that consists of // the parent/top level container (same as above effectively, except - // with a fixed initial compile time size and seperate maptype which + // with a fixed initial compile time size and separate maptype which // indicates the true mape type (tofrom etc.). This parent mapping is // only relevant if the structure in its totality is being mapped, // otherwise the above suffices. @@ -2388,7 +2388,7 @@ static void processMapWithMembersOf( // If we have a partial map (no parent referenced in the map clauses of the // directive, only members) and only a single member, we do not need to bind - // the map of the member to the parent, we can pass the member seperately. + // the map of the member to the parent, we can pass the member separately. if (parentClause.getMembers().size() == 1 && parentClause.getPartialMap()) { auto memberClause = llvm::cast( parentClause.getMembers()[0].getDefiningOp()); @@ -2425,7 +2425,7 @@ createAlteredByCaptureMap(MapInfoData &mapData, LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder) { for (size_t i = 0; i < mapData.MapClause.size(); ++i) { - // if it's declare target, skip it, it's handled seperately. + // if it's declare target, skip it, it's handled separately. if (!mapData.IsDeclareTarget[i]) { auto mapOp = mlir::dyn_cast_if_present(mapData.MapClause[i]); diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_prod.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_prod.mlir index d32a92e337ba72..8cbe8fd1ed695e 100644 --- a/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_prod.mlir +++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/sparse_reduce_custom_prod.mlir @@ -27,7 +27,7 @@ // REDEFINE: %{sparsifier_opts} = enable-runtime-library=false enable-buffer-initialization=true vl=2 reassociate-fp-reductions=true enable-index-optimizations=true // RUN: %{compile} | %{run} | FileCheck %s -// Product reductions - kept in a seperate file as these are not supported by +// Product reductions - kept in a separate file as these are not supported by // the AArch64 SVE backend (so the set-up is a bit different to // sparse_reducitons.mlir) diff --git a/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir b/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir index 7a785301eb16b6..aa4ac111a8a503 100644 --- a/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir @@ -5,7 +5,7 @@ // compiler. Certain LLVM IR optimisation passes will perform runtime breaking // transformations on allocations not found to be in the entry block, current // OpenMP dialect lowering of TargetOp's will inject user allocations after -// compiler generated entry code, in a seperate block, this test checks that +// compiler generated entry code, in a separate block, this test checks that // a small function which attempts to raise some of these (specifically // constant sized) allocations performs its task reasonably in these // scenarios. diff --git a/openmp/tools/Modules/FindOpenMPTarget.cmake b/openmp/tools/Modules/FindOpenMPTarget.cmake index 424294090d5d0f..d478e3dfff1109 100644 --- a/openmp/tools/Modules/FindOpenMPTarget.cmake +++ b/openmp/tools/Modules/FindOpenMPTarget.cmake @@ -200,7 +200,7 @@ function(_OPENMP_TARGET_DEVICE_GET_FLAGS LANG DEVICE OPENMP_FLAG_VAR OPENMP_LIB_ file(APPEND ${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeOutput.log "Compilation successful, adding flags for ${DEVICE}.\n\n") - # Clang has a seperate library for target offloading. + # Clang has a separate library for target offloading. if(CMAKE_${LANG}_COMPILER_ID STREQUAL "Clang") find_library(OpenMPTarget_libomptarget_LIBRARY NAMES omptarget From c7b5be86f00beac6d806318888c4198986b2c84c Mon Sep 17 00:00:00 2001 From: MaheshRavishankar <1663364+MaheshRavishankar@users.noreply.github.com> Date: Thu, 13 Jun 2024 12:38:40 -0700 Subject: [PATCH 011/155] [mlir][TilingInterface] Update documentation for `TilingInterface.td`. (#95178) --- .../mlir/Interfaces/TilingInterface.td | 251 +++++++++++++----- 1 file changed, 182 insertions(+), 69 deletions(-) diff --git a/mlir/include/mlir/Interfaces/TilingInterface.td b/mlir/include/mlir/Interfaces/TilingInterface.td index bc83c81c0086ca..8865aba3b4ef09 100644 --- a/mlir/include/mlir/Interfaces/TilingInterface.td +++ b/mlir/include/mlir/Interfaces/TilingInterface.td @@ -18,9 +18,46 @@ include "mlir/IR/OpBase.td" def TilingInterface : OpInterface<"TilingInterface"> { let description = [{ - Interface for allowing operations to expose information needed to - tile them (similar to LinalgOp, but without having access to - indexing maps) + This interface allows operations to expose information needed to tile them. + + The intent of this interface is to separate the generation of the loop + structure (and constructs used for it) from the information needed from + the operation to be able to tile them. As a result an implementation of + the tiling algorithm (like `scf::tileUsingSCF`) can generate the inter-tile + loop structure, and call into the methods of the interface to be able to + tile any operation that implements the interface. + + This interface is also meant to help with "tile and fuse", i.e. the process + of fusing a producer with a consumer by + a) Tiling the consumer + b) Based on the tile of the producer used by the tiled consumer, + materialize the tiled implementation of a producer to generate that + tile (and use it immediately in the consumer) + You could also fuse a consumer with a producer by + a) Tiling the producer + b) Based on the tile produced, materialize the tiled implementation of + a consumer that uses this tile. + Note that the tile and fuse does not make any calculation on whether it + is "profitable to do this", but simply provides a mechansim to implement + the transformation when such a fusion is needed by the caller. + + For any operation to be tilable, an operation has to implement the + following two methods (see description below) + - `getLoopIteratorTypes` + - `getIterationDomain` + - `getTiledImplementation` + - `getResultTilePosition` + + For an operation to be "tiled and fused" with its (already tiled) consumer, + an operation has to implement the following additional method (see + description below): + - `generateResultTileValue + + For an operation to be "tiled and fused" with its (already tiled) producer, + an operation has to implement the following additional methods (see + description below): + - `getTiledImplementationFromOperandTile` + - `getIterationDomainTileFromOperandTile`. }]; let cppNamespace = "::mlir"; let methods = [ @@ -49,19 +86,22 @@ def TilingInterface : OpInterface<"TilingInterface"> { /*desc=*/[{ Method to generate the tiled implementation of an operation. - The iteration space of the operation is returned by - `getIterationDomain`. The caller provides the information of the - tile within this iteration space whose implementation the - caller needs. + Given a tile of the iteration space (as returned by + `getIterationDomain`), generate in-place the code that represents + the computation corresponding to that tile of the iteration space. + It is the responsibility of the implementation of this method in + the operation to generate the slices of the operands needed for the + tiled implementation. - `offsets` provides the offset of the tile in the coordinate system of the original iteration space, i.e., if an iteration space - dimension had non-zero offset, it must be included in the offset + dimension had non-zero offset, it will be included in the offset provided here (as opposed to zero-based offset "relative" to the iteration space). - `sizes` provides the size of the tile. - The method returns the operation that is the tiled - implementation. + The returned `TilingResult` must return for each result of the + untiled operation, a `Value` that is the result of the tiled + operation. }], /*retType=*/"FailureOr<::mlir::TilingResult>", /*methodName=*/"getTiledImplementation", @@ -76,11 +116,34 @@ def TilingInterface : OpInterface<"TilingInterface"> { >, InterfaceMethod< /*desc=*/[{ - Method to return the position of the result tile computed by the tiled operation. + Method to return the position of the result tile computed by the + tiled operation. + + For operations that return a value (typically a value of type + `RankedTensorType`), the generated tiled computation has to also + recompute a replacement for the results of the original operation. + The tiled implementation of the operation returns a tile of the + result(s). This methods returns information about what part of the + result tensor is computed by the tiled implementation. The manner in + which these tiles get put together to get the final result is upto + the surrounding loop construct. If an operation has no results, (for + example an operation that operates only on memrefs), then this method + need not be implemented by the operation. + - `resultNumber` is the result number of the original operation + being processed. + - `offsets` provides the offset of the tile in the coordinate system + of the original iteration space, i.e., if an iteration space + dimension had non-zero offset, it will be included in the offset + provided here (as opposed to zero-based offset "relative" to the + iteration space). + - `sizes` provides the size of the tile. + - `resultOffsets` is the offsets of the tile of the result generated + by the tiled implementation (returned by value). + - `resultSizes` is the size of the tile of the result generated + by the tiled implementation (returned by value). - Specifies what tile of the result of the original tensor is computed - by the tiled implementation. Expects the same `offsets` and `sizes` as - used to obtain the tiled implementation of the operation. + Note: It is undefined behaviour if there is overlap between the + tiles of the result generated by the tiled implementation. }], /*retType=*/"::mlir::LogicalResult", /*methodName=*/"getResultTilePosition", @@ -98,18 +161,38 @@ def TilingInterface : OpInterface<"TilingInterface"> { >, InterfaceMethod< /*desc=*/[{ - Method to return the tile of the iteration domain where - values from the given tile of the operand are used. + Method to generate the code that produces a tile of the result. + + This method is required to allow operations to be "tiled and fused" + with an (already tiled) consumer. Typically, for two operations with + producer -> consumer relation ship, to compute a tile of the + consumer a `slice` of the producer is needed. This method allows + computing that slice of the producer in-place, thereby "fusing" + the operations at tile-granularity. This method is different from + `getTiledImplementation`, which produces a tiled implementation + for a tile of the iteration space. This method produces a tiled + implementation based on the tile of producer required. + - `resultNumber` is the result of the producer used by the consumer. + - `offsets` is the offset of the slice of the producer result used by + the tiled implementation of the consumer. + - `sizes` is the size of the slice of the producer result used by the + consumer. + If fusion of the producer with the consumer is not legal for the + operation/result, this method should return failure. + + Note: This method only deals with the mechanism of implementing the + fusion. In general the fusion might result in recomputation (based on + the way the result is produced by the producer and the access pattern + used in the consumer to access). This is upto the caller to handle + appropriately. }], - /*retType=*/"::mlir::LogicalResult", - /*methodName=*/"getIterationDomainTileFromOperandTile", + /*retType=*/"FailureOr<::mlir::TilingResult>", + /*methodName=*/"generateResultTileValue", /*args=*/(ins "OpBuilder &":$b, - "unsigned":$operandNumber, - "ArrayRef ":$offsets, - "ArrayRef ":$sizes, - "SmallVectorImpl &":$iterDomainOffsets, - "SmallVectorImpl &":$iterDomainSizes), + "unsigned":$resultNumber, + "ArrayRef":$offsets, + "ArrayRef":$sizes), /*methodBody=*/"", /*defaultImplementation=*/[{ return failure(); @@ -117,32 +200,27 @@ def TilingInterface : OpInterface<"TilingInterface"> { >, InterfaceMethod< /*desc=*/[{ - Method to generate the code that produces a tile of the result. - - Generates the IR that computes the tile of a result of the - operation. The `offsets` and `sizes` describe the tile of - the output required. This is different from - `getTiledImplementation` which generates the tiled - implementation of the operation given a tile of the - iteration space. This method generates a tiled - implementation of the operation based on the tile of the - result required. This method enables fusion by using tile - and fuse. The method returns failure if the operation can't be - tiled to generate the result tile. In practical terms this - implies it cannot be tiled and fused with its consumers. + Method to generate the tiled implementation of an operation that uses + exactly a tile of the given operand. - - `offsets` provides the offset of the tile in the coordinate system - of the original iteration space, i.e., if an iteration space - dimension had non-zero offset, it must be included in the offset - provided here (as opposed to zero-based offset "relative" to the - iteration space). - - `sizes` provides the size of the tile. + This method is required to allow operations to be "tiled and fused" + with an (already tiled) producer. Given a tile of the producer, this + method generates the tile of the consumer that uses exactly this + produced tile. In some sense it is the "reverse" of + `generateResultTileValue`. + - `operandNumber` is the result of the producer used by the consumer. + - `offsets` is the offset of the slice of the producer result used by + the tiled implementation of the consumer. + - `sizes` is the size of the slice of the producer result used by the + consumer. + If it is illegal to fuse with a producer along the given operand for + an operation, the implementation should return a failure. }], /*retType=*/"FailureOr<::mlir::TilingResult>", - /*methodName=*/"generateResultTileValue", + /*methodName=*/"getTiledImplementationFromOperandTile", /*args=*/(ins "OpBuilder &":$b, - "unsigned":$resultNumber, + "unsigned":$operandNumber, "ArrayRef":$offsets, "ArrayRef":$sizes), /*methodBody=*/"", @@ -152,38 +230,73 @@ def TilingInterface : OpInterface<"TilingInterface"> { >, InterfaceMethod< /*desc=*/[{ - Method to generate the tiled implementation of an operation from - operand tile position. + Method to return the tile of the iteration domain that uses a given + tile of the operand. - NOTE: For most operations, this should be a trivial composition of - getIterationDomainTileFromOperandTile and getTiledImplementation. + This method is required to allow operations to be "tiled and fused" + with an (already tiled) producer. Given a tile of an operand, + returns the tile of the iteration space that uses this tile. + - `operandNumber` is the result of the producer used by the consumer. + - `offsets` is the offset of the slice of the producer result used by + the tiled implementation of the consumer. + - `sizes` is the size of the slice of the producer result used by the + consumer. + If it is illegal to fuse with a producer along the given operand for + an operation, or if this mapping cannot be computed, the + implementation should return a failure. - Generates the IR that computes the tiled implementation of an - operation from operand tile. The `offsets` and `sizes` - describe the tile of the operand required. This is different from - `getTiledImplementation` which generates the tiled - implementation of the operation given a tile of the - iteration space. This method generates a tiled - implementation of the operation based on the tile of the - operand required. This method enables consumer fusion by using - tile and fuse. The method returns failure if the operation - can't be tiled to generate the operand tile. In practical terms - this implies it cannot be tiled and fused with its producers. + Note that unlike the "tile consumer and fuse producer" case, the + "tile producer and fuse consumer" requires an additional method to get + the iteration tile space that encompasses all uses of the given operand + tile. The reason for this is, consider + ```mlir + %1 = scf.for... { + %2 = + %3 = tensor.insert_slice %2 into ... + scf.yield %3 + } + %4 = )(... %1... ) + ... (... %4 ...) + ``` - - `offsets` provides the offset of the tile in the coordinate system - of the original iteration space, i.e., if an iteration space - dimension had non-zero offset, it must be included in the offset - provided here (as opposed to zero-based offset "relative" to the - iteration space). - - `sizes` provides the size of the tile. + when fused this becomes + ``` + %1 = scf.for... { + %2 = + %3 = (... %2...) + %4 = tensor.insert_slice %3 into ... + scf.yield %4 + } + ... (... %1 ...) + ``` + + i.e, when fusing the consumer, the replacement for the result of the + consumer needs to be returned to replace the uses of the consumer. + For the tile+fuse algorithm to do this it needs information about + which tile of the iteration space encompasses all uses of the tile + produced and use that to compute what are the results produced. Note + that this iteration space might be the entire iteration space of the + operation, or multiple operand tiles might map to intersecting + iteration spaces. It is upto the caller to make sure that it is still + fusable with producer in this scenario, or it must return a failure. + + Note that this method is only used as a way to implement the + transformation. It does not provide guarantees on whether such a + transformation is profitable. + + For most cases `getTiledImplementationFromOperandTile` could be a + implemented using `getIterationDomainTileFromOperandTile` + + `getTiledImplementation` methods. }], - /*retType=*/"FailureOr<::mlir::TilingResult>", - /*methodName=*/"getTiledImplementationFromOperandTile", + /*retType=*/"::mlir::LogicalResult", + /*methodName=*/"getIterationDomainTileFromOperandTile", /*args=*/(ins "OpBuilder &":$b, "unsigned":$operandNumber, - "ArrayRef":$offsets, - "ArrayRef":$sizes), + "ArrayRef ":$offsets, + "ArrayRef ":$sizes, + "SmallVectorImpl &":$iterDomainOffsets, + "SmallVectorImpl &":$iterDomainSizes), /*methodBody=*/"", /*defaultImplementation=*/[{ return failure(); From 52d29eb2874580f0fe96e5cbb96faffbbdc432a7 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 13 Jun 2024 20:42:53 +0100 Subject: [PATCH 012/155] [LV] Add extra cost model tests with truncated inductions. Extra test cases that caused revert of https://github.com/llvm/llvm-project/pull/92555 --- .../LoopVectorize/X86/induction-costs.ll | 402 ++++++++++++++++++ 1 file changed, 402 insertions(+) diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll index 9ae36ae4d58755..d22ccf6671e843 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-costs.ll @@ -214,6 +214,140 @@ exit: } define void @multiple_pointer_ivs_with_scalar_uses_only(ptr %A, ptr %B) #0 { +; CHECK-LABEL: define void @multiple_pointer_ivs_with_scalar_uses_only( +; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] +; CHECK: vector.memcheck: +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i64 8589934391 +; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[B]], i64 4294967196 +; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]] +; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[B]], [[SCEVGEP]] +; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] +; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[A]], i64 8589934368 +; CHECK-NEXT: [[IND_END3:%.*]] = getelementptr i8, ptr [[B]], i64 4294967184 +; CHECK-NEXT: [[IND_END5:%.*]] = getelementptr i8, ptr [[B]], i64 4294967184 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <16 x i32> [ , [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 10 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 12 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 14 +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 16 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 18 +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 20 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 22 +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 24 +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26 +; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28 +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30 +; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP1]] +; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP2]] +; CHECK-NEXT: [[NEXT_GEP9:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP3]] +; CHECK-NEXT: [[NEXT_GEP10:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP4]] +; CHECK-NEXT: [[NEXT_GEP11:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP5]] +; CHECK-NEXT: [[NEXT_GEP12:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP6]] +; CHECK-NEXT: [[NEXT_GEP13:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP7]] +; CHECK-NEXT: [[NEXT_GEP14:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP8]] +; CHECK-NEXT: [[NEXT_GEP15:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP9]] +; CHECK-NEXT: [[NEXT_GEP16:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP10]] +; CHECK-NEXT: [[NEXT_GEP17:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP11]] +; CHECK-NEXT: [[NEXT_GEP18:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP12]] +; CHECK-NEXT: [[NEXT_GEP19:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP13]] +; CHECK-NEXT: [[NEXT_GEP20:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP14]] +; CHECK-NEXT: [[NEXT_GEP21:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[NEXT_GEP22:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP16]] +; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[NEXT_GEP23:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[NEXT_GEP23]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP18]], align 1, !alias.scope [[META13:![0-9]+]] +; CHECK-NEXT: [[TMP19:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[NEXT_GEP22]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD24:%.*]] = load <16 x i8>, ptr [[TMP20]], align 1, !alias.scope [[META13]] +; CHECK-NEXT: [[TMP21:%.*]] = zext <16 x i8> [[WIDE_LOAD24]] to <16 x i32> +; CHECK-NEXT: [[TMP22]] = add <16 x i32> [[TMP19]], [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <16 x i32> [[VECTOR_RECUR]], <16 x i32> [[TMP22]], <16 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = lshr <16 x i32> [[TMP23]], +; CHECK-NEXT: [[TMP25:%.*]] = trunc <16 x i32> [[TMP24]] to <16 x i8> +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i8> [[TMP25]], i32 0 +; CHECK-NEXT: store i8 [[TMP26]], ptr [[NEXT_GEP]], align 1, !alias.scope [[META16:![0-9]+]], !noalias [[META13]] +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i8> [[TMP25]], i32 1 +; CHECK-NEXT: store i8 [[TMP27]], ptr [[NEXT_GEP7]], align 1, !alias.scope [[META16]], !noalias [[META13]] +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i8> [[TMP25]], i32 2 +; CHECK-NEXT: store i8 [[TMP28]], ptr [[NEXT_GEP8]], align 1, !alias.scope [[META16]], !noalias [[META13]] +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i8> [[TMP25]], i32 3 +; CHECK-NEXT: store i8 [[TMP29]], ptr [[NEXT_GEP9]], align 1, !alias.scope [[META16]], !noalias [[META13]] +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i8> [[TMP25]], i32 4 +; CHECK-NEXT: store i8 [[TMP30]], ptr [[NEXT_GEP10]], align 1, !alias.scope [[META16]], !noalias [[META13]] +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i8> [[TMP25]], i32 5 +; CHECK-NEXT: store i8 [[TMP31]], ptr [[NEXT_GEP11]], align 1, !alias.scope [[META16]], !noalias [[META13]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i8> [[TMP25]], i32 6 +; CHECK-NEXT: store i8 [[TMP32]], ptr [[NEXT_GEP12]], align 1, !alias.scope [[META16]], !noalias [[META13]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i8> [[TMP25]], i32 7 +; CHECK-NEXT: store i8 [[TMP33]], ptr [[NEXT_GEP13]], align 1, !alias.scope [[META16]], !noalias [[META13]] +; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i8> [[TMP25]], i32 8 +; CHECK-NEXT: store i8 [[TMP34]], ptr [[NEXT_GEP14]], align 1, !alias.scope [[META16]], !noalias [[META13]] +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i8> [[TMP25]], i32 9 +; CHECK-NEXT: store i8 [[TMP35]], ptr [[NEXT_GEP15]], align 1, !alias.scope [[META16]], !noalias [[META13]] +; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i8> [[TMP25]], i32 10 +; CHECK-NEXT: store i8 [[TMP36]], ptr [[NEXT_GEP16]], align 1, !alias.scope [[META16]], !noalias [[META13]] +; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i8> [[TMP25]], i32 11 +; CHECK-NEXT: store i8 [[TMP37]], ptr [[NEXT_GEP17]], align 1, !alias.scope [[META16]], !noalias [[META13]] +; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i8> [[TMP25]], i32 12 +; CHECK-NEXT: store i8 [[TMP38]], ptr [[NEXT_GEP18]], align 1, !alias.scope [[META16]], !noalias [[META13]] +; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i8> [[TMP25]], i32 13 +; CHECK-NEXT: store i8 [[TMP39]], ptr [[NEXT_GEP19]], align 1, !alias.scope [[META16]], !noalias [[META13]] +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i8> [[TMP25]], i32 14 +; CHECK-NEXT: store i8 [[TMP40]], ptr [[NEXT_GEP20]], align 1, !alias.scope [[META16]], !noalias [[META13]] +; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x i8> [[TMP25]], i32 15 +; CHECK-NEXT: store i8 [[TMP41]], ptr [[NEXT_GEP21]], align 1, !alias.scope [[META16]], !noalias [[META13]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4294967184 +; CHECK-NEXT: br i1 [[TMP42]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP22]], i32 15 +; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 2048, [[VECTOR_MEMCHECK]] ], [ 2048, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ -12, [[MIDDLE_BLOCK]] ], [ 100, [[ENTRY]] ], [ 100, [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ], [ [[A]], [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi ptr [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ], [ [[B]], [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ], [ [[B]], [[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[DEC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADD38:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PTR_IV_1:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[OUTPTR_0:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PTR_IV_2:%.*]] = phi ptr [ [[BC_RESUME_VAL4]], [[SCALAR_PH]] ], [ [[INCDEC_PTR36:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[PTR_IV_3:%.*]] = phi ptr [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ], [ [[INCDEC_PTR33:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INCDEC_PTR33]] = getelementptr i8, ptr [[PTR_IV_3]], i64 1 +; CHECK-NEXT: [[TMP43:%.*]] = load i8, ptr [[PTR_IV_3]], align 1 +; CHECK-NEXT: [[CONV34:%.*]] = zext i8 [[TMP43]] to i32 +; CHECK-NEXT: [[INCDEC_PTR36]] = getelementptr i8, ptr [[PTR_IV_2]], i64 1 +; CHECK-NEXT: [[TMP44:%.*]] = load i8, ptr [[PTR_IV_2]], align 1 +; CHECK-NEXT: [[CONV37:%.*]] = zext i8 [[TMP44]] to i32 +; CHECK-NEXT: [[ADD38]] = add i32 [[CONV34]], [[CONV37]] +; CHECK-NEXT: [[SHR42:%.*]] = lshr i32 [[SCALAR_RECUR]], 1 +; CHECK-NEXT: [[CONV43:%.*]] = trunc i32 [[SHR42]] to i8 +; CHECK-NEXT: store i8 [[CONV43]], ptr [[PTR_IV_1]], align 1 +; CHECK-NEXT: [[DEC]] = add i32 [[IV_1]], 1 +; CHECK-NEXT: [[OUTPTR_0]] = getelementptr i8, ptr [[PTR_IV_1]], i64 2 +; CHECK-NEXT: [[CMP30_NOT:%.*]] = icmp eq i32 [[DEC]], 0 +; CHECK-NEXT: br i1 [[CMP30_NOT]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void +; entry: br label %loop @@ -243,6 +377,44 @@ exit: } define i16 @iv_and_step_trunc() { +; CHECK-LABEL: define i16 @iv_and_step_trunc() { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <2 x i16> [ , [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <2 x i16> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[TMP0]] to <2 x i16> +; CHECK-NEXT: [[TMP2]] = mul <2 x i16> [[VEC_IND1]], [[TMP1]] +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], +; CHECK-NEXT: [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i16> [[TMP2]], i32 0 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i16> [[TMP2]], i32 1 +; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 2, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[IV]] to i16 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[IV_NEXT]] to i16 +; CHECK-NEXT: [[REC_NEXT]] = mul i16 [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[REC_LCSSA:%.*]] = phi i16 [ [[SCALAR_RECUR]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i16 [[REC_LCSSA]] +; entry: br label %loop @@ -261,6 +433,80 @@ exit: } define i32 @test_scalar_predicated_cost(i64 %x, i64 %y, ptr %A) #0 { +; CHECK-LABEL: define i32 @test_scalar_predicated_cost( +; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], ptr [[A:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[Y]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <8 x i64> poison, i64 [[X]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT4]], <8 x i64> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], +; CHECK-NEXT: [[STEP_ADD1:%.*]] = add <8 x i64> [[STEP_ADD]], +; CHECK-NEXT: [[STEP_ADD2:%.*]] = add <8 x i64> [[STEP_ADD1]], +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 16 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 24 +; CHECK-NEXT: [[TMP4:%.*]] = icmp ule <8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ule <8 x i64> [[STEP_ADD]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp ule <8 x i64> [[STEP_ADD1]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <8 x i64> [[STEP_ADD2]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i1> [[TMP4]], +; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i1> [[TMP5]], +; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i1> [[TMP6]], +; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i1> [[TMP7]], +; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[VEC_IND]] +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[STEP_ADD]] +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[STEP_ADD1]] +; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[BROADCAST_SPLAT5]], [[STEP_ADD2]] +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP20:%.*]] = trunc <8 x i64> [[TMP12]] to <8 x i32> +; CHECK-NEXT: [[TMP21:%.*]] = trunc <8 x i64> [[TMP13]] to <8 x i32> +; CHECK-NEXT: [[TMP22:%.*]] = trunc <8 x i64> [[TMP14]] to <8 x i32> +; CHECK-NEXT: [[TMP23:%.*]] = trunc <8 x i64> [[TMP15]] to <8 x i32> +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i32, ptr [[TMP16]], i32 0 +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr i32, ptr [[TMP16]], i32 8 +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr i32, ptr [[TMP16]], i32 16 +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr i32, ptr [[TMP16]], i32 24 +; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP20]], ptr [[TMP24]], i32 4, <8 x i1> [[TMP8]]) +; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP21]], ptr [[TMP25]], i32 4, <8 x i1> [[TMP9]]) +; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP22]], ptr [[TMP26]], i32 4, <8 x i1> [[TMP10]]) +; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP23]], ptr [[TMP27]], i32 4, <8 x i1> [[TMP11]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD2]], +; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96 +; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[CMP9_NOT:%.*]] = icmp ule i64 [[IV]], [[Y]] +; CHECK-NEXT: br i1 [[CMP9_NOT]], label [[LOOP_LATCH]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[OR:%.*]] = or i64 [[X]], [[IV]] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[A]], i64 [[IV]] +; CHECK-NEXT: [[T:%.*]] = trunc i64 [[OR]] to i32 +; CHECK-NEXT: store i32 [[T]], ptr [[GEP]], align 4 +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 100 +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret i32 0 +; entry: br label %loop.header @@ -285,6 +531,147 @@ exit: ret i32 0 } +define void @wide_iv_trunc(ptr %dst, i64 %N) { +; CHECK-LABEL: define void @wide_iv_trunc( +; CHECK-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[N]], 8 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK: loop.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 3 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDEX]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0 +; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK: pred.store.if: +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP1]], 0 +; CHECK-NEXT: store i32 [[TMP4]], ptr [[DST]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] +; CHECK: pred.store.continue: +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1 +; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]] +; CHECK: pred.store.if1: +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[TMP1]], 1 +; CHECK-NEXT: store i32 [[TMP6]], ptr [[DST]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]] +; CHECK: pred.store.continue2: +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2 +; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] +; CHECK: pred.store.if3: +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP1]], 2 +; CHECK-NEXT: store i32 [[TMP8]], ptr [[DST]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] +; CHECK: pred.store.continue4: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3 +; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]] +; CHECK: pred.store.if5: +; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP1]], 3 +; CHECK-NEXT: store i32 [[TMP10]], ptr [[DST]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] +; CHECK: pred.store.continue6: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; CHECK-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: store i32 [[IV_TRUNC]], ptr [[DST]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT_LOOPEXIT]], label [[LOOP]], !llvm.loop [[LOOP25:![0-9]+]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %cmp = icmp ult i64 %N, 8 + br i1 %cmp, label %loop, label %exit + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %iv.trunc = trunc i64 %iv to i32 + store i32 %iv.trunc, ptr %dst, align 4 + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, %N + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @wide_iv_trunc_reuse(ptr %dst) { +; CHECK-LABEL: define void @wide_iv_trunc_reuse( +; CHECK-SAME: ptr [[DST:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32 +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], 5 +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], 6 +; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], 7 +; CHECK-NEXT: store i32 [[TMP7]], ptr [[DST]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 +; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK: scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV_TRUNC:%.*]], [[LOOP]] ] +; CHECK-NEXT: store i32 [[IV_2]], ptr [[DST]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 0 +; CHECK-NEXT: [[IV_TRUNC]] = trunc i64 [[IV]] to i32 +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP27:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ] + %iv.2 = phi i32 [ 0, %entry ], [ %iv.trunc, %loop ] + store i32 %iv.2, ptr %dst, align 4 + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 0 + %iv.trunc = trunc i64 %iv to i32 + br i1 %ec, label %exit, label %loop + +exit: + ret void +} attributes #0 = { "min-legal-vector-width"="0" "target-cpu"="skylake-avx512" } ;. @@ -301,4 +688,19 @@ attributes #0 = { "min-legal-vector-width"="0" "target-cpu"="skylake-avx512" } ; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]]} ; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META1]], [[META2]]} ; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META2]], [[META1]]} +; CHECK: [[META13]] = !{[[META14:![0-9]+]]} +; CHECK: [[META14]] = distinct !{[[META14]], [[META15:![0-9]+]]} +; CHECK: [[META15]] = distinct !{[[META15]], !"LVerDomain"} +; CHECK: [[META16]] = !{[[META17:![0-9]+]]} +; CHECK: [[META17]] = distinct !{[[META17]], [[META15]]} +; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META1]], [[META2]]} +; CHECK: [[LOOP19]] = distinct !{[[LOOP19]], [[META1]]} +; CHECK: [[LOOP20]] = distinct !{[[LOOP20]], [[META1]], [[META2]]} +; CHECK: [[LOOP21]] = distinct !{[[LOOP21]], [[META2]], [[META1]]} +; CHECK: [[LOOP22]] = distinct !{[[LOOP22]], [[META1]], [[META2]]} +; CHECK: [[LOOP23]] = distinct !{[[LOOP23]], [[META2]], [[META1]]} +; CHECK: [[LOOP24]] = distinct !{[[LOOP24]], [[META1]], [[META2]]} +; CHECK: [[LOOP25]] = distinct !{[[LOOP25]], [[META2]], [[META1]]} +; CHECK: [[LOOP26]] = distinct !{[[LOOP26]], [[META1]], [[META2]]} +; CHECK: [[LOOP27]] = distinct !{[[LOOP27]], [[META2]], [[META1]]} ;. From ba7d5ebe4bb2dc9b6885adf8346529e763cd6fce Mon Sep 17 00:00:00 2001 From: OverMighty Date: Thu, 13 Jun 2024 21:48:36 +0200 Subject: [PATCH 013/155] [libc] Fix build breaks caused by f16sqrtf changes (#95459) See Buildbot failures: - https://lab.llvm.org/buildbot/#/builders/78/builds/13 - https://lab.llvm.org/buildbot/#/builders/182/builds/7 --- libc/test/src/math/smoke/CMakeLists.txt | 3 +++ libc/test/src/stdfix/ISqrtTest.h | 2 +- libc/test/src/stdfix/SqrtTest.h | 3 ++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index 3bb87d2b0d0f3c..3e9edc51b004f0 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -2597,6 +2597,9 @@ add_fp_unittest( HDRS SqrtTest.h DEPENDS + # The dependency on sqrtf128 is used to disable the test when float128 + # support is not available. + libc.src.math.sqrtf128 libc.src.__support.FPUtil.generic.sqrt COMPILE_OPTIONS -O3 diff --git a/libc/test/src/stdfix/ISqrtTest.h b/libc/test/src/stdfix/ISqrtTest.h index ddf292fdd083f3..692488b383551c 100644 --- a/libc/test/src/stdfix/ISqrtTest.h +++ b/libc/test/src/stdfix/ISqrtTest.h @@ -55,7 +55,7 @@ template class ISqrtTest : public LIBC_NAMESPACE::testing::Test { x_d += 1.0; ++x; OutType result = func(x); - double expected = LIBC_NAMESPACE::fputil::sqrt(x_d); + double expected = LIBC_NAMESPACE::fputil::sqrt(x_d); testSpecificInput(x, result, expected, ERR); } } diff --git a/libc/test/src/stdfix/SqrtTest.h b/libc/test/src/stdfix/SqrtTest.h index 47ec129fab2aed..2a8a825abb460c 100644 --- a/libc/test/src/stdfix/SqrtTest.h +++ b/libc/test/src/stdfix/SqrtTest.h @@ -49,7 +49,8 @@ template class SqrtTest : public LIBC_NAMESPACE::testing::Test { T v = LIBC_NAMESPACE::cpp::bit_cast(x); double v_d = static_cast(v); double errors = LIBC_NAMESPACE::fputil::abs( - static_cast(func(v)) - LIBC_NAMESPACE::fputil::sqrt(v_d)); + static_cast(func(v)) - + LIBC_NAMESPACE::fputil::sqrt(v_d)); if (errors > ERR) { // Print out the failure input and output. EXPECT_EQ(v, zero); From 41587739a63f7622c36715421d215f07d79f9a7d Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 13 Jun 2024 13:04:50 -0700 Subject: [PATCH 014/155] [ProfileData] Migrate to getValueArrayForSite (#95457) This patch is a collection of one-liner migrations to getValueArrayForSite. --- llvm/unittests/ProfileData/InstrProfTest.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp index 0309be4eb10fb4..8a04281efeb50d 100644 --- a/llvm/unittests/ProfileData/InstrProfTest.cpp +++ b/llvm/unittests/ProfileData/InstrProfTest.cpp @@ -1135,7 +1135,7 @@ TEST_P(MaybeSparseInstrProfTest, icall_and_vtable_data_merge) { EXPECT_STREQ((const char *)VD[3].Value, "callee1"); EXPECT_EQ(VD[3].Count, 1U); - auto VD_2(R->getValueForSite(IPVK_IndirectCallTarget, 2)); + auto VD_2 = R->getValueArrayForSite(IPVK_IndirectCallTarget, 2); EXPECT_STREQ((const char *)VD_2[0].Value, "callee3"); EXPECT_EQ(VD_2[0].Count, 6U); EXPECT_STREQ((const char *)VD_2[1].Value, "callee4"); @@ -1145,13 +1145,13 @@ TEST_P(MaybeSparseInstrProfTest, icall_and_vtable_data_merge) { EXPECT_STREQ((const char *)VD_2[3].Value, "callee1"); EXPECT_EQ(VD_2[3].Count, 1U); - auto VD_3(R->getValueForSite(IPVK_IndirectCallTarget, 3)); + auto VD_3 = R->getValueArrayForSite(IPVK_IndirectCallTarget, 3); EXPECT_STREQ((const char *)VD_3[0].Value, "callee8"); EXPECT_EQ(VD_3[0].Count, 2U); EXPECT_STREQ((const char *)VD_3[1].Value, "callee7"); EXPECT_EQ(VD_3[1].Count, 1U); - auto VD_4(R->getValueForSite(IPVK_IndirectCallTarget, 4)); + auto VD_4 = R->getValueArrayForSite(IPVK_IndirectCallTarget, 4); EXPECT_STREQ((const char *)VD_4[0].Value, "callee3"); EXPECT_EQ(VD_4[0].Count, 6U); EXPECT_STREQ((const char *)VD_4[1].Value, "callee2"); @@ -1255,8 +1255,7 @@ TEST_P(ValueProfileMergeEdgeCaseTest, value_profile_data_merge_saturation) { Reader->getInstrProfRecord("baz", 0x5678); ASSERT_TRUE(bool(ReadRecord2)); ASSERT_EQ(1U, ReadRecord2->getNumValueSites(ValueKind)); - std::unique_ptr VD = - ReadRecord2->getValueForSite(ValueKind, 0); + auto VD = ReadRecord2->getValueArrayForSite(ValueKind, 0); EXPECT_EQ(ProfiledValue, VD[0].Value); EXPECT_EQ(MaxValCount, VD[0].Count); } From 93181db7fb6cd738bc807e510f87e3a61fb5b3e6 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Thu, 13 Jun 2024 20:48:17 +0000 Subject: [PATCH 015/155] [mlir][bzl] Add missing dep The file was added to MLIRBindingsPythonCoreNoCAPI but objects weren't. Signed-off-by: Jacques Pienaar --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 8e217ff8c12480..421063cc3e930a 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -1051,6 +1051,7 @@ cc_library( ":CAPIDebugObjects", ":CAPIIRObjects", ":CAPIInterfacesObjects", + ":CAPITransformObjects", ], ) From 1365ce22e9a419c992cb81824f5176390de83ee6 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 13 Jun 2024 13:59:01 -0700 Subject: [PATCH 016/155] [llvm-profdata] Clean up traverseAllValueSites (NFC) (#95467) If NV == 0, nothing interesting happens after the "if" statement. We should just "continue" to the next value site. While I am at it, this patch migrates a use of getValueForSite to getValueArrayForSite. --- llvm/tools/llvm-profdata/llvm-profdata.cpp | 30 +++++++++++----------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index fae6d1e989ab5a..6c8ab14e7c2456 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -2695,30 +2695,30 @@ static void traverseAllValueSites(const InstrProfRecord &Func, uint32_t VK, uint32_t NS = Func.getNumValueSites(VK); Stats.TotalNumValueSites += NS; for (size_t I = 0; I < NS; ++I) { - uint32_t NV = Func.getNumValueDataForSite(VK, I); - std::unique_ptr VD = Func.getValueForSite(VK, I); + auto VD = Func.getValueArrayForSite(VK, I); + uint32_t NV = VD.size(); + if (NV == 0) + continue; Stats.TotalNumValues += NV; - if (NV) { - Stats.TotalNumValueSitesWithValueProfile++; - if (NV > Stats.ValueSitesHistogram.size()) - Stats.ValueSitesHistogram.resize(NV, 0); - Stats.ValueSitesHistogram[NV - 1]++; - } + Stats.TotalNumValueSitesWithValueProfile++; + if (NV > Stats.ValueSitesHistogram.size()) + Stats.ValueSitesHistogram.resize(NV, 0); + Stats.ValueSitesHistogram[NV - 1]++; uint64_t SiteSum = 0; - for (uint32_t V = 0; V < NV; V++) - SiteSum += VD[V].Count; + for (const auto &V : VD) + SiteSum += V.Count; if (SiteSum == 0) SiteSum = 1; - for (uint32_t V = 0; V < NV; V++) { + for (const auto &V : VD) { OS << "\t[ " << format("%2u", I) << ", "; if (Symtab == nullptr) - OS << format("%4" PRIu64, VD[V].Value); + OS << format("%4" PRIu64, V.Value); else - OS << Symtab->getFuncOrVarName(VD[V].Value); - OS << ", " << format("%10" PRId64, VD[V].Count) << " ] (" - << format("%.2f%%", (VD[V].Count * 100.0 / SiteSum)) << ")\n"; + OS << Symtab->getFuncOrVarName(V.Value); + OS << ", " << format("%10" PRId64, V.Count) << " ] (" + << format("%.2f%%", (V.Count * 100.0 / SiteSum)) << ")\n"; } } } From b6688a0b17361e1f4164f52e8d3b17defd7d432d Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 13 Jun 2024 14:06:20 -0700 Subject: [PATCH 017/155] [MC] flushPendingLabels: revert setAtom change The setAtom call introduced by e17bc023f4e5b79f08bfc7f624f8ff0f0cf17ce4 was due to my misunderstanding of flushPendingLabels (see https://discourse.llvm.org/t/mc-removing-aligned-bundling-support/79518). When evaluating `.quad x-y`, MCExpr.cpp:AttemptToFoldSymbolOffsetDifference gives different results at parse time and layout time because the `if (FA->getAtom() == FB.getAtom())` condition in isSymbolRefDifferenceFullyResolvedImpl only works when `setAtom` with a non-null pointer has been called. Calling setAtom in flushPendingLabels does not help anything. --- llvm/lib/MC/MCSection.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/lib/MC/MCSection.cpp b/llvm/lib/MC/MCSection.cpp index 1d9fe2cafd6174..59fdfd76f444ae 100644 --- a/llvm/lib/MC/MCSection.cpp +++ b/llvm/lib/MC/MCSection.cpp @@ -102,12 +102,9 @@ void MCSection::flushPendingLabels() { while (!PendingLabels.empty()) { PendingLabel& Label = PendingLabels[0]; switchSubsection(Label.Subsection); - const MCSymbol *Atom = - CurFragList->Tail ? CurFragList->Tail->getAtom() : nullptr; MCFragment *F = new MCDataFragment(); addFragment(*F); F->setParent(this); - F->setAtom(Atom); flushPendingLabels(F, 0, Label.Subsection); } } From 597cde155a008364c83870b24306fbae93e80cf8 Mon Sep 17 00:00:00 2001 From: Angel Zhang Date: Thu, 13 Jun 2024 17:16:53 -0400 Subject: [PATCH 018/155] [mlir][spirv] Implement SPIR-V lowering for `vector.deinterleave` (#95313) 1. Added a conversion for `vector.deinterleave` to the `VectorToSPIRV` pass. 2. Added LIT tests for the new conversion. --------- Co-authored-by: Jakub Kuderski --- .../VectorToSPIRV/VectorToSPIRV.cpp | 66 ++++++++++++++++++- .../VectorToSPIRV/vector-to-spirv.mlir | 26 ++++++++ 2 files changed, 89 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRV.cpp b/mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRV.cpp index 92168cfa361471..8baa31a235950b 100644 --- a/mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRV.cpp +++ b/mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRV.cpp @@ -618,6 +618,66 @@ struct VectorInterleaveOpConvert final } }; +struct VectorDeinterleaveOpConvert final + : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + + LogicalResult + matchAndRewrite(vector::DeinterleaveOp deinterleaveOp, OpAdaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + + // Check the result vector type. + VectorType oldResultType = deinterleaveOp.getResultVectorType(); + Type newResultType = getTypeConverter()->convertType(oldResultType); + if (!newResultType) + return rewriter.notifyMatchFailure(deinterleaveOp, + "unsupported result vector type"); + + Location loc = deinterleaveOp->getLoc(); + + // Deinterleave the indices. + Value sourceVector = adaptor.getSource(); + VectorType sourceType = deinterleaveOp.getSourceVectorType(); + int n = sourceType.getNumElements(); + + // Output vectors of size 1 are converted to scalars by the type converter. + // We cannot use `spirv::VectorShuffleOp` directly in this case, and need to + // use `spirv::CompositeExtractOp`. + if (n == 2) { + auto elem0 = rewriter.create( + loc, newResultType, sourceVector, rewriter.getI32ArrayAttr({0})); + + auto elem1 = rewriter.create( + loc, newResultType, sourceVector, rewriter.getI32ArrayAttr({1})); + + rewriter.replaceOp(deinterleaveOp, {elem0, elem1}); + return success(); + } + + // Indices for `shuffleEven` (result 0). + auto seqEven = llvm::seq(n / 2); + auto indicesEven = + llvm::map_to_vector(seqEven, [](int i) { return i * 2; }); + + // Indices for `shuffleOdd` (result 1). + auto seqOdd = llvm::seq(n / 2); + auto indicesOdd = + llvm::map_to_vector(seqOdd, [](int i) { return i * 2 + 1; }); + + // Create two SPIR-V shuffles. + auto shuffleEven = rewriter.create( + loc, newResultType, sourceVector, sourceVector, + rewriter.getI32ArrayAttr(indicesEven)); + + auto shuffleOdd = rewriter.create( + loc, newResultType, sourceVector, sourceVector, + rewriter.getI32ArrayAttr(indicesOdd)); + + rewriter.replaceOp(deinterleaveOp, {shuffleEven, shuffleOdd}); + return success(); + } +}; + struct VectorLoadOpConverter final : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; @@ -862,9 +922,9 @@ void mlir::populateVectorToSPIRVPatterns(SPIRVTypeConverter &typeConverter, VectorReductionFloatMinMax, VectorReductionFloatMinMax, VectorShapeCast, VectorInsertStridedSliceOpConvert, VectorShuffleOpConvert, - VectorInterleaveOpConvert, VectorSplatPattern, VectorLoadOpConverter, - VectorStoreOpConverter>(typeConverter, patterns.getContext(), - PatternBenefit(1)); + VectorInterleaveOpConvert, VectorDeinterleaveOpConvert, + VectorSplatPattern, VectorLoadOpConverter, VectorStoreOpConverter>( + typeConverter, patterns.getContext(), PatternBenefit(1)); // Make sure that the more specialized dot product pattern has higher benefit // than the generic one that extracts all elements. diff --git a/mlir/test/Conversion/VectorToSPIRV/vector-to-spirv.mlir b/mlir/test/Conversion/VectorToSPIRV/vector-to-spirv.mlir index 2592d0fc04111d..6c6a9a1d0c6c5a 100644 --- a/mlir/test/Conversion/VectorToSPIRV/vector-to-spirv.mlir +++ b/mlir/test/Conversion/VectorToSPIRV/vector-to-spirv.mlir @@ -507,6 +507,32 @@ func.func @interleave_size1(%a: vector<1xf32>, %b: vector<1xf32>) -> vector<2xf3 // ----- +// CHECK-LABEL: func @deinterleave +// CHECK-SAME: (%[[ARG0:.+]]: vector<4xf32>) +// CHECK: %[[SHUFFLE0:.*]] = spirv.VectorShuffle [0 : i32, 2 : i32] %[[ARG0]], %[[ARG0]] : vector<4xf32>, vector<4xf32> -> vector<2xf32> +// CHECK: %[[SHUFFLE1:.*]] = spirv.VectorShuffle [1 : i32, 3 : i32] %[[ARG0]], %[[ARG0]] : vector<4xf32>, vector<4xf32> -> vector<2xf32> +// CHECK: return %[[SHUFFLE0]], %[[SHUFFLE1]] +func.func @deinterleave(%a: vector<4xf32>) -> (vector<2xf32>, vector<2xf32>) { + %0, %1 = vector.deinterleave %a : vector<4xf32> -> vector<2xf32> + return %0, %1 : vector<2xf32>, vector<2xf32> +} + +// ----- + +// CHECK-LABEL: func @deinterleave_scalar +// CHECK-SAME: (%[[ARG0:.+]]: vector<2xf32>) +// CHECK: %[[EXTRACT0:.*]] = spirv.CompositeExtract %[[ARG0]][0 : i32] : vector<2xf32> +// CHECK: %[[EXTRACT1:.*]] = spirv.CompositeExtract %[[ARG0]][1 : i32] : vector<2xf32> +// CHECK: %[[CAST0:.*]] = builtin.unrealized_conversion_cast %[[EXTRACT0]] : f32 to vector<1xf32> +// CHECK: %[[CAST1:.*]] = builtin.unrealized_conversion_cast %[[EXTRACT1]] : f32 to vector<1xf32> +// CHECK: return %[[CAST0]], %[[CAST1]] +func.func @deinterleave_scalar(%a: vector<2xf32>) -> (vector<1xf32>, vector<1xf32>) { + %0, %1 = vector.deinterleave %a: vector<2xf32> -> vector<1xf32> + return %0, %1 : vector<1xf32>, vector<1xf32> +} + +// ----- + // CHECK-LABEL: func @reduction_add // CHECK-SAME: (%[[V:.+]]: vector<4xi32>) // CHECK: %[[S0:.+]] = spirv.CompositeExtract %[[V]][0 : i32] : vector<4xi32> From 27588fe2057a3e6b69c1d6e4885a7a539b3123ff Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 13 Jun 2024 14:37:15 -0700 Subject: [PATCH 019/155] [MC] Move MCFragment::Atom to MCSectionMachO::Atoms Mach-O's `.subsections_via_symbols` mechanism associates a fragment with an atom (a non-temporary defined symbol). The current approach (`MCFragment::Atom`) wastes space for other object file formats. After #95077, `MCFragment::LayoutOrder` is only used by `AttemptToFoldSymbolOffsetDifference`. While it could be removed, we might explore future uses for `LayoutOrder`. @aengelke suggests one use case: move `Atom` into MCSection. This works because Mach-O doesn't support `.subsection`, and `LayoutOrder`, as the index into the fragment list, is unchanged. This patch moves MCFragment::Atom to MCSectionMachO::Atoms. `getAtom` may be called at parse time before `Atoms` is initialized, so a bound checking is needed to keep the hack working. Pull Request: https://github.com/llvm/llvm-project/pull/95341 --- llvm/include/llvm/MC/MCFragment.h | 6 +----- llvm/include/llvm/MC/MCSectionMachO.h | 7 +++++++ llvm/lib/MC/MCFragment.cpp | 5 +++++ llvm/lib/MC/MCMachOStreamer.cpp | 4 +++- llvm/lib/MC/MCSectionMachO.cpp | 12 ++++++++++++ 5 files changed, 28 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h index 2f62bdb462f837..555730828b4625 100644 --- a/llvm/include/llvm/MC/MCFragment.h +++ b/llvm/include/llvm/MC/MCFragment.h @@ -60,9 +60,6 @@ class MCFragment { /// The data for the section this fragment is in. MCSection *Parent; - /// The atom this fragment is in, as represented by its defining symbol. - const MCSymbol *Atom = nullptr; - /// The offset of this fragment in its section. uint64_t Offset = 0; @@ -96,8 +93,7 @@ class MCFragment { MCSection *getParent() const { return Parent; } void setParent(MCSection *Value) { Parent = Value; } - const MCSymbol *getAtom() const { return Atom; } - void setAtom(const MCSymbol *Value) { Atom = Value; } + const MCSymbol *getAtom() const; unsigned getLayoutOrder() const { return LayoutOrder; } void setLayoutOrder(unsigned Value) { LayoutOrder = Value; } diff --git a/llvm/include/llvm/MC/MCSectionMachO.h b/llvm/include/llvm/MC/MCSectionMachO.h index fdf1773d400253..e268562786b2fb 100644 --- a/llvm/include/llvm/MC/MCSectionMachO.h +++ b/llvm/include/llvm/MC/MCSectionMachO.h @@ -32,6 +32,9 @@ class MCSectionMachO final : public MCSection { /// for example. unsigned Reserved2; + // The defining non-temporary symbol for each fragment. + SmallVector Atoms; + MCSectionMachO(StringRef Segment, StringRef Section, unsigned TAA, unsigned reserved2, SectionKind K, MCSymbol *Begin); friend class MCContext; @@ -74,6 +77,10 @@ class MCSectionMachO final : public MCSection { bool useCodeAlign() const override; bool isVirtualSection() const override; + void allocAtoms(); + const MCSymbol *getAtom(size_t I) const; + void setAtom(size_t I, const MCSymbol *Sym); + static bool classof(const MCSection *S) { return S->getVariant() == SV_MachO; } diff --git a/llvm/lib/MC/MCFragment.cpp b/llvm/lib/MC/MCFragment.cpp index ffd3a503e733b5..0d8d639b0f8aff 100644 --- a/llvm/lib/MC/MCFragment.cpp +++ b/llvm/lib/MC/MCFragment.cpp @@ -17,6 +17,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCFixup.h" #include "llvm/MC/MCSection.h" +#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Casting.h" @@ -264,6 +265,10 @@ void MCFragment::destroy() { } } +const MCSymbol *MCFragment::getAtom() const { + return cast(Parent)->getAtom(LayoutOrder); +} + // Debugging methods namespace llvm { diff --git a/llvm/lib/MC/MCMachOStreamer.cpp b/llvm/lib/MC/MCMachOStreamer.cpp index 10f9988b9d16a0..466aa633f00871 100644 --- a/llvm/lib/MC/MCMachOStreamer.cpp +++ b/llvm/lib/MC/MCMachOStreamer.cpp @@ -519,11 +519,13 @@ void MCMachOStreamer::finishImpl() { // Set the fragment atom associations by tracking the last seen atom defining // symbol. for (MCSection &Sec : getAssembler()) { + cast(Sec).allocAtoms(); const MCSymbol *CurrentAtom = nullptr; + size_t I = 0; for (MCFragment &Frag : Sec) { if (const MCSymbol *Symbol = DefiningSymbolMap.lookup(&Frag)) CurrentAtom = Symbol; - Frag.setAtom(CurrentAtom); + cast(Sec).setAtom(I++, CurrentAtom); } } diff --git a/llvm/lib/MC/MCSectionMachO.cpp b/llvm/lib/MC/MCSectionMachO.cpp index f7eedac3f2d1ad..53b76663084e8e 100644 --- a/llvm/lib/MC/MCSectionMachO.cpp +++ b/llvm/lib/MC/MCSectionMachO.cpp @@ -291,3 +291,15 @@ Error MCSectionMachO::ParseSectionSpecifier(StringRef Spec, // In. return Error::success(); } + +void MCSectionMachO::allocAtoms() { + auto *L = curFragList(); + if (L->Tail) + Atoms.resize(L->Tail->getLayoutOrder() + 1); +} + +const MCSymbol *MCSectionMachO::getAtom(size_t I) const { + return I < Atoms.size() ? Atoms[I] : nullptr; +} + +void MCSectionMachO::setAtom(size_t I, const MCSymbol *Sym) { Atoms[I] = Sym; } From 00ed887454f7d7522f3eac8549661e51f864a9a7 Mon Sep 17 00:00:00 2001 From: Aleksandr Korepanov Date: Thu, 13 Jun 2024 23:50:31 +0200 Subject: [PATCH 020/155] [LLDB][Windows] Fix watchpoints for Windows (#95446) Hello! Currently, watchpoints don't work on Windows (this can be reproduced with the existing tests). This patch fixes the related issues so that the tests and watchpoints start working. Here is the list of tests that are fixed by this patch (on Windows, checked in **release/18.x** branch): - commands/watchpoints/hello_watchpoint/TestMyFirstWatchpoint.py - commands/watchpoints/multiple_hits/TestMultipleHits.py - commands/watchpoints/multiple_threads/TestWatchpointMultipleThreads.py - commands/watchpoints/step_over_watchpoint/TestStepOverWatchpoint.py - commands/watchpoints/unaligned-watchpoint/TestUnalignedWatchpoint.py - commands/watchpoints/watchpoint_commands/TestWatchpointCommands.py - commands/watchpoints/watchpoint_commands/command/TestWatchpointCommandLLDB.py - commands/watchpoints/watchpoint_commands/command/TestWatchpointCommandPython.py - commands/watchpoints/watchpoint_commands/condition/TestWatchpointConditionCmd.py - commands/watchpoints/watchpoint_count/TestWatchpointCount.py - commands/watchpoints/watchpoint_disable/TestWatchpointDisable.py - commands/watchpoints/watchpoint_size/TestWatchpointSizes.py - python_api/watchpoint/TestSetWatchpoint.py - python_api/watchpoint/TestWatchpointIgnoreCount.py - python_api/watchpoint/TestWatchpointIter.py - python_api/watchpoint/condition/TestWatchpointConditionAPI.py - python_api/watchpoint/watchlocation/TestTargetWatchAddress.py --------- Co-authored-by: Jason Molenda --- .../source/Plugins/Process/Windows/Common/ProcessWindows.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lldb/source/Plugins/Process/Windows/Common/ProcessWindows.cpp b/lldb/source/Plugins/Process/Windows/Common/ProcessWindows.cpp index eb0834b1159f64..f383b3d40a4f3a 100644 --- a/lldb/source/Plugins/Process/Windows/Common/ProcessWindows.cpp +++ b/lldb/source/Plugins/Process/Windows/Common/ProcessWindows.cpp @@ -405,8 +405,7 @@ void ProcessWindows::RefreshStateAfterStop() { "{1:x} with watchpoint {2}", m_session_data->m_debugger->GetProcess().GetProcessId(), pc, id); - stop_info = StopInfo::CreateStopReasonWithWatchpointID( - *stop_thread, id, m_watchpoints[id].address); + stop_info = StopInfo::CreateStopReasonWithWatchpointID(*stop_thread, id); stop_thread->SetStopInfo(stop_info); return; @@ -857,7 +856,7 @@ Status ProcessWindows::EnableWatchpoint(WatchpointSP wp_sp, bool notify) { info.address = wp_sp->GetLoadAddress(); info.size = wp_sp->GetByteSize(); info.read = wp_sp->WatchpointRead(); - info.write = wp_sp->WatchpointWrite(); + info.write = wp_sp->WatchpointWrite() || wp_sp->WatchpointModify(); for (unsigned i = 0U; i < m_thread_list.GetSize(); i++) { Thread *thread = m_thread_list.GetThreadAtIndex(i).get(); From 8fa7cf000aa17ca14f576dc3f5669ba7b84a05d1 Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Thu, 13 Jun 2024 14:51:58 -0700 Subject: [PATCH 021/155] [HWASan] disable hwasan_symbolize_stack_uas on x86 --- .../test/hwasan/TestCases/hwasan_symbolize_stack_uas.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compiler-rt/test/hwasan/TestCases/hwasan_symbolize_stack_uas.cpp b/compiler-rt/test/hwasan/TestCases/hwasan_symbolize_stack_uas.cpp index 500bc71d5ff06f..b96ec9e415c75f 100644 --- a/compiler-rt/test/hwasan/TestCases/hwasan_symbolize_stack_uas.cpp +++ b/compiler-rt/test/hwasan/TestCases/hwasan_symbolize_stack_uas.cpp @@ -1,7 +1,8 @@ // RUN: %clang_hwasan -Wl,--build-id -g %s -o %t // RUN: %env_hwasan_opts=symbolize=0 not %run %t 2>&1 | hwasan_symbolize --symbols $(dirname %t) --index | FileCheck %s -// REQUIRES: pointer-tagging +// TODO: find out why this fails on sanitizer-x86_64-linux-qemu bot +// REQUIRES: aarch64-target-arch #include #include From 3106a23155b905ea86100798d277278f5be47ebd Mon Sep 17 00:00:00 2001 From: PiJoules <6019989+PiJoules@users.noreply.github.com> Date: Thu, 13 Jun 2024 14:55:09 -0700 Subject: [PATCH 022/155] [libc][stdlib] Fix UB in freelist (#95330) Some of the freelist code uses type punning which is UB in C++, namely because we read from a union member that is not the active union member. --- libc/src/stdlib/CMakeLists.txt | 3 +- libc/src/stdlib/freelist.h | 53 ++++++++++++---------------------- 2 files changed, 20 insertions(+), 36 deletions(-) diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt index 971b39bb900de2..6d2c5acca96057 100644 --- a/libc/src/stdlib/CMakeLists.txt +++ b/libc/src/stdlib/CMakeLists.txt @@ -398,8 +398,9 @@ else() freelist.h DEPENDS libc.src.__support.fixedvector - libc.src.__support.CPP.cstddef libc.src.__support.CPP.array + libc.src.__support.CPP.cstddef + libc.src.__support.CPP.new libc.src.__support.CPP.span ) add_header_library( diff --git a/libc/src/stdlib/freelist.h b/libc/src/stdlib/freelist.h index c01ed6eddb7d46..789bc164fb161b 100644 --- a/libc/src/stdlib/freelist.h +++ b/libc/src/stdlib/freelist.h @@ -11,6 +11,7 @@ #include "src/__support/CPP/array.h" #include "src/__support/CPP/cstddef.h" +#include "src/__support/CPP/new.h" #include "src/__support/CPP/span.h" #include "src/__support/fixedvector.h" @@ -92,19 +93,12 @@ bool FreeList::add_chunk(span chunk) { if (chunk.size() < sizeof(FreeListNode)) return false; - union { - FreeListNode *node; - cpp::byte *bytes; - } aliased; - - aliased.bytes = chunk.data(); - + // Add it to the correct list. size_t chunk_ptr = find_chunk_ptr_for_size(chunk.size(), false); - // Add it to the correct list. - aliased.node->size = chunk.size(); - aliased.node->next = chunks_[chunk_ptr]; - chunks_[chunk_ptr] = aliased.node; + FreeListNode *node = + ::new (chunk.data()) FreeListNode{chunks_[chunk_ptr], chunk.size()}; + chunks_[chunk_ptr] = node; return true; } @@ -123,17 +117,13 @@ span FreeList::find_chunk(size_t size) const { // Now iterate up the buckets, walking each list to find a good candidate for (size_t i = chunk_ptr; i < chunks_.size(); i++) { - union { - FreeListNode *node; - cpp::byte *data; - } aliased; - aliased.node = chunks_[static_cast(i)]; + FreeListNode *node = chunks_[static_cast(i)]; - while (aliased.node != nullptr) { - if (aliased.node->size >= size) - return span(aliased.data, aliased.node->size); + while (node != nullptr) { + if (node->size >= size) + return span(reinterpret_cast(node), node->size); - aliased.node = aliased.node->next; + node = node->next; } } @@ -146,34 +136,27 @@ template bool FreeList::remove_chunk(span chunk) { size_t chunk_ptr = find_chunk_ptr_for_size(chunk.size(), true); - // Walk that list, finding the chunk. - union { - FreeListNode *node; - cpp::byte *data; - } aliased, aliased_next; - // Check head first. if (chunks_[chunk_ptr] == nullptr) return false; - aliased.node = chunks_[chunk_ptr]; - if (aliased.data == chunk.data()) { - chunks_[chunk_ptr] = aliased.node->next; + FreeListNode *node = chunks_[chunk_ptr]; + if (reinterpret_cast(node) == chunk.data()) { + chunks_[chunk_ptr] = node->next; return true; } // No? Walk the nodes. - aliased.node = chunks_[chunk_ptr]; + node = chunks_[chunk_ptr]; - while (aliased.node->next != nullptr) { - aliased_next.node = aliased.node->next; - if (aliased_next.data == chunk.data()) { + while (node->next != nullptr) { + if (reinterpret_cast(node->next) == chunk.data()) { // Found it, remove this node out of the chain - aliased.node->next = aliased_next.node->next; + node->next = node->next->next; return true; } - aliased.node = aliased.node->next; + node = node->next; } return false; From 1ebda1173186c4c0ab776d1f140f903a49ace2a3 Mon Sep 17 00:00:00 2001 From: Maksim Panchenko Date: Thu, 13 Jun 2024 14:56:01 -0700 Subject: [PATCH 023/155] [BOLT] Fix duplicate diagnostic message (#95167) Print .altinstructions parsing stats only once. --- bolt/lib/Rewrite/LinuxKernelRewriter.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp index 6b3f5bce9f0f58..7e0141b003bd0e 100644 --- a/bolt/lib/Rewrite/LinuxKernelRewriter.cpp +++ b/bolt/lib/Rewrite/LinuxKernelRewriter.cpp @@ -1479,8 +1479,9 @@ Error LinuxKernelRewriter::tryReadAltInstructions(uint32_t AltInstFeatureSize, } } - BC.outs() << "BOLT-INFO: parsed " << EntryID - << " alternative instruction entries\n"; + if (!ParseOnly) + BC.outs() << "BOLT-INFO: parsed " << EntryID + << " alternative instruction entries\n"; return Error::success(); } From 01a429c432620cad6deac99d48cf6ef96c7f86e8 Mon Sep 17 00:00:00 2001 From: Arda Unal Date: Thu, 13 Jun 2024 15:09:47 -0700 Subject: [PATCH 024/155] [mlir][mesh] Fix wrong argument passed to targetShardingInUnsplitLastAxis (#95059) In unsplitLastAxisInResharding, wrong argument was passed when calling targetShardingInUnsplitLastAxis.There weren't any tests to uncover this. I added one in mesh-spmdization.mlir for Linalg and one in resharding-spmdization.mlir for Mesh dialects. --- .../Dialect/Mesh/Transforms/Spmdization.cpp | 2 +- .../test/Dialect/Linalg/mesh-spmdization.mlir | 35 +++++++++++++++++++ .../Dialect/Mesh/resharding-spmdization.mlir | 13 +++++++ 3 files changed, 49 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Dialect/Mesh/Transforms/Spmdization.cpp b/mlir/lib/Dialect/Mesh/Transforms/Spmdization.cpp index f3e4b15aec118e..4ecc897103af71 100644 --- a/mlir/lib/Dialect/Mesh/Transforms/Spmdization.cpp +++ b/mlir/lib/Dialect/Mesh/Transforms/Spmdization.cpp @@ -266,7 +266,7 @@ unsplitLastAxisInResharding(ImplicitLocOpBuilder &builder, builder.setInsertionPointAfterValue(sourceShard); MeshShardingAttr targetSharding = - targetShardingInUnsplitLastAxis(ctx, sourceSharding, splitMeshAxis); + targetShardingInUnsplitLastAxis(ctx, sourceSharding, splitTensorAxis); ShapedType allGatherResultShape = allGatherResultShapeInUnsplitLastAxis( sourceShard.getType(), mesh.getShape()[splitMeshAxis], splitTensorAxis); Value allGatherResult = builder.create( diff --git a/mlir/test/Dialect/Linalg/mesh-spmdization.mlir b/mlir/test/Dialect/Linalg/mesh-spmdization.mlir index bd56c801283b17..52f352cfedd8e2 100644 --- a/mlir/test/Dialect/Linalg/mesh-spmdization.mlir +++ b/mlir/test/Dialect/Linalg/mesh-spmdization.mlir @@ -162,3 +162,38 @@ func.func @matmul_1d_mesh_static_tensors_reduction_iterator_sharding_with_partia // CHECK: return %[[SHARDED_MATMUL]] : tensor<4x8xi8> return %res_shared2 : tensor<4x8xi8> } + +// ----- + +mesh.mesh @mesh_1d(shape = 4) + +// CHECK-LABEL: func @matmul_1d_mesh_static_tensors_parallel_iterator_unsplit_last_axis +func.func @matmul_1d_mesh_static_tensors_parallel_iterator_unsplit_last_axis( + // CHECK-SAME: %[[IN1:[A-Za-z0-9_]+]]: tensor<4x6xi8>, + %in1: tensor<4x6xi8>, + // CHECK-SAME: %[[IN2:[A-Za-z0-9_]+]]: tensor<6x8xi8>, + %in2: tensor<6x8xi8>, + // CHECK-SAME: %[[DPS_OUT:[A-Za-z0-9_]+]]: tensor<4x8xi8> + %dps_out: tensor<4x8xi8> + // CHECK-SAME: -> tensor<4x8xi8> { +) -> tensor<4x8xi8> { + %in1_replicated1 = mesh.shard %in1 to <@mesh_1d, [[], []]> : tensor<4x6xi8> + %in1_replicated2 = mesh.shard %in1_replicated1 to <@mesh_1d, [[], []]> annotate_for_users : tensor<4x6xi8> + // CHECK: %[[ALL_SLICE1:.*]] = mesh.all_slice %[[IN2]] on @mesh_1d mesh_axes = [0] slice_axis = 1 + %in2_replicated = mesh.shard %in2 to <@mesh_1d, [[], []]> : tensor<6x8xi8> + %in2_sharded = mesh.shard %in2_replicated to <@mesh_1d, [[], [0]]> annotate_for_users : tensor<6x8xi8> + // CHECK: %[[ALL_SLICE2:.*]] = mesh.all_slice %[[DPS_OUT]] on @mesh_1d mesh_axes = [0] slice_axis = 1 + %dps_out_replicated = mesh.shard %dps_out to <@mesh_1d, [[], []]> : tensor<4x8xi8> + %dps_out_sharded = mesh.shard %dps_out_replicated to <@mesh_1d, [[], [0]]> annotate_for_users: tensor<4x8xi8> + // CHECK: %[[MATMUL_RES:.*]] = linalg.matmul + // CHECK-SAME: ins(%[[IN1]], %[[ALL_SLICE1]] : tensor<4x6xi8>, tensor<6x2xi8>) + // CHECK-SAME: outs(%[[ALL_SLICE2]] : tensor<4x2xi8>) + // CHECK-SAME: -> tensor<4x2xi8> + %res = linalg.matmul ins(%in1_replicated2, %in2_sharded : tensor<4x6xi8>, tensor<6x8xi8>) + outs(%dps_out_sharded : tensor<4x8xi8>) -> tensor<4x8xi8> + // CHECK: %[[ALL_GATHER:.*]] = mesh.all_gather %[[MATMUL_RES]] on @mesh_1d mesh_axes = [0] gather_axis = 1 : tensor<4x2xi8> -> tensor<4x8xi8> + %res_sharded = mesh.shard %res to <@mesh_1d, [[], [0]]> : tensor<4x8xi8> + %res_replicated = mesh.shard %res_sharded to <@mesh_1d, [[], []]> annotate_for_users: tensor<4x8xi8> + // CHECK: return %[[ALL_GATHER]] : tensor<4x8xi8> + return %res_replicated : tensor<4x8xi8> +} diff --git a/mlir/test/Dialect/Mesh/resharding-spmdization.mlir b/mlir/test/Dialect/Mesh/resharding-spmdization.mlir index ba05306598bcc6..b3e305135ad8b7 100644 --- a/mlir/test/Dialect/Mesh/resharding-spmdization.mlir +++ b/mlir/test/Dialect/Mesh/resharding-spmdization.mlir @@ -96,6 +96,19 @@ func.func @unshard_static_axis( return %1 : tensor<10x14xf32> } +// CHECK-LABEL: func @unshard_static_last_axis +func.func @unshard_static_last_axis( + // CHECK-SAME: %[[ARG:.*]]: tensor<10x14xf32> + %arg0: tensor<10x14xf32> +) -> tensor<10x14xf32> { + // CHECK: %[[SOURCE_SHARD:.*]] = builtin.unrealized_conversion_cast %[[ARG]] : tensor<10x14xf32> to tensor<10x7xf32> + // CHECK: %[[ALL_GATHER:.*]] = mesh.all_gather %[[SOURCE_SHARD]] on @mesh_1d mesh_axes = [0] gather_axis = 1 : tensor<10x7xf32> -> tensor<10x14xf32> + %0 = mesh.shard %arg0 to <@mesh_1d, [[], [0]]> : tensor<10x14xf32> + %1 = mesh.shard %0 to <@mesh_1d, [[], []]> annotate_for_users : tensor<10x14xf32> + // CHECK: return %[[ALL_GATHER]] : tensor<10x14xf32> + return %1 : tensor<10x14xf32> +} + // CHECK-LABEL: func @unshard_dynamic_axis func.func @unshard_dynamic_axis( // CHECK-SAME: %[[ARG:.*]]: tensor From c54f5f67b80a41abfb1848aba480fee43b5d8245 Mon Sep 17 00:00:00 2001 From: Peter Klausler <35819229+klausler@users.noreply.github.com> Date: Thu, 13 Jun 2024 15:39:08 -0700 Subject: [PATCH 025/155] [flang] Address missed cases for REDUCE change, fixes shared lib build (#95481) My recent change that distinguishes pass-by-reference from pass-by-value reduction operation functions missed the "CppReduceComplex" cases, and also broke the shared library build-bots. Fix. --- flang/runtime/complex-reduction.c | 34 ++++++++--- flang/runtime/complex-reduction.h | 96 ++++++++++++++++++++++--------- 2 files changed, 94 insertions(+), 36 deletions(-) diff --git a/flang/runtime/complex-reduction.c b/flang/runtime/complex-reduction.c index 7654de8080a152..37ce3fa410016b 100644 --- a/flang/runtime/complex-reduction.c +++ b/flang/runtime/complex-reduction.c @@ -157,23 +157,39 @@ ADAPT_REDUCTION(DotProductComplex16, CFloat128ComplexType, CppComplexFloat128, #endif /* REDUCE() */ -#define RARGS REDUCE_ARGS(float_Complex_t) -ADAPT_REDUCTION(ReduceComplex4, float_Complex_t, CppComplexFloat, CMPLXF, RARGS, - REDUCE_ARG_NAMES) +#define RARGS REDUCE_ARGS(float_Complex_t, float_Complex_t_ref_op) +ADAPT_REDUCTION(ReduceComplex4Ref, float_Complex_t, CppComplexFloat, CMPLXF, + RARGS, REDUCE_ARG_NAMES) +#undef RARGS +#define RARGS REDUCE_ARGS(float_Complex_t, float_Complex_t_value_op) +ADAPT_REDUCTION(ReduceComplex4Value, float_Complex_t, CppComplexFloat, CMPLXF, + RARGS, REDUCE_ARG_NAMES) +#undef RARGS +#define RARGS REDUCE_ARGS(double_Complex_t, double_Complex_t_ref_op) +ADAPT_REDUCTION(ReduceComplex8Ref, double_Complex_t, CppComplexDouble, CMPLX, + RARGS, REDUCE_ARG_NAMES) #undef RARGS -#define RARGS REDUCE_ARGS(double_Complex_t) -ADAPT_REDUCTION(ReduceComplex8, double_Complex_t, CppComplexDouble, CMPLX, +#define RARGS REDUCE_ARGS(double_Complex_t, double_Complex_t_value_op) +ADAPT_REDUCTION(ReduceComplex8Value, double_Complex_t, CppComplexDouble, CMPLX, RARGS, REDUCE_ARG_NAMES) #undef RARGS #if LDBL_MANT_DIG == 64 -#define RARGS REDUCE_ARGS(long_double_Complex_t) -ADAPT_REDUCTION(ReduceComplex10, long_double_Complex_t, CppComplexLongDouble, +#define RARGS REDUCE_ARGS(long_double_Complex_t, long_double_Complex_t_ref_op) +ADAPT_REDUCTION(ReduceComplex10Ref, long_double_Complex_t, CppComplexLongDouble, CMPLXL, RARGS, REDUCE_ARG_NAMES) #undef RARGS +#define RARGS REDUCE_ARGS(long_double_Complex_t, long_double_Complex_t_value_op) +ADAPT_REDUCTION(ReduceComplex10Value, long_double_Complex_t, + CppComplexLongDouble, CMPLXL, RARGS, REDUCE_ARG_NAMES) +#undef RARGS #endif #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 -#define RARGS REDUCE_ARGS(CFloat128ComplexType) -ADAPT_REDUCTION(ReduceComplex16, CFloat128ComplexType, CppComplexFloat128, +#define RARGS REDUCE_ARGS(CFloat128ComplexType, CFloat128ComplexType_ref_op) +ADAPT_REDUCTION(ReduceComplex16Ref, CFloat128ComplexType, CppComplexFloat128, + CMPLXF128, RARGS, REDUCE_ARG_NAMES) +#undef RARGS +#define RARGS REDUCE_ARGS(CFloat128ComplexType, CFloat128ComplexType_value_op) +ADAPT_REDUCTION(ReduceComplex16Value, CFloat128ComplexType, CppComplexFloat128, CMPLXF128, RARGS, REDUCE_ARG_NAMES) #undef RARGS #endif diff --git a/flang/runtime/complex-reduction.h b/flang/runtime/complex-reduction.h index 98b20d1e592be8..b0f19622fdb1a5 100644 --- a/flang/runtime/complex-reduction.h +++ b/flang/runtime/complex-reduction.h @@ -69,49 +69,91 @@ long_double_Complex_t RTNAME(DotProductComplex10)(DOT_PRODUCT_ARGS); CFloat128ComplexType RTNAME(DotProductComplex16)(DOT_PRODUCT_ARGS); #endif -#define REDUCE_ARGS(T) \ - T##_op operation, const struct CppDescriptor *x, \ - const struct CppDescriptor *y, const char *source, int line, \ - int dim /*=0*/, const struct CppDescriptor *mask /*=NULL*/, \ - const T *identity /*=NULL*/, _Bool ordered /*=true*/ +#define REDUCE_ARGS(T, OP) \ + OP operation, const struct CppDescriptor *x, const struct CppDescriptor *y, \ + const char *source, int line, int dim /*=0*/, \ + const struct CppDescriptor *mask /*=NULL*/, const T *identity /*=NULL*/, \ + _Bool ordered /*=true*/ #define REDUCE_ARG_NAMES \ operation, x, y, source, line, dim, mask, identity, ordered -typedef float_Complex_t (*float_Complex_t_op)( +typedef float_Complex_t (*float_Complex_t_ref_op)( const float_Complex_t *, const float_Complex_t *); -typedef double_Complex_t (*double_Complex_t_op)( +typedef float_Complex_t (*float_Complex_t_value_op)( + float_Complex_t, float_Complex_t); +typedef double_Complex_t (*double_Complex_t_ref_op)( const double_Complex_t *, const double_Complex_t *); -typedef long_double_Complex_t (*long_double_Complex_t_op)( +typedef double_Complex_t (*double_Complex_t_value_op)( + double_Complex_t, double_Complex_t); +typedef long_double_Complex_t (*long_double_Complex_t_ref_op)( const long_double_Complex_t *, const long_double_Complex_t *); - -float_Complex_t RTNAME(ReduceComplex2)(REDUCE_ARGS(float_Complex_t)); -float_Complex_t RTNAME(ReduceComplex3)(REDUCE_ARGS(float_Complex_t)); -float_Complex_t RTNAME(ReduceComplex4)(REDUCE_ARGS(float_Complex_t)); -double_Complex_t RTNAME(ReduceComplex8)(REDUCE_ARGS(double_Complex_t)); -long_double_Complex_t RTNAME(ReduceComplex10)( - REDUCE_ARGS(long_double_Complex_t)); +typedef long_double_Complex_t (*long_double_Complex_t_value_op)( + long_double_Complex_t, long_double_Complex_t); + +float_Complex_t RTNAME(ReduceComplex2Ref)( + REDUCE_ARGS(float_Complex_t, float_Complex_t_ref_op)); +float_Complex_t RTNAME(ReduceComplex2Value)( + REDUCE_ARGS(float_Complex_t, float_Complex_t_value_op)); +float_Complex_t RTNAME(ReduceComplex3Ref)( + REDUCE_ARGS(float_Complex_t, float_Complex_t_ref_op)); +float_Complex_t RTNAME(ReduceComplex3Value)( + REDUCE_ARGS(float_Complex_t, float_Complex_t_value_op)); +float_Complex_t RTNAME(ReduceComplex4Ref)( + REDUCE_ARGS(float_Complex_t, float_Complex_t_ref_op)); +float_Complex_t RTNAME(ReduceComplex4Value)( + REDUCE_ARGS(float_Complex_t, float_Complex_t_value_op)); +double_Complex_t RTNAME(ReduceComplex8Ref)( + REDUCE_ARGS(double_Complex_t, double_Complex_t_ref_op)); +double_Complex_t RTNAME(ReduceComplex8Value)( + REDUCE_ARGS(double_Complex_t, double_Complex_t_value_op)); +long_double_Complex_t RTNAME(ReduceComplex10Ref)( + REDUCE_ARGS(long_double_Complex_t, long_double_Complex_t_ref_op)); +long_double_Complex_t RTNAME(ReduceComplex10Value)( + REDUCE_ARGS(long_double_Complex_t, long_double_Complex_t_value_op)); #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 -typedef CFloat128ComplexType (*CFloat128ComplexType_op)( +typedef CFloat128ComplexType (*CFloat128ComplexType_ref_op)( const CFloat128ComplexType *, const CFloat128ComplexType *); -CFloat128ComplexType RTNAME(ReduceComplex16)(REDUCE_ARGS(CFloat128ComplexType)); +typedef CFloat128ComplexType (*CFloat128ComplexType_value_op)( + CFloat128ComplexType, CFloat128ComplexType); +CFloat128ComplexType RTNAME(ReduceComplex16Ref)( + REDUCE_ARGS(CFloat128ComplexType, CFloat128ComplexType_ref_op)); +CFloat128ComplexType RTNAME(ReduceComplex16Value)( + REDUCE_ARGS(CFloat128ComplexType, CFloat128ComplexType_value_op)); #endif -#define REDUCE_DIM_ARGS(T) \ - struct CppDescriptor *result, T##_op operation, \ - const struct CppDescriptor *x, const struct CppDescriptor *y, \ - const char *source, int line, int dim, \ +#define REDUCE_DIM_ARGS(T, OP) \ + struct CppDescriptor *result, OP operation, const struct CppDescriptor *x, \ + const struct CppDescriptor *y, const char *source, int line, int dim, \ const struct CppDescriptor *mask /*=NULL*/, const T *identity /*=NULL*/, \ _Bool ordered /*=true*/ #define REDUCE_DIM_ARG_NAMES \ result, operation, x, y, source, line, dim, mask, identity, ordered -void RTNAME(ReduceComplex2Dim)(REDUCE_DIM_ARGS(float_Complex_t)); -void RTNAME(ReduceComplex3Dim)(REDUCE_DIM_ARGS(float_Complex_t)); -void RTNAME(ReduceComplex4Dim)(REDUCE_DIM_ARGS(float_Complex_t)); -void RTNAME(ReduceComplex8Dim)(REDUCE_DIM_ARGS(double_Complex_t)); -void RTNAME(ReduceComplex10Dim)(REDUCE_DIM_ARGS(long_double_Complex_t)); +void RTNAME(ReduceComplex2DimRef)( + REDUCE_DIM_ARGS(float_Complex_t, float_Complex_t_ref_op)); +void RTNAME(ReduceComplex2DimValue)( + REDUCE_DIM_ARGS(float_Complex_t, float_Complex_t_value_op)); +void RTNAME(ReduceComplex3DimRef)( + REDUCE_DIM_ARGS(float_Complex_t, float_Complex_t_ref_op)); +void RTNAME(ReduceComplex3DimValue)( + REDUCE_DIM_ARGS(float_Complex_t, float_Complex_t_value_op)); +void RTNAME(ReduceComplex4DimRef)( + REDUCE_DIM_ARGS(float_Complex_t, float_Complex_t_ref_op)); +void RTNAME(ReduceComplex4DimValue)( + REDUCE_DIM_ARGS(float_Complex_t, float_Complex_t_value_op)); +void RTNAME(ReduceComplex8DimRef)( + REDUCE_DIM_ARGS(double_Complex_t, double_Complex_t_ref_op)); +void RTNAME(ReduceComplex8DimValue)( + REDUCE_DIM_ARGS(double_Complex_t, double_Complex_t_value_op)); +void RTNAME(ReduceComplex10DimRef)( + REDUCE_DIM_ARGS(long_double_Complex_t, long_double_Complex_t_ref_op)); +void RTNAME(ReduceComplex10DimValue)( + REDUCE_DIM_ARGS(long_double_Complex_t, long_double_Complex_t_value_op)); #if LDBL_MANT_DIG == 113 || HAS_FLOAT128 -void RTNAME(ReduceComplex16Dim)(REDUCE_DIM_ARGS(CFloat128ComplexType)); +void RTNAME(ReduceComplex16DimRef)( + REDUCE_DIM_ARGS(CFloat128ComplexType, CFloat128ComplexType_ref_op)); +void RTNAME(ReduceComplex16DimValue)( + REDUCE_DIM_ARGS(CFloat128ComplexType, CFloat128ComplexType_value_op)); #endif #endif // FORTRAN_RUNTIME_COMPLEX_REDUCTION_H_ From 22ea97d7bfd65abf68a68b13bf96ad69be23df54 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Thu, 13 Jun 2024 16:00:12 -0700 Subject: [PATCH 026/155] [lldb] Use packaging module instead of pkg_resources (#93712) Use the packaging [1] module for parsing version numbers, instead of pkg_resources which is distributed with setuptools. I recently switched over to using the latter, knowing it was deprecated (in favor of the packaging module) because it comes with Python out of the box. Newer versions of setuptools have removed `pkg_resources` so we have to use packaging. [1] https://pypi.org/project/packaging/ --- lldb/packages/Python/lldbsuite/test/decorators.py | 6 ++---- .../Python/lldbsuite/test/lldbplatformutil.py | 15 +++++++-------- .../tools/lldb-server/TestAppleSimulatorOSType.py | 4 ++-- lldb/test/Shell/helper/build.py | 10 ++++------ 4 files changed, 15 insertions(+), 20 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/decorators.py b/lldb/packages/Python/lldbsuite/test/decorators.py index 79cc0a2aeacbeb..ecc7b81035f11f 100644 --- a/lldb/packages/Python/lldbsuite/test/decorators.py +++ b/lldb/packages/Python/lldbsuite/test/decorators.py @@ -1,6 +1,6 @@ # System modules from functools import wraps -from pkg_resources import packaging +from packaging import version import ctypes import locale import os @@ -66,9 +66,7 @@ def fn_neq(x, y): "<=": fn_leq, } - return op_lookup[comparison]( - packaging.version.parse(actual), packaging.version.parse(expected) - ) + return op_lookup[comparison](version.parse(actual), version.parse(expected)) def _match_decorator_property(expected, actual): diff --git a/lldb/packages/Python/lldbsuite/test/lldbplatformutil.py b/lldb/packages/Python/lldbsuite/test/lldbplatformutil.py index 187d16aa1baa68..21f2095db90f8f 100644 --- a/lldb/packages/Python/lldbsuite/test/lldbplatformutil.py +++ b/lldb/packages/Python/lldbsuite/test/lldbplatformutil.py @@ -8,8 +8,7 @@ import subprocess import sys import os -from urllib.parse import urlparse -from pkg_resources import packaging +from packaging import version # LLDB modules import lldb @@ -309,17 +308,17 @@ def expectedCompilerVersion(compiler_version): # Assume the compiler version is at or near the top of trunk. return operator in [">", ">=", "!", "!=", "not"] - version = packaging.version.parse(version_str) - test_compiler_version = packaging.version.parse(test_compiler_version_str) + actual_version = version.parse(version_str) + test_compiler_version = version.parse(test_compiler_version_str) if operator == ">": - return test_compiler_version > version + return test_compiler_version > actual_version if operator == ">=" or operator == "=>": - return test_compiler_version >= version + return test_compiler_version >= actual_version if operator == "<": - return test_compiler_version < version + return test_compiler_version < actual_version if operator == "<=" or operator == "=<": - return test_compiler_version <= version + return test_compiler_version <= actual_version if operator == "!=" or operator == "!" or operator == "not": return version_str not in test_compiler_version_str return version_str in test_compiler_version_str diff --git a/lldb/test/API/tools/lldb-server/TestAppleSimulatorOSType.py b/lldb/test/API/tools/lldb-server/TestAppleSimulatorOSType.py index d770447f0771cd..754579a59c11e5 100644 --- a/lldb/test/API/tools/lldb-server/TestAppleSimulatorOSType.py +++ b/lldb/test/API/tools/lldb-server/TestAppleSimulatorOSType.py @@ -61,9 +61,9 @@ def check_simulator_ostype(self, sdk, platform_name, arch=platform.machine()): # Older versions of watchOS (<7.0) only support i386 if platform_name == "watchos": - from pkg_resources import packaging + from packaging import version - if packaging.version.parse(vers) < packaging.version.parse("7.0"): + if version.parse(vers) < version.parse("7.0"): arch = "i386" triple = "-".join([arch, "apple", platform_name + vers, "simulator"]) diff --git a/lldb/test/Shell/helper/build.py b/lldb/test/Shell/helper/build.py index d3c25bd944e983..b2b8146e88c75b 100755 --- a/lldb/test/Shell/helper/build.py +++ b/lldb/test/Shell/helper/build.py @@ -441,9 +441,9 @@ def _get_vctools_version(self): if not subdirs: return None - from distutils.version import StrictVersion + from packaging import version - subdirs.sort(key=lambda x: StrictVersion(x)) + subdirs.sort(key=lambda x: version.parse(x)) if self.verbose: full_path = os.path.join(vcinstalldir, subdirs[-1]) @@ -517,11 +517,9 @@ def _find_windows_sdk_in_registry_view(self, view): if not sdk_versions: return (None, None) - # Windows SDK version numbers consist of 4 dotted components, so we - # have to use LooseVersion, as StrictVersion supports 3 or fewer. - from pkg_resources import packaging + from packaging import version - sdk_versions.sort(key=lambda x: packaging.version.parse(x), reverse=True) + sdk_versions.sort(key=lambda x: version.parse(x), reverse=True) option_value_name = "OptionId.DesktopCPP" + self.msvc_arch_str for v in sdk_versions: try: From 602634d70cba2c51f6177740c4a98a377d10ab6a Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 13 Jun 2024 16:12:51 -0700 Subject: [PATCH 027/155] [Transforms] Migrate to a new version of getValueProfDataFromInst (#95477) Note that the version of getValueProfDataFromInst that returns bool has been "deprecated" since: commit 1e15371dd8843dfc52b9435afaa133997c1773d8 Author: Mingming Liu Date: Mon Apr 1 15:14:49 2024 -0700 --- llvm/lib/Transforms/Instrumentation/CGProfile.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp index c322d0abd6bc1d..651239bee91f9e 100644 --- a/llvm/lib/Transforms/Instrumentation/CGProfile.cpp +++ b/llvm/lib/Transforms/Instrumentation/CGProfile.cpp @@ -78,14 +78,14 @@ static bool runCGProfilePass(Module &M, FunctionAnalysisManager &FAM, if (!CB) continue; if (CB->isIndirectCall()) { - InstrProfValueData ValueData[8]; uint32_t ActualNumValueData; uint64_t TotalC; - if (!getValueProfDataFromInst(*CB, IPVK_IndirectCallTarget, 8, - ValueData, ActualNumValueData, TotalC)) + auto ValueData = getValueProfDataFromInst( + *CB, IPVK_IndirectCallTarget, 8, ActualNumValueData, TotalC); + if (!ValueData) continue; - for (const auto &VD : - ArrayRef(ValueData, ActualNumValueData)) { + for (const auto &VD : ArrayRef( + ValueData.get(), ActualNumValueData)) { UpdateCounts(TTI, &F, Symtab.getFunction(VD.Value), VD.Count); } continue; From 0ca05e8221d20c7bb06f59dd4eb3d486228b962a Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Thu, 13 Jun 2024 17:47:04 -0700 Subject: [PATCH 028/155] Reland '[hwasan] Add fixed_shadow_base flag (#73980)' (#95445) This was reverted in https://github.com/llvm/llvm-project/pull/95435 because it broke Android static hwasan binaries. This reland limits the change to !SANITIZER_ANDROID. Original commit message: When set to non-zero, the HWASan runtime will map the shadow base at the specified constant address. This is particularly useful in conjunction with the existing compiler option 'hwasan-mapping-offset', which bakes a hardcoded constant address into the instrumentation. --------- Co-authored-by: Vitaly Buka --- compiler-rt/lib/hwasan/hwasan_flags.inc | 7 ++ compiler-rt/lib/hwasan/hwasan_linux.cpp | 9 ++- .../hwasan/TestCases/Linux/fixed-shadow.c | 76 +++++++++++++++++++ 3 files changed, 90 insertions(+), 2 deletions(-) create mode 100644 compiler-rt/test/hwasan/TestCases/Linux/fixed-shadow.c diff --git a/compiler-rt/lib/hwasan/hwasan_flags.inc b/compiler-rt/lib/hwasan/hwasan_flags.inc index 978fa46b705cb9..058a0457b9e7f6 100644 --- a/compiler-rt/lib/hwasan/hwasan_flags.inc +++ b/compiler-rt/lib/hwasan/hwasan_flags.inc @@ -84,3 +84,10 @@ HWASAN_FLAG(bool, malloc_bisect_dump, false, // are untagged before the call. HWASAN_FLAG(bool, fail_without_syscall_abi, true, "Exit if fail to request relaxed syscall ABI.") + +HWASAN_FLAG( + uptr, fixed_shadow_base, -1, + "If not -1, HWASan will attempt to allocate the shadow at this address, " + "instead of choosing one dynamically." + "Tip: this can be combined with the compiler option, " + "-hwasan-mapping-offset, to optimize the instrumentation.") diff --git a/compiler-rt/lib/hwasan/hwasan_linux.cpp b/compiler-rt/lib/hwasan/hwasan_linux.cpp index c254670ee2d484..ae14906fd30428 100644 --- a/compiler-rt/lib/hwasan/hwasan_linux.cpp +++ b/compiler-rt/lib/hwasan/hwasan_linux.cpp @@ -106,8 +106,13 @@ static uptr GetHighMemEnd() { } static void InitializeShadowBaseAddress(uptr shadow_size_bytes) { - __hwasan_shadow_memory_dynamic_address = - FindDynamicShadowStart(shadow_size_bytes); + // FIXME: Android should init flags before shadow. + if (!SANITIZER_ANDROID && flags()->fixed_shadow_base != (uptr)-1) { + __hwasan_shadow_memory_dynamic_address = flags()->fixed_shadow_base; + } else { + __hwasan_shadow_memory_dynamic_address = + FindDynamicShadowStart(shadow_size_bytes); + } } static void MaybeDieIfNoTaggingAbi(const char *message) { diff --git a/compiler-rt/test/hwasan/TestCases/Linux/fixed-shadow.c b/compiler-rt/test/hwasan/TestCases/Linux/fixed-shadow.c new file mode 100644 index 00000000000000..4ff1d3e64c1d0e --- /dev/null +++ b/compiler-rt/test/hwasan/TestCases/Linux/fixed-shadow.c @@ -0,0 +1,76 @@ +// Test fixed shadow base functionality. +// +// Default compiler instrumentation works with any shadow base (dynamic or fixed). +// RUN: %clang_hwasan %s -o %t && %run %t +// RUN: %clang_hwasan %s -o %t && HWASAN_OPTIONS=fixed_shadow_base=263878495698944 %run %t +// RUN: %clang_hwasan %s -o %t && HWASAN_OPTIONS=fixed_shadow_base=4398046511104 %run %t +// +// If -hwasan-mapping-offset is set, then the fixed_shadow_base needs to match. +// RUN: %clang_hwasan %s -mllvm -hwasan-mapping-offset=263878495698944 -o %t && HWASAN_OPTIONS=fixed_shadow_base=263878495698944 %run %t +// RUN: %clang_hwasan %s -mllvm -hwasan-mapping-offset=4398046511104 -o %t && HWASAN_OPTIONS=fixed_shadow_base=4398046511104 %run %t +// RUN: %clang_hwasan %s -mllvm -hwasan-mapping-offset=263878495698944 -o %t && HWASAN_OPTIONS=fixed_shadow_base=4398046511104 not %run %t +// RUN: %clang_hwasan %s -mllvm -hwasan-mapping-offset=4398046511104 -o %t && HWASAN_OPTIONS=fixed_shadow_base=263878495698944 not %run %t +// +// Note: if fixed_shadow_base is not set, compiler-rt will dynamically choose a +// shadow base, which has a tiny but non-zero probability of matching the +// compiler instrumentation. To avoid test flake, we do not test this case. +// +// Assume 48-bit VMA +// REQUIRES: aarch64-target-arch +// +// REQUIRES: Clang +// +// UNSUPPORTED: android + +#include +#include +#include +#include +#include +#include + +int main() { + __hwasan_enable_allocator_tagging(); + + // We test that the compiler instrumentation is able to access shadow memory + // for many different addresses. If we only test a small number of addresses, + // it might work by chance even if the shadow base does not match between the + // compiler instrumentation and compiler-rt. + void **mmaps[256]; + // 48-bit VMA + for (int i = 0; i < 256; i++) { + unsigned long long addr = (i * (1ULL << 40)); + + void *p = mmap((void *)addr, 4096, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + // We don't use MAP_FIXED, to avoid overwriting critical memory. + // However, if we don't get allocated the requested address, it + // isn't a useful test. + if ((unsigned long long)p != addr) { + munmap(p, 4096); + mmaps[i] = MAP_FAILED; + } else { + mmaps[i] = p; + } + } + + int failures = 0; + for (int i = 0; i < 256; i++) { + if (mmaps[i] == MAP_FAILED) { + failures++; + } else { + printf("%d %p\n", i, mmaps[i]); + munmap(mmaps[i], 4096); + } + } + + // We expect roughly 17 failures: + // - the page at address zero + // - 16 failures because the shadow memory takes up 1/16th of the address space + // We could also get unlucky e.g., if libraries or binaries are loaded into the + // exact addresses where we tried to map. + // To avoid test flake, we allow some margin of error. + printf("Failed: %d\n", failures); + assert(failures < 48); + return 0; +} From 41c50f0836439f4d53e7209cd9e9ce54341ed9a3 Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Thu, 13 Jun 2024 17:55:10 -0700 Subject: [PATCH 029/155] [HWASan] comment why hwasan_symbolize_stack_uas is arm64 only --- .../test/hwasan/TestCases/hwasan_symbolize_stack_uas.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compiler-rt/test/hwasan/TestCases/hwasan_symbolize_stack_uas.cpp b/compiler-rt/test/hwasan/TestCases/hwasan_symbolize_stack_uas.cpp index b96ec9e415c75f..62caf1bd25fb03 100644 --- a/compiler-rt/test/hwasan/TestCases/hwasan_symbolize_stack_uas.cpp +++ b/compiler-rt/test/hwasan/TestCases/hwasan_symbolize_stack_uas.cpp @@ -1,7 +1,8 @@ // RUN: %clang_hwasan -Wl,--build-id -g %s -o %t // RUN: %env_hwasan_opts=symbolize=0 not %run %t 2>&1 | hwasan_symbolize --symbols $(dirname %t) --index | FileCheck %s -// TODO: find out why this fails on sanitizer-x86_64-linux-qemu bot +// This doesn't work on X86, because that uses instrument-with-calls which +// disables frame records. // REQUIRES: aarch64-target-arch #include From 785dc76c6667d0ea81c8b877dbff9c1e843918d6 Mon Sep 17 00:00:00 2001 From: Jon Roelofs Date: Thu, 13 Jun 2024 18:05:52 -0700 Subject: [PATCH 030/155] [llvm][SelectionDAG] Fix up chains in lowerInvokeable. rdar://113994760 (#94004) lowerInvokeable wasn't updating the returned chain after emitting the lowerEndEH, which caused SwiftErrorVal-handling code to re-set the DAG root, and thus accidentally skip the EH_LABEL node it was supposed to have addeed. After fixing that, a few places needed to be adjusted that assume the specific shape of the returned DAG. Fixes: #64826 Fixes: rdar://113994760 --- .../SelectionDAG/SelectionDAGBuilder.cpp | 3 + .../SelectionDAG/StatepointLowering.cpp | 3 + .../CodeGen/X86/issue64826-switferror-eh.ll | 77 +++++++++++ llvm/test/CodeGen/X86/statepoint-invoke.ll | 130 ++++++++++++++++++ .../CodeGen/X86/statepoint-spill-lowering.ll | 36 ++++- 5 files changed, 247 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/X86/issue64826-switferror-eh.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 58cf7de6fc20db..2b82d874293910 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8606,6 +8606,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, if (EHPadBB) { DAG.setRoot(lowerEndEH(getRoot(), cast_or_null(CLI.CB), EHPadBB, BeginLabel)); + Result.second = getRoot(); } return Result; @@ -10448,6 +10449,8 @@ void SelectionDAGBuilder::visitPatchpoint(const CallBase &CB, std::pair Result = lowerInvokable(CLI, EHPadBB); SDNode *CallEnd = Result.second.getNode(); + if (CallEnd->getOpcode() == ISD::EH_LABEL) + CallEnd = CallEnd->getOperand(0).getNode(); if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) CallEnd = CallEnd->getOperand(0).getNode(); diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index d7f4d1c8937563..671ec84fb94163 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -340,6 +340,9 @@ static std::pair lowerCallFromStatepointLoweringInfo( // to grab the return value from the return register(s), or it can be a LOAD // to load a value returned by reference via a stack slot. + if (CallEnd->getOpcode() == ISD::EH_LABEL) + CallEnd = CallEnd->getOperand(0).getNode(); + bool HasDef = !SI.CLI.RetTy->isVoidTy(); if (HasDef) { if (CallEnd->getOpcode() == ISD::LOAD) diff --git a/llvm/test/CodeGen/X86/issue64826-switferror-eh.ll b/llvm/test/CodeGen/X86/issue64826-switferror-eh.ll new file mode 100644 index 00000000000000..83b6fb11bc9853 --- /dev/null +++ b/llvm/test/CodeGen/X86/issue64826-switferror-eh.ll @@ -0,0 +1,77 @@ +; RUN: llc %s -filetype=obj -o - | llvm-readobj -r - | FileCheck %s --check-prefix=RELOC +; RUN: llc %s -o - | FileCheck %s --check-prefix=ASM + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx" + +declare void @issue64826(i64, ptr, ptr swifterror) + +define swiftcc void @rdar113994760() personality ptr @__gcc_personality_v0 { +entry: + %swifterror = alloca swifterror ptr, align 8 + invoke swiftcc void @issue64826(i64 0, ptr null, ptr swifterror %swifterror) + to label %.noexc unwind label %tsan_cleanup + +.noexc: ; preds = %entry + ret void + +tsan_cleanup: ; preds = %entry + %cleanup.lpad = landingpad { ptr, i32 } + cleanup + resume { ptr, i32 } zeroinitializer +} + +declare i32 @__gcc_personality_v0(...) + +; RELOC-LABEL: Relocations [ +; RELOC-NEXT: Section __text { +; RELOC-NEXT: 0x19 1 2 1 X86_64_RELOC_BRANCH 0 __Unwind_Resume +; RELOC-NEXT: 0xB 1 2 1 X86_64_RELOC_BRANCH 0 _issue64826 +; RELOC-NEXT: } +; RELOC-NEXT: Section __eh_frame { +; RELOC-NEXT: 0x13 1 2 1 X86_64_RELOC_GOT 0 ___gcc_personality_v0 +; RELOC-NEXT: } +; RELOC-NEXT: ] + +; ASM-LABEL: rdar113994760: +; ASM: ## %bb.0: ## %entry +; ASM-NEXT: pushq %r12 +; ASM-NEXT: .cfi_def_cfa_offset 16 +; ASM-NEXT: subq $16, %rsp +; ASM-NEXT: .cfi_def_cfa_offset 32 +; ASM-NEXT: .cfi_offset %r12, -16 +; ASM-NEXT: Ltmp0: +; ASM-NEXT: xorl %edi, %edi +; ASM-NEXT: xorl %esi, %esi +; ASM-NEXT: callq _issue64826 +; ASM-NEXT: Ltmp1: +; ASM-NEXT: ## %bb.1: ## %.noexc +; ASM-NEXT: addq $16, %rsp +; ASM-NEXT: popq %r12 +; ASM-NEXT: retq +; ASM-NEXT: LBB0_2: ## %tsan_cleanup +; ASM-NEXT: Ltmp2: +; ASM-NEXT: xorl %edi, %edi +; ASM-NEXT: callq __Unwind_Resume +; ASM-NEXT: Lfunc_end0: +; ASM-NEXT: .cfi_endproc +; ASM-NEXT: .section __TEXT,__gcc_except_tab +; ASM-NEXT: .p2align 2, 0x0 +; ASM-NEXT: GCC_except_table0: +; ASM-NEXT: Lexception0: +; ASM-NEXT: .byte 255 ## @LPStart Encoding = omit +; ASM-NEXT: .byte 255 ## @TType Encoding = omit +; ASM-NEXT: .byte 1 ## Call site Encoding = uleb128 +; ASM-NEXT: .uleb128 Lcst_end0-Lcst_begin0 +; ASM-NEXT: Lcst_begin0: +; ASM-NEXT: .uleb128 Ltmp0-Lfunc_begin0 ## >> Call Site 1 << +; ASM-NEXT: .uleb128 Ltmp1-Ltmp0 ## Call between Ltmp0 and Ltmp1 +; ASM-NEXT: .uleb128 Ltmp2-Lfunc_begin0 ## jumps to Ltmp2 +; ASM-NEXT: .byte 0 ## On action: cleanup +; ASM-NEXT: .uleb128 Ltmp1-Lfunc_begin0 ## >> Call Site 2 << +; ASM-NEXT: .uleb128 Lfunc_end0-Ltmp1 ## Call between Ltmp1 and Lfunc_end0 +; ASM-NEXT: .byte 0 ## has no landing pad +; ASM-NEXT: .byte 0 ## On action: cleanup +; ASM-NEXT: Lcst_end0: +; ASM-NEXT: .p2align 2, 0x0 +; ASM-NEXT: ## -- End function diff --git a/llvm/test/CodeGen/X86/statepoint-invoke.ll b/llvm/test/CodeGen/X86/statepoint-invoke.ll index d0b08c05af3fff..34dbc21a8a8cb8 100644 --- a/llvm/test/CodeGen/X86/statepoint-invoke.ll +++ b/llvm/test/CodeGen/X86/statepoint-invoke.ll @@ -9,6 +9,28 @@ declare ptr addrspace(1) @"some_other_call"(ptr addrspace(1)) declare i32 @"personality_function"() define ptr addrspace(1) @test_basic(ptr addrspace(1) %obj, +; CHECK-LABEL: test_basic: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %rsi, {{[0-9]+}}(%rsp) +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: callq some_call@PLT +; CHECK-NEXT: .Ltmp3: +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: # %bb.1: # %invoke_safepoint_normal_dest +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_2: # %exceptional_return +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq ptr addrspace(1) %obj1) gc "statepoint-example" personality ptr @"personality_function" { entry: @@ -37,6 +59,26 @@ exceptional_return: ; CHECK: .p2align 4 define ptr addrspace(1) @test_result(ptr addrspace(1) %obj, +; CHECK-LABEL: test_result: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movq %rdi, (%rsp) +; CHECK-NEXT: .Ltmp4: +; CHECK-NEXT: callq some_other_call@PLT +; CHECK-NEXT: .Ltmp7: +; CHECK-NEXT: .Ltmp5: +; CHECK-NEXT: # %bb.1: # %normal_return +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB1_2: # %exceptional_return +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .Ltmp6: +; CHECK-NEXT: movq (%rsp), %rax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq ptr addrspace(1) %obj1) gc "statepoint-example" personality ptr @personality_function { entry: @@ -60,6 +102,57 @@ exceptional_return: ; CHECK: .p2align 4 define ptr addrspace(1) @test_same_val(i1 %cond, ptr addrspace(1) %val1, ptr addrspace(1) %val2, ptr addrspace(1) %val3) +; CHECK-LABEL: test_same_val: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: subq $16, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movl %edi, %ebx +; CHECK-NEXT: testb $1, %bl +; CHECK-NEXT: je .LBB2_3 +; CHECK-NEXT: # %bb.1: # %left +; CHECK-NEXT: movq %rsi, (%rsp) +; CHECK-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; CHECK-NEXT: .Ltmp11: +; CHECK-NEXT: movq %rsi, %rdi +; CHECK-NEXT: callq some_call@PLT +; CHECK-NEXT: .Ltmp14: +; CHECK-NEXT: .Ltmp12: +; CHECK-NEXT: # %bb.2: # %left.relocs +; CHECK-NEXT: movq (%rsp), %rax +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; CHECK-NEXT: jmp .LBB2_5 +; CHECK-NEXT: .LBB2_3: # %right +; CHECK-NEXT: movq %rdx, (%rsp) +; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; CHECK-NEXT: .Ltmp8: +; CHECK-NEXT: movq %rsi, %rdi +; CHECK-NEXT: callq some_call@PLT +; CHECK-NEXT: .Ltmp15: +; CHECK-NEXT: .Ltmp9: +; CHECK-NEXT: # %bb.4: # %right.relocs +; CHECK-NEXT: movq (%rsp), %rcx +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: .LBB2_5: # %normal_return +; CHECK-NEXT: testb $1, %bl +; CHECK-NEXT: cmoveq %rcx, %rax +; CHECK-NEXT: .LBB2_6: # %normal_return +; CHECK-NEXT: addq $16, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB2_9: # %exceptional_return.right +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .Ltmp10: +; CHECK-NEXT: movq (%rsp), %rax +; CHECK-NEXT: jmp .LBB2_6 +; CHECK-NEXT: .LBB2_7: # %exceptional_return.left +; CHECK-NEXT: .Ltmp13: +; CHECK-NEXT: movq (%rsp), %rax +; CHECK-NEXT: jmp .LBB2_6 gc "statepoint-example" personality ptr @"personality_function" { entry: br i1 %cond, label %left, label %right @@ -102,6 +195,23 @@ exceptional_return.right: } define ptr addrspace(1) @test_null_undef(ptr addrspace(1) %val1) +; CHECK-LABEL: test_null_undef: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .Ltmp16: +; CHECK-NEXT: callq some_call@PLT +; CHECK-NEXT: .Ltmp19: +; CHECK-NEXT: .Ltmp17: +; CHECK-NEXT: .LBB3_1: # %normal_return +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB3_2: # %exceptional_return +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .Ltmp18: +; CHECK-NEXT: jmp .LBB3_1 gc "statepoint-example" personality ptr @"personality_function" { entry: %sp1 = invoke token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void (ptr addrspace(1))) @some_call, i32 1, i32 0, ptr addrspace(1) %val1, i32 0, i32 0) ["gc-live"(ptr addrspace(1) null, ptr addrspace(1) undef)] @@ -121,6 +231,26 @@ exceptional_return: } define ptr addrspace(1) @test_alloca_and_const(ptr addrspace(1) %val1) +; CHECK-LABEL: test_alloca_and_const: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .Ltmp20: +; CHECK-NEXT: callq some_call@PLT +; CHECK-NEXT: .Ltmp23: +; CHECK-NEXT: .Ltmp21: +; CHECK-NEXT: # %bb.1: # %normal_return +; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB4_2: # %exceptional_return +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .Ltmp22: +; CHECK-NEXT: movl $15, %eax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq gc "statepoint-example" personality ptr @"personality_function" { entry: %a = alloca i32 diff --git a/llvm/test/CodeGen/X86/statepoint-spill-lowering.ll b/llvm/test/CodeGen/X86/statepoint-spill-lowering.ll index 180925d3235252..2b921b11eb48b9 100644 --- a/llvm/test/CodeGen/X86/statepoint-spill-lowering.ll +++ b/llvm/test/CodeGen/X86/statepoint-spill-lowering.ll @@ -8,8 +8,29 @@ target triple = "x86_64-pc-linux-gnu" declare void @"some_call"(ptr addrspace(1)) declare i32 @"personality_function"() -; CHECK-LABEL: test_invoke: define ptr addrspace(1) @test_invoke(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e, ptr addrspace(1) %f, ptr addrspace(1) %g, ptr addrspace(1) %h, ptr addrspace(1) %j, ptr addrspace(1) %k, ptr addrspace(1) %l, ptr addrspace(1) %m, ptr addrspace(1) %n, ptr addrspace(1) %o, ptr addrspace(1) %p, ptr addrspace(1) %q, ptr addrspace(1) %r, ptr addrspace(1) %s, ptr addrspace(1) %t) +; CHECK-LABEL: test_invoke: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi +; CHECK-NEXT: movq %rdi, (%rsp) +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: callq some_call@PLT +; CHECK-NEXT: .Ltmp3: +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: # %bb.1: # %invoke_safepoint_normal_dest +; CHECK-NEXT: movq (%rsp), %rax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +; CHECK-NEXT: .LBB0_2: # %exceptional_return +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq gc "statepoint-example" personality ptr @"personality_function" { entry: %0 = invoke token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void (ptr addrspace(1))) @some_call, i32 1, i32 0, ptr addrspace(1) %t, i32 0, i32 0) ["gc-live" (ptr addrspace(1) %t)] @@ -25,8 +46,19 @@ exceptional_return: ret ptr addrspace(1) null } -; CHECK-LABEL: test_call: define ptr addrspace(1) @test_call(ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(1) %d, ptr addrspace(1) %e, ptr addrspace(1) %f, ptr addrspace(1) %g, ptr addrspace(1) %h, ptr addrspace(1) %j, ptr addrspace(1) %k, ptr addrspace(1) %l, ptr addrspace(1) %m, ptr addrspace(1) %n, ptr addrspace(1) %o, ptr addrspace(1) %p, ptr addrspace(1) %q, ptr addrspace(1) %r, ptr addrspace(1) %s, ptr addrspace(1) %t) +; CHECK-LABEL: test_call: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi +; CHECK-NEXT: movq %rdi, (%rsp) +; CHECK-NEXT: callq some_call@PLT +; CHECK-NEXT: .Ltmp4: +; CHECK-NEXT: movq (%rsp), %rax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq gc "statepoint-example" personality ptr @"personality_function" { entry: %0 = call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(void (ptr addrspace(1))) @some_call, i32 1, i32 0, ptr addrspace(1) %t, i32 0, i32 0) ["gc-live" (ptr addrspace(1) %t)] From 4f8c961924c2e15eed54e5207111ceedc1da6568 Mon Sep 17 00:00:00 2001 From: Jon Roelofs Date: Thu, 13 Jun 2024 18:06:20 -0700 Subject: [PATCH 031/155] [compiler-rt][AArch64][FMV] Use the hw.optional.arm.caps fast path (#95275) MacOS 15.0 and iOS 18.0 added a new sysctl to fetch a bitvector of all the hw.optional.arm.FEAT_*'s in one go. Using this has a perf advantage over doing multiple round-trips to the kernel and back, but since it's not present in older oses, we still need the slow fallback. --- .../builtins/cpu_model/aarch64/fmv/apple.inc | 77 ++++++++++++++++++- 1 file changed, 75 insertions(+), 2 deletions(-) diff --git a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc index 6fef109567b613..6f4b9ab37e36ba 100644 --- a/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc +++ b/compiler-rt/lib/builtins/cpu_model/aarch64/fmv/apple.inc @@ -2,6 +2,11 @@ #if TARGET_OS_OSX || TARGET_OS_IPHONE #include +#if __has_include() +#include +#define HAS_CPU_CAPABILITIES_PUBLIC_H 1 +#endif + static bool isKnownAndSupported(const char *name) { int32_t val = 0; size_t size = sizeof(val); @@ -10,6 +15,19 @@ static bool isKnownAndSupported(const char *name) { return val; } +static uint64_t deriveImplicitFeatures(uint64_t features) { + // FEAT_SSBS2 implies FEAT_SSBS + if ((1ULL << FEAT_SSBS2) & features) + features |= (1ULL << FEAT_SSBS); + + // FEAT_FP is always enabled + features |= (1ULL << FEAT_FP); + + features |= (1ULL << FEAT_INIT); + + return features; +} + void __init_cpu_features_resolver(void) { // On Darwin platforms, this may be called concurrently by multiple threads // because the resolvers that use it are called lazily at runtime (unlike on @@ -21,6 +39,62 @@ void __init_cpu_features_resolver(void) { uint64_t features = 0; +#ifdef HAS_CPU_CAPABILITIES_PUBLIC_H + uint8_t feats_bitvec[(CAP_BIT_NB + 7) / 8] = {0}; + size_t len = sizeof(feats_bitvec); + // When hw.optional.arm.feats is available (macOS 15.0+, iOS 18.0+), use the + // fast path to get all the feature bits, otherwise fall back to the slow + // ~20-something sysctls path. + if (!sysctlbyname("hw.optional.arm.caps", &feats_bitvec, &len, 0, 0)) { + +#define CHECK_BIT(FROM, TO) \ + do { \ + if (feats_bitvec[FROM / 8] & (1u << ((FROM) & 7))) { \ + features |= (1ULL << TO); \ + } \ + } while (0) + + CHECK_BIT(CAP_BIT_FEAT_FlagM, FEAT_FLAGM); + CHECK_BIT(CAP_BIT_FEAT_FlagM2, FEAT_FLAGM2); + CHECK_BIT(CAP_BIT_FEAT_FHM, FEAT_FP16FML); + CHECK_BIT(CAP_BIT_FEAT_DotProd, FEAT_DOTPROD); + CHECK_BIT(CAP_BIT_FEAT_SHA3, FEAT_SHA3); + CHECK_BIT(CAP_BIT_FEAT_RDM, FEAT_RDM); + CHECK_BIT(CAP_BIT_FEAT_LSE, FEAT_LSE); + CHECK_BIT(CAP_BIT_FEAT_SHA256, FEAT_SHA2); + CHECK_BIT(CAP_BIT_FEAT_SHA1, FEAT_SHA1); + CHECK_BIT(CAP_BIT_FEAT_AES, FEAT_AES); + CHECK_BIT(CAP_BIT_FEAT_PMULL, FEAT_PMULL); + CHECK_BIT(CAP_BIT_FEAT_SPECRES, FEAT_PREDRES); + CHECK_BIT(CAP_BIT_FEAT_SB, FEAT_SB); + CHECK_BIT(CAP_BIT_FEAT_FRINTTS, FEAT_FRINTTS); + CHECK_BIT(CAP_BIT_FEAT_LRCPC, FEAT_RCPC); + CHECK_BIT(CAP_BIT_FEAT_LRCPC2, FEAT_RCPC2); + CHECK_BIT(CAP_BIT_FEAT_FCMA, FEAT_FCMA); + CHECK_BIT(CAP_BIT_FEAT_JSCVT, FEAT_JSCVT); + CHECK_BIT(CAP_BIT_FEAT_DPB, FEAT_DPB); + CHECK_BIT(CAP_BIT_FEAT_DPB2, FEAT_DPB2); + CHECK_BIT(CAP_BIT_FEAT_BF16, FEAT_BF16); + CHECK_BIT(CAP_BIT_FEAT_I8MM, FEAT_I8MM); + CHECK_BIT(CAP_BIT_FEAT_DIT, FEAT_DIT); + CHECK_BIT(CAP_BIT_FEAT_FP16, FEAT_FP16); + CHECK_BIT(CAP_BIT_FEAT_SSBS, FEAT_SSBS2); + CHECK_BIT(CAP_BIT_FEAT_BTI, FEAT_BTI); + CHECK_BIT(CAP_BIT_AdvSIMD, FEAT_SIMD); + CHECK_BIT(CAP_BIT_CRC32, FEAT_CRC); + CHECK_BIT(CAP_BIT_FEAT_SME, FEAT_SME); + CHECK_BIT(CAP_BIT_FEAT_SME2, FEAT_SME2); + CHECK_BIT(CAP_BIT_FEAT_SME_F64F64, FEAT_SME_F64); + CHECK_BIT(CAP_BIT_FEAT_SME_I16I64, FEAT_SME_I64); + + features = deriveImplicitFeatures(features); + + __atomic_store(&__aarch64_cpu_features.features, &features, + __ATOMIC_RELAXED); + return; + } +#endif + // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics static const struct { const char *sysctl_name; @@ -32,7 +106,6 @@ void __init_cpu_features_resolver(void) { {"hw.optional.arm.FEAT_DotProd", FEAT_DOTPROD}, {"hw.optional.arm.FEAT_RDM", FEAT_RDM}, {"hw.optional.arm.FEAT_LSE", FEAT_LSE}, - {"hw.optional.floatingpoint", FEAT_FP}, {"hw.optional.AdvSIMD", FEAT_SIMD}, {"hw.optional.armv8_crc32", FEAT_CRC}, {"hw.optional.arm.FEAT_SHA1", FEAT_SHA1}, @@ -62,7 +135,7 @@ void __init_cpu_features_resolver(void) { if (isKnownAndSupported(feature_checks[I].sysctl_name)) features |= (1ULL << feature_checks[I].feature); - features |= (1ULL << FEAT_INIT); + features = deriveImplicitFeatures(features); __atomic_store(&__aarch64_cpu_features.features, &features, __ATOMIC_RELAXED); From 50ead2ee93bf1b59f35d7afda553a026b87855bb Mon Sep 17 00:00:00 2001 From: Shengchen Kan Date: Fri, 14 Jun 2024 08:53:01 +0800 Subject: [PATCH 032/155] [X86][AsmParser] Avoid duplicated code in MatchAndEmit(ATT/Intel)Instruction, NFC And VEXEncoding_* are renamed to OpcodePrefix_*. This is in preparation for the coming pseudo rex/rex2 prefixes support. --- .../lib/Target/X86/AsmParser/X86AsmParser.cpp | 200 ++++++++---------- 1 file changed, 83 insertions(+), 117 deletions(-) diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 6623106109316b..ffd66aa800584b 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -88,15 +88,15 @@ class X86AsmParser : public MCTargetAsmParser { bool Code16GCC; unsigned ForcedDataPrefix = 0; - enum VEXEncoding { - VEXEncoding_Default, - VEXEncoding_VEX, - VEXEncoding_VEX2, - VEXEncoding_VEX3, - VEXEncoding_EVEX, + enum OpcodePrefix { + OpcodePrefix_Default, + OpcodePrefix_VEX, + OpcodePrefix_VEX2, + OpcodePrefix_VEX3, + OpcodePrefix_EVEX, }; - VEXEncoding ForcedVEXEncoding = VEXEncoding_Default; + OpcodePrefix ForcedOpcodePrefix = OpcodePrefix_Default; enum DispEncoding { DispEncoding_Default, @@ -1197,12 +1197,11 @@ class X86AsmParser : public MCTargetAsmParser { bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures, bool MatchingInlineAsm); - bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, + bool matchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, MCInst &Inst, OperandVector &Operands, MCStreamer &Out, - uint64_t &ErrorInfo, - bool MatchingInlineAsm); + uint64_t &ErrorInfo, bool MatchingInlineAsm); - bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, + bool matchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, MCInst &Inst, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm); @@ -3186,7 +3185,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, InstInfo = &Info; // Reset the forced VEX encoding. - ForcedVEXEncoding = VEXEncoding_Default; + ForcedOpcodePrefix = OpcodePrefix_Default; ForcedDispEncoding = DispEncoding_Default; UseApxExtendedReg = false; ForcedNoFlag = false; @@ -3203,13 +3202,13 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, Parser.Lex(); // Eat curly. if (Prefix == "vex") - ForcedVEXEncoding = VEXEncoding_VEX; + ForcedOpcodePrefix = OpcodePrefix_VEX; else if (Prefix == "vex2") - ForcedVEXEncoding = VEXEncoding_VEX2; + ForcedOpcodePrefix = OpcodePrefix_VEX2; else if (Prefix == "vex3") - ForcedVEXEncoding = VEXEncoding_VEX3; + ForcedOpcodePrefix = OpcodePrefix_VEX3; else if (Prefix == "evex") - ForcedVEXEncoding = VEXEncoding_EVEX; + ForcedOpcodePrefix = OpcodePrefix_EVEX; else if (Prefix == "disp8") ForcedDispEncoding = DispEncoding_Disp8; else if (Prefix == "disp32") @@ -3235,15 +3234,15 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // Parse MASM style pseudo prefixes. if (isParsingMSInlineAsm()) { if (Name.equals_insensitive("vex")) - ForcedVEXEncoding = VEXEncoding_VEX; + ForcedOpcodePrefix = OpcodePrefix_VEX; else if (Name.equals_insensitive("vex2")) - ForcedVEXEncoding = VEXEncoding_VEX2; + ForcedOpcodePrefix = OpcodePrefix_VEX2; else if (Name.equals_insensitive("vex3")) - ForcedVEXEncoding = VEXEncoding_VEX3; + ForcedOpcodePrefix = OpcodePrefix_VEX3; else if (Name.equals_insensitive("evex")) - ForcedVEXEncoding = VEXEncoding_EVEX; + ForcedOpcodePrefix = OpcodePrefix_EVEX; - if (ForcedVEXEncoding != VEXEncoding_Default) { + if (ForcedOpcodePrefix != OpcodePrefix_Default) { if (getLexer().isNot(AsmToken::Identifier)) return Error(Parser.getTok().getLoc(), "Expected identifier"); // FIXME: The mnemonic won't match correctly if its not in lower case. @@ -3741,7 +3740,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, } bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) { - if (ForcedVEXEncoding != VEXEncoding_VEX3 && + if (ForcedOpcodePrefix != OpcodePrefix_VEX3 && X86::optimizeInstFromVEX3ToVEX2(Inst, MII.get(Inst.getOpcode()))) return true; @@ -4002,15 +4001,55 @@ void X86AsmParser::emitInstruction(MCInst &Inst, OperandVector &Operands, applyLVILoadHardeningMitigation(Inst, Out); } +static unsigned getPrefixes(OperandVector &Operands) { + unsigned Result = 0; + X86Operand &Prefix = static_cast(*Operands.back()); + if (Prefix.isPrefix()) { + Result = Prefix.getPrefix(); + Operands.pop_back(); + } + return Result; +} + bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) { - if (isParsingIntelSyntax()) - return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo, - MatchingInlineAsm); - return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo, - MatchingInlineAsm); + assert(!Operands.empty() && "Unexpect empty operand list!"); + assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!"); + + // First, handle aliases that expand to multiple instructions. + MatchFPUWaitAlias(IDLoc, static_cast(*Operands[0]), Operands, + Out, MatchingInlineAsm); + unsigned Prefixes = getPrefixes(Operands); + + MCInst Inst; + + // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the + // encoder and printer. + if (ForcedOpcodePrefix == OpcodePrefix_VEX) + Prefixes |= X86::IP_USE_VEX; + else if (ForcedOpcodePrefix == OpcodePrefix_VEX2) + Prefixes |= X86::IP_USE_VEX2; + else if (ForcedOpcodePrefix == OpcodePrefix_VEX3) + Prefixes |= X86::IP_USE_VEX3; + else if (ForcedOpcodePrefix == OpcodePrefix_EVEX) + Prefixes |= X86::IP_USE_EVEX; + + // Set encoded flags for {disp8} and {disp32}. + if (ForcedDispEncoding == DispEncoding_Disp8) + Prefixes |= X86::IP_USE_DISP8; + else if (ForcedDispEncoding == DispEncoding_Disp32) + Prefixes |= X86::IP_USE_DISP32; + + if (Prefixes) + Inst.setFlags(Prefixes); + + return isParsingIntelSyntax() + ? matchAndEmitIntelInstruction(IDLoc, Opcode, Inst, Operands, Out, + ErrorInfo, MatchingInlineAsm) + : matchAndEmitATTInstruction(IDLoc, Opcode, Inst, Operands, Out, + ErrorInfo, MatchingInlineAsm); } void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, @@ -4053,16 +4092,6 @@ bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm); } -static unsigned getPrefixes(OperandVector &Operands) { - unsigned Result = 0; - X86Operand &Prefix = static_cast(*Operands.back()); - if (Prefix.isPrefix()) { - Result = Prefix.getPrefix(); - Operands.pop_back(); - } - return Result; -} - unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) { unsigned Opc = Inst.getOpcode(); const MCInstrDesc &MCID = MII.get(Opc); @@ -4072,63 +4101,31 @@ unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) { if (ForcedNoFlag == !(MCID.TSFlags & X86II::EVEX_NF) && !X86::isCFCMOVCC(Opc)) return Match_Unsupported; - if (ForcedVEXEncoding == VEXEncoding_EVEX && + if (ForcedOpcodePrefix == OpcodePrefix_EVEX && (MCID.TSFlags & X86II::EncodingMask) != X86II::EVEX) return Match_Unsupported; - if ((ForcedVEXEncoding == VEXEncoding_VEX || - ForcedVEXEncoding == VEXEncoding_VEX2 || - ForcedVEXEncoding == VEXEncoding_VEX3) && + if ((ForcedOpcodePrefix == OpcodePrefix_VEX || + ForcedOpcodePrefix == OpcodePrefix_VEX2 || + ForcedOpcodePrefix == OpcodePrefix_VEX3) && (MCID.TSFlags & X86II::EncodingMask) != X86II::VEX) return Match_Unsupported; if ((MCID.TSFlags & X86II::ExplicitOpPrefixMask) == X86II::ExplicitVEXPrefix && - (ForcedVEXEncoding != VEXEncoding_VEX && - ForcedVEXEncoding != VEXEncoding_VEX2 && - ForcedVEXEncoding != VEXEncoding_VEX3)) + (ForcedOpcodePrefix != OpcodePrefix_VEX && + ForcedOpcodePrefix != OpcodePrefix_VEX2 && + ForcedOpcodePrefix != OpcodePrefix_VEX3)) return Match_Unsupported; return Match_Success; } -bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, - OperandVector &Operands, - MCStreamer &Out, - uint64_t &ErrorInfo, - bool MatchingInlineAsm) { - assert(!Operands.empty() && "Unexpect empty operand list!"); - assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!"); - SMRange EmptyRange = std::nullopt; - - // First, handle aliases that expand to multiple instructions. - MatchFPUWaitAlias(IDLoc, static_cast(*Operands[0]), Operands, - Out, MatchingInlineAsm); +bool X86AsmParser::matchAndEmitATTInstruction( + SMLoc IDLoc, unsigned &Opcode, MCInst &Inst, OperandVector &Operands, + MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) { X86Operand &Op = static_cast(*Operands[0]); - unsigned Prefixes = getPrefixes(Operands); - - MCInst Inst; - - // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the - // encoder and printer. - if (ForcedVEXEncoding == VEXEncoding_VEX) - Prefixes |= X86::IP_USE_VEX; - else if (ForcedVEXEncoding == VEXEncoding_VEX2) - Prefixes |= X86::IP_USE_VEX2; - else if (ForcedVEXEncoding == VEXEncoding_VEX3) - Prefixes |= X86::IP_USE_VEX3; - else if (ForcedVEXEncoding == VEXEncoding_EVEX) - Prefixes |= X86::IP_USE_EVEX; - - // Set encoded flags for {disp8} and {disp32}. - if (ForcedDispEncoding == DispEncoding_Disp8) - Prefixes |= X86::IP_USE_DISP8; - else if (ForcedDispEncoding == DispEncoding_Disp32) - Prefixes |= X86::IP_USE_DISP32; - - if (Prefixes) - Inst.setFlags(Prefixes); - + SMRange EmptyRange = std::nullopt; // In 16-bit mode, if data32 is specified, temporarily switch to 32-bit mode // when matching the instruction. if (ForcedDataPrefix == X86::Is32Bit) @@ -4350,44 +4347,11 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode, return true; } -bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, - OperandVector &Operands, - MCStreamer &Out, - uint64_t &ErrorInfo, - bool MatchingInlineAsm) { - assert(!Operands.empty() && "Unexpect empty operand list!"); - assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!"); - StringRef Mnemonic = (static_cast(*Operands[0])).getToken(); - SMRange EmptyRange = std::nullopt; - StringRef Base = (static_cast(*Operands[0])).getToken(); - unsigned Prefixes = getPrefixes(Operands); - - // First, handle aliases that expand to multiple instructions. - MatchFPUWaitAlias(IDLoc, static_cast(*Operands[0]), Operands, Out, MatchingInlineAsm); +bool X86AsmParser::matchAndEmitIntelInstruction( + SMLoc IDLoc, unsigned &Opcode, MCInst &Inst, OperandVector &Operands, + MCStreamer &Out, uint64_t &ErrorInfo, bool MatchingInlineAsm) { X86Operand &Op = static_cast(*Operands[0]); - - MCInst Inst; - - // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the - // encoder and printer. - if (ForcedVEXEncoding == VEXEncoding_VEX) - Prefixes |= X86::IP_USE_VEX; - else if (ForcedVEXEncoding == VEXEncoding_VEX2) - Prefixes |= X86::IP_USE_VEX2; - else if (ForcedVEXEncoding == VEXEncoding_VEX3) - Prefixes |= X86::IP_USE_VEX3; - else if (ForcedVEXEncoding == VEXEncoding_EVEX) - Prefixes |= X86::IP_USE_EVEX; - - // Set encoded flags for {disp8} and {disp32}. - if (ForcedDispEncoding == DispEncoding_Disp8) - Prefixes |= X86::IP_USE_DISP8; - else if (ForcedDispEncoding == DispEncoding_Disp32) - Prefixes |= X86::IP_USE_DISP32; - - if (Prefixes) - Inst.setFlags(Prefixes); - + SMRange EmptyRange = std::nullopt; // Find one unsized memory operand, if present. X86Operand *UnsizedMemOp = nullptr; for (const auto &Op : Operands) { @@ -4402,6 +4366,7 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, // Allow some instructions to have implicitly pointer-sized operands. This is // compatible with gas. + StringRef Mnemonic = (static_cast(*Operands[0])).getToken(); if (UnsizedMemOp) { static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"}; for (const char *Instr : PtrSizedInstrs) { @@ -4415,6 +4380,7 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode, SmallVector Match; FeatureBitset ErrorInfoMissingFeatures; FeatureBitset MissingFeatures; + StringRef Base = (static_cast(*Operands[0])).getToken(); // If unsized push has immediate operand we should default the default pointer // size for the size. From cb021f5e46d259876ccf0aa24db48c10369f3d61 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 13 Jun 2024 17:35:23 -0700 Subject: [PATCH 033/155] [RISCV] Don't use SEW=16 .vf instructions to move scalar bf16 into a vector. The instructions are only defined to operator f16 data. If the scalar FPR register isn't properly nan-boxed, these instructions will create a fp16 nan not a bf16 nan in the vector register. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 19 +- .../RISCV/rvv/fixed-vectors-vpmerge.ll | 88 +++---- llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll | 231 ++++-------------- llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll | 138 ++++------- 4 files changed, 139 insertions(+), 337 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 042b13418a2000..b1b27f03252e0e 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1102,12 +1102,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::EXTRACT_SUBVECTOR}, VT, Custom); setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); - if (Subtarget.hasStdExtZfbfmin()) { - if (Subtarget.hasVInstructionsF16()) - setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); - else - setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); - } + if (Subtarget.hasStdExtZfbfmin()) + setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); @@ -1340,12 +1336,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::EXTRACT_SUBVECTOR}, VT, Custom); setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom); - if (Subtarget.hasStdExtZfbfmin()) { - if (Subtarget.hasVInstructionsF16()) - setOperationAction(ISD::SPLAT_VECTOR, VT, Legal); - else - setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); - } + if (Subtarget.hasStdExtZfbfmin()) + setOperationAction(ISD::SPLAT_VECTOR, VT, Custom); setOperationAction( {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT, Custom); @@ -6738,8 +6730,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, Subtarget.hasStdExtZfhminOrZhinxmin() && !Subtarget.hasVInstructionsF16())) || (Op.getValueType().getScalarType() == MVT::bf16 && - (Subtarget.hasVInstructionsBF16() && Subtarget.hasStdExtZfbfmin() && - !Subtarget.hasVInstructionsF16()))) { + (Subtarget.hasVInstructionsBF16() && Subtarget.hasStdExtZfbfmin()))) { if (Op.getValueType() == MVT::nxv32f16 || Op.getValueType() == MVT::nxv32bf16) return SplitVectorOp(Op, DAG); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll index d360c3f635b5c3..d6f158b0c00e2c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll @@ -1255,20 +1255,14 @@ define <2 x bfloat> @vpmerge_vv_v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x } define <2 x bfloat> @vpmerge_vf_v2bf16(bfloat %a, <2 x bfloat> %vb, <2 x i1> %m, i32 zeroext %evl) { -; ZVFH-LABEL: vpmerge_vf_v2bf16: -; ZVFH: # %bb.0: -; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, ma -; ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; ZVFH-NEXT: ret -; -; ZVFHMIN-LABEL: vpmerge_vf_v2bf16: -; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t -; ZVFHMIN-NEXT: ret +; CHECK-LABEL: vpmerge_vf_v2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa5 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t +; CHECK-NEXT: ret %elt.head = insertelement <2 x bfloat> poison, bfloat %a, i32 0 %va = shufflevector <2 x bfloat> %elt.head, <2 x bfloat> poison, <2 x i32> zeroinitializer %v = call <2 x bfloat> @llvm.vp.merge.v2bf16(<2 x i1> %m, <2 x bfloat> %va, <2 x bfloat> %vb, i32 %evl) @@ -1289,20 +1283,14 @@ define <4 x bfloat> @vpmerge_vv_v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x } define <4 x bfloat> @vpmerge_vf_v4bf16(bfloat %a, <4 x bfloat> %vb, <4 x i1> %m, i32 zeroext %evl) { -; ZVFH-LABEL: vpmerge_vf_v4bf16: -; ZVFH: # %bb.0: -; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, tu, ma -; ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; ZVFH-NEXT: ret -; -; ZVFHMIN-LABEL: vpmerge_vf_v4bf16: -; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; ZVFHMIN-NEXT: vfmv.v.f v9, fa5 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t -; ZVFHMIN-NEXT: ret +; CHECK-LABEL: vpmerge_vf_v4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa5 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t +; CHECK-NEXT: ret %elt.head = insertelement <4 x bfloat> poison, bfloat %a, i32 0 %va = shufflevector <4 x bfloat> %elt.head, <4 x bfloat> poison, <4 x i32> zeroinitializer %v = call <4 x bfloat> @llvm.vp.merge.v4bf16(<4 x i1> %m, <4 x bfloat> %va, <4 x bfloat> %vb, i32 %evl) @@ -1323,20 +1311,14 @@ define <8 x bfloat> @vpmerge_vv_v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x } define <8 x bfloat> @vpmerge_vf_v8bf16(bfloat %a, <8 x bfloat> %vb, <8 x i1> %m, i32 zeroext %evl) { -; ZVFH-LABEL: vpmerge_vf_v8bf16: -; ZVFH: # %bb.0: -; ZVFH-NEXT: vsetvli zero, a0, e16, m1, tu, ma -; ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; ZVFH-NEXT: ret -; -; ZVFHMIN-LABEL: vpmerge_vf_v8bf16: -; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vfmv.v.f v10, fa5 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t -; ZVFHMIN-NEXT: ret +; CHECK-LABEL: vpmerge_vf_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v10, fa5 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t +; CHECK-NEXT: ret %elt.head = insertelement <8 x bfloat> poison, bfloat %a, i32 0 %va = shufflevector <8 x bfloat> %elt.head, <8 x bfloat> poison, <8 x i32> zeroinitializer %v = call <8 x bfloat> @llvm.vp.merge.v8bf16(<8 x i1> %m, <8 x bfloat> %va, <8 x bfloat> %vb, i32 %evl) @@ -1357,20 +1339,14 @@ define <16 x bfloat> @vpmerge_vv_v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, < } define <16 x bfloat> @vpmerge_vf_v16bf16(bfloat %a, <16 x bfloat> %vb, <16 x i1> %m, i32 zeroext %evl) { -; ZVFH-LABEL: vpmerge_vf_v16bf16: -; ZVFH: # %bb.0: -; ZVFH-NEXT: vsetvli zero, a0, e16, m2, tu, ma -; ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; ZVFH-NEXT: ret -; -; ZVFHMIN-LABEL: vpmerge_vf_v16bf16: -; ZVFHMIN: # %bb.0: -; ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vfmv.v.f v12, fa5 -; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t -; ZVFHMIN-NEXT: ret +; CHECK-LABEL: vpmerge_vf_v16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vfmv.v.f v12, fa5 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t +; CHECK-NEXT: ret %elt.head = insertelement <16 x bfloat> poison, bfloat %a, i32 0 %va = shufflevector <16 x bfloat> %elt.head, <16 x bfloat> poison, <16 x i32> zeroinitializer %v = call <16 x bfloat> @llvm.vp.merge.v16bf16(<16 x i1> %m, <16 x bfloat> %va, <16 x bfloat> %vb, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll index e33c795169fab8..d617c973dec321 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll @@ -1562,35 +1562,14 @@ define @vpmerge_vv_nxv1bf16( %va, @vpmerge_vf_nxv1bf16(bfloat %a, %vb, %m, i32 zeroext %evl) { -; RV32ZVFH-LABEL: vpmerge_vf_nxv1bf16: -; RV32ZVFH: # %bb.0: -; RV32ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, ma -; RV32ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; RV32ZVFH-NEXT: ret -; -; RV64ZVFH-LABEL: vpmerge_vf_nxv1bf16: -; RV64ZVFH: # %bb.0: -; RV64ZVFH-NEXT: vsetvli zero, a0, e16, mf4, tu, ma -; RV64ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; RV64ZVFH-NEXT: ret -; -; RV32ZVFHMIN-LABEL: vpmerge_vf_nxv1bf16: -; RV32ZVFHMIN: # %bb.0: -; RV32ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV32ZVFHMIN-NEXT: vfmv.v.f v9, fa5 -; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; RV32ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t -; RV32ZVFHMIN-NEXT: ret -; -; RV64ZVFHMIN-LABEL: vpmerge_vf_nxv1bf16: -; RV64ZVFHMIN: # %bb.0: -; RV64ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; RV64ZVFHMIN-NEXT: vfmv.v.f v9, fa5 -; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; RV64ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t -; RV64ZVFHMIN-NEXT: ret +; CHECK-LABEL: vpmerge_vf_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa5 +; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t +; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %a, i32 0 %va = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.merge.nxv1bf16( %m, %va, %vb, i32 %evl) @@ -1611,35 +1590,14 @@ define @vpmerge_vv_nxv2bf16( %va, @vpmerge_vf_nxv2bf16(bfloat %a, %vb, %m, i32 zeroext %evl) { -; RV32ZVFH-LABEL: vpmerge_vf_nxv2bf16: -; RV32ZVFH: # %bb.0: -; RV32ZVFH-NEXT: vsetvli zero, a0, e16, mf2, tu, ma -; RV32ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; RV32ZVFH-NEXT: ret -; -; RV64ZVFH-LABEL: vpmerge_vf_nxv2bf16: -; RV64ZVFH: # %bb.0: -; RV64ZVFH-NEXT: vsetvli zero, a0, e16, mf2, tu, ma -; RV64ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; RV64ZVFH-NEXT: ret -; -; RV32ZVFHMIN-LABEL: vpmerge_vf_nxv2bf16: -; RV32ZVFHMIN: # %bb.0: -; RV32ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV32ZVFHMIN-NEXT: vfmv.v.f v9, fa5 -; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; RV32ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t -; RV32ZVFHMIN-NEXT: ret -; -; RV64ZVFHMIN-LABEL: vpmerge_vf_nxv2bf16: -; RV64ZVFHMIN: # %bb.0: -; RV64ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV64ZVFHMIN-NEXT: vfmv.v.f v9, fa5 -; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; RV64ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t -; RV64ZVFHMIN-NEXT: ret +; CHECK-LABEL: vpmerge_vf_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa5 +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t +; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %a, i32 0 %va = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.merge.nxv2bf16( %m, %va, %vb, i32 %evl) @@ -1660,35 +1618,14 @@ define @vpmerge_vv_nxv4bf16( %va, @vpmerge_vf_nxv4bf16(bfloat %a, %vb, %m, i32 zeroext %evl) { -; RV32ZVFH-LABEL: vpmerge_vf_nxv4bf16: -; RV32ZVFH: # %bb.0: -; RV32ZVFH-NEXT: vsetvli zero, a0, e16, m1, tu, ma -; RV32ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; RV32ZVFH-NEXT: ret -; -; RV64ZVFH-LABEL: vpmerge_vf_nxv4bf16: -; RV64ZVFH: # %bb.0: -; RV64ZVFH-NEXT: vsetvli zero, a0, e16, m1, tu, ma -; RV64ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; RV64ZVFH-NEXT: ret -; -; RV32ZVFHMIN-LABEL: vpmerge_vf_nxv4bf16: -; RV32ZVFHMIN: # %bb.0: -; RV32ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV32ZVFHMIN-NEXT: vfmv.v.f v10, fa5 -; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; RV32ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t -; RV32ZVFHMIN-NEXT: ret -; -; RV64ZVFHMIN-LABEL: vpmerge_vf_nxv4bf16: -; RV64ZVFHMIN: # %bb.0: -; RV64ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; RV64ZVFHMIN-NEXT: vfmv.v.f v10, fa5 -; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; RV64ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t -; RV64ZVFHMIN-NEXT: ret +; CHECK-LABEL: vpmerge_vf_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v10, fa5 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t +; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %a, i32 0 %va = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.merge.nxv4bf16( %m, %va, %vb, i32 %evl) @@ -1709,35 +1646,14 @@ define @vpmerge_vv_nxv8bf16( %va, @vpmerge_vf_nxv8bf16(bfloat %a, %vb, %m, i32 zeroext %evl) { -; RV32ZVFH-LABEL: vpmerge_vf_nxv8bf16: -; RV32ZVFH: # %bb.0: -; RV32ZVFH-NEXT: vsetvli zero, a0, e16, m2, tu, ma -; RV32ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; RV32ZVFH-NEXT: ret -; -; RV64ZVFH-LABEL: vpmerge_vf_nxv8bf16: -; RV64ZVFH: # %bb.0: -; RV64ZVFH-NEXT: vsetvli zero, a0, e16, m2, tu, ma -; RV64ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; RV64ZVFH-NEXT: ret -; -; RV32ZVFHMIN-LABEL: vpmerge_vf_nxv8bf16: -; RV32ZVFHMIN: # %bb.0: -; RV32ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV32ZVFHMIN-NEXT: vfmv.v.f v12, fa5 -; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; RV32ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t -; RV32ZVFHMIN-NEXT: ret -; -; RV64ZVFHMIN-LABEL: vpmerge_vf_nxv8bf16: -; RV64ZVFHMIN: # %bb.0: -; RV64ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; RV64ZVFHMIN-NEXT: vfmv.v.f v12, fa5 -; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; RV64ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t -; RV64ZVFHMIN-NEXT: ret +; CHECK-LABEL: vpmerge_vf_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vfmv.v.f v12, fa5 +; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t +; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %a, i32 0 %va = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.merge.nxv8bf16( %m, %va, %vb, i32 %evl) @@ -1758,35 +1674,14 @@ define @vpmerge_vv_nxv16bf16( %va, } define @vpmerge_vf_nxv16bf16(bfloat %a, %vb, %m, i32 zeroext %evl) { -; RV32ZVFH-LABEL: vpmerge_vf_nxv16bf16: -; RV32ZVFH: # %bb.0: -; RV32ZVFH-NEXT: vsetvli zero, a0, e16, m4, tu, ma -; RV32ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; RV32ZVFH-NEXT: ret -; -; RV64ZVFH-LABEL: vpmerge_vf_nxv16bf16: -; RV64ZVFH: # %bb.0: -; RV64ZVFH-NEXT: vsetvli zero, a0, e16, m4, tu, ma -; RV64ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; RV64ZVFH-NEXT: ret -; -; RV32ZVFHMIN-LABEL: vpmerge_vf_nxv16bf16: -; RV32ZVFHMIN: # %bb.0: -; RV32ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32ZVFHMIN-NEXT: vfmv.v.f v16, fa5 -; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; RV32ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t -; RV32ZVFHMIN-NEXT: ret -; -; RV64ZVFHMIN-LABEL: vpmerge_vf_nxv16bf16: -; RV64ZVFHMIN: # %bb.0: -; RV64ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV64ZVFHMIN-NEXT: vfmv.v.f v16, fa5 -; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; RV64ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t -; RV64ZVFHMIN-NEXT: ret +; CHECK-LABEL: vpmerge_vf_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmv.v.f v16, fa5 +; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t +; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %a, i32 0 %va = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.merge.nxv16bf16( %m, %va, %vb, i32 %evl) @@ -1807,41 +1702,17 @@ define @vpmerge_vv_nxv32bf16( %va, } define @vpmerge_vf_nxv32bf16(bfloat %a, %vb, %m, i32 zeroext %evl) { -; RV32ZVFH-LABEL: vpmerge_vf_nxv32bf16: -; RV32ZVFH: # %bb.0: -; RV32ZVFH-NEXT: vsetvli zero, a0, e16, m8, tu, ma -; RV32ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; RV32ZVFH-NEXT: ret -; -; RV64ZVFH-LABEL: vpmerge_vf_nxv32bf16: -; RV64ZVFH: # %bb.0: -; RV64ZVFH-NEXT: vsetvli zero, a0, e16, m8, tu, ma -; RV64ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; RV64ZVFH-NEXT: ret -; -; RV32ZVFHMIN-LABEL: vpmerge_vf_nxv32bf16: -; RV32ZVFHMIN: # %bb.0: -; RV32ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; RV32ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV32ZVFHMIN-NEXT: vfmv.v.f v24, fa5 -; RV32ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; RV32ZVFHMIN-NEXT: vfncvtbf16.f.f.w v16, v24 -; RV32ZVFHMIN-NEXT: vmv.v.v v20, v16 -; RV32ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, tu, ma -; RV32ZVFHMIN-NEXT: vmerge.vvm v8, v8, v16, v0 -; RV32ZVFHMIN-NEXT: ret -; -; RV64ZVFHMIN-LABEL: vpmerge_vf_nxv32bf16: -; RV64ZVFHMIN: # %bb.0: -; RV64ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; RV64ZVFHMIN-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV64ZVFHMIN-NEXT: vfmv.v.f v24, fa5 -; RV64ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; RV64ZVFHMIN-NEXT: vfncvtbf16.f.f.w v16, v24 -; RV64ZVFHMIN-NEXT: vmv.v.v v20, v16 -; RV64ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, tu, ma -; RV64ZVFHMIN-NEXT: vmerge.vvm v8, v8, v16, v0 -; RV64ZVFHMIN-NEXT: ret +; CHECK-LABEL: vpmerge_vf_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmv.v.f v24, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v16, v24 +; CHECK-NEXT: vmv.v.v v20, v16 +; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: ret %elt.head = insertelement poison, bfloat %a, i32 0 %va = shufflevector %elt.head, poison, zeroinitializer %v = call @llvm.vp.merge.nxv32bf16( %m, %va, %vb, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll index ee7a94cf5e4971..ec4b9721824c7f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll @@ -524,20 +524,14 @@ define @vfmerge_vv_nxv1bf16( %va, @vfmerge_fv_nxv1bf16( %va, bfloat %b, %cond) { -; CHECK-ZVFH-LABEL: vfmerge_fv_nxv1bf16: -; CHECK-ZVFH: # %bb.0: -; CHECK-ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; CHECK-ZVFH-NEXT: ret -; -; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv1bf16: -; CHECK-ZVFHMIN: # %bb.0: -; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-ZVFHMIN-NEXT: vfmv.v.f v9, fa5 -; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t -; CHECK-ZVFHMIN-NEXT: ret +; CHECK-LABEL: vfmerge_fv_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t +; CHECK-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = select %cond, %splat, %va @@ -555,20 +549,14 @@ define @vfmerge_vv_nxv2bf16( %va, @vfmerge_fv_nxv2bf16( %va, bfloat %b, %cond) { -; CHECK-ZVFH-LABEL: vfmerge_fv_nxv2bf16: -; CHECK-ZVFH: # %bb.0: -; CHECK-ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; CHECK-ZVFH-NEXT: ret -; -; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv2bf16: -; CHECK-ZVFHMIN: # %bb.0: -; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-ZVFHMIN-NEXT: vfmv.v.f v9, fa5 -; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t -; CHECK-ZVFHMIN-NEXT: ret +; CHECK-LABEL: vfmerge_fv_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma +; CHECK-NEXT: vfmv.v.f v9, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t +; CHECK-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = select %cond, %splat, %va @@ -586,20 +574,14 @@ define @vfmerge_vv_nxv4bf16( %va, @vfmerge_fv_nxv4bf16( %va, bfloat %b, %cond) { -; CHECK-ZVFH-LABEL: vfmerge_fv_nxv4bf16: -; CHECK-ZVFH: # %bb.0: -; CHECK-ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; CHECK-ZVFH-NEXT: ret -; -; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv4bf16: -; CHECK-ZVFHMIN: # %bb.0: -; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-ZVFHMIN-NEXT: vfmv.v.f v10, fa5 -; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t -; CHECK-ZVFHMIN-NEXT: ret +; CHECK-LABEL: vfmerge_fv_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vfmv.v.f v10, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t +; CHECK-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = select %cond, %splat, %va @@ -617,20 +599,14 @@ define @vfmerge_vv_nxv8bf16( %va, @vfmerge_fv_nxv8bf16( %va, bfloat %b, %cond) { -; CHECK-ZVFH-LABEL: vfmerge_fv_nxv8bf16: -; CHECK-ZVFH: # %bb.0: -; CHECK-ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; CHECK-ZVFH-NEXT: ret -; -; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv8bf16: -; CHECK-ZVFHMIN: # %bb.0: -; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-ZVFHMIN-NEXT: vfmv.v.f v12, fa5 -; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t -; CHECK-ZVFHMIN-NEXT: ret +; CHECK-LABEL: vfmerge_fv_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vfmv.v.f v12, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t +; CHECK-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = select %cond, %splat, %va @@ -675,20 +651,14 @@ define @vfmerge_vv_nxv16bf16( %va, } define @vfmerge_fv_nxv16bf16( %va, bfloat %b, %cond) { -; CHECK-ZVFH-LABEL: vfmerge_fv_nxv16bf16: -; CHECK-ZVFH: # %bb.0: -; CHECK-ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; CHECK-ZVFH-NEXT: ret -; -; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv16bf16: -; CHECK-ZVFHMIN: # %bb.0: -; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-ZVFHMIN-NEXT: vfmv.v.f v16, fa5 -; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t -; CHECK-ZVFHMIN-NEXT: ret +; CHECK-LABEL: vfmerge_fv_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmv.v.f v16, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t +; CHECK-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = select %cond, %splat, %va @@ -706,23 +676,17 @@ define @vfmerge_vv_nxv32bf16( %va, } define @vfmerge_fv_nxv32bf16( %va, bfloat %b, %cond) { -; CHECK-ZVFH-LABEL: vfmerge_fv_nxv32bf16: -; CHECK-ZVFH: # %bb.0: -; CHECK-ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-ZVFH-NEXT: vfmerge.vfm v8, v8, fa0, v0 -; CHECK-ZVFH-NEXT: ret -; -; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv32bf16: -; CHECK-ZVFHMIN: # %bb.0: -; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-ZVFHMIN-NEXT: vfmv.v.f v16, fa5 -; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v24, v16 -; CHECK-ZVFHMIN-NEXT: vmv.v.v v28, v24 -; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-ZVFHMIN-NEXT: vmerge.vvm v8, v8, v24, v0 -; CHECK-ZVFHMIN-NEXT: ret +; CHECK-LABEL: vfmerge_fv_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vfmv.v.f v16, fa5 +; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma +; CHECK-NEXT: vfncvtbf16.f.f.w v24, v16 +; CHECK-NEXT: vmv.v.v v28, v24 +; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 +; CHECK-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = select %cond, %splat, %va From 836ca5bbf7d6366df7c35ec9d1f235b1ebc9744e Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 13 Jun 2024 18:21:09 -0700 Subject: [PATCH 034/155] [Transforms] Migrate to a new version of getValueProfDataFromInst (#95485) Note that the version of getValueProfDataFromInst that returns bool has been "deprecated" since: commit 1e15371dd8843dfc52b9435afaa133997c1773d8 Author: Mingming Liu Date: Mon Apr 1 15:14:49 2024 -0700 --- llvm/lib/Transforms/IPO/SampleProfile.cpp | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index 7e6a8817b7a67a..61078c4194b811 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -790,14 +790,12 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const { static bool doesHistoryAllowICP(const Instruction &Inst, StringRef Candidate) { uint32_t NumVals = 0; uint64_t TotalCount = 0; - std::unique_ptr ValueData = - std::make_unique(MaxNumPromotions); - bool Valid = + auto ValueData = getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions, - ValueData.get(), NumVals, TotalCount, true); + NumVals, TotalCount, true); // No valid value profile so no promoted targets have been recorded // before. Ok to do ICP. - if (!Valid) + if (!ValueData) return true; unsigned NumPromoted = 0; @@ -837,11 +835,8 @@ updateIDTMetaData(Instruction &Inst, uint32_t NumVals = 0; // OldSum is the existing total count in the value profile data. uint64_t OldSum = 0; - std::unique_ptr ValueData = - std::make_unique(MaxNumPromotions); - bool Valid = - getValueProfDataFromInst(Inst, IPVK_IndirectCallTarget, MaxNumPromotions, - ValueData.get(), NumVals, OldSum, true); + auto ValueData = getValueProfDataFromInst( + Inst, IPVK_IndirectCallTarget, MaxNumPromotions, NumVals, OldSum, true); DenseMap ValueCountMap; if (Sum == 0) { @@ -850,7 +845,7 @@ updateIDTMetaData(Instruction &Inst, "If sum is 0, assume only one element in CallTargets " "with count being NOMORE_ICP_MAGICNUM"); // Initialize ValueCountMap with existing value profile data. - if (Valid) { + if (ValueData) { for (uint32_t I = 0; I < NumVals; I++) ValueCountMap[ValueData[I].Value] = ValueData[I].Count; } @@ -866,7 +861,7 @@ updateIDTMetaData(Instruction &Inst, } else { // Initialize ValueCountMap with existing NOMORE_ICP_MAGICNUM // counts in the value profile. - if (Valid) { + if (ValueData) { for (uint32_t I = 0; I < NumVals; I++) { if (ValueData[I].Count == NOMORE_ICP_MAGICNUM) ValueCountMap[ValueData[I].Value] = ValueData[I].Count; From 75882ed4c7126b33b1dabb08775af5ee0b2c6e12 Mon Sep 17 00:00:00 2001 From: William Junda Huang Date: Thu, 13 Jun 2024 22:13:38 -0400 Subject: [PATCH 035/155] [Codegen] (NFC) Faster algorithm for MachineBlockPlacement (#91843) In MachineBlockPlacement, the function getFirstUnplacedBlock is inefficient because in most cases (for usual loop CFG), this function fails to find a candidate, and its complexity becomes O(#(loops in function) * #(blocks in function)). This makes the compilation of very long functions slow. This update reduces it to O(k * #(blocks in function)) where k is the maximum loop nesting depth, by iterating through the BlockFilter instead. --- llvm/lib/CodeGen/MachineBlockPlacement.cpp | 116 +++++++++++++++------ 1 file changed, 85 insertions(+), 31 deletions(-) diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index d250981117c8f9..1cb71f39efbe18 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -480,14 +480,16 @@ class MachineBlockPlacement : public MachineFunctionPass { BlockFilterSet *BlockFilter); bool repeatedlyTailDuplicateBlock( MachineBasicBlock *BB, MachineBasicBlock *&LPred, - const MachineBasicBlock *LoopHeaderBB, - BlockChain &Chain, BlockFilterSet *BlockFilter, - MachineFunction::iterator &PrevUnplacedBlockIt); - bool maybeTailDuplicateBlock( - MachineBasicBlock *BB, MachineBasicBlock *LPred, - BlockChain &Chain, BlockFilterSet *BlockFilter, + const MachineBasicBlock *LoopHeaderBB, BlockChain &Chain, + BlockFilterSet *BlockFilter, MachineFunction::iterator &PrevUnplacedBlockIt, - bool &DuplicatedToLPred); + BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt); + bool + maybeTailDuplicateBlock(MachineBasicBlock *BB, MachineBasicBlock *LPred, + BlockChain &Chain, BlockFilterSet *BlockFilter, + MachineFunction::iterator &PrevUnplacedBlockIt, + BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt, + bool &DuplicatedToLPred); bool hasBetterLayoutPredecessor( const MachineBasicBlock *BB, const MachineBasicBlock *Succ, const BlockChain &SuccChain, BranchProbability SuccProb, @@ -498,10 +500,13 @@ class MachineBlockPlacement : public MachineFunctionPass { const BlockFilterSet *BlockFilter); MachineBasicBlock *selectBestCandidateBlock( const BlockChain &Chain, SmallVectorImpl &WorkList); - MachineBasicBlock *getFirstUnplacedBlock( - const BlockChain &PlacedChain, - MachineFunction::iterator &PrevUnplacedBlockIt, - const BlockFilterSet *BlockFilter); + MachineBasicBlock * + getFirstUnplacedBlock(const BlockChain &PlacedChain, + MachineFunction::iterator &PrevUnplacedBlockIt); + MachineBasicBlock * + getFirstUnplacedBlock(const BlockChain &PlacedChain, + BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt, + const BlockFilterSet *BlockFilter); /// Add a basic block to the work list if it is appropriate. /// @@ -1761,7 +1766,7 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( return BestBlock; } -/// Retrieve the first unplaced basic block. +/// Retrieve the first unplaced basic block in the entire function. /// /// This routine is called when we are unable to use the CFG to walk through /// all of the basic blocks and form a chain due to unnatural loops in the CFG. @@ -1770,12 +1775,10 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( /// re-scanning the entire sequence on repeated calls to this routine. MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( const BlockChain &PlacedChain, - MachineFunction::iterator &PrevUnplacedBlockIt, - const BlockFilterSet *BlockFilter) { + MachineFunction::iterator &PrevUnplacedBlockIt) { + for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F->end(); I != E; ++I) { - if (BlockFilter && !BlockFilter->count(&*I)) - continue; if (BlockToChain[&*I] != &PlacedChain) { PrevUnplacedBlockIt = I; // Now select the head of the chain to which the unplaced block belongs @@ -1787,6 +1790,31 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( return nullptr; } +/// Retrieve the first unplaced basic block among the blocks in BlockFilter. +/// +/// This is similar to getFirstUnplacedBlock for the entire function, but since +/// the size of BlockFilter is typically far less than the number of blocks in +/// the entire function, iterating through the BlockFilter is more efficient. +/// When processing the entire funciton, using the version without BlockFilter +/// has a complexity of #(loops in function) * #(blocks in function), while this +/// version has a complexity of sum(#(loops in block) foreach block in function) +/// which is always smaller. For long function mostly sequential in structure, +/// the complexity is amortized to 1 * #(blocks in function). +MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( + const BlockChain &PlacedChain, + BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt, + const BlockFilterSet *BlockFilter) { + assert(BlockFilter); + for (; PrevUnplacedBlockInFilterIt != BlockFilter->end(); + ++PrevUnplacedBlockInFilterIt) { + BlockChain *C = BlockToChain[*PrevUnplacedBlockInFilterIt]; + if (C != &PlacedChain) { + return *C->begin(); + } + } + return nullptr; +} + void MachineBlockPlacement::fillWorkLists( const MachineBasicBlock *MBB, SmallPtrSetImpl &UpdatedPreds, @@ -1826,6 +1854,9 @@ void MachineBlockPlacement::buildChain( assert(HeadBB && "BB must not be null.\n"); assert(BlockToChain[HeadBB] == &Chain && "BlockToChainMap mis-match.\n"); MachineFunction::iterator PrevUnplacedBlockIt = F->begin(); + BlockFilterSet::iterator PrevUnplacedBlockInFilterIt; + if (BlockFilter) + PrevUnplacedBlockInFilterIt = BlockFilter->begin(); const MachineBasicBlock *LoopHeaderBB = HeadBB; markChainSuccessors(Chain, LoopHeaderBB, BlockFilter); @@ -1855,7 +1886,11 @@ void MachineBlockPlacement::buildChain( BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList); if (!BestSucc) { - BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt, BlockFilter); + if (BlockFilter) + BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockInFilterIt, + BlockFilter); + else + BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt); if (!BestSucc) break; @@ -1867,7 +1902,8 @@ void MachineBlockPlacement::buildChain( // Check for that now. if (allowTailDupPlacement() && BestSucc && ShouldTailDup) { repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain, - BlockFilter, PrevUnplacedBlockIt); + BlockFilter, PrevUnplacedBlockIt, + PrevUnplacedBlockInFilterIt); // If the chosen successor was duplicated into BB, don't bother laying // it out, just go round the loop again with BB as the chain end. if (!BB->isSuccessor(BestSucc)) @@ -3017,14 +3053,14 @@ void MachineBlockPlacement::alignBlocks() { /// @return true if \p BB was removed. bool MachineBlockPlacement::repeatedlyTailDuplicateBlock( MachineBasicBlock *BB, MachineBasicBlock *&LPred, - const MachineBasicBlock *LoopHeaderBB, - BlockChain &Chain, BlockFilterSet *BlockFilter, - MachineFunction::iterator &PrevUnplacedBlockIt) { + const MachineBasicBlock *LoopHeaderBB, BlockChain &Chain, + BlockFilterSet *BlockFilter, MachineFunction::iterator &PrevUnplacedBlockIt, + BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt) { bool Removed, DuplicatedToLPred; bool DuplicatedToOriginalLPred; - Removed = maybeTailDuplicateBlock(BB, LPred, Chain, BlockFilter, - PrevUnplacedBlockIt, - DuplicatedToLPred); + Removed = maybeTailDuplicateBlock( + BB, LPred, Chain, BlockFilter, PrevUnplacedBlockIt, + PrevUnplacedBlockInFilterIt, DuplicatedToLPred); if (!Removed) return false; DuplicatedToOriginalLPred = DuplicatedToLPred; @@ -3045,9 +3081,9 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock( if (ChainEnd == Chain.begin()) break; DupPred = *std::prev(ChainEnd); - Removed = maybeTailDuplicateBlock(DupBB, DupPred, Chain, BlockFilter, - PrevUnplacedBlockIt, - DuplicatedToLPred); + Removed = maybeTailDuplicateBlock( + DupBB, DupPred, Chain, BlockFilter, PrevUnplacedBlockIt, + PrevUnplacedBlockInFilterIt, DuplicatedToLPred); } // If BB was duplicated into LPred, it is now scheduled. But because it was // removed, markChainSuccessors won't be called for its chain. Instead we @@ -3074,9 +3110,9 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock( /// \p DuplicatedToLPred - True if the block was duplicated into LPred. /// \return - True if the block was duplicated into all preds and removed. bool MachineBlockPlacement::maybeTailDuplicateBlock( - MachineBasicBlock *BB, MachineBasicBlock *LPred, - BlockChain &Chain, BlockFilterSet *BlockFilter, - MachineFunction::iterator &PrevUnplacedBlockIt, + MachineBasicBlock *BB, MachineBasicBlock *LPred, BlockChain &Chain, + BlockFilterSet *BlockFilter, MachineFunction::iterator &PrevUnplacedBlockIt, + BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt, bool &DuplicatedToLPred) { DuplicatedToLPred = false; if (!shouldTailDuplicate(BB)) @@ -3118,7 +3154,25 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock( // Handle the filter set if (BlockFilter) { - BlockFilter->remove(RemBB); + auto It = llvm::find(*BlockFilter, RemBB); + // Erase RemBB from BlockFilter, and keep PrevUnplacedBlockInFilterIt + // pointing to the same element as before. + if (It != BlockFilter->end()) { + if (It < PrevUnplacedBlockInFilterIt) { + const MachineBasicBlock *PrevBB = *PrevUnplacedBlockInFilterIt; + // BlockFilter is a SmallVector so all elements after RemBB are + // shifted to the front by 1 after its deletion. + auto Distance = PrevUnplacedBlockInFilterIt - It - 1; + PrevUnplacedBlockInFilterIt = BlockFilter->erase(It) + Distance; + assert(*PrevUnplacedBlockInFilterIt == PrevBB); + (void)PrevBB; + } else if (It == PrevUnplacedBlockInFilterIt) + // The block pointed by PrevUnplacedBlockInFilterIt is erased, we + // have to set it to the next element. + PrevUnplacedBlockInFilterIt = BlockFilter->erase(It); + else + BlockFilter->erase(It); + } } // Remove the block from loop info. From eb1248f20a86eb1bc8a7cc61d4ce71293a6caa75 Mon Sep 17 00:00:00 2001 From: Shengchen Kan Date: Fri, 14 Jun 2024 10:05:50 +0800 Subject: [PATCH 036/155] [X86][MC] Add missing support for pseudo rex/rex2 prefix in assembler This fixes https://github.com/llvm/llvm-project/issues/95417 --- .../lib/Target/X86/AsmParser/X86AsmParser.cpp | 52 +++++++++++++------ .../lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 14 ++--- .../X86/MCTargetDesc/X86MCCodeEmitter.cpp | 5 +- llvm/test/MC/X86/apx/pseudo-rex2.s | 9 ++++ llvm/test/MC/X86/pseudo-rex.s | 9 ++++ llvm/test/MC/X86/x86_errors.s | 8 +++ 6 files changed, 74 insertions(+), 23 deletions(-) create mode 100644 llvm/test/MC/X86/apx/pseudo-rex2.s create mode 100644 llvm/test/MC/X86/pseudo-rex.s diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index ffd66aa800584b..c0f54b223877cf 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -90,6 +90,8 @@ class X86AsmParser : public MCTargetAsmParser { enum OpcodePrefix { OpcodePrefix_Default, + OpcodePrefix_REX, + OpcodePrefix_REX2, OpcodePrefix_VEX, OpcodePrefix_VEX2, OpcodePrefix_VEX3, @@ -3201,7 +3203,11 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, return Error(Parser.getTok().getLoc(), "Expected '}'"); Parser.Lex(); // Eat curly. - if (Prefix == "vex") + if (Prefix == "rex") + ForcedOpcodePrefix = OpcodePrefix_REX; + else if (Prefix == "rex2") + ForcedOpcodePrefix = OpcodePrefix_REX2; + else if (Prefix == "vex") ForcedOpcodePrefix = OpcodePrefix_VEX; else if (Prefix == "vex2") ForcedOpcodePrefix = OpcodePrefix_VEX2; @@ -4025,9 +4031,13 @@ bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, MCInst Inst; - // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the - // encoder and printer. - if (ForcedOpcodePrefix == OpcodePrefix_VEX) + // If REX/REX2/VEX/EVEX encoding is forced, we need to pass the USE_* flag to + // the encoder and printer. + if (ForcedOpcodePrefix == OpcodePrefix_REX) + Prefixes |= X86::IP_USE_REX; + else if (ForcedOpcodePrefix == OpcodePrefix_REX2) + Prefixes |= X86::IP_USE_REX2; + else if (ForcedOpcodePrefix == OpcodePrefix_VEX) Prefixes |= X86::IP_USE_VEX; else if (ForcedOpcodePrefix == OpcodePrefix_VEX2) Prefixes |= X86::IP_USE_VEX2; @@ -4095,24 +4105,34 @@ bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) { unsigned Opc = Inst.getOpcode(); const MCInstrDesc &MCID = MII.get(Opc); + uint64_t TSFlags = MCID.TSFlags; if (UseApxExtendedReg && !X86II::canUseApxExtendedReg(MCID)) return Match_Unsupported; - if (ForcedNoFlag == !(MCID.TSFlags & X86II::EVEX_NF) && !X86::isCFCMOVCC(Opc)) + if (ForcedNoFlag == !(TSFlags & X86II::EVEX_NF) && !X86::isCFCMOVCC(Opc)) return Match_Unsupported; - if (ForcedOpcodePrefix == OpcodePrefix_EVEX && - (MCID.TSFlags & X86II::EncodingMask) != X86II::EVEX) - return Match_Unsupported; - - if ((ForcedOpcodePrefix == OpcodePrefix_VEX || - ForcedOpcodePrefix == OpcodePrefix_VEX2 || - ForcedOpcodePrefix == OpcodePrefix_VEX3) && - (MCID.TSFlags & X86II::EncodingMask) != X86II::VEX) - return Match_Unsupported; + switch (ForcedOpcodePrefix) { + case OpcodePrefix_Default: + break; + case OpcodePrefix_REX: + case OpcodePrefix_REX2: + if (TSFlags & X86II::EncodingMask) + return Match_Unsupported; + break; + case OpcodePrefix_VEX: + case OpcodePrefix_VEX2: + case OpcodePrefix_VEX3: + if ((TSFlags & X86II::EncodingMask) != X86II::VEX) + return Match_Unsupported; + break; + case OpcodePrefix_EVEX: + if ((TSFlags & X86II::EncodingMask) != X86II::EVEX) + return Match_Unsupported; + break; + } - if ((MCID.TSFlags & X86II::ExplicitOpPrefixMask) == - X86II::ExplicitVEXPrefix && + if ((TSFlags & X86II::ExplicitOpPrefixMask) == X86II::ExplicitVEXPrefix && (ForcedOpcodePrefix != OpcodePrefix_VEX && ForcedOpcodePrefix != OpcodePrefix_VEX2 && ForcedOpcodePrefix != OpcodePrefix_VEX3)) diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 8e401578364157..a89408bb79b065 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -56,12 +56,14 @@ enum IPREFIXES { IP_HAS_REPEAT = 1U << 3, IP_HAS_LOCK = 1U << 4, IP_HAS_NOTRACK = 1U << 5, - IP_USE_VEX = 1U << 6, - IP_USE_VEX2 = 1U << 7, - IP_USE_VEX3 = 1U << 8, - IP_USE_EVEX = 1U << 9, - IP_USE_DISP8 = 1U << 10, - IP_USE_DISP32 = 1U << 11, + IP_USE_REX = 1U << 6, + IP_USE_REX2 = 1U << 7, + IP_USE_VEX = 1U << 8, + IP_USE_VEX2 = 1U << 9, + IP_USE_VEX3 = 1U << 10, + IP_USE_EVEX = 1U << 11, + IP_USE_DISP8 = 1U << 12, + IP_USE_DISP32 = 1U << 13, }; enum OperandType : unsigned { diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index b4633b91bee322..72219c136c7e17 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -1365,7 +1365,10 @@ PrefixKind X86MCCodeEmitter::emitREXPrefix(int MemOperand, const MCInst &MI, } } } - if ((TSFlags & X86II::ExplicitOpPrefixMask) == X86II::ExplicitREX2Prefix) + if (MI.getFlags() & X86::IP_USE_REX) + Prefix.setLowerBound(REX); + if ((TSFlags & X86II::ExplicitOpPrefixMask) == X86II::ExplicitREX2Prefix || + MI.getFlags() & X86::IP_USE_REX2) Prefix.setLowerBound(REX2); switch (TSFlags & X86II::FormMask) { default: diff --git a/llvm/test/MC/X86/apx/pseudo-rex2.s b/llvm/test/MC/X86/apx/pseudo-rex2.s new file mode 100644 index 00000000000000..bbe15085243811 --- /dev/null +++ b/llvm/test/MC/X86/apx/pseudo-rex2.s @@ -0,0 +1,9 @@ +# RUN: llvm-mc -triple x86_64 -show-encoding %s | FileCheck %s + +# CHECK: addl %ebx, %ecx +# CHECK: encoding: [0xd5,0x00,0x01,0xd9] +{rex2} addl %ebx, %ecx + +# CHECK: popcntl %edi, %esi +# CHECK: encoding: [0xf3,0xd5,0x80,0xb8,0xf7] +{rex2} popcnt %edi,%esi diff --git a/llvm/test/MC/X86/pseudo-rex.s b/llvm/test/MC/X86/pseudo-rex.s new file mode 100644 index 00000000000000..fc4c4dad54343b --- /dev/null +++ b/llvm/test/MC/X86/pseudo-rex.s @@ -0,0 +1,9 @@ +# RUN: llvm-mc -triple x86_64 -show-encoding %s | FileCheck %s + +# CHECK: addl %ebx, %ecx +# CHECK: encoding: [0x40,0x01,0xd9] +{rex} addl %ebx, %ecx + +# CHECK: popcntl %edi, %esi +# CHECK: encoding: [0xf3,0x40,0x0f,0xb8,0xf7] +{rex} popcnt %edi,%esi diff --git a/llvm/test/MC/X86/x86_errors.s b/llvm/test/MC/X86/x86_errors.s index da8659f3621e36..543575f38815a6 100644 --- a/llvm/test/MC/X86/x86_errors.s +++ b/llvm/test/MC/X86/x86_errors.s @@ -168,6 +168,14 @@ cltq // X86: error: instruction requires: 64-bit mode cmpxchg16b (%eax) +// X86: error: unsupported instruction +// X64: error: unsupported instruction +{rex} vmovdqu32 %xmm0, %xmm0 + +// X86: error: unsupported instruction +// X64: error: unsupported instruction +{rex2} vmovdqu32 %xmm0, %xmm0 + // X86: error: unsupported instruction // X64: error: unsupported instruction {vex} vmovdqu32 %xmm0, %xmm0 From 57458513a94812860f1c40faddcfc3c8f71223a4 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 13 Jun 2024 19:56:10 -0700 Subject: [PATCH 037/155] [RISCV] Remove unused check prefixes. NFC --- llvm/test/CodeGen/RISCV/rvv/vselect-bf16.ll | 57 +-------------------- 1 file changed, 2 insertions(+), 55 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-bf16.ll index a80d295744dc85..31a40a95b460f2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-bf16.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-bf16.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVFH +; RUN: -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVFH +; RUN: -verify-machineinstrs < %s | FileCheck %s define @vfmerge_vv_nxv1bf16( %va, %vb, %cond) { ; CHECK-LABEL: vfmerge_vv_nxv1bf16: @@ -23,14 +23,6 @@ define @vfmerge_fv_nxv1bf16( %va, bfl ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t ; CHECK-NEXT: ret -; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv1bf16: -; CHECK-ZVFHMIN: # %bb.0: -; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-ZVFHMIN-NEXT: vfmv.v.f v9, fa5 -; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t -; CHECK-ZVFHMIN-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = select %cond, %splat, %va @@ -56,14 +48,6 @@ define @vfmerge_fv_nxv2bf16( %va, bfl ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t ; CHECK-NEXT: ret -; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv2bf16: -; CHECK-ZVFHMIN: # %bb.0: -; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-ZVFHMIN-NEXT: vfmv.v.f v9, fa5 -; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t -; CHECK-ZVFHMIN-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = select %cond, %splat, %va @@ -89,14 +73,6 @@ define @vfmerge_fv_nxv4bf16( %va, bfl ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t ; CHECK-NEXT: ret -; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv4bf16: -; CHECK-ZVFHMIN: # %bb.0: -; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-ZVFHMIN-NEXT: vfmv.v.f v10, fa5 -; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t -; CHECK-ZVFHMIN-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = select %cond, %splat, %va @@ -122,14 +98,6 @@ define @vfmerge_fv_nxv8bf16( %va, bfl ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t ; CHECK-NEXT: ret -; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv8bf16: -; CHECK-ZVFHMIN: # %bb.0: -; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-ZVFHMIN-NEXT: vfmv.v.f v12, fa5 -; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t -; CHECK-ZVFHMIN-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = select %cond, %splat, %va @@ -182,14 +150,6 @@ define @vfmerge_fv_nxv16bf16( %va, ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t ; CHECK-NEXT: ret -; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv16bf16: -; CHECK-ZVFHMIN: # %bb.0: -; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-ZVFHMIN-NEXT: vfmv.v.f v16, fa5 -; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t -; CHECK-ZVFHMIN-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = select %cond, %splat, %va @@ -218,21 +178,8 @@ define @vfmerge_fv_nxv32bf16( %va, ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 ; CHECK-NEXT: ret -; CHECK-ZVFHMIN-LABEL: vfmerge_fv_nxv32bf16: -; CHECK-ZVFHMIN: # %bb.0: -; CHECK-ZVFHMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-ZVFHMIN-NEXT: vfmv.v.f v16, fa5 -; CHECK-ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-ZVFHMIN-NEXT: vfncvtbf16.f.f.w v24, v16 -; CHECK-ZVFHMIN-NEXT: vmv.v.v v28, v24 -; CHECK-ZVFHMIN-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-ZVFHMIN-NEXT: vmerge.vvm v8, v8, v24, v0 -; CHECK-ZVFHMIN-NEXT: ret %head = insertelement poison, bfloat %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer %vc = select %cond, %splat, %va ret %vc } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK-ZVFH: {{.*}} From a7a1195f01037e5019f671c96ef4bca9af9bb9a7 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 13 Jun 2024 20:00:12 -0700 Subject: [PATCH 038/155] [RISCV] Remove duplicate bf16 testing. NFC The bf16 test cases were copied to other files without the Zvfh/Zfvhmin options. Remove the duplication by adding a few Zvfh command lines to the bf16 files and deleting the bf16 tests from the test files for f16/f32/f64. --- .../RISCV/rvv/fixed-vectors-select-fp.ll | 352 +----------------- .../RISCV/rvv/fixed-vectors-vpmerge-bf16.ll | 4 + .../RISCV/rvv/fixed-vectors-vpmerge.ll | 120 +----- .../RISCV/rvv/fixed-vectors-vselect-vp.ll | 56 +-- llvm/test/CodeGen/RISCV/rvv/select-fp.ll | 188 +--------- .../CodeGen/RISCV/rvv/vpmerge-sdnode-bf16.ll | 4 + llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll | 179 +-------- llvm/test/CodeGen/RISCV/rvv/vselect-bf16.ll | 4 + llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll | 188 +--------- llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll | 80 +--- 10 files changed, 40 insertions(+), 1135 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll index 7a96aad31f0843..e75591c7d0aaf1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define <2 x half> @select_v2f16(i1 zeroext %c, <2 x half> %a, <2 x half> %b) { @@ -119,347 +119,3 @@ define <16 x half> @selectcc_v16f16(half %a, half %b, <16 x half> %c, <16 x half %v = select i1 %cmp, <16 x half> %c, <16 x half> %d ret <16 x half> %v } - -define <2 x float> @select_v2f32(i1 zeroext %c, <2 x float> %a, <2 x float> %b) { -; CHECK-LABEL: select_v2f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %v = select i1 %c, <2 x float> %a, <2 x float> %b - ret <2 x float> %v -} - -define <2 x float> @selectcc_v2f32(float %a, float %b, <2 x float> %c, <2 x float> %d) { -; CHECK-LABEL: selectcc_v2f32: -; CHECK: # %bb.0: -; CHECK-NEXT: feq.s a0, fa0, fa1 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %cmp = fcmp oeq float %a, %b - %v = select i1 %cmp, <2 x float> %c, <2 x float> %d - ret <2 x float> %v -} - -define <4 x float> @select_v4f32(i1 zeroext %c, <4 x float> %a, <4 x float> %b) { -; CHECK-LABEL: select_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %v = select i1 %c, <4 x float> %a, <4 x float> %b - ret <4 x float> %v -} - -define <4 x float> @selectcc_v4f32(float %a, float %b, <4 x float> %c, <4 x float> %d) { -; CHECK-LABEL: selectcc_v4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: feq.s a0, fa0, fa1 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %cmp = fcmp oeq float %a, %b - %v = select i1 %cmp, <4 x float> %c, <4 x float> %d - ret <4 x float> %v -} - -define <8 x float> @select_v8f32(i1 zeroext %c, <8 x float> %a, <8 x float> %b) { -; CHECK-LABEL: select_v8f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vmsne.vi v0, v12, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: ret - %v = select i1 %c, <8 x float> %a, <8 x float> %b - ret <8 x float> %v -} - -define <8 x float> @selectcc_v8f32(float %a, float %b, <8 x float> %c, <8 x float> %d) { -; CHECK-LABEL: selectcc_v8f32: -; CHECK: # %bb.0: -; CHECK-NEXT: feq.s a0, fa0, fa1 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vmsne.vi v0, v12, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: ret - %cmp = fcmp oeq float %a, %b - %v = select i1 %cmp, <8 x float> %c, <8 x float> %d - ret <8 x float> %v -} - -define <16 x float> @select_v16f32(i1 zeroext %c, <16 x float> %a, <16 x float> %b) { -; CHECK-LABEL: select_v16f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vmsne.vi v0, v16, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: ret - %v = select i1 %c, <16 x float> %a, <16 x float> %b - ret <16 x float> %v -} - -define <16 x float> @selectcc_v16f32(float %a, float %b, <16 x float> %c, <16 x float> %d) { -; CHECK-LABEL: selectcc_v16f32: -; CHECK: # %bb.0: -; CHECK-NEXT: feq.s a0, fa0, fa1 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vmsne.vi v0, v16, 0 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: ret - %cmp = fcmp oeq float %a, %b - %v = select i1 %cmp, <16 x float> %c, <16 x float> %d - ret <16 x float> %v -} - -define <2 x double> @select_v2f64(i1 zeroext %c, <2 x double> %a, <2 x double> %b) { -; CHECK-LABEL: select_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %v = select i1 %c, <2 x double> %a, <2 x double> %b - ret <2 x double> %v -} - -define <2 x double> @selectcc_v2f64(double %a, double %b, <2 x double> %c, <2 x double> %d) { -; CHECK-LABEL: selectcc_v2f64: -; CHECK: # %bb.0: -; CHECK-NEXT: feq.d a0, fa0, fa1 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %cmp = fcmp oeq double %a, %b - %v = select i1 %cmp, <2 x double> %c, <2 x double> %d - ret <2 x double> %v -} - -define <4 x double> @select_v4f64(i1 zeroext %c, <4 x double> %a, <4 x double> %b) { -; CHECK-LABEL: select_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vmsne.vi v0, v12, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: ret - %v = select i1 %c, <4 x double> %a, <4 x double> %b - ret <4 x double> %v -} - -define <4 x double> @selectcc_v4f64(double %a, double %b, <4 x double> %c, <4 x double> %d) { -; CHECK-LABEL: selectcc_v4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: feq.d a0, fa0, fa1 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vmsne.vi v0, v12, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: ret - %cmp = fcmp oeq double %a, %b - %v = select i1 %cmp, <4 x double> %c, <4 x double> %d - ret <4 x double> %v -} - -define <8 x double> @select_v8f64(i1 zeroext %c, <8 x double> %a, <8 x double> %b) { -; CHECK-LABEL: select_v8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vmsne.vi v0, v16, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: ret - %v = select i1 %c, <8 x double> %a, <8 x double> %b - ret <8 x double> %v -} - -define <8 x double> @selectcc_v8f64(double %a, double %b, <8 x double> %c, <8 x double> %d) { -; CHECK-LABEL: selectcc_v8f64: -; CHECK: # %bb.0: -; CHECK-NEXT: feq.d a0, fa0, fa1 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vmsne.vi v0, v16, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: ret - %cmp = fcmp oeq double %a, %b - %v = select i1 %cmp, <8 x double> %c, <8 x double> %d - ret <8 x double> %v -} - -define <16 x double> @select_v16f64(i1 zeroext %c, <16 x double> %a, <16 x double> %b) { -; CHECK-LABEL: select_v16f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vmsne.vi v0, v24, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 -; CHECK-NEXT: ret - %v = select i1 %c, <16 x double> %a, <16 x double> %b - ret <16 x double> %v -} - -define <16 x double> @selectcc_v16f64(double %a, double %b, <16 x double> %c, <16 x double> %d) { -; CHECK-LABEL: selectcc_v16f64: -; CHECK: # %bb.0: -; CHECK-NEXT: feq.d a0, fa0, fa1 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vmsne.vi v0, v24, 0 -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 -; CHECK-NEXT: ret - %cmp = fcmp oeq double %a, %b - %v = select i1 %cmp, <16 x double> %c, <16 x double> %d - ret <16 x double> %v -} - -define <2 x bfloat> @select_v2bf16(i1 zeroext %c, <2 x bfloat> %a, <2 x bfloat> %b) { -; CHECK-LABEL: select_v2bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %v = select i1 %c, <2 x bfloat> %a, <2 x bfloat> %b - ret <2 x bfloat> %v -} - -define <2 x bfloat> @selectcc_v2bf16(bfloat %a, bfloat %b, <2 x bfloat> %c, <2 x bfloat> %d) { -; CHECK-LABEL: selectcc_v2bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 -; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 -; CHECK-NEXT: feq.s a0, fa4, fa5 -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %cmp = fcmp oeq bfloat %a, %b - %v = select i1 %cmp, <2 x bfloat> %c, <2 x bfloat> %d - ret <2 x bfloat> %v -} - -define <4 x bfloat> @select_v4bf16(i1 zeroext %c, <4 x bfloat> %a, <4 x bfloat> %b) { -; CHECK-LABEL: select_v4bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %v = select i1 %c, <4 x bfloat> %a, <4 x bfloat> %b - ret <4 x bfloat> %v -} - -define <4 x bfloat> @selectcc_v4bf16(bfloat %a, bfloat %b, <4 x bfloat> %c, <4 x bfloat> %d) { -; CHECK-LABEL: selectcc_v4bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 -; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 -; CHECK-NEXT: feq.s a0, fa4, fa5 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %cmp = fcmp oeq bfloat %a, %b - %v = select i1 %cmp, <4 x bfloat> %c, <4 x bfloat> %d - ret <4 x bfloat> %v -} - -define <8 x bfloat> @select_v8bf16(i1 zeroext %c, <8 x bfloat> %a, <8 x bfloat> %b) { -; CHECK-LABEL: select_v8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %v = select i1 %c, <8 x bfloat> %a, <8 x bfloat> %b - ret <8 x bfloat> %v -} - -define <8 x bfloat> @selectcc_v8bf16(bfloat %a, bfloat %b, <8 x bfloat> %c, <8 x bfloat> %d) { -; CHECK-LABEL: selectcc_v8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 -; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 -; CHECK-NEXT: feq.s a0, fa4, fa5 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %cmp = fcmp oeq bfloat %a, %b - %v = select i1 %cmp, <8 x bfloat> %c, <8 x bfloat> %d - ret <8 x bfloat> %v -} - -define <16 x bfloat> @select_v16bf16(i1 zeroext %c, <16 x bfloat> %a, <16 x bfloat> %b) { -; CHECK-LABEL: select_v16bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vmsne.vi v0, v12, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: ret - %v = select i1 %c, <16 x bfloat> %a, <16 x bfloat> %b - ret <16 x bfloat> %v -} - -define <16 x bfloat> @selectcc_v16bf16(bfloat %a, bfloat %b, <16 x bfloat> %c, <16 x bfloat> %d) { -; CHECK-LABEL: selectcc_v16bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 -; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 -; CHECK-NEXT: feq.s a0, fa4, fa5 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vmsne.vi v0, v12, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: ret - %cmp = fcmp oeq bfloat %a, %b - %v = select i1 %cmp, <16 x bfloat> %c, <16 x bfloat> %d - ret <16 x bfloat> %v -} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge-bf16.ll index 7353f87764308f..f5fc06f6fa5319 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge-bf16.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge-bf16.ll @@ -3,6 +3,10 @@ ; RUN: -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+d,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+v,+zvfh,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+v,+m,+zvfh,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s declare <2 x bfloat> @llvm.vp.merge.v2bf16(<2 x i1>, <2 x bfloat>, <2 x bfloat>, i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll index d6f158b0c00e2c..9f0561b394b819 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64 -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64ZVFHMIN declare <4 x i1> @llvm.vp.merge.v4i1(<4 x i1>, <4 x i1>, <4 x i1>, i32) @@ -1240,115 +1240,3 @@ define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1> %v = call <32 x double> @llvm.vp.merge.v32f64(<32 x i1> %m, <32 x double> %va, <32 x double> %vb, i32 %evl) ret <32 x double> %v } - -declare <2 x bfloat> @llvm.vp.merge.v2bf16(<2 x i1>, <2 x bfloat>, <2 x bfloat>, i32) - -define <2 x bfloat> @vpmerge_vv_v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpmerge_vv_v2bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma -; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret - %v = call <2 x bfloat> @llvm.vp.merge.v2bf16(<2 x i1> %m, <2 x bfloat> %va, <2 x bfloat> %vb, i32 %evl) - ret <2 x bfloat> %v -} - -define <2 x bfloat> @vpmerge_vf_v2bf16(bfloat %a, <2 x bfloat> %vb, <2 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpmerge_vf_v2bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfmv.v.f v9, fa5 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t -; CHECK-NEXT: ret - %elt.head = insertelement <2 x bfloat> poison, bfloat %a, i32 0 - %va = shufflevector <2 x bfloat> %elt.head, <2 x bfloat> poison, <2 x i32> zeroinitializer - %v = call <2 x bfloat> @llvm.vp.merge.v2bf16(<2 x i1> %m, <2 x bfloat> %va, <2 x bfloat> %vb, i32 %evl) - ret <2 x bfloat> %v -} - -declare <4 x bfloat> @llvm.vp.merge.v4bf16(<4 x i1>, <4 x bfloat>, <4 x bfloat>, i32) - -define <4 x bfloat> @vpmerge_vv_v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpmerge_vv_v4bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma -; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret - %v = call <4 x bfloat> @llvm.vp.merge.v4bf16(<4 x i1> %m, <4 x bfloat> %va, <4 x bfloat> %vb, i32 %evl) - ret <4 x bfloat> %v -} - -define <4 x bfloat> @vpmerge_vf_v4bf16(bfloat %a, <4 x bfloat> %vb, <4 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpmerge_vf_v4bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vfmv.v.f v9, fa5 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t -; CHECK-NEXT: ret - %elt.head = insertelement <4 x bfloat> poison, bfloat %a, i32 0 - %va = shufflevector <4 x bfloat> %elt.head, <4 x bfloat> poison, <4 x i32> zeroinitializer - %v = call <4 x bfloat> @llvm.vp.merge.v4bf16(<4 x i1> %m, <4 x bfloat> %va, <4 x bfloat> %vb, i32 %evl) - ret <4 x bfloat> %v -} - -declare <8 x bfloat> @llvm.vp.merge.v8bf16(<8 x i1>, <8 x bfloat>, <8 x bfloat>, i32) - -define <8 x bfloat> @vpmerge_vv_v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpmerge_vv_v8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma -; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret - %v = call <8 x bfloat> @llvm.vp.merge.v8bf16(<8 x i1> %m, <8 x bfloat> %va, <8 x bfloat> %vb, i32 %evl) - ret <8 x bfloat> %v -} - -define <8 x bfloat> @vpmerge_vf_v8bf16(bfloat %a, <8 x bfloat> %vb, <8 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpmerge_vf_v8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; CHECK-NEXT: vfmv.v.f v10, fa5 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t -; CHECK-NEXT: ret - %elt.head = insertelement <8 x bfloat> poison, bfloat %a, i32 0 - %va = shufflevector <8 x bfloat> %elt.head, <8 x bfloat> poison, <8 x i32> zeroinitializer - %v = call <8 x bfloat> @llvm.vp.merge.v8bf16(<8 x i1> %m, <8 x bfloat> %va, <8 x bfloat> %vb, i32 %evl) - ret <8 x bfloat> %v -} - -declare <16 x bfloat> @llvm.vp.merge.v16bf16(<16 x i1>, <16 x bfloat>, <16 x bfloat>, i32) - -define <16 x bfloat> @vpmerge_vv_v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <16 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpmerge_vv_v16bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma -; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret - %v = call <16 x bfloat> @llvm.vp.merge.v16bf16(<16 x i1> %m, <16 x bfloat> %va, <16 x bfloat> %vb, i32 %evl) - ret <16 x bfloat> %v -} - -define <16 x bfloat> @vpmerge_vf_v16bf16(bfloat %a, <16 x bfloat> %vb, <16 x i1> %m, i32 zeroext %evl) { -; CHECK-LABEL: vpmerge_vf_v16bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-NEXT: vfmv.v.f v12, fa5 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t -; CHECK-NEXT: ret - %elt.head = insertelement <16 x bfloat> poison, bfloat %a, i32 0 - %va = shufflevector <16 x bfloat> %elt.head, <16 x bfloat> poison, <16 x i32> zeroinitializer - %v = call <16 x bfloat> @llvm.vp.merge.v16bf16(<16 x i1> %m, <16 x bfloat> %va, <16 x bfloat> %vb, i32 %evl) - ret <16 x bfloat> %v -} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll index c5d9cdacae749f..0a2ed3eb1ffbf7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <1 x i1> @llvm.vp.select.v1i1(<1 x i1>, <1 x i1>, <1 x i1>, i32) @@ -683,51 +683,3 @@ define <16 x double> @select_v16f64(<16 x i1> %a, <16 x double> %b, <16 x double %v = call <16 x double> @llvm.vp.select.v16f64(<16 x i1> %a, <16 x double> %b, <16 x double> %c, i32 %evl) ret <16 x double> %v } - -declare <2 x bfloat> @llvm.vp.select.v2bf16(<2 x i1>, <2 x bfloat>, <2 x bfloat>, i32) - -define <2 x bfloat> @select_v2bf16(<2 x i1> %a, <2 x bfloat> %b, <2 x bfloat> %c, i32 zeroext %evl) { -; CHECK-LABEL: select_v2bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %v = call <2 x bfloat> @llvm.vp.select.v2bf16(<2 x i1> %a, <2 x bfloat> %b, <2 x bfloat> %c, i32 %evl) - ret <2 x bfloat> %v -} - -declare <4 x bfloat> @llvm.vp.select.v4bf16(<4 x i1>, <4 x bfloat>, <4 x bfloat>, i32) - -define <4 x bfloat> @select_v4bf16(<4 x i1> %a, <4 x bfloat> %b, <4 x bfloat> %c, i32 zeroext %evl) { -; CHECK-LABEL: select_v4bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %v = call <4 x bfloat> @llvm.vp.select.v4bf16(<4 x i1> %a, <4 x bfloat> %b, <4 x bfloat> %c, i32 %evl) - ret <4 x bfloat> %v -} - -declare <8 x bfloat> @llvm.vp.select.v8bf16(<8 x i1>, <8 x bfloat>, <8 x bfloat>, i32) - -define <8 x bfloat> @select_v8bf16(<8 x i1> %a, <8 x bfloat> %b, <8 x bfloat> %c, i32 zeroext %evl) { -; CHECK-LABEL: select_v8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %v = call <8 x bfloat> @llvm.vp.select.v8bf16(<8 x i1> %a, <8 x bfloat> %b, <8 x bfloat> %c, i32 %evl) - ret <8 x bfloat> %v -} - -declare <16 x bfloat> @llvm.vp.select.v16bf16(<16 x i1>, <16 x bfloat>, <16 x bfloat>, i32) - -define <16 x bfloat> @select_v16bf16(<16 x i1> %a, <16 x bfloat> %b, <16 x bfloat> %c, i32 zeroext %evl) { -; CHECK-LABEL: select_v16bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: ret - %v = call <16 x bfloat> @llvm.vp.select.v16bf16(<16 x i1> %a, <16 x bfloat> %b, <16 x bfloat> %c, i32 %evl) - ret <16 x bfloat> %v -} diff --git a/llvm/test/CodeGen/RISCV/rvv/select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/select-fp.ll index 2b9d847a9e873b..f8581d8e21b390 100644 --- a/llvm/test/CodeGen/RISCV/rvv/select-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/select-fp.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define @select_nxv1f16(i1 zeroext %c, %a, %b) { @@ -427,183 +427,3 @@ define @selectcc_nxv8f64(double %a, double %b, %c, %d ret %v } - -define @select_nxv1bf16(i1 zeroext %c, %a, %b) { -; CHECK-LABEL: select_nxv1bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %v = select i1 %c, %a, %b - ret %v -} - -define @selectcc_nxv1bf16(bfloat %a, bfloat %b, %c, %d) { -; CHECK-LABEL: selectcc_nxv1bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 -; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 -; CHECK-NEXT: feq.s a0, fa4, fa5 -; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %cmp = fcmp oeq bfloat %a, %b - %v = select i1 %cmp, %c, %d - ret %v -} - -define @select_nxv2bf16(i1 zeroext %c, %a, %b) { -; CHECK-LABEL: select_nxv2bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %v = select i1 %c, %a, %b - ret %v -} - -define @selectcc_nxv2bf16(bfloat %a, bfloat %b, %c, %d) { -; CHECK-LABEL: selectcc_nxv2bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 -; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 -; CHECK-NEXT: feq.s a0, fa4, fa5 -; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %cmp = fcmp oeq bfloat %a, %b - %v = select i1 %cmp, %c, %d - ret %v -} - -define @select_nxv4bf16(i1 zeroext %c, %a, %b) { -; CHECK-LABEL: select_nxv4bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %v = select i1 %c, %a, %b - ret %v -} - -define @selectcc_nxv4bf16(bfloat %a, bfloat %b, %c, %d) { -; CHECK-LABEL: selectcc_nxv4bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 -; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 -; CHECK-NEXT: feq.s a0, fa4, fa5 -; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %cmp = fcmp oeq bfloat %a, %b - %v = select i1 %cmp, %c, %d - ret %v -} - -define @select_nxv8bf16(i1 zeroext %c, %a, %b) { -; CHECK-LABEL: select_nxv8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vmsne.vi v0, v12, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: ret - %v = select i1 %c, %a, %b - ret %v -} - -define @selectcc_nxv8bf16(bfloat %a, bfloat %b, %c, %d) { -; CHECK-LABEL: selectcc_nxv8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 -; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 -; CHECK-NEXT: feq.s a0, fa4, fa5 -; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v12, a0 -; CHECK-NEXT: vmsne.vi v0, v12, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: ret - %cmp = fcmp oeq bfloat %a, %b - %v = select i1 %cmp, %c, %d - ret %v -} - -define @select_nxv16bf16(i1 zeroext %c, %a, %b) { -; CHECK-LABEL: select_nxv16bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vmsne.vi v0, v16, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: ret - %v = select i1 %c, %a, %b - ret %v -} - -define @selectcc_nxv16bf16(bfloat %a, bfloat %b, %c, %d) { -; CHECK-LABEL: selectcc_nxv16bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 -; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 -; CHECK-NEXT: feq.s a0, fa4, fa5 -; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.x v16, a0 -; CHECK-NEXT: vmsne.vi v0, v16, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: ret - %cmp = fcmp oeq bfloat %a, %b - %v = select i1 %cmp, %c, %d - ret %v -} - -define @select_nxv32bf16(i1 zeroext %c, %a, %b) { -; CHECK-LABEL: select_nxv32bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vmsne.vi v0, v24, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 -; CHECK-NEXT: ret - %v = select i1 %c, %a, %b - ret %v -} - -define @selectcc_nxv32bf16(bfloat %a, bfloat %b, %c, %d) { -; CHECK-LABEL: selectcc_nxv32bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa1 -; CHECK-NEXT: fcvt.s.bf16 fa4, fa0 -; CHECK-NEXT: feq.s a0, fa4, fa5 -; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vmsne.vi v0, v24, 0 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 -; CHECK-NEXT: ret - %cmp = fcmp oeq bfloat %a, %b - %v = select i1 %cmp, %c, %d - ret %v -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode-bf16.ll index 3463134b768964..f6090315f38c13 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode-bf16.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode-bf16.ll @@ -3,6 +3,10 @@ ; RUN: -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+d,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+v,+zvfh,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+v,+zvfh,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.merge.nxv1bf16(, , , i32) diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll index d617c973dec321..094e6c9cc754fa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVFHMIN declare @llvm.vp.merge.nxv1i1(, , , i32) @@ -1547,174 +1547,3 @@ define @vpmerge_vf_nxv8f64(double %a, @llvm.vp.merge.nxv8f64( %m, %va, %vb, i32 %evl) ret %v } - -declare @llvm.vp.merge.nxv1bf16(, , , i32) - -define @vpmerge_vv_nxv1bf16( %va, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpmerge_vv_nxv1bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, ma -; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret - %v = call @llvm.vp.merge.nxv1bf16( %m, %va, %vb, i32 %evl) - ret %v -} - -define @vpmerge_vf_nxv1bf16(bfloat %a, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpmerge_vf_nxv1bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfmv.v.f v9, fa5 -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, tu, mu -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t -; CHECK-NEXT: ret - %elt.head = insertelement poison, bfloat %a, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv1bf16( %m, %va, %vb, i32 %evl) - ret %v -} - -declare @llvm.vp.merge.nxv2bf16(, , , i32) - -define @vpmerge_vv_nxv2bf16( %va, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpmerge_vv_nxv2bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, ma -; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret - %v = call @llvm.vp.merge.nxv2bf16( %m, %va, %vb, i32 %evl) - ret %v -} - -define @vpmerge_vf_nxv2bf16(bfloat %a, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpmerge_vf_nxv2bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vfmv.v.f v9, fa5 -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, tu, mu -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t -; CHECK-NEXT: ret - %elt.head = insertelement poison, bfloat %a, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv2bf16( %m, %va, %vb, i32 %evl) - ret %v -} - -declare @llvm.vp.merge.nxv4bf16(, , , i32) - -define @vpmerge_vv_nxv4bf16( %va, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpmerge_vv_nxv4bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, ma -; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret - %v = call @llvm.vp.merge.nxv4bf16( %m, %va, %vb, i32 %evl) - ret %v -} - -define @vpmerge_vf_nxv4bf16(bfloat %a, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpmerge_vf_nxv4bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; CHECK-NEXT: vfmv.v.f v10, fa5 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, tu, mu -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t -; CHECK-NEXT: ret - %elt.head = insertelement poison, bfloat %a, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv4bf16( %m, %va, %vb, i32 %evl) - ret %v -} - -declare @llvm.vp.merge.nxv8bf16(, , , i32) - -define @vpmerge_vv_nxv8bf16( %va, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpmerge_vv_nxv8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma -; CHECK-NEXT: vmerge.vvm v10, v10, v8, v0 -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret - %v = call @llvm.vp.merge.nxv8bf16( %m, %va, %vb, i32 %evl) - ret %v -} - -define @vpmerge_vf_nxv8bf16(bfloat %a, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpmerge_vf_nxv8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-NEXT: vfmv.v.f v12, fa5 -; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, mu -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t -; CHECK-NEXT: ret - %elt.head = insertelement poison, bfloat %a, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv8bf16( %m, %va, %vb, i32 %evl) - ret %v -} - -declare @llvm.vp.merge.nxv16bf16(, , , i32) - -define @vpmerge_vv_nxv16bf16( %va, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpmerge_vv_nxv16bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma -; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 -; CHECK-NEXT: vmv4r.v v8, v12 -; CHECK-NEXT: ret - %v = call @llvm.vp.merge.nxv16bf16( %m, %va, %vb, i32 %evl) - ret %v -} - -define @vpmerge_vf_nxv16bf16(bfloat %a, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpmerge_vf_nxv16bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmv.v.f v16, fa5 -; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t -; CHECK-NEXT: ret - %elt.head = insertelement poison, bfloat %a, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv16bf16( %m, %va, %vb, i32 %evl) - ret %v -} - -declare @llvm.vp.merge.nxv32bf16(, , , i32) - -define @vpmerge_vv_nxv32bf16( %va, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpmerge_vv_nxv32bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 -; CHECK-NEXT: vmv8r.v v8, v16 -; CHECK-NEXT: ret - %v = call @llvm.vp.merge.nxv32bf16( %m, %va, %vb, i32 %evl) - ret %v -} - -define @vpmerge_vf_nxv32bf16(bfloat %a, %vb, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpmerge_vf_nxv32bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmv.v.f v24, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v16, v24 -; CHECK-NEXT: vmv.v.v v20, v16 -; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 -; CHECK-NEXT: ret - %elt.head = insertelement poison, bfloat %a, i32 0 - %va = shufflevector %elt.head, poison, zeroinitializer - %v = call @llvm.vp.merge.nxv32bf16( %m, %va, %vb, i32 %evl) - ret %v -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-bf16.ll index 31a40a95b460f2..11bec8b878cdf1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-bf16.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-bf16.ll @@ -3,6 +3,10 @@ ; RUN: -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+v,+zvfh,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+v,+zvfh,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s define @vfmerge_vv_nxv1bf16( %va, %vb, %cond) { ; CHECK-LABEL: vfmerge_vv_nxv1bf16: diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll index ec4b9721824c7f..53b8e4a78b756f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVFH -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVFH -; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVFHMIN -; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVFHMIN define @vfmerge_vv_nxv1f16( %va, %vb, %cond) { @@ -512,183 +512,3 @@ define void @vselect_legalize_regression( %a, %sel, ptr %out ret void } - -define @vfmerge_vv_nxv1bf16( %va, %vb, %cond) { -; CHECK-LABEL: vfmerge_vv_nxv1bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %vc = select %cond, %va, %vb - ret %vc -} - -define @vfmerge_fv_nxv1bf16( %va, bfloat %b, %cond) { -; CHECK-LABEL: vfmerge_fv_nxv1bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma -; CHECK-NEXT: vfmv.v.f v9, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t -; CHECK-NEXT: ret - %head = insertelement poison, bfloat %b, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va - ret %vc -} - -define @vfmerge_vv_nxv2bf16( %va, %vb, %cond) { -; CHECK-LABEL: vfmerge_vv_nxv2bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %vc = select %cond, %va, %vb - ret %vc -} - -define @vfmerge_fv_nxv2bf16( %va, bfloat %b, %cond) { -; CHECK-LABEL: vfmerge_fv_nxv2bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; CHECK-NEXT: vfmv.v.f v9, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t -; CHECK-NEXT: ret - %head = insertelement poison, bfloat %b, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va - ret %vc -} - -define @vfmerge_vv_nxv4bf16( %va, %vb, %cond) { -; CHECK-LABEL: vfmerge_vv_nxv4bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %vc = select %cond, %va, %vb - ret %vc -} - -define @vfmerge_fv_nxv4bf16( %va, bfloat %b, %cond) { -; CHECK-LABEL: vfmerge_fv_nxv4bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; CHECK-NEXT: vfmv.v.f v10, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t -; CHECK-NEXT: ret - %head = insertelement poison, bfloat %b, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va - ret %vc -} - -define @vfmerge_vv_nxv8bf16( %va, %vb, %cond) { -; CHECK-LABEL: vfmerge_vv_nxv8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: ret - %vc = select %cond, %va, %vb - ret %vc -} - -define @vfmerge_fv_nxv8bf16( %va, bfloat %b, %cond) { -; CHECK-LABEL: vfmerge_fv_nxv8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; CHECK-NEXT: vfmv.v.f v12, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t -; CHECK-NEXT: ret - %head = insertelement poison, bfloat %b, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va - ret %vc -} - -define @vfmerge_zv_nxv8bf16( %va, %cond) { -; CHECK-LABEL: vfmerge_zv_nxv8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 -; CHECK-NEXT: ret - %vc = select %cond, splat (bfloat zeroinitializer), %va - ret %vc -} - -define @vmerge_truelhs_nxv8bf16_0( %va, %vb) { -; CHECK-LABEL: vmerge_truelhs_nxv8bf16_0: -; CHECK: # %bb.0: -; CHECK-NEXT: ret - %vc = select splat (i1 1), %va, %vb - ret %vc -} - -define @vmerge_falselhs_nxv8bf16_0( %va, %vb) { -; CHECK-LABEL: vmerge_falselhs_nxv8bf16_0: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv2r.v v8, v10 -; CHECK-NEXT: ret - %vc = select zeroinitializer, %va, %vb - ret %vc -} - -define @vfmerge_vv_nxv16bf16( %va, %vb, %cond) { -; CHECK-LABEL: vfmerge_vv_nxv16bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: ret - %vc = select %cond, %va, %vb - ret %vc -} - -define @vfmerge_fv_nxv16bf16( %va, bfloat %b, %cond) { -; CHECK-LABEL: vfmerge_fv_nxv16bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmv.v.f v16, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu -; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t -; CHECK-NEXT: ret - %head = insertelement poison, bfloat %b, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va - ret %vc -} - -define @vfmerge_vv_nxv32bf16( %va, %vb, %cond) { -; CHECK-LABEL: vfmerge_vv_nxv32bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 -; CHECK-NEXT: ret - %vc = select %cond, %va, %vb - ret %vc -} - -define @vfmerge_fv_nxv32bf16( %va, bfloat %b, %cond) { -; CHECK-LABEL: vfmerge_fv_nxv32bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vfmv.v.f v16, fa5 -; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v24, v16 -; CHECK-NEXT: vmv.v.v v28, v24 -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 -; CHECK-NEXT: ret - %head = insertelement poison, bfloat %b, i32 0 - %splat = shufflevector %head, poison, zeroinitializer - %vc = select %cond, %splat, %va - ret %vc -} diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll index d1049e14fa29aa..ee0617c9314801 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+zfh,+zvfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+zfh,+zvfh,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+zfh,+zvfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+zfh,+zvfhmin,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+d,+m,+zfh,+zvfhmin,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+zfh,+zvfhmin,+v,+experimental-zfbfmin,+experimental-zvfbfmin -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+d,+m,+zfh,+zvfhmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.select.nxv1i1(, , , i32) @@ -922,75 +922,3 @@ define @select_unknown_T_T( %x, @llvm.vp.select.nxv2i1( %x, %y, %y, i32 %evl) ret %a } - -declare @llvm.vp.select.nxv1bf16(, , , i32) - -define @select_nxv1bf16( %a, %b, %c, i32 zeroext %evl) { -; CHECK-LABEL: select_nxv1bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %v = call @llvm.vp.select.nxv1bf16( %a, %b, %c, i32 %evl) - ret %v -} - -declare @llvm.vp.select.nxv2bf16(, , , i32) - -define @select_nxv2bf16( %a, %b, %c, i32 zeroext %evl) { -; CHECK-LABEL: select_nxv2bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %v = call @llvm.vp.select.nxv2bf16( %a, %b, %c, i32 %evl) - ret %v -} - -declare @llvm.vp.select.nxv4bf16(, , , i32) - -define @select_nxv4bf16( %a, %b, %c, i32 zeroext %evl) { -; CHECK-LABEL: select_nxv4bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 -; CHECK-NEXT: ret - %v = call @llvm.vp.select.nxv4bf16( %a, %b, %c, i32 %evl) - ret %v -} - -declare @llvm.vp.select.nxv8bf16(, , , i32) - -define @select_nxv8bf16( %a, %b, %c, i32 zeroext %evl) { -; CHECK-LABEL: select_nxv8bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 -; CHECK-NEXT: ret - %v = call @llvm.vp.select.nxv8bf16( %a, %b, %c, i32 %evl) - ret %v -} - -declare @llvm.vp.select.nxv16bf16(, , , i32) - -define @select_nxv16bf16( %a, %b, %c, i32 zeroext %evl) { -; CHECK-LABEL: select_nxv16bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 -; CHECK-NEXT: ret - %v = call @llvm.vp.select.nxv16bf16( %a, %b, %c, i32 %evl) - ret %v -} - -declare @llvm.vp.select.nxv32bf16(, , , i32) - -define @select_nxv32bf16( %a, %b, %c, i32 zeroext %evl) { -; CHECK-LABEL: select_nxv32bf16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 -; CHECK-NEXT: ret - %v = call @llvm.vp.select.nxv32bf16( %a, %b, %c, i32 %evl) - ret %v -} From 43bd7ae65af40ff3378d5a0395a058ba834ad8dd Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Fri, 14 Jun 2024 12:13:47 +0900 Subject: [PATCH 039/155] StreamChecker.cpp: Use isa<> (for #93408) [-Wunused-but-set-variable] --- clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp index 74ee607849a5bf..613c221de7b4cd 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp @@ -969,9 +969,9 @@ static std::optional getStartIndex(SValBuilder &SVB, if (const auto *ER = dyn_cast(R)) return ER->getIndex(); - if (const auto *TR = dyn_cast(R)) + if (isa(R)) return Zero(); - if (const auto *SR = dyn_cast(R)) + if (isa(R)) return Zero(); return std::nullopt; } From 2efe3d7fc0e7f9594d91e73bef11d33e0796aa65 Mon Sep 17 00:00:00 2001 From: Schrodinger ZHU Yifan Date: Thu, 13 Jun 2024 20:34:32 -0700 Subject: [PATCH 040/155] Reland "[libc] fix aarch64 linux full build (#95358)" (#95423) Reverts llvm/llvm-project#95419 and Reland #95358. This PR is full of temporal fixes. After a discussion with @lntue, it is better to avoid further changes to the cmake infrastructure for now as a rework to the cmake utilities will be landed in the future. --- libc/cmake/modules/LLVMLibCTestRules.cmake | 9 +++++++++ libc/config/linux/aarch64/entrypoints.txt | 7 +++++++ libc/src/__support/threads/linux/CMakeLists.txt | 1 + libc/test/IntegrationTest/CMakeLists.txt | 5 +++++ libc/test/IntegrationTest/test.cpp | 10 ++++++++++ libc/test/UnitTest/CMakeLists.txt | 2 +- libc/test/UnitTest/HermeticTestUtils.cpp | 16 ++++++++++++++++ 7 files changed, 49 insertions(+), 1 deletion(-) diff --git a/libc/cmake/modules/LLVMLibCTestRules.cmake b/libc/cmake/modules/LLVMLibCTestRules.cmake index eb6be91b55e261..c8d7c8a2b1c7ca 100644 --- a/libc/cmake/modules/LLVMLibCTestRules.cmake +++ b/libc/cmake/modules/LLVMLibCTestRules.cmake @@ -686,6 +686,15 @@ function(add_libc_hermetic_test test_name) LibcTest.hermetic libc.test.UnitTest.ErrnoSetterMatcher ${fq_deps_list}) + # TODO: currently the dependency chain is broken such that getauxval cannot properly + # propagate to hermetic tests. This is a temporary workaround. + if (LIBC_TARGET_ARCHITECTURE_IS_AARCH64) + target_link_libraries( + ${fq_build_target_name} + PRIVATE + libc.src.sys.auxv.getauxval + ) + endif() # Tests on the GPU require an external loader utility to launch the kernel. if(TARGET libc.utils.gpu.loader) diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 2b2d0985a89922..ee865fdec05c1d 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -644,6 +644,12 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.pthread.pthread_mutexattr_setrobust libc.src.pthread.pthread_mutexattr_settype libc.src.pthread.pthread_once + libc.src.pthread.pthread_rwlockattr_destroy + libc.src.pthread.pthread_rwlockattr_getkind_np + libc.src.pthread.pthread_rwlockattr_getpshared + libc.src.pthread.pthread_rwlockattr_init + libc.src.pthread.pthread_rwlockattr_setkind_np + libc.src.pthread.pthread_rwlockattr_setpshared libc.src.pthread.pthread_setspecific # sched.h entrypoints @@ -754,6 +760,7 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.unistd._exit libc.src.unistd.environ libc.src.unistd.execv + libc.src.unistd.fork libc.src.unistd.getopt libc.src.unistd.optarg libc.src.unistd.optind diff --git a/libc/src/__support/threads/linux/CMakeLists.txt b/libc/src/__support/threads/linux/CMakeLists.txt index 9bf88ccc84557d..8e6cd7227b2c81 100644 --- a/libc/src/__support/threads/linux/CMakeLists.txt +++ b/libc/src/__support/threads/linux/CMakeLists.txt @@ -64,6 +64,7 @@ add_object_library( .futex_utils libc.config.linux.app_h libc.include.sys_syscall + libc.include.fcntl libc.src.errno.errno libc.src.__support.CPP.atomic libc.src.__support.CPP.stringstream diff --git a/libc/test/IntegrationTest/CMakeLists.txt b/libc/test/IntegrationTest/CMakeLists.txt index 4f31f10b29f0b8..4a999407d48d77 100644 --- a/libc/test/IntegrationTest/CMakeLists.txt +++ b/libc/test/IntegrationTest/CMakeLists.txt @@ -1,3 +1,7 @@ +set(arch_specific_deps) +if(LIBC_TARGET_ARCHITECTURE_IS_AARCH64) + set(arch_specific_deps libc.src.sys.auxv.getauxval) +endif() add_object_library( test SRCS @@ -8,4 +12,5 @@ add_object_library( test.h DEPENDS libc.src.__support.OSUtil.osutil + ${arch_specific_deps} ) diff --git a/libc/test/IntegrationTest/test.cpp b/libc/test/IntegrationTest/test.cpp index 3bdbe89a3fb62d..a8b2f2911fd8e7 100644 --- a/libc/test/IntegrationTest/test.cpp +++ b/libc/test/IntegrationTest/test.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/common.h" +#include "src/sys/auxv/getauxval.h" #include #include @@ -79,4 +81,12 @@ void *realloc(void *ptr, size_t s) { // Integration tests are linked with -nostdlib. BFD linker expects // __dso_handle when -nostdlib is used. void *__dso_handle = nullptr; + +#ifdef LIBC_TARGET_ARCH_IS_AARCH64 +// Due to historical reasons, libgcc on aarch64 may expect __getauxval to be +// defined. See also https://gcc.gnu.org/pipermail/gcc-cvs/2020-June/300635.html +unsigned long __getauxval(unsigned long id) { + return LIBC_NAMESPACE::getauxval(id); +} +#endif } // extern "C" diff --git a/libc/test/UnitTest/CMakeLists.txt b/libc/test/UnitTest/CMakeLists.txt index 302af3044ca3d6..4adc2f5c725f79 100644 --- a/libc/test/UnitTest/CMakeLists.txt +++ b/libc/test/UnitTest/CMakeLists.txt @@ -41,7 +41,7 @@ function(add_unittest_framework_library name) target_compile_options(${name}.hermetic PRIVATE ${compile_options}) if(TEST_LIB_DEPENDS) - foreach(dep IN LISTS ${TEST_LIB_DEPENDS}) + foreach(dep IN ITEMS ${TEST_LIB_DEPENDS}) if(TARGET ${dep}.unit) add_dependencies(${name}.unit ${dep}.unit) else() diff --git a/libc/test/UnitTest/HermeticTestUtils.cpp b/libc/test/UnitTest/HermeticTestUtils.cpp index 349c182ff2379f..6e815e6c8aab04 100644 --- a/libc/test/UnitTest/HermeticTestUtils.cpp +++ b/libc/test/UnitTest/HermeticTestUtils.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#include "src/__support/common.h" +#include "src/sys/auxv/getauxval.h" #include #include @@ -19,6 +21,12 @@ void *memmove(void *dst, const void *src, size_t count); void *memset(void *ptr, int value, size_t count); int atexit(void (*func)(void)); +// TODO: It seems that some old test frameworks does not use +// add_libc_hermetic_test properly. Such that they won't get correct linkage +// against the object containing this function. We create a dummy function that +// always returns 0 to indicate a failure. +[[gnu::weak]] unsigned long getauxval(unsigned long id) { return 0; } + } // namespace LIBC_NAMESPACE namespace { @@ -102,6 +110,14 @@ void __cxa_pure_virtual() { // __dso_handle when -nostdlib is used. void *__dso_handle = nullptr; +#ifdef LIBC_TARGET_ARCH_IS_AARCH64 +// Due to historical reasons, libgcc on aarch64 may expect __getauxval to be +// defined. See also https://gcc.gnu.org/pipermail/gcc-cvs/2020-June/300635.html +unsigned long __getauxval(unsigned long id) { + return LIBC_NAMESPACE::getauxval(id); +} +#endif + } // extern "C" void *operator new(unsigned long size, void *ptr) { return ptr; } From 85e8d6275839df5b7a939c0c34c69ed39702ef7f Mon Sep 17 00:00:00 2001 From: Aviad Cohen Date: Fri, 14 Jun 2024 06:49:43 +0300 Subject: [PATCH 041/155] [mlir][scf]: Expose emitNormalizedLoopBounds/denormalizeInductionVariable util functions (#94429) Also adjusted `LoopParams` to use OpFoldResult instead of Value. --- mlir/include/mlir/Dialect/Arith/Utils/Utils.h | 12 ++- mlir/include/mlir/Dialect/SCF/Utils/Utils.h | 26 ++++++ mlir/lib/Dialect/Arith/Utils/Utils.cpp | 21 ++++- mlir/lib/Dialect/SCF/Utils/Utils.cpp | 85 +++++++++---------- mlir/test/Dialect/Affine/loop-coalescing.mlir | 15 ++-- mlir/test/Dialect/SCF/transform-ops.mlir | 15 +++- 6 files changed, 111 insertions(+), 63 deletions(-) diff --git a/mlir/include/mlir/Dialect/Arith/Utils/Utils.h b/mlir/include/mlir/Dialect/Arith/Utils/Utils.h index 5e7945d9b04928..76f5825025739b 100644 --- a/mlir/include/mlir/Dialect/Arith/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Arith/Utils/Utils.h @@ -54,7 +54,13 @@ llvm::SmallBitVector getPositionsOfShapeOne(unsigned rank, ArrayRef shape); /// Converts an OpFoldResult to a Value. Returns the fold result if it casts to -/// a Value or creates a ConstantIndexOp if it casts to an IntegerAttribute. +/// a Value or creates a ConstantOp if it casts to an Integer Attribute. +/// Other attribute types are not supported. +Value getValueOrCreateConstantIntOp(OpBuilder &b, Location loc, + OpFoldResult ofr); + +/// Converts an OpFoldResult to a Value. Returns the fold result if it casts to +/// a Value or creates a ConstantIndexOp if it casts to an Integer Attribute. /// Other attribute types are not supported. Value getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr); @@ -88,6 +94,10 @@ Value createScalarOrSplatConstant(OpBuilder &builder, Location loc, Type type, Value createScalarOrSplatConstant(OpBuilder &builder, Location loc, Type type, const APFloat &value); +/// Returns the int type of the integer in ofr. +/// Other attribute types are not supported. +Type getType(OpFoldResult ofr); + /// Helper struct to build simple arithmetic quantities with minimal type /// inference support. struct ArithBuilder { diff --git a/mlir/include/mlir/Dialect/SCF/Utils/Utils.h b/mlir/include/mlir/Dialect/SCF/Utils/Utils.h index bc09cc7f7fa5e0..f719c002139875 100644 --- a/mlir/include/mlir/Dialect/SCF/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/SCF/Utils/Utils.h @@ -120,6 +120,32 @@ LogicalResult loopUnrollByFactor( scf::ForOp forOp, uint64_t unrollFactor, function_ref annotateFn = nullptr); +/// This structure is to pass and return sets of loop parameters without +/// confusing the order. +struct LoopParams { + OpFoldResult lowerBound; + OpFoldResult upperBound; + OpFoldResult step; +}; + +/// Transform a loop with a strictly positive step +/// for %i = %lb to %ub step %s +/// into a 0-based loop with step 1 +/// for %ii = 0 to ceildiv(%ub - %lb, %s) step 1 { +/// %i = %ii * %s + %lb +/// Insert the induction variable remapping in the body of `inner`, which is +/// expected to be either `loop` or another loop perfectly nested under `loop`. +/// Insert the definition of new bounds immediate before `outer`, which is +/// expected to be either `loop` or its parent in the loop nest. +LoopParams emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc, + OpFoldResult lb, OpFoldResult ub, + OpFoldResult step); + +/// Get back the original induction variable values after loop normalization. +void denormalizeInductionVariable(RewriterBase &rewriter, Location loc, + Value normalizedIv, OpFoldResult origLb, + OpFoldResult origStep); + /// Tile a nest of standard for loops rooted at `rootForOp` by finding such /// parametric tile sizes that the outer loops have a fixed number of iterations /// as defined in `sizes`. diff --git a/mlir/lib/Dialect/Arith/Utils/Utils.cpp b/mlir/lib/Dialect/Arith/Utils/Utils.cpp index 4ce55a23820cf7..e75db84b75e280 100644 --- a/mlir/lib/Dialect/Arith/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Arith/Utils/Utils.cpp @@ -100,12 +100,20 @@ llvm::SmallBitVector mlir::getPositionsOfShapeOne(unsigned rank, return dimsToProject; } +Value mlir::getValueOrCreateConstantIntOp(OpBuilder &b, Location loc, + OpFoldResult ofr) { + if (auto value = dyn_cast_if_present(ofr)) + return value; + auto attr = cast(cast(ofr)); + return b.create( + loc, b.getIntegerAttr(attr.getType(), attr.getValue().getSExtValue())); +} + Value mlir::getValueOrCreateConstantIndexOp(OpBuilder &b, Location loc, OpFoldResult ofr) { - if (auto value = llvm::dyn_cast_if_present(ofr)) + if (auto value = dyn_cast_if_present(ofr)) return value; - auto attr = dyn_cast(llvm::dyn_cast_if_present(ofr)); - assert(attr && "expect the op fold result casts to an integer attribute"); + auto attr = cast(cast(ofr)); return b.create(loc, attr.getValue().getSExtValue()); } @@ -294,6 +302,13 @@ Value mlir::createScalarOrSplatConstant(OpBuilder &builder, Location loc, return builder.createOrFold(loc, type, splat); } +Type mlir::getType(OpFoldResult ofr) { + if (auto value = dyn_cast_if_present(ofr)) + return value.getType(); + auto attr = cast(cast(ofr)); + return attr.getType(); +} + Value ArithBuilder::_and(Value lhs, Value rhs) { return b.create(loc, lhs, rhs); } diff --git a/mlir/lib/Dialect/SCF/Utils/Utils.cpp b/mlir/lib/Dialect/SCF/Utils/Utils.cpp index 8bdbef15521b61..a031e53fe0ffbb 100644 --- a/mlir/lib/Dialect/SCF/Utils/Utils.cpp +++ b/mlir/lib/Dialect/SCF/Utils/Utils.cpp @@ -18,6 +18,7 @@ #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/IRMapping.h" +#include "mlir/IR/OpDefinition.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Interfaces/SideEffectInterfaces.h" #include "mlir/Transforms/RegionUtils.h" @@ -29,16 +30,6 @@ using namespace mlir; -namespace { -// This structure is to pass and return sets of loop parameters without -// confusing the order. -struct LoopParams { - Value lowerBound; - Value upperBound; - Value step; -}; -} // namespace - SmallVector mlir::replaceLoopNestWithNewYields( RewriterBase &rewriter, MutableArrayRef loopNest, ValueRange newIterOperands, const NewYieldValuesFn &newYieldValuesFn, @@ -473,17 +464,9 @@ LogicalResult mlir::loopUnrollByFactor( return success(); } -/// Transform a loop with a strictly positive step -/// for %i = %lb to %ub step %s -/// into a 0-based loop with step 1 -/// for %ii = 0 to ceildiv(%ub - %lb, %s) step 1 { -/// %i = %ii * %s + %lb -/// Insert the induction variable remapping in the body of `inner`, which is -/// expected to be either `loop` or another loop perfectly nested under `loop`. -/// Insert the definition of new bounds immediate before `outer`, which is -/// expected to be either `loop` or its parent in the loop nest. -static LoopParams emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc, - Value lb, Value ub, Value step) { +LoopParams mlir::emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc, + OpFoldResult lb, OpFoldResult ub, + OpFoldResult step) { // For non-index types, generate `arith` instructions // Check if the loop is already known to have a constant zero lower bound or // a constant one step. @@ -495,32 +478,38 @@ static LoopParams emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc, if (auto stepCst = getConstantIntValue(step)) isStepOne = stepCst.value() == 1; + Type loopParamsType = getType(lb); + assert(loopParamsType == getType(ub) && loopParamsType == getType(step) && + "expected matching types"); + // Compute the number of iterations the loop executes: ceildiv(ub - lb, step) // assuming the step is strictly positive. Update the bounds and the step // of the loop to go from 0 to the number of iterations, if necessary. if (isZeroBased && isStepOne) return {lb, ub, step}; - Value diff = isZeroBased ? ub : rewriter.create(loc, ub, lb); - Value newUpperBound = - isStepOne ? diff : rewriter.create(loc, diff, step); + OpFoldResult diff = ub; + if (!isZeroBased) { + diff = rewriter.createOrFold( + loc, getValueOrCreateConstantIntOp(rewriter, loc, ub), + getValueOrCreateConstantIntOp(rewriter, loc, lb)); + } + OpFoldResult newUpperBound = diff; + if (!isStepOne) { + newUpperBound = rewriter.createOrFold( + loc, getValueOrCreateConstantIntOp(rewriter, loc, diff), + getValueOrCreateConstantIntOp(rewriter, loc, step)); + } - Value newLowerBound = isZeroBased - ? lb - : rewriter.create( - loc, rewriter.getZeroAttr(lb.getType())); - Value newStep = isStepOne - ? step - : rewriter.create( - loc, rewriter.getIntegerAttr(step.getType(), 1)); + OpFoldResult newLowerBound = rewriter.getZeroAttr(loopParamsType); + OpFoldResult newStep = rewriter.getOneAttr(loopParamsType); return {newLowerBound, newUpperBound, newStep}; } -/// Get back the original induction variable values after loop normalization -static void denormalizeInductionVariable(RewriterBase &rewriter, Location loc, - Value normalizedIv, Value origLb, - Value origStep) { +void mlir::denormalizeInductionVariable(RewriterBase &rewriter, Location loc, + Value normalizedIv, OpFoldResult origLb, + OpFoldResult origStep) { Value denormalizedIv; SmallPtrSet preserve; bool isStepOne = isConstantIntValue(origStep, 1); @@ -528,12 +517,15 @@ static void denormalizeInductionVariable(RewriterBase &rewriter, Location loc, Value scaled = normalizedIv; if (!isStepOne) { - scaled = rewriter.create(loc, normalizedIv, origStep); + Value origStepValue = + getValueOrCreateConstantIntOp(rewriter, loc, origStep); + scaled = rewriter.create(loc, normalizedIv, origStepValue); preserve.insert(scaled.getDefiningOp()); } denormalizedIv = scaled; if (!isZeroBased) { - denormalizedIv = rewriter.create(loc, scaled, origLb); + Value origLbValue = getValueOrCreateConstantIntOp(rewriter, loc, origLb); + denormalizedIv = rewriter.create(loc, scaled, origLbValue); preserve.insert(denormalizedIv.getDefiningOp()); } @@ -638,9 +630,12 @@ LogicalResult mlir::coalesceLoops(RewriterBase &rewriter, emitNormalizedLoopBounds(rewriter, loop.getLoc(), lb, ub, step); rewriter.modifyOpInPlace(loop, [&]() { - loop.setLowerBound(newLoopParams.lowerBound); - loop.setUpperBound(newLoopParams.upperBound); - loop.setStep(newLoopParams.step); + loop.setLowerBound(getValueOrCreateConstantIntOp( + rewriter, loop.getLoc(), newLoopParams.lowerBound)); + loop.setUpperBound(getValueOrCreateConstantIntOp( + rewriter, loop.getLoc(), newLoopParams.upperBound)); + loop.setStep(getValueOrCreateConstantIntOp(rewriter, loop.getLoc(), + newLoopParams.step)); }); rewriter.setInsertionPointToStart(innermost.getBody()); @@ -778,8 +773,7 @@ void mlir::collapseParallelLoops( llvm::sort(dims); // Normalize ParallelOp's iteration pattern. - SmallVector normalizedLowerBounds, normalizedSteps, - normalizedUpperBounds; + SmallVector normalizedUpperBounds; for (unsigned i = 0, e = loops.getNumLoops(); i < e; ++i) { OpBuilder::InsertionGuard g2(rewriter); rewriter.setInsertionPoint(loops); @@ -787,9 +781,8 @@ void mlir::collapseParallelLoops( Value ub = loops.getUpperBound()[i]; Value step = loops.getStep()[i]; auto newLoopParams = emitNormalizedLoopBounds(rewriter, loc, lb, ub, step); - normalizedLowerBounds.push_back(newLoopParams.lowerBound); - normalizedUpperBounds.push_back(newLoopParams.upperBound); - normalizedSteps.push_back(newLoopParams.step); + normalizedUpperBounds.push_back(getValueOrCreateConstantIntOp( + rewriter, loops.getLoc(), newLoopParams.upperBound)); rewriter.setInsertionPointToStart(loops.getBody()); denormalizeInductionVariable(rewriter, loc, loops.getInductionVars()[i], lb, diff --git a/mlir/test/Dialect/Affine/loop-coalescing.mlir b/mlir/test/Dialect/Affine/loop-coalescing.mlir index ae0adf5a0a02d4..0235000aeac538 100644 --- a/mlir/test/Dialect/Affine/loop-coalescing.mlir +++ b/mlir/test/Dialect/Affine/loop-coalescing.mlir @@ -74,11 +74,10 @@ func.func @multi_use() { func.func @unnormalized_loops() { // CHECK: %[[orig_step_i:.*]] = arith.constant 2 - // CHECK: %[[orig_step_j:.*]] = arith.constant 3 + + // CHECK: %[[orig_step_j_and_numiter_i:.*]] = arith.constant 3 // CHECK: %[[orig_lb_i:.*]] = arith.constant 5 // CHECK: %[[orig_lb_j:.*]] = arith.constant 7 - // CHECK: %[[orig_ub_i:.*]] = arith.constant 10 - // CHECK: %[[orig_ub_j:.*]] = arith.constant 17 %c2 = arith.constant 2 : index %c3 = arith.constant 3 : index %c5 = arith.constant 5 : index @@ -86,20 +85,16 @@ func.func @unnormalized_loops() { %c10 = arith.constant 10 : index %c17 = arith.constant 17 : index - // Number of iterations in the outer scf. - // CHECK: %[[diff_i:.*]] = arith.subi %[[orig_ub_i]], %[[orig_lb_i]] - // CHECK: %[[numiter_i:.*]] = arith.ceildivsi %[[diff_i]], %[[orig_step_i]] - // Normalized lower bound and step for the outer scf. // CHECK: %[[lb_i:.*]] = arith.constant 0 // CHECK: %[[step_i:.*]] = arith.constant 1 // Number of iterations in the inner loop, the pattern is the same as above, // only capture the final result. - // CHECK: %[[numiter_j:.*]] = arith.ceildivsi {{.*}}, %[[orig_step_j]] + // CHECK: %[[numiter_j:.*]] = arith.constant 4 // New bounds of the outer scf. - // CHECK: %[[range:.*]] = arith.muli %[[numiter_i]], %[[numiter_j]] + // CHECK: %[[range:.*]] = arith.muli %[[orig_step_j_and_numiter_i:.*]], %[[numiter_j]] // CHECK: scf.for %[[i:.*]] = %[[lb_i]] to %[[range]] step %[[step_i]] scf.for %i = %c5 to %c10 step %c2 { // The inner loop has been removed. @@ -108,7 +103,7 @@ func.func @unnormalized_loops() { // The IVs are rewritten. // CHECK: %[[normalized_j:.*]] = arith.remsi %[[i]], %[[numiter_j]] // CHECK: %[[normalized_i:.*]] = arith.divsi %[[i]], %[[numiter_j]] - // CHECK: %[[scaled_j:.*]] = arith.muli %[[normalized_j]], %[[orig_step_j]] + // CHECK: %[[scaled_j:.*]] = arith.muli %[[normalized_j]], %[[orig_step_j_and_numiter_i]] // CHECK: %[[orig_j:.*]] = arith.addi %[[scaled_j]], %[[orig_lb_j]] // CHECK: %[[scaled_i:.*]] = arith.muli %[[normalized_i]], %[[orig_step_i]] // CHECK: %[[orig_i:.*]] = arith.addi %[[scaled_i]], %[[orig_lb_i]] diff --git a/mlir/test/Dialect/SCF/transform-ops.mlir b/mlir/test/Dialect/SCF/transform-ops.mlir index 2d6b48fd3e57c2..b91225bf45b967 100644 --- a/mlir/test/Dialect/SCF/transform-ops.mlir +++ b/mlir/test/Dialect/SCF/transform-ops.mlir @@ -277,13 +277,22 @@ module attributes {transform.with_named_sequence} { // This test checks for loop coalescing success for non-index loop boundaries and step type func.func @coalesce_i32_loops() { + // CHECK: %[[VAL_0:.*]] = arith.constant 0 : i32 + // CHECK: %[[VAL_1:.*]] = arith.constant 128 : i32 + // CHECK: %[[VAL_2:.*]] = arith.constant 2 : i32 + // CHECK: %[[VAL_3:.*]] = arith.constant 64 : i32 %0 = arith.constant 0 : i32 %1 = arith.constant 128 : i32 %2 = arith.constant 2 : i32 %3 = arith.constant 64 : i32 - // CHECK-DAG: %[[C0_I32:.*]] = arith.constant 0 : i32 - // CHECK-DAG: %[[C1_I32:.*]] = arith.constant 1 : i32 - // CHECK: scf.for %[[ARG0:.*]] = %[[C0_I32]] to {{.*}} step %[[C1_I32]] : i32 + // CHECK: %[[VAL_4:.*]] = arith.constant 64 : i32 + // CHECK: %[[ZERO:.*]] = arith.constant 0 : i32 + // CHECK: %[[ONE:.*]] = arith.constant 1 : i32 + // CHECK: %[[VAL_7:.*]] = arith.constant 32 : i32 + // CHECK: %[[VAL_8:.*]] = arith.constant 0 : i32 + // CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 + // CHECK: %[[UB:.*]] = arith.muli %[[VAL_4]], %[[VAL_7]] : i32 + // CHECK: scf.for %[[VAL_11:.*]] = %[[ZERO]] to %[[UB]] step %[[ONE]] : i32 { scf.for %i = %0 to %1 step %2 : i32 { scf.for %j = %0 to %3 step %2 : i32 { arith.addi %i, %j : i32 From 7ffeaf0e187b41994f63ae82e73e123b942cd16b Mon Sep 17 00:00:00 2001 From: harishch4 Date: Fri, 14 Jun 2024 09:37:38 +0530 Subject: [PATCH 042/155] [MLIR][Flang][OpenMP] Implement lowering simd aligned to MLIR (#95198) Rebased @DominikAdamski patch: https://reviews.llvm.org/D142722 --------- Co-authored-by: Dominik Adamski Co-authored-by: Tom Eccles --- flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 60 ++++++++++++++++++++++ flang/lib/Lower/OpenMP/ClauseProcessor.h | 2 +- flang/lib/Lower/OpenMP/OpenMP.cpp | 8 +-- flang/test/Lower/OpenMP/simd.f90 | 41 +++++++++++++++ flang/test/Lower/OpenMP/simd_aarch64.f90 | 16 ++++++ flang/test/Lower/OpenMP/simd_x86_64.f90 | 48 +++++++++++++++++ 6 files changed, 170 insertions(+), 5 deletions(-) create mode 100644 flang/test/Lower/OpenMP/simd_aarch64.f90 create mode 100644 flang/test/Lower/OpenMP/simd_x86_64.f90 diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 371fe6db012556..27eea2b133b3cf 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -16,6 +16,7 @@ #include "flang/Lower/PFTBuilder.h" #include "flang/Parser/tools.h" #include "flang/Semantics/tools.h" +#include "llvm/Frontend/OpenMP/OMPIRBuilder.h" namespace Fortran { namespace lower { @@ -514,6 +515,65 @@ bool ClauseProcessor::processUntied(mlir::omp::UntiedClauseOps &result) const { //===----------------------------------------------------------------------===// // ClauseProcessor repeatable clauses //===----------------------------------------------------------------------===// +static llvm::StringMap getTargetFeatures(mlir::ModuleOp module) { + llvm::StringMap featuresMap; + llvm::SmallVector targetFeaturesVec; + if (mlir::LLVM::TargetFeaturesAttr features = + fir::getTargetFeatures(module)) { + llvm::ArrayRef featureAttrs = features.getFeatures(); + for (auto &featureAttr : featureAttrs) { + llvm::StringRef featureKeyString = featureAttr.strref(); + featuresMap[featureKeyString.substr(1)] = (featureKeyString[0] == '+'); + } + } + return featuresMap; +} + +static void +addAlignedClause(lower::AbstractConverter &converter, + const omp::clause::Aligned &clause, + llvm::SmallVectorImpl &alignedVars, + llvm::SmallVectorImpl &alignmentAttrs) { + using Aligned = omp::clause::Aligned; + lower::StatementContext stmtCtx; + mlir::IntegerAttr alignmentValueAttr; + int64_t alignment = 0; + fir::FirOpBuilder &builder = converter.getFirOpBuilder(); + + if (auto &alignmentValueParserExpr = + std::get>(clause.t)) { + mlir::Value operand = fir::getBase( + converter.genExprValue(*alignmentValueParserExpr, stmtCtx)); + alignment = *fir::getIntIfConstant(operand); + } else { + llvm::StringMap featuresMap = getTargetFeatures(builder.getModule()); + llvm::Triple triple = fir::getTargetTriple(builder.getModule()); + alignment = + llvm::OpenMPIRBuilder::getOpenMPDefaultSimdAlign(triple, featuresMap); + } + + // The default alignment for some targets is equal to 0. + // Do not generate alignment assumption if alignment is less than or equal to + // 0. + if (alignment > 0) { + auto &objects = std::get(clause.t); + if (!objects.empty()) + genObjectList(objects, converter, alignedVars); + alignmentValueAttr = builder.getI64IntegerAttr(alignment); + // All the list items in a aligned clause will have same alignment + for (std::size_t i = 0; i < objects.size(); i++) + alignmentAttrs.push_back(alignmentValueAttr); + } +} + +bool ClauseProcessor::processAligned( + mlir::omp::AlignedClauseOps &result) const { + return findRepeatableClause( + [&](const omp::clause::Aligned &clause, const parser::CharBlock &) { + addAlignedClause(converter, clause, result.alignedVars, + result.alignmentAttrs); + }); +} bool ClauseProcessor::processAllocate( mlir::omp::AllocateClauseOps &result) const { diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index e8b06a703fc03b..5c9ab8baf82dd5 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -90,6 +90,7 @@ class ClauseProcessor { bool processUntied(mlir::omp::UntiedClauseOps &result) const; // 'Repeatable' clauses: They can appear multiple times in the clause list. + bool processAligned(mlir::omp::AlignedClauseOps &result) const; bool processAllocate(mlir::omp::AllocateClauseOps &result) const; bool processCopyin() const; bool processCopyprivate(mlir::Location currentLocation, @@ -140,7 +141,6 @@ class ClauseProcessor { template bool processMotionClauses(lower::StatementContext &stmtCtx, mlir::omp::MapClauseOps &result); - // Call this method for these clauses that should be supported but are not // implemented yet. It triggers a compilation error if any of the given // clauses is found. diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 9a8211711123ee..aac22f0faad373 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -1070,15 +1070,15 @@ static void genSimdClauses(lower::AbstractConverter &converter, const List &clauses, mlir::Location loc, mlir::omp::SimdClauseOps &clauseOps) { ClauseProcessor cp(converter, semaCtx, clauses); + cp.processAligned(clauseOps); cp.processIf(llvm::omp::Directive::OMPD_simd, clauseOps); cp.processReduction(loc, clauseOps); cp.processSafelen(clauseOps); cp.processSimdlen(clauseOps); - // TODO Support delayed privatization. - cp.processTODO( - loc, llvm::omp::Directive::OMPD_simd); + // TODO Support delayed privatization. + cp.processTODO(loc, llvm::omp::Directive::OMPD_simd); } static void genSingleClauses(lower::AbstractConverter &converter, diff --git a/flang/test/Lower/OpenMP/simd.f90 b/flang/test/Lower/OpenMP/simd.f90 index 223b248b793483..e98136dd57b0a7 100644 --- a/flang/test/Lower/OpenMP/simd.f90 +++ b/flang/test/Lower/OpenMP/simd.f90 @@ -182,3 +182,44 @@ subroutine simd_with_collapse_clause(n) end do !$OMP END SIMD end subroutine + + +!CHECK: func.func @_QPsimdloop_aligned_cptr(%[[ARG_A:.*]]: !fir.ref +!CHECK-SAME: > {fir.bindc_name = "a"}) { +!CHECK: %[[A_DECL:.*]]:2 = hlfir.declare %[[ARG_A]] dummy_scope %0 +!CHECK-SAME: {uniq_name = "_QFsimdloop_aligned_cptrEa"} : +!CHECK-SAME: (!fir.ref>, !fir.dscope) -> +!CHECK-SAME: (!fir.ref>, +!CHECK-SAME: !fir.ref>) +subroutine simdloop_aligned_cptr( A) + use iso_c_binding + integer :: i + type (c_ptr) :: A +!CHECK: omp.simd aligned(%[[A_DECL]]#1 : !fir.ref +!CHECK-SAME: > +!CHECK-SAME: -> 256 : i64) + !$OMP SIMD ALIGNED(A:256) + do i = 1, 10 + call c_test_call(A) + end do + !$OMP END SIMD +end subroutine + +!CHECK-LABEL: func @_QPsimdloop_aligned_allocatable +subroutine simdloop_aligned_allocatable() + integer :: i + integer, allocatable :: A(:) + allocate(A(10)) +!CHECK: %[[A_PTR:.*]] = fir.alloca !fir.box>> {bindc_name = "a", +!CHECK-SAME: uniq_name = "_QFsimdloop_aligned_allocatableEa"} +!CHECK: %[[A_DECL:.*]]:2 = hlfir.declare %[[A_PTR]] {fortran_attrs = #fir.var_attrs, +!CHECK-SAME: uniq_name = "_QFsimdloop_aligned_allocatableEa"} : +!CHECK-SAME: (!fir.ref>>>) -> +!CHECK-SAME: (!fir.ref>>>, !fir.ref>>>) +!CHECK: omp.simd aligned(%[[A_DECL]]#1 : !fir.ref>>> -> 256 : i64) + !$OMP SIMD ALIGNED(A:256) + do i = 1, 10 + A(i) = i + end do +end subroutine diff --git a/flang/test/Lower/OpenMP/simd_aarch64.f90 b/flang/test/Lower/OpenMP/simd_aarch64.f90 new file mode 100644 index 00000000000000..735237223bcb55 --- /dev/null +++ b/flang/test/Lower/OpenMP/simd_aarch64.f90 @@ -0,0 +1,16 @@ +! Tests for 2.9.3.1 Simd and target dependent defult alignment for AArch64 +! The default alignment for AARCH64 is 0 so we do not emit aligned clause +! REQUIRES: aarch64-registered-target +! RUN: %flang_fc1 -triple aarch64-unknown-linux-gnu -emit-hlfir -fopenmp %s -o - | FileCheck %s +subroutine simdloop_aligned_cptr(A) + use iso_c_binding + integer :: i + type (c_ptr) :: A + !CHECK: omp.simd + !CHECK-NOT: aligned( + !$OMP SIMD ALIGNED(A) + do i = 1, 10 + call c_test_call(A) + end do + !$OMP END SIMD +end subroutine diff --git a/flang/test/Lower/OpenMP/simd_x86_64.f90 b/flang/test/Lower/OpenMP/simd_x86_64.f90 new file mode 100644 index 00000000000000..c8cb7970c3222c --- /dev/null +++ b/flang/test/Lower/OpenMP/simd_x86_64.f90 @@ -0,0 +1,48 @@ +! Tests for 2.9.3.1 Simd and target dependent defult alignment for x86 +! REQUIRES: x86-registered-target +! RUN: %flang_fc1 -triple x86_64-unknown-linux-gnu -emit-hlfir -fopenmp -target-cpu x86-64 %s -o - | FileCheck --check-prefixes=DEFAULT %s +! RUN: %flang_fc1 -triple x86_64-unknown-linux-gnu -emit-hlfir -fopenmp -target-cpu x86-64 -target-feature +avx %s -o - | FileCheck --check-prefixes=AVX %s +! RUN: %flang_fc1 -triple x86_64-unknown-linux-gnu -emit-hlfir -fopenmp -target-cpu x86-64 -target-feature +avx512f %s -o - | FileCheck --check-prefixes=AVX512F %s +!DEFAULT: func.func @_QPsimdloop_aligned_cptr(%[[ARG_A:.*]]: !fir.ref +!DEFAULT-SAME: > {fir.bindc_name = "a"}) { +!DEFAULT: %[[A_DECL:.*]]:2 = hlfir.declare %[[ARG_A]] dummy_scope %0 +!DEFAULT-SAME: {uniq_name = "_QFsimdloop_aligned_cptrEa"} : +!DEFAULT-SAME: (!fir.ref>, !fir.dscope) -> +!DEFAULT-SAME: (!fir.ref>, +!DEFAULT-SAME: !fir.ref>) +!AVX: func.func @_QPsimdloop_aligned_cptr(%[[ARG_A:.*]]: !fir.ref +!AVX-SAME: > {fir.bindc_name = "a"}) { +!AVX: %[[A_DECL:.*]]:2 = hlfir.declare %[[ARG_A]] dummy_scope %0 +!AVX-SAME: {uniq_name = "_QFsimdloop_aligned_cptrEa"} : +!AVX-SAME: (!fir.ref>, !fir.dscope) -> +!AVX-SAME: (!fir.ref>, +!AVX-SAME: !fir.ref>) +!AVX512F: func.func @_QPsimdloop_aligned_cptr(%[[ARG_A:.*]]: !fir.ref +!AVX512F-SAME: > {fir.bindc_name = "a"}) { +!AVX512F: %[[A_DECL:.*]]:2 = hlfir.declare %[[ARG_A]] dummy_scope %0 +!AVX512F-SAME: {uniq_name = "_QFsimdloop_aligned_cptrEa"} : +!AVX512F-SAME: (!fir.ref>, !fir.dscope) -> +!AVX512F-SAME: (!fir.ref>, +!AVX512F-SAME: !fir.ref>) +subroutine simdloop_aligned_cptr(A) + use iso_c_binding + integer :: i + type (c_ptr) :: A + !DEFAULT: omp.simd aligned(%[[A_DECL]]#1 : !fir.ref + !DEFAULT-SAME: > + !DEFAULT-SAME: -> 128 : i64) + !AVX: omp.simd aligned(%[[A_DECL]]#1 : !fir.ref + !AVX-SAME: > + !AVX-SAME: -> 256 : i64) + !AVX512F: omp.simd aligned(%[[A_DECL]]#1 : !fir.ref + !AVX512F-SAME: > + !AVX512F-SAME: -> 512 : i64) + !$OMP SIMD ALIGNED(A) + do i = 1, 10 + call c_test_call(A) + end do + !$OMP END SIMD +end subroutine From d66084b9b8c924adacac4e5d32de02492c4478a6 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 13 Jun 2024 21:41:35 -0700 Subject: [PATCH 043/155] [RISCV] Remove unnecessary bf16 -mattr from vfmv.v.f.ll. NFC --- llvm/test/CodeGen/RISCV/rvv/vfmv.v.f.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f.ll index 81dc78ba9e0509..237ef11d154bad 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmv.v.f.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+experimental-zfbfmin,+experimental-zvfbfmin \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+experimental-zfbfmin,+experimental-zvfbfmin \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfmv.v.f.nxv1f16( From bd6568c98a50a180eabc41e9df5b896b7518c587 Mon Sep 17 00:00:00 2001 From: Pradeep Kumar Date: Fri, 14 Jun 2024 10:35:35 +0530 Subject: [PATCH 044/155] [MLIR][GPU] Add gpu.cluster_dim_blocks and gpu.cluster_block_id Ops (#95245) This commit adds support for `gpu.cluster_dim_blocks` and `gpu.cluster_block_id` Ops to represent number of blocks per cluster and block id inside a cluster respectively. Also, fixed the description of `gpu.cluster_dim` Op and updated the `cga_cluster.mlir` test file to use `gpu.cluster_dim_blocks` Co-authored-by: pradeepku Co-authored-by: Guray Ozen --- mlir/include/mlir/Dialect/GPU/IR/GPUOps.td | 27 ++++++++++++++++++- mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td | 6 ++--- .../GPUToNVVM/LowerGpuOpsToNVVMOps.cpp | 6 +++++ .../GPU/IR/InferIntRangeInterfaceImpls.cpp | 12 +++++++++ .../GPU/CUDA/sm90/cga_cluster.mlir | 6 ++--- 5 files changed, 50 insertions(+), 7 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td index eb81b6469746f4..9c5f7ecd8cbe85 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUOps.td @@ -70,7 +70,7 @@ class GPU_IndexOp traits = []> : def GPU_ClusterDimOp : GPU_IndexOp<"cluster_dim"> { let description = [{ - Returns the number of thread blocks in the cluster along + Returns the number of cluster identifiers per grid along the x, y, or z `dimension`. Example: @@ -81,6 +81,19 @@ def GPU_ClusterDimOp : GPU_IndexOp<"cluster_dim"> { }]; } +def GPU_ClusterDimBlocksOp : GPU_IndexOp<"cluster_dim_blocks"> { + let description = [{ + Returns the number of thread blocks in the cluster along + the x, y, or z `dimension`. + + Example: + + ```mlir + %cDimBlocksX = gpu.cluster_dim_blocks x + ``` + }]; +} + def GPU_ClusterIdOp : GPU_IndexOp<"cluster_id"> { let description = [{ Returns the cluster id, i.e. the index of the current cluster within the @@ -94,6 +107,18 @@ def GPU_ClusterIdOp : GPU_IndexOp<"cluster_id"> { }]; } +def GPU_ClusterBlockIdOp : GPU_IndexOp<"cluster_block_id"> { + let description = [{ + Returns the block id within the cluster along the x, y, or z `dimension`. + + Example: + + ```mlir + %cBlockIdY = gpu.cluster_block_id y + ``` + }]; +} + def GPU_BlockDimOp : GPU_IndexOp<"block_dim"> { let description = [{ Returns the number of threads in the thread block (aka the block size) along diff --git a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td index 4daeeab093863a..4d48b3de7a57ed 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/NVVMOps.td @@ -160,9 +160,9 @@ def NVVM_ClusterDimZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.nclusterid.z">; def NVVM_BlockInClusterIdXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.ctaid.x">; def NVVM_BlockInClusterIdYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.ctaid.y">; def NVVM_BlockInClusterIdZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.ctaid.z">; -def NVVM_GridInClusterDimXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.nctaid.x">; -def NVVM_GridInClusterDimYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.nctaid.y">; -def NVVM_GridInClusterDimZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.nctaid.z">; +def NVVM_ClusterDimBlocksXOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.nctaid.x">; +def NVVM_ClusterDimBlocksYOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.nctaid.y">; +def NVVM_ClusterDimBlocksZOp : NVVM_SpecialRegisterOp<"read.ptx.sreg.cluster.nctaid.z">; //===----------------------------------------------------------------------===// // CTA index and across Cluster dimensions diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp index b95fba20a00cbe..fdd65e40e90645 100644 --- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp +++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -342,8 +342,14 @@ void mlir::populateGpuToNVVMConversionPatterns(LLVMTypeConverter &converter, NVVM::BlockDimYOp, NVVM::BlockDimZOp>, GPUIndexIntrinsicOpLowering, + GPUIndexIntrinsicOpLowering< + gpu::ClusterBlockIdOp, NVVM::BlockInClusterIdXOp, + NVVM::BlockInClusterIdYOp, NVVM::BlockInClusterIdZOp>, GPUIndexIntrinsicOpLowering, + GPUIndexIntrinsicOpLowering< + gpu::ClusterDimBlocksOp, NVVM::ClusterDimBlocksXOp, + NVVM::ClusterDimBlocksYOp, NVVM::ClusterDimBlocksZOp>, GPUIndexIntrinsicOpLowering, GPUIndexIntrinsicOpLowering getKnownLaunchDim(Op op, LaunchDims type) { void ClusterDimOp::inferResultRanges(ArrayRef, SetIntRangeFn setResultRange) { + uint64_t max = APInt::getMaxValue(64).getZExtValue(); + setResultRange(getResult(), getIndexRange(1, max)); +} + +void ClusterDimBlocksOp::inferResultRanges(ArrayRef, + SetIntRangeFn setResultRange) { setResultRange(getResult(), getIndexRange(1, kMaxClusterDim)); } @@ -95,6 +101,12 @@ void ClusterIdOp::inferResultRanges(ArrayRef, setResultRange(getResult(), getIndexRange(0, max - 1ULL)); } +void ClusterBlockIdOp::inferResultRanges(ArrayRef, + SetIntRangeFn setResultRange) { + uint64_t max = kMaxClusterDim; + setResultRange(getResult(), getIndexRange(0, max - 1ULL)); +} + void BlockDimOp::inferResultRanges(ArrayRef, SetIntRangeFn setResultRange) { std::optional knownVal = diff --git a/mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir b/mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir index 025282ec0d688f..5c11d80178f727 100644 --- a/mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir +++ b/mlir/test/Integration/GPU/CUDA/sm90/cga_cluster.mlir @@ -22,9 +22,9 @@ module attributes {gpu.container_module} { %cidX = gpu.cluster_id x %cidY = gpu.cluster_id y %cidZ = gpu.cluster_id z - %cdimX = gpu.cluster_dim x - %cdimY = gpu.cluster_dim y - %cdimZ = gpu.cluster_dim z + %cdimX = gpu.cluster_dim_blocks x + %cdimY = gpu.cluster_dim_blocks y + %cdimZ = gpu.cluster_dim_blocks z %bidX = gpu.block_id x %bidY = gpu.block_id y %bidZ = gpu.block_id z From b95446286bddc521fa92012fdb60fe0d24c63346 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 13 Jun 2024 22:08:10 -0700 Subject: [PATCH 045/155] [RISCV] Remove partially duplicate riscv_vfmv_v_f_vl patterns. We had specific patterns for riscv_vfmv_v_f_vl in both RISCVInstrInfoVVLPatterns.td and RISCVInstrInfoVSDPatterns.td. The RISCVInstrInfoVSDPatterns.td patterns could only match if the RISCVInstrInfoVVLPatterns.td failed. As far as I can tell this would only happen if the predicate didn't match. Tweak the predicate so the RISCVInstrInfoVVLPatterns.td can match in more cases. --- .../Target/RISCV/RISCVInstrInfoVSDPatterns.td | 20 ------------------- .../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 6 ++++-- 2 files changed, 4 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index 497c4aadf7535f..9042fe610a39fc 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -1472,26 +1472,6 @@ foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in { fvti.AVL, fvti.Log2SEW, TA_MA)>; } -//===----------------------------------------------------------------------===// -// Vector Splats -//===----------------------------------------------------------------------===// - -foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in { - let Predicates = !listconcat(GetVTypePredicates.Vti>.Predicates, - GetVTypeScalarPredicates.Predicates) in - def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl undef, fvti.ScalarRegClass:$rs1, srcvalue)), - (!cast("PseudoVFMV_V_"#fvti.ScalarSuffix#"_"#fvti.LMul.MX) - (fvti.Vector (IMPLICIT_DEF)), - (fvti.Scalar fvti.ScalarRegClass:$rs1), - fvti.AVL, fvti.Log2SEW, TA_MA)>; - defvar ivti = GetIntVTypeInfo.Vti; - let Predicates = GetVTypePredicates.Predicates in - def : Pat<(fvti.Vector (SplatFPOp (fvti.Scalar fpimm0))), - (!cast("PseudoVMV_V_I_"#fvti.LMul.MX) - (fvti.Vector (IMPLICIT_DEF)), - 0, fvti.AVL, fvti.Log2SEW, TA_MA)>; -} - //===----------------------------------------------------------------------===// // Vector Element Extracts //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 9fff89d3092b37..440b8963dfe3fd 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -2654,8 +2654,8 @@ foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in { } foreach fvti = AllFloatVectors in { - let Predicates = !listconcat(GetVTypePredicates.Predicates, - GetVTypeScalarPredicates.Predicates) in { + defvar ivti = GetIntVTypeInfo.Vti; + let Predicates = GetVTypePredicates.Predicates in { // 13.16. Vector Floating-Point Move Instruction // If we're splatting fpimm0, use vmv.v.x vd, x0. def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl @@ -2666,7 +2666,9 @@ foreach fvti = AllFloatVectors in { fvti.Vector:$passthru, (fvti.Scalar (SelectFPImm (XLenVT GPR:$imm))), VLOpFrag)), (!cast("PseudoVMV_V_X_"#fvti.LMul.MX) $passthru, GPR:$imm, GPR:$vl, fvti.Log2SEW, TU_MU)>; + } + let Predicates = GetVTypePredicates.Predicates in { def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl fvti.Vector:$passthru, (fvti.Scalar fvti.ScalarRegClass:$rs2), VLOpFrag)), (!cast("PseudoVFMV_V_" # fvti.ScalarSuffix # "_" # From f83d5d293dced17de175ad69de6b81503716d3ce Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 13 Jun 2024 22:16:28 -0700 Subject: [PATCH 046/155] [RISCV] Remove vfmerge.vf patterns with bf16 types. These patterns are no longer used because we don't generate bf16 to vector splats except for constants that can be handled with vmerge.vi. --- llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td | 16 ---------------- .../Target/RISCV/RISCVInstrInfoVSDPatterns.td | 5 +++-- .../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 5 +++-- 3 files changed, 6 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td index a206974e53e510..45a57d1170814c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -382,17 +382,6 @@ class GetIntVTypeInfo { !cast(vti)))); } -// This functor is used to obtain the fp vector type that has the same SEW and -// multiplier as the input parameter type. -class GetFpVTypeInfo { - // Equivalent integer vector type. Eg. - // VF16M1 → VF16M1 (identity) - // VBF16M1 → VF16M1 - VTypeInfo Vti = !cast(!subst("VBF", "VF", - !subst("VI", "VF", - !cast(vti)))); -} - class MTypeInfo { ValueType Mask = Mas; // {SEW, VLMul} values set a valid VType to deal with this mask type. @@ -769,11 +758,6 @@ class GetVTypePredicates { true : [HasVInstructions]); } -class GetVTypeScalarPredicates { - list Predicates = !cond(!eq(vti.Scalar, bf16) : [HasStdExtZfbfmin], - true : []); -} - class VPseudoUSLoadNoMask : Pseudo<(outs RetClass:$rd), diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index 9042fe610a39fc..3489c62b2c5c1e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -1412,9 +1412,10 @@ foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in { fvti.RegClass:$rs2, 0, (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>; } +} - let Predicates = !listconcat(GetVTypePredicates.Vti>.Predicates, - GetVTypeScalarPredicates.Predicates) in +foreach fvti = AllFloatVectors in { + let Predicates = GetVTypePredicates.Predicates in def : Pat<(fvti.Vector (vselect (fvti.Mask V0), (SplatFPOp fvti.ScalarRegClass:$rs1), fvti.RegClass:$rs2)), diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 440b8963dfe3fd..372f2c5bbf9f18 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -2638,9 +2638,10 @@ foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in { fvti.RegClass:$merge, fvti.RegClass:$rs2, 0, (fvti.Mask V0), GPR:$vl, fvti.Log2SEW)>; } +} - let Predicates = !listconcat(GetVTypePredicates.Vti>.Predicates, - GetVTypeScalarPredicates.Predicates) in { +foreach fvti = AllFloatVectors in { + let Predicates = GetVTypePredicates.Predicates in { def : Pat<(fvti.Vector (riscv_vmerge_vl (fvti.Mask V0), (SplatFPOp fvti.ScalarRegClass:$rs1), fvti.RegClass:$rs2, From 53dbc1f9f142c635e34b7fed3018f1954d0b573a Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 13 Jun 2024 22:31:27 -0700 Subject: [PATCH 047/155] [RISCV] Add vselect pattern with SelectFPImm. --- .../Target/RISCV/RISCVInstrInfoVSDPatterns.td | 8 ++- llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll | 50 +++++++++++++++++-- 2 files changed, 53 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td index 3489c62b2c5c1e..e82625f085bec5 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -1404,13 +1404,19 @@ foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in { fvti.RegClass:$rs2, fvti.RegClass:$rs1, (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>; + def : Pat<(fvti.Vector (vselect (fvti.Mask V0), + (SplatFPOp (SelectFPImm (XLenVT GPR:$imm))), + fvti.RegClass:$rs2)), + (!cast("PseudoVMERGE_VXM_"#fvti.LMul.MX) + (fvti.Vector (IMPLICIT_DEF)), + fvti.RegClass:$rs2, GPR:$imm, (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>; + def : Pat<(fvti.Vector (vselect (fvti.Mask V0), (SplatFPOp (fvti.Scalar fpimm0)), fvti.RegClass:$rs2)), (!cast("PseudoVMERGE_VIM_"#fvti.LMul.MX) (fvti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs2, 0, (fvti.Mask V0), fvti.AVL, fvti.Log2SEW)>; - } } diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll index 53b8e4a78b756f..f8274b4cf80379 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVFH +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,CHECK-ZVFH ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVFH +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,CHECK-ZVFH ; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVFHMIN +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,CHECK-ZVFHMIN ; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfhmin,+v -target-abi=lp64d \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVFHMIN +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,CHECK-ZVFHMIN define @vfmerge_vv_nxv1f16( %va, %vb, %cond) { ; CHECK-LABEL: vfmerge_vv_nxv1f16: @@ -142,6 +142,17 @@ define @vfmerge_zv_nxv8f16( %va, %vc } +define @vfmerge_nzv_nxv8f16( %va, %cond) { +; CHECK-LABEL: vfmerge_nzv_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 1048568 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %vc = select %cond, splat (half -0.0), %va + ret %vc +} + define @vmerge_truelhs_nxv8f16_0( %va, %vb) { ; CHECK-LABEL: vmerge_truelhs_nxv8f16_0: ; CHECK: # %bb.0: @@ -322,6 +333,17 @@ define @vfmerge_zv_nxv8f32( %va, %vc } +define @vfmerge_nzv_nxv8f32( %va, %cond) { +; CHECK-LABEL: vfmerge_nzv_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: ret + %vc = select %cond, splat (float -0.0), %va + ret %vc +} + define @vfmerge_vv_nxv16f32( %va, %vb, %cond) { ; CHECK-LABEL: vfmerge_vv_nxv16f32: ; CHECK: # %bb.0: @@ -442,6 +464,26 @@ define @vfmerge_zv_nxv8f64( %va, %vc } +define @vfmerge_nzv_nxv8f64( %va, %cond) { +; RV32-LABEL: vfmerge_nzv_nxv8f64: +; RV32: # %bb.0: +; RV32-NEXT: fcvt.d.w fa5, zero +; RV32-NEXT: fneg.d fa5, fa5 +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32-NEXT: vfmerge.vfm v8, v8, fa5, v0 +; RV32-NEXT: ret +; +; RV64-LABEL: vfmerge_nzv_nxv8f64: +; RV64: # %bb.0: +; RV64-NEXT: li a0, -1 +; RV64-NEXT: slli a0, a0, 63 +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vmerge.vxm v8, v8, a0, v0 +; RV64-NEXT: ret + %vc = select %cond, splat (double -0.0), %va + ret %vc +} + define @vselect_combine_regression( %va, %vb) { ; CHECK-LABEL: vselect_combine_regression: ; CHECK: # %bb.0: From 2e7b95e4c080426e5085c38cec01176b56798534 Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Thu, 13 Jun 2024 22:44:24 -0700 Subject: [PATCH 048/155] [Safe Buffers] Serialize unsafe_buffer_usage pragmas (#92031) The commit adds serialization and de-serialization implementations for the stored regions. Basically, the serialized representation of the regions of a PP is a (ordered) sequence of source location encodings. For de-serialization, regions from loaded files are stored by their ASTs. When later one queries if a loaded location L is in an opt-out region, PP looks up the regions of the loaded AST where L is at. (Background if helps: a pair of `#pragma clang unsafe_buffer_usage begin/end` pragmas marks a warning-opt-out region. The begin and end locations (opt-out regions) are stored in preprocessor instances (PP) and will be queried by the `-Wunsafe-buffer-usage` analyzer.) The reported issue at upstream: https://github.com/llvm/llvm-project/issues/90501 rdar://124035402 --- clang/include/clang/Basic/SourceManager.h | 5 + clang/include/clang/Lex/Preprocessor.h | 52 +++++- .../include/clang/Serialization/ASTBitCodes.h | 3 + clang/lib/Basic/SourceManager.cpp | 18 +++ clang/lib/Lex/Preprocessor.cpp | 110 ++++++++++--- clang/lib/Serialization/ASTReader.cpp | 11 ++ clang/lib/Serialization/ASTWriter.cpp | 7 + clang/test/Modules/safe_buffers_optout.cpp | 151 ++++++++++++++++++ ...unsafe-buffer-usage-pragma-pch-complex.cpp | 63 ++++++++ ...-buffer-usage-pragma-pch-cross-files-2.cpp | 25 +++ ...fe-buffer-usage-pragma-pch-cross-files.cpp | 29 ++++ .../warn-unsafe-buffer-usage-pragma-pch.cpp | 27 ++++ 12 files changed, 477 insertions(+), 24 deletions(-) create mode 100644 clang/test/Modules/safe_buffers_optout.cpp create mode 100644 clang/test/PCH/unsafe-buffer-usage-pragma-pch-complex.cpp create mode 100644 clang/test/PCH/unsafe-buffer-usage-pragma-pch-cross-files-2.cpp create mode 100644 clang/test/PCH/unsafe-buffer-usage-pragma-pch-cross-files.cpp create mode 100644 clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h index ce33423551039c..d2e2e914327f2b 100644 --- a/clang/include/clang/Basic/SourceManager.h +++ b/clang/include/clang/Basic/SourceManager.h @@ -1676,6 +1676,11 @@ class SourceManager : public RefCountedBase { isInTheSameTranslationUnit(std::pair &LOffs, std::pair &ROffs) const; + /// \param Loc a source location in a loaded AST (of a PCH/Module file). + /// \returns a FileID uniquely identifies the AST of a loaded + /// module/PCH where `Loc` is at. + FileID getUniqueLoadedASTFileID(SourceLocation Loc) const; + /// Determines whether the two decomposed source location is in the same TU. bool isInTheSameTranslationUnitImpl( const std::pair &LOffs, diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 9b1628d2d86f9e..9d8a1aae23df3e 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -2883,11 +2883,41 @@ class Preprocessor { /// otherwise. SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region. - // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in one - // translation unit. Each region is represented by a pair of start and end - // locations. A region is "open" if its' start and end locations are - // identical. - SmallVector, 8> SafeBufferOptOutMap; + using SafeBufferOptOutRegionsTy = + SmallVector, 16>; + // An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this + // translation unit. Each region is represented by a pair of start and + // end locations. + SafeBufferOptOutRegionsTy SafeBufferOptOutMap; + + // The "-Wunsafe-buffer-usage" opt-out regions in loaded ASTs. We use the + // following structure to manage them by their ASTs. + struct { + // A map from unique IDs to region maps of loaded ASTs. The ID identifies a + // loaded AST. See `SourceManager::getUniqueLoadedASTID`. + llvm::DenseMap LoadedRegions; + + // Returns a reference to the safe buffer opt-out regions of the loaded + // AST where `Loc` belongs to. (Construct if absent) + SafeBufferOptOutRegionsTy & + findAndConsLoadedOptOutMap(SourceLocation Loc, SourceManager &SrcMgr) { + return LoadedRegions[SrcMgr.getUniqueLoadedASTFileID(Loc)]; + } + + // Returns a reference to the safe buffer opt-out regions of the loaded + // AST where `Loc` belongs to. (This const function returns nullptr if + // absent.) + const SafeBufferOptOutRegionsTy * + lookupLoadedOptOutMap(SourceLocation Loc, + const SourceManager &SrcMgr) const { + FileID FID = SrcMgr.getUniqueLoadedASTFileID(Loc); + auto Iter = LoadedRegions.find(FID); + + if (Iter == LoadedRegions.end()) + return nullptr; + return &Iter->getSecond(); + } + } LoadedSafeBufferOptOutMap; public: /// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out @@ -2918,6 +2948,18 @@ class Preprocessor { /// opt-out region bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc); + /// \return a sequence of SourceLocations representing ordered opt-out regions + /// specified by + /// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit. + SmallVector serializeSafeBufferOptOutMap() const; + + /// \param SrcLocSeqs a sequence of SourceLocations deserialized from a + /// record of code `PP_UNSAFE_BUFFER_USAGE`. + /// \return true iff the `Preprocessor` has been updated; false `Preprocessor` + /// is same as itself before the call. + bool setDeserializedSafeBufferOptOutMap( + const SmallVectorImpl &SrcLocSeqs); + private: /// Helper functions to forward lexing to the actual lexer. They all share the /// same signature. diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index 52a6c5e10f8025..4ce6cd74dd834f 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -694,6 +694,9 @@ enum ASTRecordTypes { /// Record code for lexical and visible block for delayed namespace in /// reduced BMI. DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD = 68, + + /// Record code for \#pragma clang unsafe_buffer_usage begin/end + PP_UNSAFE_BUFFER_USAGE = 69, }; /// Record types used within a source manager block. diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index 753601e01b5c30..f0af1a3e3a38b5 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -1915,6 +1915,24 @@ SourceManager::getDecomposedIncludedLoc(FileID FID) const { return DecompLoc; } +FileID SourceManager::getUniqueLoadedASTFileID(SourceLocation Loc) const { + assert(isLoadedSourceLocation(Loc) && + "Must be a source location in a loaded PCH/Module file"); + + auto [FID, Ignore] = getDecomposedLoc(Loc); + // `LoadedSLocEntryAllocBegin` stores the sorted lowest FID of each loaded + // allocation. Later allocations have lower FileIDs. The call below is to find + // the lowest FID of a loaded allocation from any FID in the same allocation. + // The lowest FID is used to identify a loaded allocation. + const FileID *FirstFID = + llvm::lower_bound(LoadedSLocEntryAllocBegin, FID, std::greater{}); + + assert(FirstFID && + "The failure to find the first FileID of a " + "loaded AST from a loaded source location was unexpected."); + return *FirstFID; +} + bool SourceManager::isInTheSameTranslationUnitImpl( const std::pair &LOffs, const std::pair &ROffs) const { diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 0b70192743a399..44b69a58f34110 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -58,6 +58,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Support/Capacity.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" @@ -1483,26 +1484,56 @@ void Preprocessor::emitFinalMacroWarning(const Token &Identifier, } bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr, - const SourceLocation &Loc) const { - // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in: - auto FirstRegionEndingAfterLoc = llvm::partition_point( - SafeBufferOptOutMap, - [&SourceMgr, - &Loc](const std::pair &Region) { - return SourceMgr.isBeforeInTranslationUnit(Region.second, Loc); - }); + const SourceLocation &Loc) const { + // The lambda that tests if a `Loc` is in an opt-out region given one opt-out + // region map: + auto TestInMap = [&SourceMgr](const SafeBufferOptOutRegionsTy &Map, + const SourceLocation &Loc) -> bool { + // Try to find a region in `SafeBufferOptOutMap` where `Loc` is in: + auto FirstRegionEndingAfterLoc = llvm::partition_point( + Map, [&SourceMgr, + &Loc](const std::pair &Region) { + return SourceMgr.isBeforeInTranslationUnit(Region.second, Loc); + }); + + if (FirstRegionEndingAfterLoc != Map.end()) { + // To test if the start location of the found region precedes `Loc`: + return SourceMgr.isBeforeInTranslationUnit( + FirstRegionEndingAfterLoc->first, Loc); + } + // If we do not find a region whose end location passes `Loc`, we want to + // check if the current region is still open: + if (!Map.empty() && Map.back().first == Map.back().second) + return SourceMgr.isBeforeInTranslationUnit(Map.back().first, Loc); + return false; + }; - if (FirstRegionEndingAfterLoc != SafeBufferOptOutMap.end()) { - // To test if the start location of the found region precedes `Loc`: - return SourceMgr.isBeforeInTranslationUnit(FirstRegionEndingAfterLoc->first, - Loc); - } - // If we do not find a region whose end location passes `Loc`, we want to - // check if the current region is still open: - if (!SafeBufferOptOutMap.empty() && - SafeBufferOptOutMap.back().first == SafeBufferOptOutMap.back().second) - return SourceMgr.isBeforeInTranslationUnit(SafeBufferOptOutMap.back().first, - Loc); + // What the following does: + // + // If `Loc` belongs to the local TU, we just look up `SafeBufferOptOutMap`. + // Otherwise, `Loc` is from a loaded AST. We look up the + // `LoadedSafeBufferOptOutMap` first to get the opt-out region map of the + // loaded AST where `Loc` is at. Then we find if `Loc` is in an opt-out + // region w.r.t. the region map. If the region map is absent, it means there + // is no opt-out pragma in that loaded AST. + // + // Opt-out pragmas in the local TU or a loaded AST is not visible to another + // one of them. That means if you put the pragmas around a `#include + // "module.h"`, where module.h is a module, it is not actually suppressing + // warnings in module.h. This is fine because warnings in module.h will be + // reported when module.h is compiled in isolation and nothing in module.h + // will be analyzed ever again. So you will not see warnings from the file + // that imports module.h anyway. And you can't even do the same thing for PCHs + // because they can only be included from the command line. + + if (SourceMgr.isLocalSourceLocation(Loc)) + return TestInMap(SafeBufferOptOutMap, Loc); + + const SafeBufferOptOutRegionsTy *LoadedRegions = + LoadedSafeBufferOptOutMap.lookupLoadedOptOutMap(Loc, SourceMgr); + + if (LoadedRegions) + return TestInMap(*LoadedRegions, Loc); return false; } @@ -1551,6 +1582,47 @@ bool Preprocessor::isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc) { return InSafeBufferOptOutRegion; } +SmallVector +Preprocessor::serializeSafeBufferOptOutMap() const { + assert(!InSafeBufferOptOutRegion && + "Attempt to serialize safe buffer opt-out regions before file being " + "completely preprocessed"); + + SmallVector SrcSeq; + + for (const auto &[begin, end] : SafeBufferOptOutMap) { + SrcSeq.push_back(begin); + SrcSeq.push_back(end); + } + // Only `SafeBufferOptOutMap` gets serialized. No need to serialize + // `LoadedSafeBufferOptOutMap` because if this TU loads a pch/module, every + // pch/module in the pch-chain/module-DAG will be loaded one by one in order. + // It means that for each loading pch/module m, it just needs to load m's own + // `SafeBufferOptOutMap`. + return SrcSeq; +} + +bool Preprocessor::setDeserializedSafeBufferOptOutMap( + const SmallVectorImpl &SourceLocations) { + if (SourceLocations.size() == 0) + return false; + + assert(SourceLocations.size() % 2 == 0 && + "ill-formed SourceLocation sequence"); + + auto It = SourceLocations.begin(); + SafeBufferOptOutRegionsTy &Regions = + LoadedSafeBufferOptOutMap.findAndConsLoadedOptOutMap(*It, SourceMgr); + + do { + SourceLocation Begin = *It++; + SourceLocation End = *It++; + + Regions.emplace_back(Begin, End); + } while (It != SourceLocations.end()); + return true; +} + ModuleLoader::~ModuleLoader() = default; CommentHandler::~CommentHandler() = default; diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 59338b44db32f3..89bab014c86ba1 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -3583,6 +3583,17 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F, break; } + case PP_UNSAFE_BUFFER_USAGE: { + if (!Record.empty()) { + SmallVector SrcLocs; + unsigned Idx = 0; + while (Idx < Record.size()) + SrcLocs.push_back(ReadSourceLocation(F, Record, Idx)); + PP.setDeserializedSafeBufferOptOutMap(SrcLocs); + } + break; + } + case PP_CONDITIONAL_STACK: if (!Record.empty()) { unsigned Idx = 0, End = Record.size() - 1; diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index ee3e687636e6a0..ef165979f9a9e7 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -926,6 +926,7 @@ void ASTWriter::WriteBlockInfoBlock() { RECORD(PP_CONDITIONAL_STACK); RECORD(DECLS_TO_CHECK_FOR_DEFERRED_DIAGS); RECORD(PP_ASSUME_NONNULL_LOC); + RECORD(PP_UNSAFE_BUFFER_USAGE); // SourceManager Block. BLOCK(SOURCE_MANAGER_BLOCK); @@ -2518,6 +2519,12 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) { Record.clear(); } + // Write the safe buffer opt-out region map in PP + for (SourceLocation &S : PP.serializeSafeBufferOptOutMap()) + AddSourceLocation(S, Record); + Stream.EmitRecord(PP_UNSAFE_BUFFER_USAGE, Record); + Record.clear(); + // Enter the preprocessor block. Stream.EnterSubblock(PREPROCESSOR_BLOCK_ID, 3); diff --git a/clang/test/Modules/safe_buffers_optout.cpp b/clang/test/Modules/safe_buffers_optout.cpp new file mode 100644 index 00000000000000..2129db65da7525 --- /dev/null +++ b/clang/test/Modules/safe_buffers_optout.cpp @@ -0,0 +1,151 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t + +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module -fmodule-name=safe_buffers_test_base -x c++ %t/safe_buffers_test.modulemap -std=c++20\ +// RUN: -o %t/safe_buffers_test_base.pcm -Wunsafe-buffer-usage +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module -fmodule-name=safe_buffers_test_textual -x c++ %t/safe_buffers_test.modulemap -std=c++20\ +// RUN: -o %t/safe_buffers_test_textual.pcm -Wunsafe-buffer-usage +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -emit-module -fmodule-name=safe_buffers_test_optout -x c++ %t/safe_buffers_test.modulemap -std=c++20\ +// RUN: -fmodule-file=%t/safe_buffers_test_base.pcm -fmodule-file=%t/safe_buffers_test_textual.pcm \ +// RUN: -o %t/safe_buffers_test_optout.pcm -Wunsafe-buffer-usage +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodule-file=%t/safe_buffers_test_optout.pcm -I %t -std=c++20 -Wunsafe-buffer-usage\ +// RUN: -verify %t/safe_buffers_optout-explicit.cpp + + +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -verify -fmodules-cache-path=%t -fmodule-map-file=%t/safe_buffers_test.modulemap -I%t\ +// RUN: -x c++ -std=c++20 -Wunsafe-buffer-usage %t/safe_buffers_optout-implicit.cpp + +//--- safe_buffers_test.modulemap +module safe_buffers_test_base { + header "base.h" +} + +module safe_buffers_test_textual { + textual header "textual.h" +} + +module safe_buffers_test_optout { + explicit module test_sub1 { header "test_sub1.h" } + explicit module test_sub2 { header "test_sub2.h" } + use safe_buffers_test_base +} + +//--- base.h +#ifdef __cplusplus +int base(int *p) { + int x = p[5]; +#pragma clang unsafe_buffer_usage begin + int y = p[5]; +#pragma clang unsafe_buffer_usage end + return x + y; +} +#endif + +//--- test_sub1.h +#include "base.h" + +#ifdef __cplusplus +int sub1(int *p) { + int x = p[5]; +#pragma clang unsafe_buffer_usage begin + int y = p[5]; +#pragma clang unsafe_buffer_usage end + return x + y + base(p); +} + +template +T sub1_T(T *p) { + T x = p[5]; +#pragma clang unsafe_buffer_usage begin + T y = p[5]; +#pragma clang unsafe_buffer_usage end + return x + y; +} +#endif + +//--- test_sub2.h +#include "base.h" + +#ifdef __cplusplus +int sub2(int *p) { + int x = p[5]; +#pragma clang unsafe_buffer_usage begin + int y = p[5]; +#pragma clang unsafe_buffer_usage end + return x + y + base(p); +} +#endif + +//--- textual.h +#ifdef __cplusplus +int textual(int *p) { + int x = p[5]; + int y = p[5]; + return x + y; +} +#endif + +//--- safe_buffers_optout-explicit.cpp +#include "test_sub1.h" +#include "test_sub2.h" + +// Testing safe buffers opt-out region serialization with modules: this +// file loads 2 submodules from top-level module +// `safe_buffers_test_optout`, which uses another top-level module +// `safe_buffers_test_base`. (So the module dependencies form a DAG.) + +// No expected warnings from base.h because base.h is a separate +// module and in a separate TU that is not textually included. The +// explicit command that builds base.h has no `-Wunsafe-buffer-usage`. + +// expected-warning@base.h:3{{unsafe buffer access}} +// expected-note@base.h:3{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +// expected-warning@test_sub1.h:5{{unsafe buffer access}} +// expected-note@test_sub1.h:5{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +// expected-warning@test_sub1.h:14{{unsafe buffer access}} +// expected-note@test_sub1.h:14{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +// expected-warning@test_sub2.h:5{{unsafe buffer access}} +// expected-note@test_sub2.h:5{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +int foo(int * p) { + int x = p[5]; // expected-warning{{unsafe buffer access}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +#pragma clang unsafe_buffer_usage begin + int y = p[5]; +#pragma clang unsafe_buffer_usage end + sub1_T(p); // instantiate template + return sub1(p) + sub2(p); +} + +#pragma clang unsafe_buffer_usage begin +#include "textual.h" // This header is textually included (i.e., it is in the same TU as %s), so warnings are suppressed +#pragma clang unsafe_buffer_usage end + +//--- safe_buffers_optout-implicit.cpp +#include "test_sub1.h" +#include "test_sub2.h" + +// Testing safe buffers opt-out region serialization with modules: this +// file loads 2 submodules from top-level module +// `safe_buffers_test_optout`, which uses another top-level module +// `safe_buffers_test_base`. (So the module dependencies form a DAG.) + +// expected-warning@base.h:3{{unsafe buffer access}} +// expected-note@base.h:3{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +// expected-warning@test_sub1.h:5{{unsafe buffer access}} +// expected-note@test_sub1.h:5{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +// expected-warning@test_sub1.h:14{{unsafe buffer access}} +// expected-note@test_sub1.h:14{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +// expected-warning@test_sub2.h:5{{unsafe buffer access}} +// expected-note@test_sub2.h:5{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +int foo(int * p) { + int x = p[5]; // expected-warning{{unsafe buffer access}} expected-note{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +#pragma clang unsafe_buffer_usage begin + int y = p[5]; +#pragma clang unsafe_buffer_usage end + sub1_T(p); // instantiate template + return sub1(p) + sub2(p); +} + +#pragma clang unsafe_buffer_usage begin +#include "textual.h" // This header is textually included (i.e., it is in the same TU as %s), so warnings are suppressed +#pragma clang unsafe_buffer_usage end diff --git a/clang/test/PCH/unsafe-buffer-usage-pragma-pch-complex.cpp b/clang/test/PCH/unsafe-buffer-usage-pragma-pch-complex.cpp new file mode 100644 index 00000000000000..03bf01dc08c356 --- /dev/null +++ b/clang/test/PCH/unsafe-buffer-usage-pragma-pch-complex.cpp @@ -0,0 +1,63 @@ +// Test PCHs: +// MAIN - includes textual_1.h +// \ loads pch_1.h - includes textual_2.h +// \ loads pch_2.h + +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t + +// RUN: %clang_cc1 -Wno-unused-value -std=c++20 -emit-pch -o %t/pch_2.h.pch %t/pch_2.h -x c++ +// RUN: %clang_cc1 -Wno-unused-value -std=c++20 -include-pch %t/pch_2.h.pch -emit-pch -o %t/pch_1.h.pch %t/pch_1.h -x c++ +// RUN: %clang_cc1 -Wno-unused-value -std=c++20 -include-pch %t/pch_1.h.pch -verify %t/main.cpp -Wunsafe-buffer-usage + + +//--- textual_1.h +int a(int *s) { + s[2]; // <- expected warning here +#pragma clang unsafe_buffer_usage begin + return s[1]; +#pragma clang unsafe_buffer_usage end +} + +//--- textual_2.h +int b(int *s) { + s[2]; // <- expected warning here +#pragma clang unsafe_buffer_usage begin + return s[1]; +#pragma clang unsafe_buffer_usage end +} + +//--- pch_1.h +#include "textual_2.h" + +int c(int *s) { + s[2]; // <- expected warning here +#pragma clang unsafe_buffer_usage begin + return s[1]; +#pragma clang unsafe_buffer_usage end +} + +//--- pch_2.h +int d(int *s) { + s[2]; // <- expected warning here +#pragma clang unsafe_buffer_usage begin + return s[1]; +#pragma clang unsafe_buffer_usage end +} + + +//--- main.cpp +#include "textual_1.h" +// expected-warning@textual_1.h:2{{unsafe buffer access}} \ + expected-note@textual_1.h:2{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +// expected-warning@textual_2.h:2{{unsafe buffer access}} \ + expected-note@textual_2.h:2{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +// expected-warning@pch_1.h:4{{unsafe buffer access}} \ + expected-note@pch_1.h:4{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +// expected-warning@pch_2.h:2{{unsafe buffer access}} \ + expected-note@pch_2.h:2{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +int main() { + int s[] = {1, 2, 3}; + return a(s) + b(s) + c(s) + d(s); +} diff --git a/clang/test/PCH/unsafe-buffer-usage-pragma-pch-cross-files-2.cpp b/clang/test/PCH/unsafe-buffer-usage-pragma-pch-cross-files-2.cpp new file mode 100644 index 00000000000000..66b3f13c712efa --- /dev/null +++ b/clang/test/PCH/unsafe-buffer-usage-pragma-pch-cross-files-2.cpp @@ -0,0 +1,25 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t + +// RUN: %clang_cc1 -Wno-unused-value -std=c++20 -emit-pch -o %t/header.pch %t/header.h -x c++ +// RUN: %clang_cc1 -Wno-unused-value -Wunsafe-buffer-usage -std=c++20 -include-pch %t/header.pch -verify %t/main.cpp + +//--- header.h +int foo(int *p) { + return p[5]; // This will be warned +} + +#pragma clang unsafe_buffer_usage begin // The opt-out region spans over two files of one TU +#include "header-2.h" + + +//--- header-2.h +int bar(int *p) { + return p[5]; // suppressed by the cross-file opt-out region +} +#pragma clang unsafe_buffer_usage end + +//--- main.cpp +// expected-warning@header.h:2 {{unsafe buffer access}} +// expected-note@header.h:2 {{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} diff --git a/clang/test/PCH/unsafe-buffer-usage-pragma-pch-cross-files.cpp b/clang/test/PCH/unsafe-buffer-usage-pragma-pch-cross-files.cpp new file mode 100644 index 00000000000000..ace9d0e4fe9efd --- /dev/null +++ b/clang/test/PCH/unsafe-buffer-usage-pragma-pch-cross-files.cpp @@ -0,0 +1,29 @@ +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: %clang_cc1 -Wno-unused-value -std=c++20 -emit-pch -o %t/header.pch %t/header.h -x c++ +// RUN: %clang_cc1 -Wno-unused-value -Wunsafe-buffer-usage -std=c++20 -include-pch %t/header.pch -verify %t/main.cpp + +//--- header.h +int foo(int *p) { + return p[5]; // This will be warned +} + +#pragma clang unsafe_buffer_usage begin +#include "header-2.h" +#pragma clang unsafe_buffer_usage end + +//--- header-2.h +// Included by the PCH in the traditional way. The include directive +// in the PCH is enclosed in an opt-out region, so unsafe operations +// here is suppressed. + +int bar(int *p) { + return p[5]; +} + + +//--- main.cpp +// expected-warning@header.h:2 {{unsafe buffer access}} +// expected-note@header.h:2 {{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} diff --git a/clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp b/clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp new file mode 100644 index 00000000000000..abd3f0ffe95652 --- /dev/null +++ b/clang/test/SemaCXX/warn-unsafe-buffer-usage-pragma-pch.cpp @@ -0,0 +1,27 @@ +// The original example from https://github.com/llvm/llvm-project/issues/90501 + +// Test without PCH +// RUN: %clang_cc1 -Wno-unused-value -Wunsafe-buffer-usage -std=c++20 -include %s -verify %s +// Test with PCH +// RUN: %clang_cc1 -Wno-unused-value -std=c++20 -emit-pch -o %t %s +// RUN: %clang_cc1 -Wno-unused-value -Wunsafe-buffer-usage -std=c++20 -include-pch %t -verify %s + +#ifndef A_H +#define A_H + +int a(int *s) { + s[2]; // <- expected warning here +#pragma clang unsafe_buffer_usage begin + return s[1]; +#pragma clang unsafe_buffer_usage end +} + +#else +// expected-warning@-7{{unsafe buffer access}} +// expected-note@-8{{pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions}} +int main() { + int s[] = {1, 2, 3}; + return a(s); +} + +#endif From ebdea52930678a2f2e7fb94415121654100b8be6 Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Thu, 13 Jun 2024 23:02:58 -0700 Subject: [PATCH 049/155] [libc] Provide vprintf for baremetal (#95363) This is similar to baremetal printf that was implemented in #94078. --- libc/src/stdio/CMakeLists.txt | 2 +- libc/src/stdio/baremetal/CMakeLists.txt | 13 +++++++ libc/src/stdio/baremetal/vprintf.cpp | 49 +++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 libc/src/stdio/baremetal/vprintf.cpp diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt index ee48e441d1c59f..7cf3278b3061c1 100644 --- a/libc/src/stdio/CMakeLists.txt +++ b/libc/src/stdio/CMakeLists.txt @@ -22,7 +22,7 @@ if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS}) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS}) endif() -if(NOT LIBC_TARGET_OS_IS_GPU) +if(NOT LIBC_TARGET_OS_IS_BAREMETAL AND NOT LIBC_TARGET_OS_IS_GPU) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/generic) endif() diff --git a/libc/src/stdio/baremetal/CMakeLists.txt b/libc/src/stdio/baremetal/CMakeLists.txt index e43f6bdcfef6f5..45196ffc9de248 100644 --- a/libc/src/stdio/baremetal/CMakeLists.txt +++ b/libc/src/stdio/baremetal/CMakeLists.txt @@ -31,3 +31,16 @@ add_entrypoint_object( libc.src.__support.OSUtil.osutil libc.src.__support.CPP.string_view ) + +add_entrypoint_object( + vprintf + SRCS + vprintf.cpp + HDRS + ../vprintf.h + DEPENDS + libc.src.stdio.printf_core.printf_main + libc.src.stdio.printf_core.writer + libc.src.__support.arg_list + libc.src.__support.OSUtil.osutil +) diff --git a/libc/src/stdio/baremetal/vprintf.cpp b/libc/src/stdio/baremetal/vprintf.cpp new file mode 100644 index 00000000000000..cd1541297f3b6b --- /dev/null +++ b/libc/src/stdio/baremetal/vprintf.cpp @@ -0,0 +1,49 @@ +//===-- Implementation of vprintf -------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/vprintf.h" +#include "src/__support/OSUtil/io.h" +#include "src/__support/arg_list.h" +#include "src/stdio/printf_core/core_structs.h" +#include "src/stdio/printf_core/printf_main.h" +#include "src/stdio/printf_core/writer.h" + +#include + +namespace LIBC_NAMESPACE { + +namespace { + +LIBC_INLINE int raw_write_hook(cpp::string_view new_str, void *) { + write_to_stderr(new_str); + return printf_core::WRITE_OK; +} + +} // namespace + +LLVM_LIBC_FUNCTION(int, vprintf, + (const char *__restrict format, va_list vlist)) { + internal::ArgList args(vlist); // This holder class allows for easier copying + // and pointer semantics, as well as handling + // destruction automatically. + constexpr size_t BUFF_SIZE = 1024; + char buffer[BUFF_SIZE]; + + printf_core::WriteBuffer wb(buffer, BUFF_SIZE, &raw_write_hook, nullptr); + printf_core::Writer writer(&wb); + + int retval = printf_core::printf_main(&writer, format, args); + + int flushval = wb.overflow_write(""); + if (flushval != printf_core::WRITE_OK) + retval = flushval; + + return retval; +} + +} // namespace LIBC_NAMESPACE From b1de42a81d838bb0c6dea7d2436820a2456c730b Mon Sep 17 00:00:00 2001 From: Petr Hosek Date: Thu, 13 Jun 2024 23:04:34 -0700 Subject: [PATCH 050/155] [libc] printf, putchar and vprintf in bareemetal entrypoints (#95436) We now have baremetal implementations of these entrypoints. --- libc/config/baremetal/arm/entrypoints.txt | 3 +++ libc/config/baremetal/riscv/entrypoints.txt | 3 +++ 2 files changed, 6 insertions(+) diff --git a/libc/config/baremetal/arm/entrypoints.txt b/libc/config/baremetal/arm/entrypoints.txt index 7fb82c60a1bb85..2930d718fdb210 100644 --- a/libc/config/baremetal/arm/entrypoints.txt +++ b/libc/config/baremetal/arm/entrypoints.txt @@ -80,8 +80,11 @@ set(TARGET_LIBC_ENTRYPOINTS # stdio.h entrypoints libc.src.stdio.remove + libc.src.stdio.printf + libc.src.stdio.putchar libc.src.stdio.sprintf libc.src.stdio.snprintf + libc.src.stdio.vprintf libc.src.stdio.vsprintf libc.src.stdio.vsnprintf diff --git a/libc/config/baremetal/riscv/entrypoints.txt b/libc/config/baremetal/riscv/entrypoints.txt index b769b43f03a2c6..6d38676b1e8a81 100644 --- a/libc/config/baremetal/riscv/entrypoints.txt +++ b/libc/config/baremetal/riscv/entrypoints.txt @@ -80,8 +80,11 @@ set(TARGET_LIBC_ENTRYPOINTS # stdio.h entrypoints libc.src.stdio.remove + libc.src.stdio.printf + libc.src.stdio.putchar libc.src.stdio.sprintf libc.src.stdio.snprintf + libc.src.stdio.vprintf libc.src.stdio.vsprintf libc.src.stdio.vsnprintf From f2d215f572affc9ad73da07763ce1831de7f2d4d Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Fri, 14 Jun 2024 07:20:50 +0100 Subject: [PATCH 051/155] [lldb][TypeSystemClang][NFCI] Factor completion logic for individual types out of GetCompleteQualType (#95402) This patch factors out the completion logic for individual clang::Type's into their own helper functions. During the process I cleaned up a few assumptions (e.g., unnecessary if-guards that could be asserts because these conditions are guaranteed by the `clang::Type::TypeClass` switch in `GetCompleteQualType`). This is mainly motivated by the type-completion rework proposed in https://github.com/llvm/llvm-project/pull/95100. --- .../TypeSystem/Clang/TypeSystemClang.cpp | 210 +++++++++++------- 1 file changed, 133 insertions(+), 77 deletions(-) diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index 369ae46cf264a2..dbe6238d4fe5a8 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -2574,6 +2574,128 @@ TypeSystemClang::GetDeclContextForType(clang::QualType type) { return nullptr; } +/// Returns the clang::RecordType of the specified \ref qual_type. This +/// function will try to complete the type if necessary (and allowed +/// by the specified \ref allow_completion). If we fail to return a *complete* +/// type, returns nullptr. +static const clang::RecordType *GetCompleteRecordType(clang::ASTContext *ast, + clang::QualType qual_type, + bool allow_completion) { + assert(qual_type->isRecordType()); + + const auto *tag_type = llvm::cast(qual_type.getTypePtr()); + + clang::CXXRecordDecl *cxx_record_decl = qual_type->getAsCXXRecordDecl(); + + // RecordType with no way of completing it, return the plain + // TagType. + if (!cxx_record_decl || !cxx_record_decl->hasExternalLexicalStorage()) + return tag_type; + + const bool is_complete = cxx_record_decl->isCompleteDefinition(); + const bool fields_loaded = + cxx_record_decl->hasLoadedFieldsFromExternalStorage(); + + // Already completed this type, nothing to be done. + if (is_complete && fields_loaded) + return tag_type; + + if (!allow_completion) + return nullptr; + + // Call the field_begin() accessor to for it to use the external source + // to load the fields... + // + // TODO: if we need to complete the type but have no external source, + // shouldn't we error out instead? + clang::ExternalASTSource *external_ast_source = ast->getExternalSource(); + if (external_ast_source) { + external_ast_source->CompleteType(cxx_record_decl); + if (cxx_record_decl->isCompleteDefinition()) { + cxx_record_decl->field_begin(); + cxx_record_decl->setHasLoadedFieldsFromExternalStorage(true); + } + } + + return tag_type; +} + +/// Returns the clang::EnumType of the specified \ref qual_type. This +/// function will try to complete the type if necessary (and allowed +/// by the specified \ref allow_completion). If we fail to return a *complete* +/// type, returns nullptr. +static const clang::EnumType *GetCompleteEnumType(clang::ASTContext *ast, + clang::QualType qual_type, + bool allow_completion) { + assert(qual_type->isEnumeralType()); + assert(ast); + + const clang::EnumType *enum_type = + llvm::cast(qual_type.getTypePtr()); + + auto *tag_decl = enum_type->getAsTagDecl(); + assert(tag_decl); + + // Already completed, nothing to be done. + if (tag_decl->getDefinition()) + return enum_type; + + if (!allow_completion) + return nullptr; + + // No definition but can't complete it, error out. + if (!tag_decl->hasExternalLexicalStorage()) + return nullptr; + + // We can't complete the type without an external source. + clang::ExternalASTSource *external_ast_source = ast->getExternalSource(); + if (!external_ast_source) + return nullptr; + + external_ast_source->CompleteType(tag_decl); + return enum_type; +} + +/// Returns the clang::ObjCObjectType of the specified \ref qual_type. This +/// function will try to complete the type if necessary (and allowed +/// by the specified \ref allow_completion). If we fail to return a *complete* +/// type, returns nullptr. +static const clang::ObjCObjectType * +GetCompleteObjCObjectType(clang::ASTContext *ast, QualType qual_type, + bool allow_completion) { + assert(qual_type->isObjCObjectType()); + assert(ast); + + const clang::ObjCObjectType *objc_class_type = + llvm::cast(qual_type); + + clang::ObjCInterfaceDecl *class_interface_decl = + objc_class_type->getInterface(); + // We currently can't complete objective C types through the newly added + // ASTContext because it only supports TagDecl objects right now... + if (!class_interface_decl) + return objc_class_type; + + // Already complete, nothing to be done. + if (class_interface_decl->getDefinition()) + return objc_class_type; + + if (!allow_completion) + return nullptr; + + // No definition but can't complete it, error out. + if (!class_interface_decl->hasExternalLexicalStorage()) + return nullptr; + + // We can't complete the type without an external source. + clang::ExternalASTSource *external_ast_source = ast->getExternalSource(); + if (!external_ast_source) + return nullptr; + + external_ast_source->CompleteType(class_interface_decl); + return objc_class_type; +} + static bool GetCompleteQualType(clang::ASTContext *ast, clang::QualType qual_type, bool allow_completion = true) { @@ -2591,92 +2713,26 @@ static bool GetCompleteQualType(clang::ASTContext *ast, allow_completion); } break; case clang::Type::Record: { - clang::CXXRecordDecl *cxx_record_decl = qual_type->getAsCXXRecordDecl(); - if (cxx_record_decl) { - if (cxx_record_decl->hasExternalLexicalStorage()) { - const bool is_complete = cxx_record_decl->isCompleteDefinition(); - const bool fields_loaded = - cxx_record_decl->hasLoadedFieldsFromExternalStorage(); - if (is_complete && fields_loaded) - return true; + if (const auto *RT = + GetCompleteRecordType(ast, qual_type, allow_completion)) + return !RT->isIncompleteType(); - if (!allow_completion) - return false; - - // Call the field_begin() accessor to for it to use the external source - // to load the fields... - clang::ExternalASTSource *external_ast_source = - ast->getExternalSource(); - if (external_ast_source) { - external_ast_source->CompleteType(cxx_record_decl); - if (cxx_record_decl->isCompleteDefinition()) { - cxx_record_decl->field_begin(); - cxx_record_decl->setHasLoadedFieldsFromExternalStorage(true); - } - } - } - } - const clang::TagType *tag_type = - llvm::cast(qual_type.getTypePtr()); - return !tag_type->isIncompleteType(); + return false; } break; case clang::Type::Enum: { - const clang::TagType *tag_type = - llvm::dyn_cast(qual_type.getTypePtr()); - if (tag_type) { - clang::TagDecl *tag_decl = tag_type->getDecl(); - if (tag_decl) { - if (tag_decl->getDefinition()) - return true; - - if (!allow_completion) - return false; - - if (tag_decl->hasExternalLexicalStorage()) { - if (ast) { - clang::ExternalASTSource *external_ast_source = - ast->getExternalSource(); - if (external_ast_source) { - external_ast_source->CompleteType(tag_decl); - return !tag_type->isIncompleteType(); - } - } - } - return false; - } - } + if (const auto *ET = GetCompleteEnumType(ast, qual_type, allow_completion)) + return !ET->isIncompleteType(); + return false; } break; case clang::Type::ObjCObject: case clang::Type::ObjCInterface: { - const clang::ObjCObjectType *objc_class_type = - llvm::dyn_cast(qual_type); - if (objc_class_type) { - clang::ObjCInterfaceDecl *class_interface_decl = - objc_class_type->getInterface(); - // We currently can't complete objective C types through the newly added - // ASTContext because it only supports TagDecl objects right now... - if (class_interface_decl) { - if (class_interface_decl->getDefinition()) - return true; - - if (!allow_completion) - return false; + if (const auto *OT = + GetCompleteObjCObjectType(ast, qual_type, allow_completion)) + return !OT->isIncompleteType(); - if (class_interface_decl->hasExternalLexicalStorage()) { - if (ast) { - clang::ExternalASTSource *external_ast_source = - ast->getExternalSource(); - if (external_ast_source) { - external_ast_source->CompleteType(class_interface_decl); - return !objc_class_type->isIncompleteType(); - } - } - } - return false; - } - } + return false; } break; case clang::Type::Attributed: From d890dda16bf65bc36b783194afbe2ebc3e709afb Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Fri, 14 Jun 2024 08:34:53 +0200 Subject: [PATCH 052/155] [NFCI][AMDGPU] Try to use PressureDiff to Calculate RegPressure. (#94221) PressureDiff is reliable most of the time, and it's pretty much free compared to RPTracker. We can use it whenever there is no subregister definitions, or physregs invovled. No subregs because PDiff doesn't take into account lane liveness, and no Physreg because it seems to get PhysReg liveness completely wrong. Sometimes it adds a diff, sometimes itt doesn't - I didn't look at that one for long so maybe there is something we can eventually do to make it better. This allows us to save a ton of calls to RPTracker and LIS too. On a huge IR module (100+MB), it went from about 20M calls to RPTracker in this function down to 3.4, with the rest being PressureDiffs. I also added an expensive check to verify correctness of PressureDiff. --- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 129 ++++++++++++++++---- llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 10 +- 2 files changed, 113 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 94d93390d0916f..217279211531b4 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -116,31 +116,112 @@ void GCNSchedStrategy::initialize(ScheduleDAGMI *DAG) { << ", SGPRExcessLimit = " << SGPRExcessLimit << "\n\n"); } +/// Checks whether \p SU can use the cached DAG pressure diffs to compute the +/// current register pressure. +/// +/// This works for the common case, but it has a few exceptions that have been +/// observed through trial and error: +/// - Explicit physical register operands +/// - Subregister definitions +/// +/// In both of those cases, PressureDiff doesn't represent the actual pressure, +/// and querying LiveIntervals through the RegPressureTracker is needed to get +/// an accurate value. +/// +/// We should eventually only use PressureDiff for maximum performance, but this +/// already allows 80% of SUs to take the fast path without changing scheduling +/// at all. Further changes would either change scheduling, or require a lot +/// more logic to recover an accurate pressure estimate from the PressureDiffs. +static bool canUsePressureDiffs(const SUnit &SU) { + if (!SU.isInstr()) + return false; + + // Cannot use pressure diffs for subregister defs or with physregs, it's + // imprecise in both cases. + for (const auto &Op : SU.getInstr()->operands()) { + if (!Op.isReg() || Op.isImplicit()) + continue; + if (Op.getReg().isPhysical() || + (Op.isDef() && Op.getSubReg() != AMDGPU::NoSubRegister)) + return false; + } + return true; +} + +static void getRegisterPressures(bool AtTop, + const RegPressureTracker &RPTracker, SUnit *SU, + std::vector &Pressure, + std::vector &MaxPressure) { + // getDownwardPressure() and getUpwardPressure() make temporary changes to + // the tracker, so we need to pass those function a non-const copy. + RegPressureTracker &TempTracker = const_cast(RPTracker); + if (AtTop) + TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure); + else + TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure); +} + void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, - unsigned VGPRPressure) { + unsigned VGPRPressure, bool IsBottomUp) { Cand.SU = SU; Cand.AtTop = AtTop; if (!DAG->isTrackingPressure()) return; - // getDownwardPressure() and getUpwardPressure() make temporary changes to - // the tracker, so we need to pass those function a non-const copy. - RegPressureTracker &TempTracker = const_cast(RPTracker); - Pressure.clear(); MaxPressure.clear(); - if (AtTop) - TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure); - else { - // FIXME: I think for bottom up scheduling, the register pressure is cached - // and can be retrieved by DAG->getPressureDif(SU). - TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure); + // We try to use the cached PressureDiffs in the ScheduleDAG whenever + // possible over querying the RegPressureTracker. + // + // RegPressureTracker will make a lot of LIS queries which are very + // expensive, it is considered a slow function in this context. + // + // PressureDiffs are precomputed and cached, and getPressureDiff is just a + // trivial lookup into an array. It is pretty much free. + // + // In EXPENSIVE_CHECKS, we always query RPTracker to verify the results of + // PressureDiffs. + if (AtTop || !canUsePressureDiffs(*SU)) { + getRegisterPressures(AtTop, RPTracker, SU, Pressure, MaxPressure); + } else { + // Reserve 4 slots. + Pressure.resize(4, 0); + Pressure[AMDGPU::RegisterPressureSets::SReg_32] = SGPRPressure; + Pressure[AMDGPU::RegisterPressureSets::VGPR_32] = VGPRPressure; + + for (const auto &Diff : DAG->getPressureDiff(SU)) { + if (!Diff.isValid()) + continue; + // PressureDiffs is always bottom-up so if we're working top-down we need + // to invert its sign. + Pressure[Diff.getPSet()] += + (IsBottomUp ? Diff.getUnitInc() : -Diff.getUnitInc()); + } + +#ifdef EXPENSIVE_CHECKS + std::vector CheckPressure, CheckMaxPressure; + getRegisterPressures(AtTop, RPTracker, SU, CheckPressure, CheckMaxPressure); + if (Pressure[AMDGPU::RegisterPressureSets::SReg_32] != + CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] || + Pressure[AMDGPU::RegisterPressureSets::VGPR_32] != + CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32]) { + errs() << "Register Pressure is inaccurate when calculated through " + "PressureDiff\n" + << "SGPR got " << Pressure[AMDGPU::RegisterPressureSets::SReg_32] + << ", expected " + << CheckPressure[AMDGPU::RegisterPressureSets::SReg_32] << "\n" + << "VGPR got " << Pressure[AMDGPU::RegisterPressureSets::VGPR_32] + << ", expected " + << CheckPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n"; + report_fatal_error("inaccurate register pressure calculation"); + } +#endif } unsigned NewSGPRPressure = Pressure[AMDGPU::RegisterPressureSets::SReg_32]; @@ -158,7 +239,6 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit; bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit; - // FIXME: We have to enter REG-EXCESS before we reach the actual threshold // to increase the likelihood we don't go over the limits. We should improve // the analysis to look through dependencies to find the path with the least @@ -207,7 +287,8 @@ void GCNSchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU, void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, - SchedCandidate &Cand) { + SchedCandidate &Cand, + bool IsBottomUp) { const SIRegisterInfo *SRI = static_cast(TRI); ArrayRef Pressure = RPTracker.getRegSetPressureAtPos(); unsigned SGPRPressure = 0; @@ -220,8 +301,8 @@ void GCNSchedStrategy::pickNodeFromQueue(SchedBoundary &Zone, for (SUnit *SU : Q) { SchedCandidate TryCand(ZonePolicy); - initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, - SGPRPressure, VGPRPressure); + initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI, SGPRPressure, + VGPRPressure, IsBottomUp); // Pass SchedBoundary only when comparing nodes from the same boundary. SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr; tryCandidate(Cand, TryCand, ZoneArg); @@ -262,7 +343,8 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) { if (!BotCand.isValid() || BotCand.SU->isScheduled || BotCand.Policy != BotPolicy) { BotCand.reset(CandPolicy()); - pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand); + pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand, + /*IsBottomUp=*/true); assert(BotCand.Reason != NoCand && "failed to find the first candidate"); } else { LLVM_DEBUG(traceCandidate(BotCand)); @@ -270,7 +352,8 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) { if (VerifyScheduling) { SchedCandidate TCand; TCand.reset(CandPolicy()); - pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand); + pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand, + /*IsBottomUp=*/true); assert(TCand.SU == BotCand.SU && "Last pick result should correspond to re-picking right now"); } @@ -282,7 +365,8 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) { if (!TopCand.isValid() || TopCand.SU->isScheduled || TopCand.Policy != TopPolicy) { TopCand.reset(CandPolicy()); - pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand); + pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand, + /*IsBottomUp=*/false); assert(TopCand.Reason != NoCand && "failed to find the first candidate"); } else { LLVM_DEBUG(traceCandidate(TopCand)); @@ -290,7 +374,8 @@ SUnit *GCNSchedStrategy::pickNodeBidirectional(bool &IsTopNode) { if (VerifyScheduling) { SchedCandidate TCand; TCand.reset(CandPolicy()); - pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand); + pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand, + /*IsBottomUp=*/false); assert(TCand.SU == TopCand.SU && "Last pick result should correspond to re-picking right now"); } @@ -327,7 +412,8 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) { if (!SU) { CandPolicy NoPolicy; TopCand.reset(NoPolicy); - pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand); + pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand, + /*IsBottomUp=*/false); assert(TopCand.Reason != NoCand && "failed to find a candidate"); SU = TopCand.SU; } @@ -337,7 +423,8 @@ SUnit *GCNSchedStrategy::pickNode(bool &IsTopNode) { if (!SU) { CandPolicy NoPolicy; BotCand.reset(NoPolicy); - pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand); + pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand, + /*IsBottomUp=*/true); assert(BotCand.Reason != NoCand && "failed to find a candidate"); SU = BotCand.SU; } diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h index 2084aae4128ff3..f0aea2bc4ab865 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -45,12 +45,12 @@ class GCNSchedStrategy : public GenericScheduler { void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, - SchedCandidate &Cand); + SchedCandidate &Cand, bool IsBottomUp); - void initCandidate(SchedCandidate &Cand, SUnit *SU, - bool AtTop, const RegPressureTracker &RPTracker, - const SIRegisterInfo *SRI, - unsigned SGPRPressure, unsigned VGPRPressure); + void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, + const RegPressureTracker &RPTracker, + const SIRegisterInfo *SRI, unsigned SGPRPressure, + unsigned VGPRPressure, bool IsBottomUp); std::vector Pressure; From 4ab37e430d960b975bfdaf95516a39ea3468f7a1 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Thu, 13 Jun 2024 23:40:58 -0700 Subject: [PATCH 053/155] [clang][Fuchsia] Use unsigned int for wint_t on *-fuchsia targets (#95499) This aligns Fuchsia targets with other similar OS targets such as Linux. Fuchsia's libc already uses unsigned rather than the compiler-provided __WINT_TYPE__ macro for its wint_t typedef, so this just makes the compiler consistent with the OS's actual ABI. The only known manifestation of the mismatch is -Wformat warnings for %lc no matching wint_t arguments. The closest thing I could see to existing tests for each target's wint_t type setting was the predefine tests that check various macros including __WINT_TYPE__ on a per-machine and/or per-OS basis. While the setting is done per-OS in most of the target implementations rather than actually varying by machine, the only existing tests for __WINT_TYPE__ are in per-machine checks that are also wholly or partly tagged as per-OS. x86_64 and riscv64 tests for respective *-linux-gnu targets now check for the same definitions in the respective *-fuchsia targets. __WINT_TYPE__ is not among the type checked in the aarch64 tests and those lack a section that's specifically tested for aarch64-linux-gnu; if such is added then it can similarly be made to check for most or all of the same value on aarch64-fuchsia as aarch64-linux-gnu. But since the actual implementation of choosing the type is done per-OS and not per-machine for the three machines with Fuchsia target support, the x86 and riscv64 tests are already redundantly testing that same code and seem sufficient. --- clang/lib/Basic/Targets/OSTargets.h | 1 + clang/test/Preprocessor/init-x86.c | 1 + clang/test/Preprocessor/init.c | 2 ++ 3 files changed, 4 insertions(+) diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h index 4366c1149e4053..5f27c3469f861d 100644 --- a/clang/lib/Basic/Targets/OSTargets.h +++ b/clang/lib/Basic/Targets/OSTargets.h @@ -868,6 +868,7 @@ class LLVM_LIBRARY_VISIBILITY FuchsiaTargetInfo : public OSTargetInfo { public: FuchsiaTargetInfo(const llvm::Triple &Triple, const TargetOptions &Opts) : OSTargetInfo(Triple, Opts) { + this->WIntType = TargetInfo::UnsignedInt; this->MCountName = "__mcount"; this->TheCXXABI.set(TargetCXXABI::Fuchsia); } diff --git a/clang/test/Preprocessor/init-x86.c b/clang/test/Preprocessor/init-x86.c index 1268419e18a5c4..6f5aa5674e48ea 100644 --- a/clang/test/Preprocessor/init-x86.c +++ b/clang/test/Preprocessor/init-x86.c @@ -999,6 +999,7 @@ // X32:#define __x86_64__ 1 // RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=x86_64-pc-linux-gnu < /dev/null | FileCheck -match-full-lines -check-prefix X86_64-LINUX %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=x86_64-unknown-fuchsia < /dev/null | FileCheck -match-full-lines -check-prefix X86_64-LINUX %s // // X86_64-LINUX:#define _LP64 1 // X86_64-LINUX:#define __BIGGEST_ALIGNMENT__ 16 diff --git a/clang/test/Preprocessor/init.c b/clang/test/Preprocessor/init.c index 2641fee940231f..6e7c0ea5c730b1 100644 --- a/clang/test/Preprocessor/init.c +++ b/clang/test/Preprocessor/init.c @@ -2527,6 +2527,8 @@ // RUN: | FileCheck -match-full-lines -check-prefix=RISCV64 %s // RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=riscv64-unknown-linux < /dev/null \ // RUN: | FileCheck -match-full-lines -check-prefixes=RISCV64,RISCV64-LINUX %s +// RUN: %clang_cc1 -E -dM -ffreestanding -fgnuc-version=4.2.1 -triple=riscv64-unknown-fuchsia < /dev/null \ +// RUN: | FileCheck -match-full-lines -check-prefixes=RISCV64 %s // RISCV64: #define _LP64 1 // RISCV64: #define __ATOMIC_ACQUIRE 2 // RISCV64: #define __ATOMIC_ACQ_REL 4 From c4a1440c149d3ea03f14fd6858b6be3a2faf9af6 Mon Sep 17 00:00:00 2001 From: wanglei Date: Fri, 14 Jun 2024 15:11:59 +0800 Subject: [PATCH 054/155] [lld][ELF] Add basic TLSDESC support for LoongArch LoongArch does not yet implement transition from TLSDESC to LE/IE, so TLSDESC dynamic relocation needs to be generated for each desc, which is ultimately handled by the dynamic linker. The test cases reference RISC-V: #79239 Reviewed By: MaskRay, SixWeining Pull Request: https://github.com/llvm/llvm-project/pull/94451 --- lld/ELF/Arch/LoongArch.cpp | 40 +++++++ lld/ELF/InputSection.cpp | 2 + lld/ELF/Relocations.cpp | 12 ++ lld/ELF/Relocations.h | 1 + lld/test/ELF/loongarch-tlsdesc-gd-mixed.s | 23 ++++ lld/test/ELF/loongarch-tlsdesc.s | 132 ++++++++++++++++++++++ 6 files changed, 210 insertions(+) create mode 100644 lld/test/ELF/loongarch-tlsdesc-gd-mixed.s create mode 100644 lld/test/ELF/loongarch-tlsdesc.s diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp index 2c5d5df922c0f6..c6ee73f23d471a 100644 --- a/lld/ELF/Arch/LoongArch.cpp +++ b/lld/ELF/Arch/LoongArch.cpp @@ -98,11 +98,13 @@ uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc, RelType type) { case R_LARCH_PCALA64_LO20: case R_LARCH_GOT64_PC_LO20: case R_LARCH_TLS_IE64_PC_LO20: + case R_LARCH_TLS_DESC64_PC_LO20: pcalau12i_pc = pc - 8; break; case R_LARCH_PCALA64_HI12: case R_LARCH_GOT64_PC_HI12: case R_LARCH_TLS_IE64_PC_HI12: + case R_LARCH_TLS_DESC64_PC_HI12: pcalau12i_pc = pc - 12; break; default: @@ -190,11 +192,13 @@ LoongArch::LoongArch() { tlsModuleIndexRel = R_LARCH_TLS_DTPMOD64; tlsOffsetRel = R_LARCH_TLS_DTPREL64; tlsGotRel = R_LARCH_TLS_TPREL64; + tlsDescRel = R_LARCH_TLS_DESC64; } else { symbolicRel = R_LARCH_32; tlsModuleIndexRel = R_LARCH_TLS_DTPMOD32; tlsOffsetRel = R_LARCH_TLS_DTPREL32; tlsGotRel = R_LARCH_TLS_TPREL32; + tlsDescRel = R_LARCH_TLS_DESC32; } gotRel = symbolicRel; @@ -294,6 +298,10 @@ int64_t LoongArch::getImplicitAddend(const uint8_t *buf, RelType type) const { case R_LARCH_JUMP_SLOT: // These relocations are defined as not having an implicit addend. return 0; + case R_LARCH_TLS_DESC32: + return read32le(buf + 4); + case R_LARCH_TLS_DESC64: + return read64le(buf + 8); } } @@ -486,6 +494,19 @@ RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s, return config->relax ? R_RELAX_HINT : R_NONE; case R_LARCH_ALIGN: return R_RELAX_HINT; + case R_LARCH_TLS_DESC_PC_HI20: + case R_LARCH_TLS_DESC64_PC_LO20: + case R_LARCH_TLS_DESC64_PC_HI12: + return R_LOONGARCH_TLSDESC_PAGE_PC; + case R_LARCH_TLS_DESC_PC_LO12: + case R_LARCH_TLS_DESC_LD: + case R_LARCH_TLS_DESC_HI20: + case R_LARCH_TLS_DESC_LO12: + case R_LARCH_TLS_DESC64_LO20: + case R_LARCH_TLS_DESC64_HI12: + return R_TLSDESC; + case R_LARCH_TLS_DESC_CALL: + return R_TLSDESC_CALL; // Other known relocs that are explicitly unimplemented: // @@ -510,6 +531,8 @@ bool LoongArch::usesOnlyLowPageBits(RelType type) const { case R_LARCH_GOT_LO12: case R_LARCH_GOT_PC_LO12: case R_LARCH_TLS_IE_PC_LO12: + case R_LARCH_TLS_DESC_LO12: + case R_LARCH_TLS_DESC_PC_LO12: return true; } } @@ -594,6 +617,8 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel, case R_LARCH_TLS_LE_LO12: case R_LARCH_TLS_IE_PC_LO12: case R_LARCH_TLS_IE_LO12: + case R_LARCH_TLS_DESC_PC_LO12: + case R_LARCH_TLS_DESC_LO12: write32le(loc, setK12(read32le(loc), extractBits(val, 11, 0))); return; @@ -609,6 +634,8 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel, case R_LARCH_TLS_LD_HI20: case R_LARCH_TLS_GD_PC_HI20: case R_LARCH_TLS_GD_HI20: + case R_LARCH_TLS_DESC_PC_HI20: + case R_LARCH_TLS_DESC_HI20: write32le(loc, setJ20(read32le(loc), extractBits(val, 31, 12))); return; @@ -620,6 +647,8 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel, case R_LARCH_TLS_LE64_LO20: case R_LARCH_TLS_IE64_PC_LO20: case R_LARCH_TLS_IE64_LO20: + case R_LARCH_TLS_DESC64_PC_LO20: + case R_LARCH_TLS_DESC64_LO20: write32le(loc, setJ20(read32le(loc), extractBits(val, 51, 32))); return; @@ -631,6 +660,8 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel, case R_LARCH_TLS_LE64_HI12: case R_LARCH_TLS_IE64_PC_HI12: case R_LARCH_TLS_IE64_HI12: + case R_LARCH_TLS_DESC64_PC_HI12: + case R_LARCH_TLS_DESC64_HI12: write32le(loc, setK12(read32le(loc), extractBits(val, 63, 52))); return; @@ -679,6 +710,15 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel, case R_LARCH_RELAX: return; // Ignored (for now) + case R_LARCH_TLS_DESC_LD: + return; // nothing to do. + case R_LARCH_TLS_DESC32: + write32le(loc + 4, val); + return; + case R_LARCH_TLS_DESC64: + write64le(loc + 8, val); + return; + default: llvm_unreachable("unknown relocation"); } diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index f4287bc94ee5ea..be12218d9948c4 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -877,6 +877,8 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, return in.got->getTlsDescAddr(sym) + a - in.gotPlt->getVA(); case R_AARCH64_TLSDESC_PAGE: return getAArch64Page(in.got->getTlsDescAddr(sym) + a) - getAArch64Page(p); + case R_LOONGARCH_TLSDESC_PAGE_PC: + return getLoongArchPageDelta(in.got->getTlsDescAddr(sym) + a, p, type); case R_TLSGD_GOT: return in.got->getGlobalDynOffset(sym) + a; case R_TLSGD_GOTPLT: diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 04db413a6609fd..1b08339e3996ae 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -1297,6 +1297,18 @@ static unsigned handleTlsRelocation(RelType type, Symbol &sym, if (config->emachine == EM_MIPS) return handleMipsTlsRelocation(type, sym, c, offset, addend, expr); + + // LoongArch does not yet implement transition from TLSDESC to LE/IE, so + // generate TLSDESC dynamic relocation for the dynamic linker to handle. + if (config->emachine == EM_LOONGARCH && + oneof(expr)) { + if (expr != R_TLSDESC_CALL) { + sym.setFlags(NEEDS_TLSDESC); + c.addReloc({expr, type, offset, addend, &sym}); + } + return 1; + } + bool isRISCV = config->emachine == EM_RISCV; if (oneof Date: Fri, 14 Jun 2024 09:29:21 +0200 Subject: [PATCH 055/155] [ARM][AArch64] Bail out if CandidatesWithoutStackFixups is empty (#95410) The following code assumes that RepeatedSequenceLocs is non-empty. Bail out if there are less than 2 candidates left, as no outlining is possible in that case. The same check is already present in all the other places where elements from RepeatedSequenceLocs may be dropped. This fixes the issue reported at: https://github.com/llvm/llvm-project/pull/93965#issuecomment-2151989716 --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 2 ++ llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 2 ++ ...liner-no-candidates-without-stack-fixup.ll | 30 +++++++++++++++++++ 3 files changed, 34 insertions(+) create mode 100644 llvm/test/CodeGen/ARM/machine-outliner-no-candidates-without-stack-fixup.ll diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 1a795b58319c92..f4b5fd7a003c24 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -8556,6 +8556,8 @@ AArch64InstrInfo::getOutliningCandidateInfo( NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) { RepeatedSequenceLocs = CandidatesWithoutStackFixups; FrameID = MachineOutlinerNoLRSave; + if (RepeatedSequenceLocs.size() < 2) + return std::nullopt; } else { SetCandidateCallInfo(MachineOutlinerDefault, 12); diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 627148b73c4f57..e81e6bb6975886 100644 --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -6060,6 +6060,8 @@ ARMBaseInstrInfo::getOutliningCandidateInfo( RepeatedSequenceLocs.size() * Costs.CallDefault) { RepeatedSequenceLocs = CandidatesWithoutStackFixups; FrameID = MachineOutlinerNoLRSave; + if (RepeatedSequenceLocs.size() < 2) + return std::nullopt; } else SetCandidateCallInfo(MachineOutlinerDefault, Costs.CallDefault); } diff --git a/llvm/test/CodeGen/ARM/machine-outliner-no-candidates-without-stack-fixup.ll b/llvm/test/CodeGen/ARM/machine-outliner-no-candidates-without-stack-fixup.ll new file mode 100644 index 00000000000000..f50d92b8160d52 --- /dev/null +++ b/llvm/test/CodeGen/ARM/machine-outliner-no-candidates-without-stack-fixup.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=thumbv8.1m.main-unknown-unknown < %s | FileCheck %s + +; Make sure this does not assert during machine outlining. + +declare void @a(...) + +define void @b(i32 %a, i32 %b, i32 %c, ptr %d, ptr %e) minsize { +; CHECK-LABEL: b: +; CHECK: @ %bb.0: +; CHECK-NEXT: ldr r3, [sp] +; CHECK-NEXT: mov r2, r1 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: movs r0, #2 +; CHECK-NEXT: b a + tail call void @a(i32 2, i32 %a, i32 %b, ptr %e) + ret void +} + +define void @c(i32 %a, i32 %b, i32 %c, ptr %d, ptr %e) minsize { +; CHECK-LABEL: c: +; CHECK: @ %bb.0: +; CHECK-NEXT: ldr r3, [sp] +; CHECK-NEXT: mov r2, r1 +; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: movs r0, #4 +; CHECK-NEXT: b a + tail call void @a(i32 4, i32 %a, i32 %b, ptr %e) + ret void +} From c947709df7859bb7285873593adab70349a5ab3e Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 14 Jun 2024 00:30:03 -0700 Subject: [PATCH 056/155] [Driver] Support -Wa,--defsym similar to -Wa,-defsym When the integrated assembler is enabled, clangDriver implements a small set of popular -Wa, options. "-defsym" is implemented (https://reviews.llvm.org/D26213), but the more common "--defsym" is not. Support "--defsym". Close #95386 --- clang/lib/Driver/ToolChains/Clang.cpp | 2 +- clang/test/Driver/defsym.s | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index b8d8ff3db5d1fd..acd32bddea6b65 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2647,7 +2647,7 @@ static void CollectArgsForIntegratedAssembler(Compilation &C, } else if (Value.starts_with("-mcpu") || Value.starts_with("-mfpu") || Value.starts_with("-mhwdiv") || Value.starts_with("-march")) { // Do nothing, we'll validate it later. - } else if (Value == "-defsym") { + } else if (Value == "-defsym" || Value == "--defsym") { if (A->getNumValues() != 2) { D.Diag(diag::err_drv_defsym_invalid_format) << Value; break; diff --git a/clang/test/Driver/defsym.s b/clang/test/Driver/defsym.s index 165c71b2eae97c..d152b326ae8b18 100644 --- a/clang/test/Driver/defsym.s +++ b/clang/test/Driver/defsym.s @@ -1,17 +1,14 @@ // XFAIL: target={{.*}}-aix{{.*}} // RUN: %clang -### -c -integrated-as %s \ -// RUN: -Wa,-defsym,abc=5 -Wa,-defsym,xyz=0xa \ +// RUN: -Wa,-defsym,abc=5 -Wa,--defsym,xyz=0xa \ // RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-DEFSYM1 // RUN: %clang -### -c -no-integrated-as -target x86_64-unknown-unknown %s \ -// RUN: -Wa,-defsym,abc=5 -Wa,-defsym,xyz=0xa \ +// RUN: -Wa,-defsym,abc=5 -Wa,--defsym,xyz=0xa \ // RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-DEFSYM1 -// CHECK-DEFSYM1: "-defsym" -// CHECK-DEFSYM1: "abc=5" -// CHECK-DEFSYM1: "-defsym" -// CHECK-DEFSYM1: "xyz=0xa" +// CHECK-DEFSYM1: "-defsym" "abc=5" "--defsym" "xyz=0xa" // RUN: not %clang -c -integrated-as -o /dev/null %s \ // RUN: -Wa,-defsym,abc= \ From 4942e78271e73d45e971196dc5a9769f94b30060 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 14 Jun 2024 09:39:15 +0200 Subject: [PATCH 057/155] [llvm-exegesis] Only link/initialize supported targets (NFC) (#95421) llvm-exegesis currently links and initializes all targets, even though most of them are not supported by llvm-exegesis. This is particularly unfortunate because llvm-exegesis does not support the LLVM dylib, so llvm-exegesis essentially ends up doing a complete relink of all of LLVM, which is not fun if you use LTO. Instead, only link and initialize the targets that are part of LLVM_EXEGESIS_TARGETS. --- llvm/tools/llvm-exegesis/CMakeLists.txt | 20 ++++++++++++-------- llvm/tools/llvm-exegesis/llvm-exegesis.cpp | 20 +++++++++++++------- 2 files changed, 25 insertions(+), 15 deletions(-) diff --git a/llvm/tools/llvm-exegesis/CMakeLists.txt b/llvm/tools/llvm-exegesis/CMakeLists.txt index ec418a7d51ecc0..c3c4058cf65255 100644 --- a/llvm/tools/llvm-exegesis/CMakeLists.txt +++ b/llvm/tools/llvm-exegesis/CMakeLists.txt @@ -1,9 +1,7 @@ +# Has side effect of defining LLVM_EXEGESIS_TARGETS +add_subdirectory(lib) + set(LLVM_LINK_COMPONENTS - AllTargetsAsmParsers - AllTargetsCodeGens - AllTargetsDescs - AllTargetsDisassemblers - AllTargetsInfos CodeGenTypes MC MCParser @@ -11,6 +9,15 @@ set(LLVM_LINK_COMPONENTS TargetParser ) +foreach(t ${LLVM_EXEGESIS_TARGETS}) + string(STRIP ${t} t) + list(APPEND LLVM_LINK_COMPONTENTS "LLVM${t}AsmParser") + list(APPEND LLVM_LINK_COMPONTENTS "LLVM${t}CodeGen") + list(APPEND LLVM_LINK_COMPONTENTS "LLVM${t}Desc") + list(APPEND LLVM_LINK_COMPONTENTS "LLVM${t}Disassembler") + list(APPEND LLVM_LINK_COMPONTENTS "LLVM${t}Info") +endforeach() + add_llvm_tool(llvm-exegesis DISABLE_LLVM_LINK_LLVM_DYLIB llvm-exegesis.cpp @@ -19,9 +26,6 @@ add_llvm_tool(llvm-exegesis intrinsics_gen ) -# Has side effect of defining LLVM_EXEGESIS_TARGETS -add_subdirectory(lib) - # Link all enabled exegesis targets set(libs) foreach(t ${LLVM_EXEGESIS_TARGETS}) diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp index 06e1c7f3c1bbed..e6a43cfc6db51c 100644 --- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp +++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp @@ -470,9 +470,11 @@ void benchmarkMain() { #endif } - InitializeAllAsmPrinters(); - InitializeAllAsmParsers(); InitializeAllExegesisTargets(); +#define LLVM_EXEGESIS(TargetName) \ + LLVMInitialize##TargetName##AsmPrinter(); \ + LLVMInitialize##TargetName##AsmParser(); +#include "llvm/Config/TargetExegesis.def" const LLVMState State = ExitOnErr(LLVMState::Create(TripleName, MCPU, "", UseDummyPerfCounters)); @@ -621,9 +623,11 @@ static void analysisMain() { "and --analysis-inconsistencies-output-file must be specified"); } - InitializeAllAsmPrinters(); - InitializeAllDisassemblers(); InitializeAllExegesisTargets(); +#define LLVM_EXEGESIS(TargetName) \ + LLVMInitialize##TargetName##AsmPrinter(); \ + LLVMInitialize##TargetName##Disassembler(); +#include "llvm/Config/TargetExegesis.def" auto MemoryBuffer = ExitOnFileError( BenchmarkFile, @@ -690,9 +694,11 @@ int main(int Argc, char **Argv) { InitLLVM X(Argc, Argv); // Initialize targets so we can print them when flag --version is specified. - InitializeAllTargetInfos(); - InitializeAllTargets(); - InitializeAllTargetMCs(); +#define LLVM_EXEGESIS(TargetName) \ + LLVMInitialize##TargetName##Target(); \ + LLVMInitialize##TargetName##TargetInfo(); \ + LLVMInitialize##TargetName##TargetMC(); +#include "llvm/Config/TargetExegesis.def" // Register the Target and CPU printer for --version. cl::AddExtraVersionPrinter(sys::printDefaultTargetAndDetectedCPU); From d712ae4a21822a51817941e7007e0dd41343cde3 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 14 Jun 2024 00:40:33 -0700 Subject: [PATCH 058/155] [Driver] Support -Wa,--defsym similar to -Wa,-defsym Missing part in c947709df7859bb7285873593adab70349a5ab3e --- clang/include/clang/Driver/Options.td | 2 +- clang/lib/Driver/ToolChains/Clang.cpp | 2 +- clang/test/Driver/defsym.s | 5 +++-- clang/test/Misc/cc1as-defsym.s | 12 ++++++++++++ 4 files changed, 17 insertions(+), 4 deletions(-) create mode 100644 clang/test/Misc/cc1as-defsym.s diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 880221176027e9..062fbd2e4c0a44 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -8124,7 +8124,7 @@ def show_inst : Flag<["-"], "show-inst">, def dwarf_debug_producer : Separate<["-"], "dwarf-debug-producer">, HelpText<"The string to embed in the Dwarf debug AT_producer record.">; -def defsym : Separate<["-"], "defsym">, +def defsym : Separate<["--"], "defsym">, HelpText<"Define a value for a symbol">; } // let Visibility = [CC1AsOption] diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index acd32bddea6b65..331cf6e713d890 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2666,7 +2666,7 @@ static void CollectArgsForIntegratedAssembler(Compilation &C, D.Diag(diag::err_drv_defsym_invalid_symval) << SVal; break; } - CmdArgs.push_back(Value.data()); + CmdArgs.push_back("--defsym"); TakeNextArg = true; } else if (Value == "-fdebug-compilation-dir") { CmdArgs.push_back("-fdebug-compilation-dir"); diff --git a/clang/test/Driver/defsym.s b/clang/test/Driver/defsym.s index d152b326ae8b18..af66656cce15e1 100644 --- a/clang/test/Driver/defsym.s +++ b/clang/test/Driver/defsym.s @@ -6,9 +6,10 @@ // RUN: %clang -### -c -no-integrated-as -target x86_64-unknown-unknown %s \ // RUN: -Wa,-defsym,abc=5 -Wa,--defsym,xyz=0xa \ -// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-DEFSYM1 +// RUN: 2>&1 | FileCheck %s --check-prefix=CHECK-DEFSYM1-GAS -// CHECK-DEFSYM1: "-defsym" "abc=5" "--defsym" "xyz=0xa" +// CHECK-DEFSYM1: "--defsym" "abc=5" "--defsym" "xyz=0xa" +// CHECK-DEFSYM1-GAS: "-defsym" "abc=5" "--defsym" "xyz=0xa" // RUN: not %clang -c -integrated-as -o /dev/null %s \ // RUN: -Wa,-defsym,abc= \ diff --git a/clang/test/Misc/cc1as-defsym.s b/clang/test/Misc/cc1as-defsym.s new file mode 100644 index 00000000000000..0dc7f24dac2c0b --- /dev/null +++ b/clang/test/Misc/cc1as-defsym.s @@ -0,0 +1,12 @@ +// REQUIRES: x86-registered-target +// RUN: %clang -cc1as -triple x86_64 -filetype obj --defsym A=1 %s -o %t.o +// RUN: llvm-nm %t.o | FileCheck %s + +// CHECK: 0000000000000001 A foo + +.globl foo +.ifdef A +foo = 1 +.else +foo = 0 +.endif From 706e1975400b3f30bd406b694bb711a7c7dbe1c4 Mon Sep 17 00:00:00 2001 From: David Green Date: Fri, 14 Jun 2024 08:51:56 +0100 Subject: [PATCH 059/155] [CodeGen] Remove target SubRegLiveness flags (#95437) This removes the uses of target flags to disable subreg liveness, relying on the `-enable-subreg-liveness` flag instead. The `-enable-subreg-liveness` flag has been changed to take precedence over the subtarget if set, and one use of `Subtarget->enableSubRegLiveness()` has been changed to `MRI->subRegLivenessEnabled()` to make sure the option properly applies. --- llvm/lib/CodeGen/InitUndef.cpp | 2 +- llvm/lib/CodeGen/MachineRegisterInfo.cpp | 6 ++++-- llvm/lib/Target/ARM/ARMSubtarget.cpp | 5 ----- llvm/lib/Target/Hexagon/HexagonSubtarget.cpp | 8 +------- llvm/lib/Target/PowerPC/PPCSubtarget.cpp | 9 +-------- llvm/lib/Target/RISCV/RISCVSubtarget.cpp | 9 +-------- .../CodeGen/ARM/regcoal-invalid-subrange-update.mir | 2 +- llvm/test/CodeGen/Hexagon/bit-gen-rseq.ll | 2 +- llvm/test/CodeGen/Hexagon/regalloc-bad-undef.mir | 2 +- llvm/test/CodeGen/Hexagon/verify-liveness-at-def.mir | 4 ++-- llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll | 12 ++++++------ llvm/test/CodeGen/PowerPC/atomics-i128.ll | 12 ++++++------ llvm/test/CodeGen/PowerPC/mma-outer-product.ll | 4 ++-- llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll | 2 +- llvm/test/CodeGen/PowerPC/subreg-killed.mir | 2 +- .../RISCV/early-clobber-tied-def-subreg-liveness.ll | 2 +- .../RISCV/regalloc-last-chance-recoloring-failure.ll | 4 ++-- .../RISCV/rvv/subregister-undef-early-clobber.mir | 2 +- .../CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll | 2 +- .../CodeGen/RISCV/rvv/undef-earlyclobber-chain.mir | 2 +- .../RISCV/rvv/vrgatherei16-subreg-liveness.ll | 2 +- .../CodeGen/Thumb2/LowOverheadLoops/spillingmove.mir | 2 +- .../Thumb2/LowOverheadLoops/subreg-liveness.mir | 2 +- 23 files changed, 38 insertions(+), 61 deletions(-) diff --git a/llvm/lib/CodeGen/InitUndef.cpp b/llvm/lib/CodeGen/InitUndef.cpp index 96ac385b6abf82..51c50ff872ef21 100644 --- a/llvm/lib/CodeGen/InitUndef.cpp +++ b/llvm/lib/CodeGen/InitUndef.cpp @@ -238,7 +238,7 @@ bool InitUndef::processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB, } if (isEarlyClobberMI(MI)) { - if (ST->enableSubRegLiveness()) + if (MRI->subRegLivenessEnabled()) Changed |= handleSubReg(MF, MI, DLD); Changed |= handleReg(&MI); } diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp index b0c1838b3ff0ec..3caa96cd5e55d4 100644 --- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -41,8 +41,10 @@ static cl::opt EnableSubRegLiveness("enable-subreg-liveness", cl::Hidden, void MachineRegisterInfo::Delegate::anchor() {} MachineRegisterInfo::MachineRegisterInfo(MachineFunction *MF) - : MF(MF), TracksSubRegLiveness(MF->getSubtarget().enableSubRegLiveness() && - EnableSubRegLiveness) { + : MF(MF), + TracksSubRegLiveness(EnableSubRegLiveness.getNumOccurrences() + ? EnableSubRegLiveness + : MF->getSubtarget().enableSubRegLiveness()) { unsigned NumRegs = getTargetRegisterInfo()->getNumRegs(); VRegInfo.reserve(256); RegAllocHints.reserve(256); diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp index a8c6cd99633f3e..b66a41d06062ff 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -68,9 +68,6 @@ static cl::opt ForceFastISel("arm-force-fast-isel", cl::init(false), cl::Hidden); -static cl::opt EnableSubRegLiveness("arm-enable-subreg-liveness", - cl::init(false), cl::Hidden); - /// initializeSubtargetDependencies - Initializes using a CPU and feature string /// so that we can use initializer lists for subtarget initialization. ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU, @@ -385,8 +382,6 @@ bool ARMSubtarget::enableMachineScheduler() const { } bool ARMSubtarget::enableSubRegLiveness() const { - if (EnableSubRegLiveness.getNumOccurrences()) - return EnableSubRegLiveness; // Enable SubRegLiveness for MVE to better optimize s subregs for mqpr regs // and q subregs for qqqqpr regs. return hasMVEIntegerOps(); diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp index da8ab5c4b21bbf..5e713842ff674a 100644 --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -55,10 +55,6 @@ static cl::opt DisableHexagonMISched("disable-hexagon-misched", cl::Hidden, cl::desc("Disable Hexagon MI Scheduling")); -static cl::opt EnableSubregLiveness( - "hexagon-subreg-liveness", cl::Hidden, cl::init(true), - cl::desc("Enable subregister liveness tracking for Hexagon")); - static cl::opt OverrideLongCalls( "hexagon-long-calls", cl::Hidden, cl::desc("If present, forces/disables the use of long calls")); @@ -726,9 +722,7 @@ unsigned HexagonSubtarget::getL1PrefetchDistance() const { return 32; } -bool HexagonSubtarget::enableSubRegLiveness() const { - return EnableSubregLiveness; -} +bool HexagonSubtarget::enableSubRegLiveness() const { return true; } Intrinsic::ID HexagonSubtarget::getIntrinsicId(unsigned Opc) const { struct Scalar { diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp index 0628fbb26245ce..bd9af12b30f5ec 100644 --- a/llvm/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.cpp @@ -37,11 +37,6 @@ using namespace llvm; #define GET_SUBTARGETINFO_CTOR #include "PPCGenSubtargetInfo.inc" -static cl::opt - UseSubRegLiveness("ppc-track-subreg-liveness", - cl::desc("Enable subregister liveness tracking for PPC"), - cl::init(true), cl::Hidden); - static cl::opt EnableMachinePipeliner("ppc-enable-pipeliner", cl::desc("Enable Machine Pipeliner for PPC"), @@ -186,9 +181,7 @@ bool PPCSubtarget::useAA() const { return true; } -bool PPCSubtarget::enableSubRegLiveness() const { - return UseSubRegLiveness; -} +bool PPCSubtarget::enableSubRegLiveness() const { return true; } bool PPCSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const { if (isAIXABI()) { diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp index d3236bb07d56d5..e84ddc65e2b703 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -39,9 +39,6 @@ namespace llvm::RISCVTuneInfoTable { #include "RISCVGenSearchableTables.inc" } // namespace llvm::RISCVTuneInfoTable -static cl::opt EnableSubRegLiveness("riscv-enable-subreg-liveness", - cl::init(true), cl::Hidden); - static cl::opt RVVVectorLMULMax( "riscv-v-fixed-length-vector-lmul-max", cl::desc("The maximum LMUL value to use for fixed length vectors. " @@ -183,11 +180,7 @@ bool RISCVSubtarget::useRVVForFixedLengthVectors() const { return hasVInstructions() && getMinRVVVectorSizeInBits() != 0; } -bool RISCVSubtarget::enableSubRegLiveness() const { - // FIXME: Enable subregister liveness by default for RVV to better handle - // LMUL>1 and segment load/store. - return EnableSubRegLiveness; -} +bool RISCVSubtarget::enableSubRegLiveness() const { return true; } void RISCVSubtarget::getPostRAMutations( std::vector> &Mutations) const { diff --git a/llvm/test/CodeGen/ARM/regcoal-invalid-subrange-update.mir b/llvm/test/CodeGen/ARM/regcoal-invalid-subrange-update.mir index bf08af763357b3..2387d8b4338efe 100644 --- a/llvm/test/CodeGen/ARM/regcoal-invalid-subrange-update.mir +++ b/llvm/test/CodeGen/ARM/regcoal-invalid-subrange-update.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc %s -start-before register-coalescer -mtriple=arm-apple-ios -stop-after machine-scheduler -o - -arm-enable-subreg-liveness -verify-machineinstrs | FileCheck %s +# RUN: llc %s -start-before register-coalescer -mtriple=arm-apple-ios -stop-after machine-scheduler -o - -enable-subreg-liveness -verify-machineinstrs | FileCheck %s # Check that when we merge live-ranges that imply offseting # the definition of a subregister by some other subreg index, diff --git a/llvm/test/CodeGen/Hexagon/bit-gen-rseq.ll b/llvm/test/CodeGen/Hexagon/bit-gen-rseq.ll index 2857c05db764f8..5d36cae76987b7 100644 --- a/llvm/test/CodeGen/Hexagon/bit-gen-rseq.ll +++ b/llvm/test/CodeGen/Hexagon/bit-gen-rseq.ll @@ -1,4 +1,4 @@ -; RUN: llc -march=hexagon -disable-hsdr -hexagon-subreg-liveness < %s | FileCheck %s +; RUN: llc -march=hexagon -disable-hsdr -enable-subreg-liveness < %s | FileCheck %s ; Check that we don't generate any bitwise operations. ; CHECK-NOT: = or( diff --git a/llvm/test/CodeGen/Hexagon/regalloc-bad-undef.mir b/llvm/test/CodeGen/Hexagon/regalloc-bad-undef.mir index 9468b18bf8e47e..d291af97d3afd9 100644 --- a/llvm/test/CodeGen/Hexagon/regalloc-bad-undef.mir +++ b/llvm/test/CodeGen/Hexagon/regalloc-bad-undef.mir @@ -1,4 +1,4 @@ -# RUN: llc -march=hexagon -hexagon-subreg-liveness -start-after machine-scheduler -stop-after stack-slot-coloring -o - %s | FileCheck %s +# RUN: llc -march=hexagon -enable-subreg-liveness -start-after machine-scheduler -stop-after stack-slot-coloring -o - %s | FileCheck %s --- | target triple = "hexagon" diff --git a/llvm/test/CodeGen/Hexagon/verify-liveness-at-def.mir b/llvm/test/CodeGen/Hexagon/verify-liveness-at-def.mir index f4a623d74436fe..8205a6c025c1b9 100644 --- a/llvm/test/CodeGen/Hexagon/verify-liveness-at-def.mir +++ b/llvm/test/CodeGen/Hexagon/verify-liveness-at-def.mir @@ -1,8 +1,8 @@ # Using a trick to run register-coalescer twice, that way # liveintervals should be preserved while running the machine verifier. # -# RUN: not --crash llc -o - %s -march=hexagon -hexagon-subreg-liveness=false -run-pass register-coalescer -verify-machineinstrs -run-pass register-coalescer 2>&1 | FileCheck -check-prefix=CHECK-NOSUB %s -# RUN: not --crash llc -o - %s -march=hexagon -hexagon-subreg-liveness=true -run-pass register-coalescer -verify-machineinstrs -run-pass register-coalescer 2>&1 | FileCheck -check-prefix=CHECK-SUB %s +# RUN: not --crash llc -o - %s -march=hexagon -enable-subreg-liveness=false -run-pass register-coalescer -verify-machineinstrs -run-pass register-coalescer 2>&1 | FileCheck -check-prefix=CHECK-NOSUB %s +# RUN: not --crash llc -o - %s -march=hexagon -enable-subreg-liveness=true -run-pass register-coalescer -verify-machineinstrs -run-pass register-coalescer 2>&1 | FileCheck -check-prefix=CHECK-SUB %s --- name: test_pass diff --git a/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll b/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll index 98d00b0e01b4bc..8967eac223caa1 100644 --- a/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll +++ b/llvm/test/CodeGen/PowerPC/atomics-i128-ldst.ll @@ -1,21 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown \ -; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-track-subreg-liveness \ +; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -enable-subreg-liveness \ ; RUN: < %s | FileCheck --check-prefix=P8 %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown -mcpu=pwr7 \ ; RUN: -ppc-asm-full-reg-names \ -; RUN: -ppc-track-subreg-liveness < %s | FileCheck --check-prefix=PWR7 %s +; RUN: -enable-subreg-liveness < %s | FileCheck --check-prefix=PWR7 %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 \ -; RUN: -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s | FileCheck \ +; RUN: -ppc-asm-full-reg-names -enable-subreg-liveness < %s | FileCheck \ ; RUN: --check-prefix=LE-PWR8 %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-freebsd -mcpu=pwr8 \ -; RUN: -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s | FileCheck \ +; RUN: -ppc-asm-full-reg-names -enable-subreg-liveness < %s | FileCheck \ ; RUN: --check-prefix=LE-PWR8 %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix -mcpu=pwr8 \ -; RUN: -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s | FileCheck \ +; RUN: -ppc-asm-full-reg-names -enable-subreg-liveness < %s | FileCheck \ ; RUN: --check-prefix=AIX64-PWR8 %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-unknown -mcpu=pwr8 \ -; RUN: -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s \ +; RUN: -ppc-asm-full-reg-names -enable-subreg-liveness < %s \ ; RUN: | FileCheck --check-prefix=PPC-PWR8 %s define dso_local i128 @lq_unordered(ptr %src) { diff --git a/llvm/test/CodeGen/PowerPC/atomics-i128.ll b/llvm/test/CodeGen/PowerPC/atomics-i128.ll index f5422a9b7b5428..0d231769ac505c 100644 --- a/llvm/test/CodeGen/PowerPC/atomics-i128.ll +++ b/llvm/test/CodeGen/PowerPC/atomics-i128.ll @@ -1,25 +1,25 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown -mcpu=pwr8 \ ; RUN: -ppc-asm-full-reg-names \ -; RUN: -ppc-track-subreg-liveness < %s | FileCheck %s +; RUN: -enable-subreg-liveness < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown -mcpu=pwr7 \ ; RUN: -ppc-asm-full-reg-names \ -; RUN: -ppc-track-subreg-liveness < %s | FileCheck --check-prefix=PWR7 %s +; RUN: -enable-subreg-liveness < %s | FileCheck --check-prefix=PWR7 %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 \ -; RUN: -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s | FileCheck \ +; RUN: -ppc-asm-full-reg-names -enable-subreg-liveness < %s | FileCheck \ ; RUN: --check-prefix=LE-PWR8 %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-freebsd -mcpu=pwr8 \ -; RUN: -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s | FileCheck \ +; RUN: -ppc-asm-full-reg-names -enable-subreg-liveness < %s | FileCheck \ ; RUN: --check-prefix=LE-PWR8 %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix -mcpu=pwr8 \ -; RUN: -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s | FileCheck \ +; RUN: -ppc-asm-full-reg-names -enable-subreg-liveness < %s | FileCheck \ ; RUN: --check-prefix=AIX64-PWR8 %s ; On 32-bit PPC platform, 16-byte lock free atomic instructions are not available, ; it's expected not to generate inlined lock-free code on such platforms, even arch level ; is pwr8+ and `-ppc-quadword-atomics` is on. ; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-unknown -mcpu=pwr8 \ -; RUN: -ppc-asm-full-reg-names -ppc-track-subreg-liveness < %s \ +; RUN: -ppc-asm-full-reg-names -enable-subreg-liveness < %s \ ; RUN: | FileCheck --check-prefix=PPC-PWR8 %s diff --git a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll index 33a8260c7bf528..085b81a4c6278a 100644 --- a/llvm/test/CodeGen/PowerPC/mma-outer-product.ll +++ b/llvm/test/CodeGen/PowerPC/mma-outer-product.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ -; RUN: -mcpu=pwr10 -ppc-track-subreg-liveness -ppc-asm-full-reg-names \ +; RUN: -mcpu=pwr10 -enable-subreg-liveness -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ -; RUN: -mcpu=pwr10 -ppc-track-subreg-liveness -ppc-asm-full-reg-names \ +; RUN: -mcpu=pwr10 -enable-subreg-liveness -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) diff --git a/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll b/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll index 3823cce4214403..8ba45643d5a01f 100644 --- a/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-acc-regalloc.ll @@ -4,7 +4,7 @@ ; RUN: | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple powerpc64le-unknown-linux-gnu \ ; RUN: -mcpu=pwr10 -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \ -; RUN: -ppc-track-subreg-liveness < %s | FileCheck %s --check-prefix=TRACKLIVE +; RUN: -enable-subreg-liveness < %s | FileCheck %s --check-prefix=TRACKLIVE %0 = type <{ double }> %1 = type <{ double }> diff --git a/llvm/test/CodeGen/PowerPC/subreg-killed.mir b/llvm/test/CodeGen/PowerPC/subreg-killed.mir index 433b9549f8a61a..ae12ab995af143 100644 --- a/llvm/test/CodeGen/PowerPC/subreg-killed.mir +++ b/llvm/test/CodeGen/PowerPC/subreg-killed.mir @@ -1,4 +1,4 @@ -# RUN: llc -mcpu=pwr10 -O3 -ppc-track-subreg-liveness -verify-machineinstrs \ +# RUN: llc -mcpu=pwr10 -O3 -enable-subreg-liveness -verify-machineinstrs \ # RUN: -mtriple=powerpc64le-unknown-linux-gnu -run-pass=greedy,virtregrewriter \ # RUN: -o - %s | FileCheck %s diff --git a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll index 478d2eae9dca2c..a8e99ddf32d63d 100644 --- a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll +++ b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -O2 -mtriple riscv64 -mattr=+v,+m,+zbb -riscv-enable-subreg-liveness \ +; RUN: llc -O2 -mtriple riscv64 -mattr=+v,+m,+zbb -enable-subreg-liveness \ ; RUN: -verify-machineinstrs < %s \ ; RUN: | FileCheck %s diff --git a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll index c92ba98dcc3385..9971cb7821ad12 100644 --- a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll +++ b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+f,+m,+zfh,+zvfh \ -; RUN: -riscv-enable-subreg-liveness=false < %s | FileCheck %s +; RUN: -enable-subreg-liveness=false < %s | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+f,+m,+zfh,+zvfh < %s \ -; RUN: -riscv-enable-subreg-liveness=true| FileCheck %s --check-prefix=SUBREGLIVENESS +; RUN: -enable-subreg-liveness=true| FileCheck %s --check-prefix=SUBREGLIVENESS ; This testcase failed to compile after ; c46aab01c002b7a04135b8b7f1f52d8c9ae23a58, which was reverted. diff --git a/llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir b/llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir index 9cafb323dc65c8..539d319f3426dd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir +++ b/llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc %s -mtriple=riscv64 -mattr=+v -riscv-enable-subreg-liveness -run-pass=init-undef -o - | FileCheck %s +# RUN: llc %s -mtriple=riscv64 -mattr=+v -enable-subreg-liveness -run-pass=init-undef -o - | FileCheck %s ... --- diff --git a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll index 48c30596ad5183..5345bec22b9144 100644 --- a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll +++ b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple riscv64 -mattr=+v -riscv-enable-subreg-liveness < %s | FileCheck %s +; RUN: llc -mtriple riscv64 -mattr=+v -enable-subreg-liveness < %s | FileCheck %s define @vrgather_all_undef(ptr %p) { ; CHECK-LABEL: vrgather_all_undef: diff --git a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.mir b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.mir index dcf61c048ff0e7..8df2c60c926c38 100644 --- a/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.mir +++ b/llvm/test/CodeGen/RISCV/rvv/undef-earlyclobber-chain.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=riscv32 -mattr=+v -riscv-enable-subreg-liveness -run-pass init-undef -run-pass machineverifier %s -o - | FileCheck %s +# RUN: llc -mtriple=riscv32 -mattr=+v -enable-subreg-liveness -run-pass init-undef -run-pass machineverifier %s -o - | FileCheck %s --- | source_filename = "" diff --git a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll index e95e9fabe93422..1779fc12095e88 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefix NOSUBREG -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s -riscv-enable-subreg-liveness=true | FileCheck %s --check-prefix SUBREG +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s -enable-subreg-liveness=true | FileCheck %s --check-prefix SUBREG ; This test checks that vrgatherei16 instructions are correctly ; register-allocated. The LMUL=1 destination register groups may not overlap diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.mir index 15aa62d5cff6b5..0995521957b984 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=thumbv8.1m.main-none-unknown-eabi -mattr=+mve -run-pass=arm-low-overhead-loops -arm-enable-subreg-liveness %s -o - --verify-machineinstrs | FileCheck %s +# RUN: llc -mtriple=thumbv8.1m.main-none-unknown-eabi -mattr=+mve -run-pass=arm-low-overhead-loops -enable-subreg-liveness %s -o - --verify-machineinstrs | FileCheck %s --- | %struct.arm_2d_size_t = type { i16, i16 } diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/subreg-liveness.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/subreg-liveness.mir index 898e6b15e18f13..571002cc8e8c24 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/subreg-liveness.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/subreg-liveness.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops -arm-enable-subreg-liveness %s -o - --verify-machineinstrs | FileCheck %s +# RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -run-pass=arm-low-overhead-loops -enable-subreg-liveness %s -o - --verify-machineinstrs | FileCheck %s --- | target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" From ae71609e91ec9f38df7e92ba3c50a1f9cebb772e Mon Sep 17 00:00:00 2001 From: Andreas Jonson Date: Fri, 14 Jun 2024 10:04:33 +0200 Subject: [PATCH 060/155] [SDAG] Lower range attribute to AssertZext (#95450) Add support for range attributes on calls, in addition to range metadata. --- .../SelectionDAG/SelectionDAGBuilder.cpp | 22 +++++++++++++------ .../AArch64/lower-range-metadata-func-call.ll | 22 +++++++++++++++++++ .../CodeGen/X86/legalize-vec-assertzext.ll | 18 +++++++++++++++ 3 files changed, 55 insertions(+), 7 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 2b82d874293910..98555b39db03c5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4487,6 +4487,17 @@ static const MDNode *getRangeMetadata(const Instruction &I) { return I.getMetadata(LLVMContext::MD_range); } +static std::optional getRange(const Instruction &I) { + if (const auto *CB = dyn_cast(&I)) { + // see comment in getRangeMetadata about this check + if (CB->hasRetAttr(Attribute::NoUndef)) + return CB->getRange(); + } + if (const MDNode *Range = getRangeMetadata(I)) + return getConstantRangeFromMetadata(*Range); + return std::nullopt; +} + void SelectionDAGBuilder::visitLoad(const LoadInst &I) { if (I.isAtomic()) return visitAtomicLoad(I); @@ -10230,19 +10241,16 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG, const Instruction &I, SDValue Op) { - const MDNode *Range = getRangeMetadata(I); - if (!Range) - return Op; + std::optional CR = getRange(I); - ConstantRange CR = getConstantRangeFromMetadata(*Range); - if (CR.isFullSet() || CR.isEmptySet() || CR.isUpperWrapped()) + if (!CR || CR->isFullSet() || CR->isEmptySet() || CR->isUpperWrapped()) return Op; - APInt Lo = CR.getUnsignedMin(); + APInt Lo = CR->getUnsignedMin(); if (!Lo.isMinValue()) return Op; - APInt Hi = CR.getUnsignedMax(); + APInt Hi = CR->getUnsignedMax(); unsigned Bits = std::max(Hi.getActiveBits(), static_cast(IntegerType::MIN_INT_BITS)); diff --git a/llvm/test/CodeGen/AArch64/lower-range-metadata-func-call.ll b/llvm/test/CodeGen/AArch64/lower-range-metadata-func-call.ll index 5b9a734c49825a..5af8189b4e0d79 100644 --- a/llvm/test/CodeGen/AArch64/lower-range-metadata-func-call.ll +++ b/llvm/test/CodeGen/AArch64/lower-range-metadata-func-call.ll @@ -34,6 +34,28 @@ entry: ret i32 %and } +; and can be eliminated +; CHECK-LABEL: {{^}}test_call_known_max_range_attr: +; CHECK: bl foo +; CHECK-NOT: and +; CHECK: ret +define i32 @test_call_known_max_range_attr() #0 { +entry: + %id = tail call noundef range(i32 0, 1024) i32 @foo() + %and = and i32 %id, 1023 + ret i32 %and +} + +; CHECK-LABEL: {{^}}test_call_known_max_range_attr_no_noundef: +; CHECK: bl foo +; CHECK: and w{{[0-9]+}}, w0, #0x3ff +; CHECK: ret +define i32 @test_call_known_max_range_attr_no_noundef() #0 { +entry: + %id = tail call range(i32 0, 1024) i32 @foo() + %and = and i32 %id, 1023 + ret i32 %and +} declare i32 @foo() diff --git a/llvm/test/CodeGen/X86/legalize-vec-assertzext.ll b/llvm/test/CodeGen/X86/legalize-vec-assertzext.ll index 1c595b7fb5e1e3..2cf37c68b8b40f 100644 --- a/llvm/test/CodeGen/X86/legalize-vec-assertzext.ll +++ b/llvm/test/CodeGen/X86/legalize-vec-assertzext.ll @@ -34,6 +34,24 @@ define i64 @widen_assertzext(ptr %x) nounwind { ret i64 %d } +define i64 @widen_assertzext_range_attr(ptr %x) nounwind { +; CHECK-LABEL: widen_assertzext_range_attr: +; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: callq test2@PLT +; CHECK-NEXT: movb $127, %al +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpexpandq %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: vextracti32x4 $3, %zmm0, %xmm0 +; CHECK-NEXT: vmovq %xmm0, %rax +; CHECK-NEXT: popq %rcx +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %e = call noundef range(i64 0, 2) <7 x i64> @test2() + %d = extractelement <7 x i64> %e, i32 6 + ret i64 %d +} + declare <16 x i64> @test() declare <7 x i64> @test2() !0 = !{ i64 0, i64 2 } From ebb5385c6ed7d6610f36b92090209c114568d796 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Fri, 14 Jun 2024 09:18:21 +0100 Subject: [PATCH 061/155] MathExtras/test: increase coverage (#95425) Increase test coverage, and cover possible overflow cases in preparation for another patch optimizing for bitwidth. --- llvm/unittests/Support/MathExtrasTest.cpp | 46 ++++++++++++++++++++++- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/llvm/unittests/Support/MathExtrasTest.cpp b/llvm/unittests/Support/MathExtrasTest.cpp index e75700df67e691..bcccb963c96aeb 100644 --- a/llvm/unittests/Support/MathExtrasTest.cpp +++ b/llvm/unittests/Support/MathExtrasTest.cpp @@ -8,6 +8,7 @@ #include "llvm/Support/MathExtras.h" #include "gtest/gtest.h" +#include using namespace llvm; @@ -175,6 +176,7 @@ TEST(MathExtras, MinAlign) { EXPECT_EQ(2u, MinAlign(2, 4)); EXPECT_EQ(1u, MinAlign(17, 64)); EXPECT_EQ(256u, MinAlign(256, 512)); + EXPECT_EQ(2u, MinAlign(0, 2)); } TEST(MathExtras, NextPowerOf2) { @@ -183,15 +185,34 @@ TEST(MathExtras, NextPowerOf2) { EXPECT_EQ(256u, NextPowerOf2(128)); } -TEST(MathExtras, alignTo) { +TEST(MathExtras, AlignTo) { EXPECT_EQ(8u, alignTo(5, 8)); EXPECT_EQ(24u, alignTo(17, 8)); EXPECT_EQ(0u, alignTo(~0LL, 8)); + EXPECT_EQ(static_cast(std::numeric_limits::max()) + 1, + alignTo(std::numeric_limits::max(), 2)); EXPECT_EQ(7u, alignTo(5, 8, 7)); EXPECT_EQ(17u, alignTo(17, 8, 1)); EXPECT_EQ(3u, alignTo(~0LL, 8, 3)); EXPECT_EQ(552u, alignTo(321, 255, 42)); + EXPECT_EQ(std::numeric_limits::max(), + alignTo(std::numeric_limits::max(), 2, 1)); +} + +TEST(MathExtras, AlignToPowerOf2) { + EXPECT_EQ(8u, alignToPowerOf2(5, 8)); + EXPECT_EQ(24u, alignToPowerOf2(17, 8)); + EXPECT_EQ(0u, alignToPowerOf2(~0LL, 8)); + EXPECT_EQ(static_cast(std::numeric_limits::max()) + 1, + alignToPowerOf2(std::numeric_limits::max(), 2)); +} + +TEST(MathExtras, AlignDown) { + EXPECT_EQ(0u, alignDown(5, 8)); + EXPECT_EQ(16u, alignDown(17, 8)); + EXPECT_EQ(std::numeric_limits::max() - 1, + alignDown(std::numeric_limits::max(), 2)); } template void SaturatingAddTestHelper() { @@ -434,7 +455,20 @@ TEST(MathExtras, IsShiftedInt) { EXPECT_FALSE((isShiftedInt<6, 10>(int64_t(1) << 15))); } -TEST(MathExtras, DivideCeilSigned) { +TEST(MathExtras, DivideNearest) { + EXPECT_EQ(divideNearest(14, 3), 5u); + EXPECT_EQ(divideNearest(15, 3), 5u); + EXPECT_EQ(divideNearest(0, 3), 0u); + EXPECT_EQ(divideNearest(std::numeric_limits::max(), 2), + 2147483648u); +} + +TEST(MathExtras, DivideCeil) { + EXPECT_EQ(divideCeil(14, 3), 5u); + EXPECT_EQ(divideCeil(15, 3), 5u); + EXPECT_EQ(divideCeil(0, 3), 0u); + EXPECT_EQ(divideCeil(std::numeric_limits::max(), 2), 2147483648u); + EXPECT_EQ(divideCeilSigned(14, 3), 5); EXPECT_EQ(divideCeilSigned(15, 3), 5); EXPECT_EQ(divideCeilSigned(14, -3), -4); @@ -443,6 +477,10 @@ TEST(MathExtras, DivideCeilSigned) { EXPECT_EQ(divideCeilSigned(-15, 3), -5); EXPECT_EQ(divideCeilSigned(0, 3), 0); EXPECT_EQ(divideCeilSigned(0, -3), 0); + EXPECT_EQ(divideCeilSigned(std::numeric_limits::max(), 2), + std::numeric_limits::max() / 2 + 1); + EXPECT_EQ(divideCeilSigned(std::numeric_limits::max(), -2), + std::numeric_limits::min() / 2 + 1); } TEST(MathExtras, DivideFloorSigned) { @@ -454,6 +492,10 @@ TEST(MathExtras, DivideFloorSigned) { EXPECT_EQ(divideFloorSigned(-15, 3), -5); EXPECT_EQ(divideFloorSigned(0, 3), 0); EXPECT_EQ(divideFloorSigned(0, -3), 0); + EXPECT_EQ(divideFloorSigned(std::numeric_limits::max(), 2), + std::numeric_limits::max() / 2); + EXPECT_EQ(divideFloorSigned(std::numeric_limits::max(), -2), + std::numeric_limits::min() / 2); } TEST(MathExtras, Mod) { From da249cad8d398939e0c608d38d0c038954941316 Mon Sep 17 00:00:00 2001 From: Hans Date: Fri, 14 Jun 2024 10:23:33 +0200 Subject: [PATCH 062/155] [clang-cl] Map /Ot to -O3 instead of -O2 (#95406) /Ot (which is also implied by /O2) is supposed to optimize for maximum speed, so -O3 seems like a better match. --- clang/docs/ReleaseNotes.rst | 4 ++++ clang/docs/UsersManual.rst | 4 ++-- clang/include/clang/Driver/Options.td | 4 ++-- clang/lib/Driver/ToolChains/MSVC.cpp | 4 ++-- clang/test/Driver/cl-options.c | 10 +++++----- 5 files changed, 15 insertions(+), 11 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 8c2f737836a9dc..68355dbb5861b7 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -921,6 +921,10 @@ Android Support Windows Support ^^^^^^^^^^^^^^^ +- The clang-cl ``/Ot`` compiler option ("optimize for speed", also implied by + ``/O2``) now maps to clang's ``-O3`` optimizataztion level instead of ``-O2``. + Users who prefer the old behavior can use ``clang-cl /Ot /clang:-O2 ...``. + - Clang-cl now supports function targets with intrinsic headers. This allows for runtime feature detection of intrinsics. Previously under clang-cl ``immintrin.h`` and similar intrinsic headers would only include the intrinsics diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index f954857b0235af..ee30e4eff9ea08 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -4636,8 +4636,8 @@ Execute ``clang-cl /?`` to see a list of supported options: /Og No effect /Oi- Disable use of builtin functions /Oi Enable use of builtin functions - /Os Optimize for size - /Ot Optimize for speed + /Os Optimize for size (like clang -Os) + /Ot Optimize for speed (like clang -O3) /Ox Deprecated (same as /Og /Oi /Ot /Oy /Ob2); use /O2 instead /Oy- Disable frame pointer omission (x86 only, default) /Oy Enable frame pointer omission (x86 only) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 062fbd2e4c0a44..d523e8c611f0fe 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -8279,9 +8279,9 @@ def : CLFlag<"Oi">, Alias<_SLASH_O>, AliasArgs<["i"]>, def : CLFlag<"Oi-">, Alias<_SLASH_O>, AliasArgs<["i-"]>, HelpText<"Disable use of builtin functions">; def : CLFlag<"Os">, Alias<_SLASH_O>, AliasArgs<["s"]>, - HelpText<"Optimize for size">; + HelpText<"Optimize for size (like clang -Os)">; def : CLFlag<"Ot">, Alias<_SLASH_O>, AliasArgs<["t"]>, - HelpText<"Optimize for speed">; + HelpText<"Optimize for speed (like clang -O3)">; def : CLFlag<"Ox">, Alias<_SLASH_O>, AliasArgs<["x"]>, HelpText<"Deprecated (like /Og /Oi /Ot /Oy /Ob2); use /O2">; def : CLFlag<"Oy">, Alias<_SLASH_O>, AliasArgs<["y"]>, diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp index b7021d4b996ddd..d03687208c5c6a 100644 --- a/clang/lib/Driver/ToolChains/MSVC.cpp +++ b/clang/lib/Driver/ToolChains/MSVC.cpp @@ -861,7 +861,7 @@ static void TranslateOptArg(Arg *A, llvm::opt::DerivedArgList &DAL, DAL.AddJoinedArg(A, Opts.getOption(options::OPT_O), "s"); } else if (OptChar == '2' || OptChar == 'x') { DAL.AddFlagArg(A, Opts.getOption(options::OPT_fbuiltin)); - DAL.AddJoinedArg(A, Opts.getOption(options::OPT_O), "2"); + DAL.AddJoinedArg(A, Opts.getOption(options::OPT_O), "3"); } if (SupportsForcingFramePointer && !DAL.hasArgNoClaim(options::OPT_fno_omit_frame_pointer)) @@ -901,7 +901,7 @@ static void TranslateOptArg(Arg *A, llvm::opt::DerivedArgList &DAL, DAL.AddJoinedArg(A, Opts.getOption(options::OPT_O), "s"); break; case 't': - DAL.AddJoinedArg(A, Opts.getOption(options::OPT_O), "2"); + DAL.AddJoinedArg(A, Opts.getOption(options::OPT_O), "3"); break; case 'y': { bool OmitFramePointer = true; diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c index 2c17459dde656a..e77ec364170d1c 100644 --- a/clang/test/Driver/cl-options.c +++ b/clang/test/Driver/cl-options.c @@ -189,12 +189,12 @@ // RUN: %clang_cl /Ot --target=i686-pc-windows-msvc -### -- %s 2>&1 | FileCheck -check-prefix=Ot %s // RUN: %clang_cl /Ot --target=x86_64-pc-windows-msvc -### -- %s 2>&1 | FileCheck -check-prefix=Ot %s // Ot: -mframe-pointer=none -// Ot: -O2 +// Ot: -O3 // RUN: %clang_cl /Ox --target=i686-pc-windows-msvc -### -- %s 2>&1 | FileCheck -check-prefix=Ox %s // RUN: %clang_cl /Ox --target=x86_64-pc-windows-msvc -### -- %s 2>&1 | FileCheck -check-prefix=Ox %s // Ox: -mframe-pointer=none -// Ox: -O2 +// Ox: -O3 // RUN: %clang_cl --target=i686-pc-win32 /O2sy- -### -- %s 2>&1 | FileCheck -check-prefix=PR24003 %s // PR24003: -mframe-pointer=all @@ -202,14 +202,14 @@ // RUN: %clang_cl --target=i686-pc-win32 -Werror -Wno-msvc-not-found /Oy- /O2 -### -- %s 2>&1 | FileCheck -check-prefix=Oy_2 %s // Oy_2: -mframe-pointer=all -// Oy_2: -O2 +// Oy_2: -O3 // RUN: %clang_cl --target=aarch64-pc-windows-msvc -Werror -Wno-msvc-not-found /Oy- /O2 -### -- %s 2>&1 | FileCheck -check-prefix=Oy_aarch64 %s // Oy_aarch64: -mframe-pointer=non-leaf -// Oy_aarch64: -O2 +// Oy_aarch64: -O3 // RUN: %clang_cl --target=i686-pc-win32 -Werror -Wno-msvc-not-found /O2 /O2 -### -- %s 2>&1 | FileCheck -check-prefix=O2O2 %s -// O2O2: "-O2" +// O2O2: "-O3" // RUN: %clang_cl /Zs -Werror /Oy -- %s 2>&1 From dfde0773fdee9301e5f2181e1dbcbb0dc3602e08 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 14 Jun 2024 10:38:50 +0200 Subject: [PATCH 063/155] [SimplifyCFG] More accurate use legality check for sinking (#94462) When sinking instructions, we have to make sure that the uses of that instruction are consistent: If used in a phi node in the sink target, then the phi operands have to match the sink candidates. This allows the phi to be removed when the instruction is sunk. This case is already handled accurately. However, what the current code doesn't handle are uses in the same block. These are just unconditionally accepted, even though this needs the same consistency check for the phi node that sinking the using instruction would introduce. Instead, the code has another check when actually performing the sinking, which repeats the phi check (just at a later time, where all the later instructions have already been sunk and any new phis introduced). This is problematic, because it messes up the profitability heuristic. The code will think that certain instructions will get sunk, but they actually won't. This may result in more phi nodes being created than is considered profitable. See the changed test for a case where we no longer do this after this patch. The new approach makes sure that the uses are consistent during the initial legality check. This is based on PhiOperands, which we already collect. The primary motivation for this is to generalize sinking to support more than one use, and doing that generalization is hard with the current split checking approach. --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 86 ++++++++----------- .../SimplifyCFG/X86/sink-common-code.ll | 8 +- 2 files changed, 40 insertions(+), 54 deletions(-) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 107c8bb6c027fa..9c7f90b0613a0b 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1932,7 +1932,7 @@ static bool replacingOperandWithVariableIsCheap(const Instruction *I, // PHI node (because an operand varies in each input block), add to PHIOperands. static bool canSinkInstructions( ArrayRef Insts, - DenseMap> &PHIOperands) { + DenseMap> &PHIOperands) { // Prune out obviously bad instructions to move. Each instruction must have // exactly zero or one use, and we check later that use is by a single, common // PHI instruction in the successor. @@ -1983,20 +1983,17 @@ static bool canSinkInstructions( return false; } - // All instructions in Insts are known to be the same opcode. If they have a - // use, check that the only user is a PHI or in the same block as the - // instruction, because if a user is in the same block as an instruction we're - // contemplating sinking, it must already be determined to be sinkable. + // Uses must be consistent: If I0 is used in a phi node in the sink target, + // then the other phi operands must match the instructions from Insts. This + // also has to hold true for any phi nodes that would be created as a result + // of sinking. Both of these cases are represented by PhiOperands. if (HasUse) { - auto *PNUse = dyn_cast(*I0->user_begin()); - auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0); - if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool { - auto *U = cast(*I->user_begin()); - return (PNUse && - PNUse->getParent() == Succ && - PNUse->getIncomingValueForBlock(I->getParent()) == I) || - U->getParent() == I->getParent(); - })) + const Use &U = *I0->use_begin(); + auto It = PHIOperands.find(&U); + if (It == PHIOperands.end()) + // There may be uses in other blocks when sinking into a loop header. + return false; + if (!equal(Insts, It->second)) return false; } @@ -2063,8 +2060,9 @@ static bool canSinkInstructions( !canReplaceOperandWithVariable(I0, OI)) // We can't create a PHI from this GEP. return false; + auto &Ops = PHIOperands[&I0->getOperandUse(OI)]; for (auto *I : Insts) - PHIOperands[I].push_back(I->getOperand(OI)); + Ops.push_back(I->getOperand(OI)); } } return true; @@ -2073,7 +2071,7 @@ static bool canSinkInstructions( // Assuming canSinkInstructions(Blocks) has returned true, sink the last // instruction of every block in Blocks to their common successor, commoning // into one instruction. -static bool sinkLastInstruction(ArrayRef Blocks) { +static void sinkLastInstruction(ArrayRef Blocks) { auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0); // canSinkInstructions returning true guarantees that every block has at @@ -2088,23 +2086,10 @@ static bool sinkLastInstruction(ArrayRef Blocks) { Insts.push_back(I); } - // The only checking we need to do now is that all users of all instructions - // are the same PHI node. canSinkInstructions should have checked this but - // it is slightly over-aggressive - it gets confused by commutative - // instructions so double-check it here. - Instruction *I0 = Insts.front(); - if (!I0->user_empty()) { - auto *PNUse = dyn_cast(*I0->user_begin()); - if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool { - auto *U = cast(*I->user_begin()); - return U == PNUse; - })) - return false; - } - // We don't need to do any more checking here; canSinkInstructions should // have done it all for us. SmallVector NewOperands; + Instruction *I0 = Insts.front(); for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) { // This check is different to that in canSinkInstructions. There, we // cared about the global view once simplifycfg (and instcombine) have @@ -2172,8 +2157,6 @@ static bool sinkLastInstruction(ArrayRef Blocks) { I->replaceAllUsesWith(I0); I->eraseFromParent(); } - - return true; } namespace { @@ -2314,9 +2297,19 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, // carry on. If we can sink an instruction but need to PHI-merge some operands // (because they're not identical in each instruction) we add these to // PHIOperands. + // We prepopulate PHIOperands with the phis that already exist in BB. + DenseMap> PHIOperands; + for (PHINode &PN : BB->phis()) { + SmallDenseMap IncomingVals; + for (const Use &U : PN.incoming_values()) + IncomingVals.insert({PN.getIncomingBlock(U), &U}); + auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]]; + for (BasicBlock *Pred : UnconditionalPreds) + Ops.push_back(*IncomingVals[Pred]); + } + int ScanIdx = 0; SmallPtrSet InstructionsToSink; - DenseMap> PHIOperands; LockstepReverseIterator LRI(UnconditionalPreds); while (LRI.isValid() && canSinkInstructions(*LRI, PHIOperands)) { @@ -2338,20 +2331,19 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, // actually sink before encountering instruction that is unprofitable to // sink? auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) { - unsigned NumPHIdValues = 0; - for (auto *I : *LRI) - for (auto *V : PHIOperands[I]) { - if (!InstructionsToSink.contains(V)) - ++NumPHIdValues; + unsigned NumPHIInsts = 0; + for (Use &U : (*LRI)[0]->operands()) { + auto It = PHIOperands.find(&U); + if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) { + return InstructionsToSink.contains(V); + })) { + ++NumPHIInsts; // FIXME: this check is overly optimistic. We may end up not sinking // said instruction, due to the very same profitability check. // See @creating_too_many_phis in sink-common-code.ll. } - LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n"); - unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size(); - if ((NumPHIdValues % UnconditionalPreds.size()) != 0) - NumPHIInsts++; - + } + LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n"); return NumPHIInsts <= 1; }; @@ -2476,13 +2468,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB, // sink is always at index 0. LRI.reset(); - if (!sinkLastInstruction(UnconditionalPreds)) { - LLVM_DEBUG( - dbgs() - << "SINK: stopping here, failed to actually sink instruction!\n"); - break; - } - + sinkLastInstruction(UnconditionalPreds); NumSinkCommonInstrs++; Changed = true; } diff --git a/llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll b/llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll index 118372164c1f92..b67ee630368480 100644 --- a/llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll @@ -568,16 +568,16 @@ define zeroext i1 @test_crash(i1 zeroext %flag, ptr %i4, ptr %m, ptr %n) { ; CHECK-NEXT: br i1 [[FLAG:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] ; CHECK: if.then: ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I4:%.*]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], -1 ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: if.else: ; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[M:%.*]], align 4 ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[N:%.*]], align 4 +; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP3]], [[TMP4]] ; CHECK-NEXT: br label [[IF_END]] ; CHECK: if.end: -; CHECK-NEXT: [[TMP4_SINK:%.*]] = phi i32 [ [[TMP4]], [[IF_ELSE]] ], [ -1, [[IF_THEN]] ] -; CHECK-NEXT: [[TMP3_SINK:%.*]] = phi i32 [ [[TMP3]], [[IF_ELSE]] ], [ [[TMP1]], [[IF_THEN]] ] -; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP3_SINK]], [[TMP4_SINK]] -; CHECK-NEXT: store i32 [[TMP5]], ptr [[I4]], align 4 +; CHECK-NEXT: [[TMP5_SINK:%.*]] = phi i32 [ [[TMP5]], [[IF_ELSE]] ], [ [[TMP2]], [[IF_THEN]] ] +; CHECK-NEXT: store i32 [[TMP5_SINK]], ptr [[I4]], align 4 ; CHECK-NEXT: ret i1 true ; entry: From e83adfe59632d2e2f8ff26db33087ba7fb754485 Mon Sep 17 00:00:00 2001 From: Chris B Date: Fri, 14 Jun 2024 03:44:24 -0500 Subject: [PATCH 064/155] [SPIRV] Silence unused variable warnings (#95492) This change marks a few variable declarations as [[maybe_unused]] to silence unused variable warnings. --- .../Target/SPIRV/Analysis/SPIRVConvergenceRegionAnalysis.cpp | 2 +- llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/SPIRV/Analysis/SPIRVConvergenceRegionAnalysis.cpp b/llvm/lib/Target/SPIRV/Analysis/SPIRVConvergenceRegionAnalysis.cpp index 7f5f7d0b1e4dc5..25e285e35f9336 100644 --- a/llvm/lib/Target/SPIRV/Analysis/SPIRVConvergenceRegionAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/Analysis/SPIRVConvergenceRegionAnalysis.cpp @@ -138,7 +138,7 @@ ConvergenceRegion::ConvergenceRegion( SmallPtrSet &&Blocks, SmallPtrSet &&Exits) : DT(DT), LI(LI), ConvergenceToken(ConvergenceToken), Entry(Entry), Exits(std::move(Exits)), Blocks(std::move(Blocks)) { - for (auto *BB : this->Exits) + for ([[maybe_unused]] auto *BB : this->Exits) assert(this->Blocks.count(BB) != 0); assert(this->Blocks.count(this->Entry) != 0); } diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp index 2a515359bdd4fc..c14e5098be7111 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp @@ -2418,7 +2418,7 @@ Type *parseBuiltinCallArgumentBaseType(const StringRef DemangledCall, if (hasBuiltinTypePrefix(TypeStr)) { // OpenCL builtin types in demangled call strings have the following format: // e.g. ocl_image2d_ro - bool IsOCLBuiltinType = TypeStr.consume_front("ocl_"); + [[maybe_unused]] bool IsOCLBuiltinType = TypeStr.consume_front("ocl_"); assert(IsOCLBuiltinType && "Invalid OpenCL builtin prefix"); // Check if this is pointer to a builtin type and not just pointer From 880d37038c7bbff53ef02c9d6b01cbbc87875243 Mon Sep 17 00:00:00 2001 From: Durgadoss R Date: Fri, 14 Jun 2024 14:17:37 +0530 Subject: [PATCH 065/155] [APFloat] Add APFloat support for FP4 data type (#95392) This patch adds APFloat type support for the E2M1 FP4 datatype. The definitions for this format are detailed in section 5.3.3 of the OCP specification, which can be accessed here: https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf Signed-off-by: Durgadoss R --- clang/lib/AST/MicrosoftMangle.cpp | 1 + llvm/include/llvm/ADT/APFloat.h | 8 + llvm/lib/Support/APFloat.cpp | 25 ++- llvm/unittests/ADT/APFloatTest.cpp | 256 ++++++++++++++++++++++++++++- 4 files changed, 283 insertions(+), 7 deletions(-) diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index ffc5d2d4cd8fc3..a863ec7a529b97 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -901,6 +901,7 @@ void MicrosoftCXXNameMangler::mangleFloat(llvm::APFloat Number) { case APFloat::S_FloatTF32: case APFloat::S_Float6E3M2FN: case APFloat::S_Float6E2M3FN: + case APFloat::S_Float4E2M1FN: llvm_unreachable("Tried to mangle unexpected APFloat semantics"); } diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h index a9bb6cc9999b1e..c24eae8da3797b 100644 --- a/llvm/include/llvm/ADT/APFloat.h +++ b/llvm/include/llvm/ADT/APFloat.h @@ -197,6 +197,10 @@ struct APFloatBase { // types, there are no infinity or NaN values. The format is detailed in // https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf S_Float6E2M3FN, + // 4-bit floating point number with bit layout S1E2M1. Unlike IEEE-754 + // types, there are no infinity or NaN values. The format is detailed in + // https://www.opencompute.org/documents/ocp-microscaling-formats-mx-v1-0-spec-final-pdf + S_Float4E2M1FN, S_x87DoubleExtended, S_MaxSemantics = S_x87DoubleExtended, @@ -219,6 +223,7 @@ struct APFloatBase { static const fltSemantics &FloatTF32() LLVM_READNONE; static const fltSemantics &Float6E3M2FN() LLVM_READNONE; static const fltSemantics &Float6E2M3FN() LLVM_READNONE; + static const fltSemantics &Float4E2M1FN() LLVM_READNONE; static const fltSemantics &x87DoubleExtended() LLVM_READNONE; /// A Pseudo fltsemantic used to construct APFloats that cannot conflict with @@ -639,6 +644,7 @@ class IEEEFloat final : public APFloatBase { APInt convertFloatTF32APFloatToAPInt() const; APInt convertFloat6E3M2FNAPFloatToAPInt() const; APInt convertFloat6E2M3FNAPFloatToAPInt() const; + APInt convertFloat4E2M1FNAPFloatToAPInt() const; void initFromAPInt(const fltSemantics *Sem, const APInt &api); template void initFromIEEEAPInt(const APInt &api); void initFromHalfAPInt(const APInt &api); @@ -656,6 +662,7 @@ class IEEEFloat final : public APFloatBase { void initFromFloatTF32APInt(const APInt &api); void initFromFloat6E3M2FNAPInt(const APInt &api); void initFromFloat6E2M3FNAPInt(const APInt &api); + void initFromFloat4E2M1FNAPInt(const APInt &api); void assign(const IEEEFloat &); void copySignificand(const IEEEFloat &); @@ -1067,6 +1074,7 @@ class APFloat : public APFloatBase { // Below Semantics do not support {NaN or Inf} case APFloat::S_Float6E3M2FN: case APFloat::S_Float6E2M3FN: + case APFloat::S_Float4E2M1FN: return false; } } diff --git a/llvm/lib/Support/APFloat.cpp b/llvm/lib/Support/APFloat.cpp index 1209bf71a287d7..47618bc3259512 100644 --- a/llvm/lib/Support/APFloat.cpp +++ b/llvm/lib/Support/APFloat.cpp @@ -69,8 +69,8 @@ enum class fltNonfiniteBehavior { // encodings do not distinguish between signalling and quiet NaN. NanOnly, - // This behavior is present in Float6E3M2FN and Float6E2M3FN types, - // which do not support Inf or NaN values. + // This behavior is present in Float6E3M2FN, Float6E2M3FN, and + // Float4E2M1FN types, which do not support Inf or NaN values. FiniteOnly, }; @@ -147,6 +147,8 @@ static constexpr fltSemantics semFloat6E3M2FN = { 4, -2, 3, 6, fltNonfiniteBehavior::FiniteOnly}; static constexpr fltSemantics semFloat6E2M3FN = { 2, 0, 4, 6, fltNonfiniteBehavior::FiniteOnly}; +static constexpr fltSemantics semFloat4E2M1FN = { + 2, 0, 2, 4, fltNonfiniteBehavior::FiniteOnly}; static constexpr fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80}; static constexpr fltSemantics semBogus = {0, 0, 0, 0}; @@ -218,6 +220,8 @@ const llvm::fltSemantics &APFloatBase::EnumToSemantics(Semantics S) { return Float6E3M2FN(); case S_Float6E2M3FN: return Float6E2M3FN(); + case S_Float4E2M1FN: + return Float4E2M1FN(); case S_x87DoubleExtended: return x87DoubleExtended(); } @@ -254,6 +258,8 @@ APFloatBase::SemanticsToEnum(const llvm::fltSemantics &Sem) { return S_Float6E3M2FN; else if (&Sem == &llvm::APFloat::Float6E2M3FN()) return S_Float6E2M3FN; + else if (&Sem == &llvm::APFloat::Float4E2M1FN()) + return S_Float4E2M1FN; else if (&Sem == &llvm::APFloat::x87DoubleExtended()) return S_x87DoubleExtended; else @@ -278,6 +284,7 @@ const fltSemantics &APFloatBase::Float8E4M3B11FNUZ() { const fltSemantics &APFloatBase::FloatTF32() { return semFloatTF32; } const fltSemantics &APFloatBase::Float6E3M2FN() { return semFloat6E3M2FN; } const fltSemantics &APFloatBase::Float6E2M3FN() { return semFloat6E2M3FN; } +const fltSemantics &APFloatBase::Float4E2M1FN() { return semFloat4E2M1FN; } const fltSemantics &APFloatBase::x87DoubleExtended() { return semX87DoubleExtended; } @@ -3640,6 +3647,11 @@ APInt IEEEFloat::convertFloat6E2M3FNAPFloatToAPInt() const { return convertIEEEFloatToAPInt(); } +APInt IEEEFloat::convertFloat4E2M1FNAPFloatToAPInt() const { + assert(partCount() == 1); + return convertIEEEFloatToAPInt(); +} + // This function creates an APInt that is just a bit map of the floating // point constant as it would appear in memory. It is not a conversion, // and treating the result as a normal integer is unlikely to be useful. @@ -3687,6 +3699,9 @@ APInt IEEEFloat::bitcastToAPInt() const { if (semantics == (const llvm::fltSemantics *)&semFloat6E2M3FN) return convertFloat6E2M3FNAPFloatToAPInt(); + if (semantics == (const llvm::fltSemantics *)&semFloat4E2M1FN) + return convertFloat4E2M1FNAPFloatToAPInt(); + assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended && "unknown format!"); return convertF80LongDoubleAPFloatToAPInt(); @@ -3911,6 +3926,10 @@ void IEEEFloat::initFromFloat6E2M3FNAPInt(const APInt &api) { initFromIEEEAPInt(api); } +void IEEEFloat::initFromFloat4E2M1FNAPInt(const APInt &api) { + initFromIEEEAPInt(api); +} + /// Treat api as containing the bits of a floating point number. void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { assert(api.getBitWidth() == Sem->sizeInBits); @@ -3944,6 +3963,8 @@ void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) { return initFromFloat6E3M2FNAPInt(api); if (Sem == &semFloat6E2M3FN) return initFromFloat6E2M3FNAPInt(api); + if (Sem == &semFloat4E2M1FN) + return initFromFloat4E2M1FNAPInt(api); llvm_unreachable(nullptr); } diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp index 7007d944801a75..f6af4b0e5f6512 100644 --- a/llvm/unittests/ADT/APFloatTest.cpp +++ b/llvm/unittests/ADT/APFloatTest.cpp @@ -1828,6 +1828,7 @@ TEST(APFloatTest, getLargest) { EXPECT_EQ(28, APFloat::getLargest(APFloat::Float6E3M2FN()).convertToDouble()); EXPECT_EQ(7.5, APFloat::getLargest(APFloat::Float6E2M3FN()).convertToDouble()); + EXPECT_EQ(6, APFloat::getLargest(APFloat::Float4E2M1FN()).convertToDouble()); } TEST(APFloatTest, getSmallest) { @@ -1900,6 +1901,13 @@ TEST(APFloatTest, getSmallest) { EXPECT_TRUE(test.isFiniteNonZero()); EXPECT_TRUE(test.isDenormal()); EXPECT_TRUE(test.bitwiseIsEqual(expected)); + + test = APFloat::getSmallest(APFloat::Float4E2M1FN(), false); + expected = APFloat(APFloat::Float4E2M1FN(), "0x0.8p0"); + EXPECT_FALSE(test.isNegative()); + EXPECT_TRUE(test.isFiniteNonZero()); + EXPECT_TRUE(test.isDenormal()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); } TEST(APFloatTest, getSmallestNormalized) { @@ -1984,6 +1992,14 @@ TEST(APFloatTest, getSmallestNormalized) { EXPECT_TRUE(test.isSmallestNormalized()); test = APFloat::getSmallestNormalized(APFloat::Float6E3M2FN(), false); expected = APFloat(APFloat::Float6E3M2FN(), "0x1p-2"); + + test = APFloat::getSmallestNormalized(APFloat::Float4E2M1FN(), false); + expected = APFloat(APFloat::Float4E2M1FN(), "0x1p0"); + EXPECT_FALSE(test.isNegative()); + EXPECT_TRUE(test.isFiniteNonZero()); + EXPECT_FALSE(test.isDenormal()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); + EXPECT_TRUE(test.isSmallestNormalized()); EXPECT_FALSE(test.isNegative()); EXPECT_TRUE(test.isFiniteNonZero()); EXPECT_FALSE(test.isDenormal()); @@ -2034,7 +2050,9 @@ TEST(APFloatTest, getZero) { {&APFloat::Float6E3M2FN(), false, true, {0, 0}, 1}, {&APFloat::Float6E3M2FN(), true, true, {0x20ULL, 0}, 1}, {&APFloat::Float6E2M3FN(), false, true, {0, 0}, 1}, - {&APFloat::Float6E2M3FN(), true, true, {0x20ULL, 0}, 1}}; + {&APFloat::Float6E2M3FN(), true, true, {0x20ULL, 0}, 1}, + {&APFloat::Float4E2M1FN(), false, true, {0, 0}, 1}, + {&APFloat::Float4E2M1FN(), true, true, {0x8ULL, 0}, 1}}; const unsigned NumGetZeroTests = std::size(GetZeroTest); for (unsigned i = 0; i < NumGetZeroTests; ++i) { APFloat test = APFloat::getZero(*GetZeroTest[i].semantics, @@ -5283,6 +5301,89 @@ TEST(APFloatTest, Float6ExhaustivePair) { } } +TEST(APFloatTest, Float4ExhaustivePair) { + // Test each pair of 4-bit floats with non-standard semantics + for (APFloat::Semantics Sem : {APFloat::S_Float4E2M1FN}) { + const llvm::fltSemantics &S = APFloat::EnumToSemantics(Sem); + for (int i = 0; i < 16; i++) { + for (int j = 0; j < 16; j++) { + SCOPED_TRACE("sem=" + std::to_string(Sem) + ",i=" + std::to_string(i) + + ",j=" + std::to_string(j)); + APFloat x(S, APInt(4, i)); + APFloat y(S, APInt(4, j)); + + bool losesInfo; + APFloat x16 = x; + x16.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, + &losesInfo); + EXPECT_FALSE(losesInfo); + APFloat y16 = y; + y16.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, + &losesInfo); + EXPECT_FALSE(losesInfo); + + // Add + APFloat z = x; + z.add(y, APFloat::rmNearestTiesToEven); + APFloat z16 = x16; + z16.add(y16, APFloat::rmNearestTiesToEven); + z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_TRUE(z.bitwiseIsEqual(z16)) + << "sem=" << Sem << ", i=" << i << ", j=" << j; + + // Subtract + z = x; + z.subtract(y, APFloat::rmNearestTiesToEven); + z16 = x16; + z16.subtract(y16, APFloat::rmNearestTiesToEven); + z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_TRUE(z.bitwiseIsEqual(z16)) + << "sem=" << Sem << ", i=" << i << ", j=" << j; + + // Multiply + z = x; + z.multiply(y, APFloat::rmNearestTiesToEven); + z16 = x16; + z16.multiply(y16, APFloat::rmNearestTiesToEven); + z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_TRUE(z.bitwiseIsEqual(z16)) + << "sem=" << Sem << ", i=" << i << ", j=" << j; + + // Skip divide by 0 + if (j == 0 || j == 8) + continue; + + // Divide + z = x; + z.divide(y, APFloat::rmNearestTiesToEven); + z16 = x16; + z16.divide(y16, APFloat::rmNearestTiesToEven); + z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_TRUE(z.bitwiseIsEqual(z16)) + << "sem=" << Sem << ", i=" << i << ", j=" << j; + + // Mod + z = x; + z.mod(y); + z16 = x16; + z16.mod(y16); + z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_TRUE(z.bitwiseIsEqual(z16)) + << "sem=" << Sem << ", i=" << i << ", j=" << j; + + // Remainder + z = x; + z.remainder(y); + z16 = x16; + z16.remainder(y16); + z16.convert(S, APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_TRUE(z.bitwiseIsEqual(z16)) + << "sem=" << Sem << ", i=" << i << ", j=" << j; + } + } + } +} + TEST(APFloatTest, ConvertE4M3FNToE5M2) { bool losesInfo; APFloat test(APFloat::Float8E4M3FN(), "1.0"); @@ -6743,7 +6844,7 @@ TEST(APFloatTest, getExactLog2) { EXPECT_EQ(INT_MIN, APFloat(Semantics, "3.0").getExactLog2Abs()); EXPECT_EQ(INT_MIN, APFloat(Semantics, "-3.0").getExactLog2Abs()); - if (I == APFloat::S_Float6E2M3FN) { + if (I == APFloat::S_Float6E2M3FN || I == APFloat::S_Float4E2M1FN) { EXPECT_EQ(2, APFloat(Semantics, "4.0").getExactLog2()); EXPECT_EQ(INT_MIN, APFloat(Semantics, "-4.0").getExactLog2()); EXPECT_EQ(2, APFloat(Semantics, "4.0").getExactLog2Abs()); @@ -6831,6 +6932,25 @@ TEST(APFloatTest, Float6E2M3FNFromString) { EXPECT_TRUE(APFloat(APFloat::Float6E2M3FN(), "-0").isNegZero()); } +TEST(APFloatTest, Float4E2M1FNFromString) { + // Exactly representable + EXPECT_EQ(6, APFloat(APFloat::Float4E2M1FN(), "6").convertToDouble()); + // Round down to maximum value + EXPECT_EQ(6, APFloat(APFloat::Float4E2M1FN(), "32").convertToDouble()); + +#ifdef GTEST_HAS_DEATH_TEST +#ifndef NDEBUG + EXPECT_DEATH(APFloat(APFloat::Float4E2M1FN(), "inf"), + "This floating point format does not support Inf"); + EXPECT_DEATH(APFloat(APFloat::Float4E2M1FN(), "nan"), + "This floating point format does not support NaN"); +#endif +#endif + + EXPECT_TRUE(APFloat(APFloat::Float4E2M1FN(), "0").isPosZero()); + EXPECT_TRUE(APFloat(APFloat::Float4E2M1FN(), "-0").isNegZero()); +} + TEST(APFloatTest, ConvertE3M2FToE2M3F) { bool losesInfo; APFloat test(APFloat::Float6E3M2FN(), "1.0"); @@ -6848,7 +6968,6 @@ TEST(APFloatTest, ConvertE3M2FToE2M3F) { EXPECT_EQ(status, APFloat::opOK); // Test overflow - losesInfo = false; test = APFloat(APFloat::Float6E3M2FN(), "28"); status = test.convert(APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven, &losesInfo); @@ -6865,7 +6984,6 @@ TEST(APFloatTest, ConvertE3M2FToE2M3F) { EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact); // Testing inexact rounding to denormal number - losesInfo = false; test = APFloat(APFloat::Float6E3M2FN(), "0.1875"); status = test.convert(APFloat::Float6E2M3FN(), APFloat::rmNearestTiesToEven, &losesInfo); @@ -6898,7 +7016,6 @@ TEST(APFloatTest, ConvertE2M3FToE3M2F) { EXPECT_EQ(status, APFloat::opOK); // Test inexact rounding - losesInfo = false; test = APFloat(APFloat::Float6E2M3FN(), "7.5"); status = test.convert(APFloat::Float6E3M2FN(), APFloat::rmNearestTiesToEven, &losesInfo); @@ -6907,6 +7024,40 @@ TEST(APFloatTest, ConvertE2M3FToE3M2F) { EXPECT_EQ(status, APFloat::opInexact); } +TEST(APFloatTest, ConvertDoubleToE2M1F) { + bool losesInfo; + APFloat test(APFloat::IEEEdouble(), "1.0"); + APFloat::opStatus status = test.convert( + APFloat::Float4E2M1FN(), APFloat::rmNearestTiesToEven, &losesInfo); + EXPECT_EQ(1.0, test.convertToDouble()); + EXPECT_FALSE(losesInfo); + EXPECT_EQ(status, APFloat::opOK); + + test = APFloat(APFloat::IEEEdouble(), "0.0"); + status = test.convert(APFloat::Float4E2M1FN(), APFloat::rmNearestTiesToEven, + &losesInfo); + EXPECT_EQ(0.0f, test.convertToDouble()); + EXPECT_FALSE(losesInfo); + EXPECT_EQ(status, APFloat::opOK); + + // Test overflow + test = APFloat(APFloat::IEEEdouble(), "8"); + status = test.convert(APFloat::Float4E2M1FN(), APFloat::rmNearestTiesToEven, + &losesInfo); + EXPECT_EQ(6, test.convertToDouble()); + EXPECT_TRUE(losesInfo); + EXPECT_EQ(status, APFloat::opInexact); + + // Test underflow + test = APFloat(APFloat::IEEEdouble(), "0.25"); + status = test.convert(APFloat::Float4E2M1FN(), APFloat::rmNearestTiesToEven, + &losesInfo); + EXPECT_EQ(0., test.convertToDouble()); + EXPECT_TRUE(losesInfo); + EXPECT_FALSE(test.isDenormal()); + EXPECT_EQ(status, APFloat::opUnderflow | APFloat::opInexact); +} + TEST(APFloatTest, Float6E3M2FNNext) { APFloat test(APFloat::Float6E3M2FN(), APFloat::uninitialized); APFloat expected(APFloat::Float6E3M2FN(), APFloat::uninitialized); @@ -6983,6 +7134,44 @@ TEST(APFloatTest, Float6E2M3FNNext) { EXPECT_TRUE(test.bitwiseIsEqual(expected)); } +TEST(APFloatTest, Float4E2M1FNNext) { + APFloat test(APFloat::Float4E2M1FN(), APFloat::uninitialized); + APFloat expected(APFloat::Float4E2M1FN(), APFloat::uninitialized); + + // 1. NextUp of largest bit pattern is the same + test = APFloat::getLargest(APFloat::Float4E2M1FN()); + expected = APFloat::getLargest(APFloat::Float4E2M1FN()); + EXPECT_EQ(test.next(false), APFloat::opOK); + EXPECT_FALSE(test.isInfinity()); + EXPECT_FALSE(test.isZero()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); + + // 2. NextUp of smallest negative denormal is -0 + test = APFloat::getSmallest(APFloat::Float4E2M1FN(), true); + expected = APFloat::getZero(APFloat::Float4E2M1FN(), true); + EXPECT_EQ(test.next(false), APFloat::opOK); + EXPECT_TRUE(test.isNegZero()); + EXPECT_FALSE(test.isPosZero()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); + + // 3. nextDown of negative of largest value is the same + test = APFloat::getLargest(APFloat::Float4E2M1FN(), true); + expected = test; + EXPECT_EQ(test.next(true), APFloat::opOK); + EXPECT_FALSE(test.isInfinity()); + EXPECT_FALSE(test.isZero()); + EXPECT_FALSE(test.isNaN()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); + + // 4. nextDown of +0 is smallest negative denormal + test = APFloat::getZero(APFloat::Float4E2M1FN(), false); + expected = APFloat::getSmallest(APFloat::Float4E2M1FN(), true); + EXPECT_EQ(test.next(true), APFloat::opOK); + EXPECT_FALSE(test.isZero()); + EXPECT_TRUE(test.isDenormal()); + EXPECT_TRUE(test.bitwiseIsEqual(expected)); +} + #ifdef GTEST_HAS_DEATH_TEST #ifndef NDEBUG TEST(APFloatTest, Float6E3M2FNGetInfNaN) { @@ -6998,6 +7187,13 @@ TEST(APFloatTest, Float6E2M3FNGetInfNaN) { EXPECT_DEATH(APFloat::getNaN(APFloat::Float6E2M3FN()), "This floating point format does not support NaN"); } + +TEST(APFloatTest, Float4E2M1FNGetInfNaN) { + EXPECT_DEATH(APFloat::getInf(APFloat::Float4E2M1FN()), + "This floating point format does not support Inf"); + EXPECT_DEATH(APFloat::getNaN(APFloat::Float4E2M1FN()), + "This floating point format does not support NaN"); +} #endif #endif @@ -7043,6 +7239,27 @@ TEST(APFloatTest, Float6E2M3FNToDouble) { EXPECT_EQ(0x0.2p0, SmallestDenorm.convertToDouble()); } +TEST(APFloatTest, Float4E2M1FNToDouble) { + APFloat One(APFloat::Float4E2M1FN(), "1.0"); + EXPECT_EQ(1.0, One.convertToDouble()); + APFloat Two(APFloat::Float4E2M1FN(), "2.0"); + EXPECT_EQ(2.0, Two.convertToDouble()); + APFloat PosLargest = APFloat::getLargest(APFloat::Float4E2M1FN(), false); + EXPECT_EQ(6, PosLargest.convertToDouble()); + APFloat NegLargest = APFloat::getLargest(APFloat::Float4E2M1FN(), true); + EXPECT_EQ(-6, NegLargest.convertToDouble()); + APFloat PosSmallest = + APFloat::getSmallestNormalized(APFloat::Float4E2M1FN(), false); + EXPECT_EQ(0x1p0, PosSmallest.convertToDouble()); + APFloat NegSmallest = + APFloat::getSmallestNormalized(APFloat::Float4E2M1FN(), true); + EXPECT_EQ(-0x1p0, NegSmallest.convertToDouble()); + + APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float4E2M1FN(), false); + EXPECT_TRUE(SmallestDenorm.isDenormal()); + EXPECT_EQ(0x0.8p0, SmallestDenorm.convertToDouble()); +} + TEST(APFloatTest, Float6E3M2FNToFloat) { APFloat PosZero = APFloat::getZero(APFloat::Float6E3M2FN()); APFloat PosZeroToFloat(PosZero.convertToFloat()); @@ -7100,4 +7317,33 @@ TEST(APFloatTest, Float6E2M3FNToFloat) { EXPECT_TRUE(SmallestDenorm.isDenormal()); EXPECT_EQ(0x0.2p0, SmallestDenorm.convertToFloat()); } + +TEST(APFloatTest, Float4E2M1FNToFloat) { + APFloat PosZero = APFloat::getZero(APFloat::Float4E2M1FN()); + APFloat PosZeroToFloat(PosZero.convertToFloat()); + EXPECT_TRUE(PosZeroToFloat.isPosZero()); + APFloat NegZero = APFloat::getZero(APFloat::Float4E2M1FN(), true); + APFloat NegZeroToFloat(NegZero.convertToFloat()); + EXPECT_TRUE(NegZeroToFloat.isNegZero()); + + APFloat One(APFloat::Float4E2M1FN(), "1.0"); + EXPECT_EQ(1.0F, One.convertToFloat()); + APFloat Two(APFloat::Float4E2M1FN(), "2.0"); + EXPECT_EQ(2.0F, Two.convertToFloat()); + + APFloat PosLargest = APFloat::getLargest(APFloat::Float4E2M1FN(), false); + EXPECT_EQ(6, PosLargest.convertToFloat()); + APFloat NegLargest = APFloat::getLargest(APFloat::Float4E2M1FN(), true); + EXPECT_EQ(-6, NegLargest.convertToFloat()); + APFloat PosSmallest = + APFloat::getSmallestNormalized(APFloat::Float4E2M1FN(), false); + EXPECT_EQ(0x1p0, PosSmallest.convertToFloat()); + APFloat NegSmallest = + APFloat::getSmallestNormalized(APFloat::Float4E2M1FN(), true); + EXPECT_EQ(-0x1p0, NegSmallest.convertToFloat()); + + APFloat SmallestDenorm = APFloat::getSmallest(APFloat::Float4E2M1FN(), false); + EXPECT_TRUE(SmallestDenorm.isDenormal()); + EXPECT_EQ(0x0.8p0, SmallestDenorm.convertToFloat()); +} } // namespace From b422fa6b62160f5eeb038d816d05e039182dde56 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 14 Jun 2024 10:32:39 +0200 Subject: [PATCH 066/155] Revert "[MC/DC][Coverage] Loosen the limit of NumConds from 6 (#82448)" This broke the lit tests on Mac: https://green.lab.llvm.org/job/llvm.org/job/clang-stage1-RA/1096/ > By storing possible test vectors instead of combinations of conditions, > the restriction is dramatically relaxed. > > This introduces two options to `cc1`: > > * `-fmcdc-max-conditions=32767` > * `-fmcdc-max-test-vectors=2147483646` > > This change makes coverage mapping, profraw, and profdata incompatible > with Clang-18. > > - Bitmap semantics changed. It is incompatible with previous format. > - `BitmapIdx` in `Decision` points to the end of the bitmap. > - Bitmap is packed per function. > - `llvm-cov` can understand `profdata` generated by `llvm-profdata-18`. > > RFC: > https://discourse.llvm.org/t/rfc-coverage-new-algorithm-and-file-format-for-mc-dc/76798 This reverts commit 7ead2d8c7e9114b3f23666209a1654939987cb30. --- clang/docs/SourceBasedCodeCoverage.rst | 29 +---- clang/include/clang/Basic/CodeGenOptions.def | 2 - clang/include/clang/Driver/Options.td | 8 -- clang/lib/CodeGen/CodeGenPGO.cpp | 50 ++++----- clang/lib/CodeGen/CoverageMappingGen.cpp | 77 +------------ clang/lib/CodeGen/MCDCState.h | 4 +- .../CoverageMapping/branch-constfolded.cpp | 34 +++--- clang/test/CoverageMapping/logical.cpp | 8 +- clang/test/CoverageMapping/mcdc-class.cpp | 4 +- .../CoverageMapping/mcdc-error-conditions.cpp | 105 +----------------- .../mcdc-logical-scalar-ids.cpp | 30 ++--- .../mcdc-logical-stmt-ids-all.cpp | 32 +++--- .../CoverageMapping/mcdc-logical-stmt-ids.cpp | 30 ++--- .../test/CoverageMapping/mcdc-scratch-space.c | 12 +- .../CoverageMapping/mcdc-system-headers.cpp | 8 +- clang/test/Profile/c-mcdc-class.cpp | 38 +++---- clang/test/Profile/c-mcdc-logicalop-ternary.c | 18 ++- clang/test/Profile/c-mcdc-nested-ternary.c | 35 +++--- clang/test/Profile/c-mcdc-not.c | 53 +++++---- clang/test/Profile/c-mcdc.c | 63 ++++++----- llvm/docs/CoverageMappingFormat.rst | 2 +- llvm/docs/LangRef.rst | 18 +-- llvm/include/llvm/IR/IntrinsicInst.h | 11 +- .../ProfileData/Coverage/CoverageMapping.h | 3 +- llvm/include/llvm/ProfileData/InstrProf.h | 2 +- .../ProfileData/Coverage/CoverageMapping.cpp | 34 ++---- .../Instrumentation/InstrProfiling.cpp | 15 +-- .../InstrProfiling/inline-data-var-create.ll | 6 +- .../Instrumentation/InstrProfiling/mcdc.ll | 3 +- .../llvm-cov/Inputs/mcdc-const-folding.o | Bin 34528 -> 34504 bytes .../Inputs/mcdc-const-folding.proftext | 36 +++--- llvm/test/tools/llvm-cov/Inputs/mcdc-const.o | Bin 5208 -> 5208 bytes .../tools/llvm-cov/Inputs/mcdc-const.proftext | 6 +- .../tools/llvm-cov/Inputs/mcdc-general-18.o | Bin 6456 -> 0 bytes .../llvm-cov/Inputs/mcdc-general-18.profdata | Bin 888 -> 0 bytes .../test/tools/llvm-cov/Inputs/mcdc-general.o | Bin 6544 -> 6456 bytes .../llvm-cov/Inputs/mcdc-general.proftext | 11 +- llvm/test/tools/llvm-cov/Inputs/mcdc-macro.o | Bin 6408 -> 6480 bytes .../tools/llvm-cov/Inputs/mcdc-macro.proftext | 15 ++- llvm/test/tools/llvm-cov/Inputs/mcdc-maxbs.o | Bin 4112 -> 4112 bytes llvm/test/tools/llvm-cov/mcdc-general-18.test | 20 ---- .../ProfileData/CoverageMappingTest.cpp | 4 +- 42 files changed, 297 insertions(+), 529 deletions(-) delete mode 100644 llvm/test/tools/llvm-cov/Inputs/mcdc-general-18.o delete mode 100644 llvm/test/tools/llvm-cov/Inputs/mcdc-general-18.profdata delete mode 100644 llvm/test/tools/llvm-cov/mcdc-general-18.test diff --git a/clang/docs/SourceBasedCodeCoverage.rst b/clang/docs/SourceBasedCodeCoverage.rst index 73910e134a5891..cee706289284db 100644 --- a/clang/docs/SourceBasedCodeCoverage.rst +++ b/clang/docs/SourceBasedCodeCoverage.rst @@ -484,31 +484,10 @@ MC/DC Instrumentation --------------------- When instrumenting for Modified Condition/Decision Coverage (MC/DC) using the -clang option ``-fcoverage-mcdc``, there are two hard limits. - -The maximum number of terms is limited to 32767, which is practical for -handwritten expressions. To be more restrictive in order to enforce coding rules, -use ``-Xclang -fmcdc-max-conditions=n``. Expressions with exceeded condition -counts ``n`` will generate warnings and will be excluded in the MC/DC coverage. - -The number of test vectors (the maximum number of possible combinations of -expressions) is limited to 2,147,483,646. In this case, approximately -256MiB (==2GiB/8) is used to record test vectors. - -To reduce memory usage, users can limit the maximum number of test vectors per -expression with ``-Xclang -fmcdc-max-test-vectors=m``. -If the number of test vectors resulting from the analysis of an expression -exceeds ``m``, a warning will be issued and the expression will be excluded -from the MC/DC coverage. - -The number of test vectors ``m``, for ``n`` terms in an expression, can be -``m <= 2^n`` in the theoretical worst case, but is usually much smaller. -In simple cases, such as expressions consisting of a sequence of single -operators, ``m == n+1``. For example, ``(a && b && c && d && e && f && g)`` -requires 8 test vectors. - -Expressions such as ``((a0 && b0) || (a1 && b1) || ...)`` can cause the -number of test vectors to increase exponentially. +clang option ``-fcoverage-mcdc``, users are limited to at most **six** leaf-level +conditions in a boolean expression. A warning will be generated for boolean +expressions that contain more than six, and they will not be instrumented for +MC/DC. Also, if a boolean expression is embedded in the nest of another boolean expression but separated by a non-logical operator, this is also not supported. diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index e3f6da4a84f694..7ffc40a00504fb 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -223,8 +223,6 @@ CODEGENOPT(CoverageMapping , 1, 0) ///< Generate coverage mapping regions to CODEGENOPT(DumpCoverageMapping , 1, 0) ///< Dump the generated coverage mapping ///< regions. CODEGENOPT(MCDCCoverage , 1, 0) ///< Enable MC/DC code coverage criteria. -VALUE_CODEGENOPT(MCDCMaxConds, 16, 32767) ///< MC/DC Maximum conditions. -VALUE_CODEGENOPT(MCDCMaxTVs, 32, 0x7FFFFFFE) ///< MC/DC Maximum test vectors. /// If -fpcc-struct-return or -freg-struct-return is specified. ENUM_CODEGENOPT(StructReturnConvention, StructReturnConventionKind, 2, SRCK_Default) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index d523e8c611f0fe..96e522720cec87 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1790,14 +1790,6 @@ defm mcdc_coverage : BoolFOption<"coverage-mcdc", "Enable MC/DC criteria when generating code coverage">, NegFlag, BothFlags<[], [ClangOption, CLOption]>>; -def fmcdc_max_conditions_EQ : Joined<["-"], "fmcdc-max-conditions=">, - Group, Visibility<[CC1Option]>, - HelpText<"Maximum number of conditions in MC/DC coverage">, - MarshallingInfoInt, "32767">; -def fmcdc_max_test_vectors_EQ : Joined<["-"], "fmcdc-max-test-vectors=">, - Group, Visibility<[CC1Option]>, - HelpText<"Maximum number of test vectors in MC/DC coverage">, - MarshallingInfoInt, "0x7FFFFFFE">; def fprofile_generate : Flag<["-"], "fprofile-generate">, Group, Visibility<[ClangOption, CLOption]>, HelpText<"Generate instrumented code to collect execution counts into default.profraw (overridden by LLVM_PROFILE_FILE env var)">; diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index 59139e342de886..db8e6f55302adc 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -167,6 +167,8 @@ struct MapRegionCounters : public RecursiveASTVisitor { PGOHash Hash; /// The map of statements to counters. llvm::DenseMap &CounterMap; + /// The next bitmap byte index to assign. + unsigned NextMCDCBitmapIdx; /// The state of MC/DC Coverage in this function. MCDC::State &MCDCState; /// Maximum number of supported MC/DC conditions in a boolean expression. @@ -181,7 +183,7 @@ struct MapRegionCounters : public RecursiveASTVisitor { MCDC::State &MCDCState, unsigned MCDCMaxCond, DiagnosticsEngine &Diag) : NextCounter(0), Hash(HashVersion), CounterMap(CounterMap), - MCDCState(MCDCState), MCDCMaxCond(MCDCMaxCond), + NextMCDCBitmapIdx(0), MCDCState(MCDCState), MCDCMaxCond(MCDCMaxCond), ProfileVersion(ProfileVersion), Diag(Diag) {} // Blocks and lambdas are handled as separate functions, so we need not @@ -312,8 +314,11 @@ struct MapRegionCounters : public RecursiveASTVisitor { return true; } - // Otherwise, allocate the Decision. - MCDCState.DecisionByStmt[BinOp].BitmapIdx = 0; + // Otherwise, allocate the number of bytes required for the bitmap + // based on the number of conditions. Must be at least 1-byte long. + MCDCState.DecisionByStmt[BinOp].BitmapIdx = NextMCDCBitmapIdx; + unsigned SizeInBits = std::max(1L << NumCond, CHAR_BIT); + NextMCDCBitmapIdx += SizeInBits / CHAR_BIT; } return true; } @@ -1078,9 +1083,7 @@ void CodeGenPGO::mapRegionCounters(const Decl *D) { // for most embedded applications. Setting a maximum value prevents the // bitmap footprint from growing too large without the user's knowledge. In // the future, this value could be adjusted with a command-line option. - unsigned MCDCMaxConditions = - (CGM.getCodeGenOpts().MCDCCoverage ? CGM.getCodeGenOpts().MCDCMaxConds - : 0); + unsigned MCDCMaxConditions = (CGM.getCodeGenOpts().MCDCCoverage) ? 6 : 0; RegionCounterMap.reset(new llvm::DenseMap); RegionMCDCState.reset(new MCDC::State); @@ -1096,6 +1099,7 @@ void CodeGenPGO::mapRegionCounters(const Decl *D) { Walker.TraverseDecl(const_cast(CD)); assert(Walker.NextCounter > 0 && "no entry counter mapped for decl"); NumRegionCounters = Walker.NextCounter; + RegionMCDCState->BitmapBytes = Walker.NextMCDCBitmapIdx; FunctionHash = Walker.Hash.finalize(); } @@ -1228,7 +1232,7 @@ void CodeGenPGO::emitMCDCParameters(CGBuilderTy &Builder) { // anything. llvm::Value *Args[3] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), Builder.getInt64(FunctionHash), - Builder.getInt32(RegionMCDCState->BitmapBits)}; + Builder.getInt32(RegionMCDCState->BitmapBytes)}; Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::instrprof_mcdc_parameters), Args); } @@ -1246,11 +1250,6 @@ void CodeGenPGO::emitMCDCTestVectorBitmapUpdate(CGBuilderTy &Builder, if (DecisionStateIter == RegionMCDCState->DecisionByStmt.end()) return; - // Don't create tvbitmap_update if the record is allocated but excluded. - // Or `bitmap |= (1 << 0)` would be wrongly executed to the next bitmap. - if (DecisionStateIter->second.Indices.size() == 0) - return; - // Extract the offset of the global bitmap associated with this expression. unsigned MCDCTestVectorBitmapOffset = DecisionStateIter->second.BitmapIdx; auto *I8PtrTy = llvm::PointerType::getUnqual(CGM.getLLVMContext()); @@ -1262,7 +1261,7 @@ void CodeGenPGO::emitMCDCTestVectorBitmapUpdate(CGBuilderTy &Builder, // index represents an executed test vector. llvm::Value *Args[5] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), Builder.getInt64(FunctionHash), - Builder.getInt32(0), // Unused + Builder.getInt32(RegionMCDCState->BitmapBytes), Builder.getInt32(MCDCTestVectorBitmapOffset), MCDCCondBitmapAddr.emitRawPointer(CGF)}; Builder.CreateCall( @@ -1306,22 +1305,19 @@ void CodeGenPGO::emitMCDCCondBitmapUpdate(CGBuilderTy &Builder, const Expr *S, // Extract the ID of the condition we are setting in the bitmap. const auto &Branch = BranchStateIter->second; assert(Branch.ID >= 0 && "Condition has no ID!"); - assert(Branch.DecisionStmt); - - // Cancel the emission if the Decision is erased after the allocation. - const auto DecisionIter = - RegionMCDCState->DecisionByStmt.find(Branch.DecisionStmt); - if (DecisionIter == RegionMCDCState->DecisionByStmt.end()) - return; - const auto &TVIdxs = DecisionIter->second.Indices[Branch.ID]; + auto *I8PtrTy = llvm::PointerType::getUnqual(CGM.getLLVMContext()); - auto *CurTV = Builder.CreateLoad(MCDCCondBitmapAddr, - "mcdc." + Twine(Branch.ID + 1) + ".cur"); - auto *NewTV = Builder.CreateAdd(CurTV, Builder.getInt32(TVIdxs[true])); - NewTV = Builder.CreateSelect( - Val, NewTV, Builder.CreateAdd(CurTV, Builder.getInt32(TVIdxs[false]))); - Builder.CreateStore(NewTV, MCDCCondBitmapAddr); + // Emit intrinsic that updates a dedicated temporary value on the stack after + // a condition is evaluated. After the set of conditions has been updated, + // the resulting value is used to update the boolean expression's bitmap. + llvm::Value *Args[5] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), + Builder.getInt64(FunctionHash), + Builder.getInt32(Branch.ID), + MCDCCondBitmapAddr.emitRawPointer(CGF), Val}; + Builder.CreateCall( + CGM.getIntrinsic(llvm::Intrinsic::instrprof_mcdc_condbitmap_update), + Args); } void CodeGenPGO::setValueProfilingFlag(llvm::Module &M) { diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index ba483d857d5f46..6ce2d32dd292ed 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -195,10 +195,6 @@ class SourceMappingRegion { return std::holds_alternative(MCDCParams); } - const auto &getMCDCBranchParams() const { - return mcdc::getParams(MCDCParams); - } - bool isMCDCDecision() const { return std::holds_alternative(MCDCParams); } @@ -208,8 +204,6 @@ class SourceMappingRegion { } const mcdc::Parameters &getMCDCParams() const { return MCDCParams; } - - void resetMCDCParams() { MCDCParams = mcdc::Parameters(); } }; /// Spelling locations for the start and end of a source region. @@ -754,7 +748,6 @@ struct MCDCCoverageBuilder { llvm::SmallVector DecisionStack; MCDC::State &MCDCState; - const Stmt *DecisionStmt = nullptr; mcdc::ConditionID NextID = 0; bool NotMapped = false; @@ -784,8 +777,7 @@ struct MCDCCoverageBuilder { /// Set the given condition's ID. void setCondID(const Expr *Cond, mcdc::ConditionID ID) { - MCDCState.BranchByStmt[CodeGenFunction::stripCond(Cond)] = {ID, - DecisionStmt}; + MCDCState.BranchByStmt[CodeGenFunction::stripCond(Cond)].ID = ID; } /// Return the ID of a given condition. @@ -816,11 +808,6 @@ struct MCDCCoverageBuilder { if (NotMapped) return; - if (NextID == 0) { - DecisionStmt = E; - assert(MCDCState.DecisionByStmt.contains(E)); - } - const mcdc::ConditionIDs &ParentDecision = DecisionStack.back(); // If the operator itself has an assigned ID, this means it represents a @@ -2135,41 +2122,13 @@ struct CounterCoverageMappingBuilder subtractCounters(ParentCount, TrueCount)); } - void createOrCancelDecision(const BinaryOperator *E, unsigned Since) { + void createDecision(const BinaryOperator *E) { unsigned NumConds = MCDCBuilder.getTotalConditionsAndReset(E); if (NumConds == 0) return; - // Extract [ID, Conds] to construct the graph. - llvm::SmallVector CondIDs(NumConds); - for (const auto &SR : ArrayRef(SourceRegions).slice(Since)) { - if (SR.isMCDCBranch()) { - auto [ID, Conds] = SR.getMCDCBranchParams(); - CondIDs[ID] = Conds; - } - } - - // Construct the graph and calculate `Indices`. - mcdc::TVIdxBuilder Builder(CondIDs); - unsigned NumTVs = Builder.NumTestVectors; - unsigned MaxTVs = CVM.getCodeGenModule().getCodeGenOpts().MCDCMaxTVs; - assert(MaxTVs < mcdc::TVIdxBuilder::HardMaxTVs); - - if (NumTVs > MaxTVs) { - // NumTVs exceeds MaxTVs -- warn and cancel the Decision. - cancelDecision(E, Since, NumTVs, MaxTVs); - return; - } - - // Update the state for CodeGenPGO - assert(MCDCState.DecisionByStmt.contains(E)); - MCDCState.DecisionByStmt[E] = { - MCDCState.BitmapBits, // Top - std::move(Builder.Indices), - }; - auto DecisionParams = mcdc::DecisionParameters{ - MCDCState.BitmapBits += NumTVs, // Tail + MCDCState.DecisionByStmt[E].BitmapIdx, NumConds, }; @@ -2177,28 +2136,6 @@ struct CounterCoverageMappingBuilder createDecisionRegion(E, DecisionParams); } - // Warn and cancel the Decision. - void cancelDecision(const BinaryOperator *E, unsigned Since, int NumTVs, - int MaxTVs) { - auto &Diag = CVM.getCodeGenModule().getDiags(); - unsigned DiagID = - Diag.getCustomDiagID(DiagnosticsEngine::Warning, - "unsupported MC/DC boolean expression; " - "number of test vectors (%0) exceeds max (%1). " - "Expression will not be covered"); - Diag.Report(E->getBeginLoc(), DiagID) << NumTVs << MaxTVs; - - // Restore MCDCBranch to Branch. - for (auto &SR : MutableArrayRef(SourceRegions).slice(Since)) { - assert(!SR.isMCDCDecision() && "Decision shouldn't be seen here"); - if (SR.isMCDCBranch()) - SR.resetMCDCParams(); - } - - // Tell CodeGenPGO not to instrument. - MCDCState.DecisionByStmt.erase(E); - } - /// Check if E belongs to system headers. bool isExprInSystemHeader(const BinaryOperator *E) const { return (!SystemHeadersCoverage && @@ -2215,8 +2152,6 @@ struct CounterCoverageMappingBuilder bool IsRootNode = MCDCBuilder.isIdle(); - unsigned SourceRegionsSince = SourceRegions.size(); - // Keep track of Binary Operator and assign MCDC condition IDs. MCDCBuilder.pushAndAssignIDs(E); @@ -2255,7 +2190,7 @@ struct CounterCoverageMappingBuilder // Create MCDC Decision Region if at top-level (root). if (IsRootNode) - createOrCancelDecision(E, SourceRegionsSince); + createDecision(E); } // Determine whether the right side of OR operation need to be visited. @@ -2276,8 +2211,6 @@ struct CounterCoverageMappingBuilder bool IsRootNode = MCDCBuilder.isIdle(); - unsigned SourceRegionsSince = SourceRegions.size(); - // Keep track of Binary Operator and assign MCDC condition IDs. MCDCBuilder.pushAndAssignIDs(E); @@ -2320,7 +2253,7 @@ struct CounterCoverageMappingBuilder // Create MCDC Decision Region if at top-level (root). if (IsRootNode) - createOrCancelDecision(E, SourceRegionsSince); + createDecision(E); } void VisitLambdaExpr(const LambdaExpr *LE) { diff --git a/clang/lib/CodeGen/MCDCState.h b/clang/lib/CodeGen/MCDCState.h index e0dd28ff90ed12..29b6f0fb681aa0 100644 --- a/clang/lib/CodeGen/MCDCState.h +++ b/clang/lib/CodeGen/MCDCState.h @@ -27,18 +27,16 @@ using namespace llvm::coverage::mcdc; /// Per-Function MC/DC state struct State { - unsigned BitmapBits = 0; + unsigned BitmapBytes = 0; struct Decision { unsigned BitmapIdx; - llvm::SmallVector> Indices; }; llvm::DenseMap DecisionByStmt; struct Branch { ConditionID ID; - const Stmt *DecisionStmt; }; llvm::DenseMap BranchByStmt; diff --git a/clang/test/CoverageMapping/branch-constfolded.cpp b/clang/test/CoverageMapping/branch-constfolded.cpp index 1e7e32808e8382..c8755d5d752b63 100644 --- a/clang/test/CoverageMapping/branch-constfolded.cpp +++ b/clang/test/CoverageMapping/branch-constfolded.cpp @@ -4,93 +4,93 @@ // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++11 -fcoverage-mcdc -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -emit-llvm-only -main-file-name branch-constfolded.cpp %s | FileCheck %s -check-prefix=MCDC // CHECK-LABEL: _Z6fand_0b: -bool fand_0(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:20 = M:3, C:2 +bool fand_0(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:20 = M:0, C:2 return false && a; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:15 = 0, 0 } // CHECK: Branch,File 0, [[@LINE-1]]:19 -> [[@LINE-1]]:20 = #2, (#1 - #2) // CHECK-LABEL: _Z6fand_1b: -bool fand_1(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:19 = M:3, C:2 +bool fand_1(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:19 = M:0, C:2 return a && true; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:11 = #1, (#0 - #1) } // CHECK: Branch,File 0, [[@LINE-1]]:15 -> [[@LINE-1]]:19 = 0, 0 // CHECK-LABEL: _Z6fand_2bb: -bool fand_2(bool a, bool b) {// MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:25 = M:4, C:3 +bool fand_2(bool a, bool b) {// MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:25 = M:0, C:3 return false && a && b; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:15 = 0, 0 } // CHECK: Branch,File 0, [[@LINE-1]]:19 -> [[@LINE-1]]:20 = #4, (#3 - #4) // CHECK: Branch,File 0, [[@LINE-2]]:24 -> [[@LINE-2]]:25 = #2, (#1 - #2) // CHECK-LABEL: _Z6fand_3bb: -bool fand_3(bool a, bool b) {// MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:24 = M:4, C:3 +bool fand_3(bool a, bool b) {// MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:24 = M:0, C:3 return a && true && b; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:11 = #3, (#0 - #3) } // CHECK: Branch,File 0, [[@LINE-1]]:15 -> [[@LINE-1]]:19 = 0, 0 // CHECK: Branch,File 0, [[@LINE-2]]:23 -> [[@LINE-2]]:24 = #2, (#1 - #2) // CHECK-LABEL: _Z6fand_4bb: -bool fand_4(bool a, bool b) {// MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:25 = M:4, C:3 +bool fand_4(bool a, bool b) {// MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:25 = M:0, C:3 return a && b && false; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:11 = #3, (#0 - #3) } // CHECK: Branch,File 0, [[@LINE-1]]:15 -> [[@LINE-1]]:16 = #4, (#3 - #4) // CHECK: Branch,File 0, [[@LINE-2]]:20 -> [[@LINE-2]]:25 = 0, 0 // CHECK-LABEL: _Z6fand_5b: -bool fand_5(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:23 = M:3, C:2 +bool fand_5(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:23 = M:0, C:2 return false && true; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:15 = 0, 0 } // CHECK: Branch,File 0, [[@LINE-1]]:19 -> [[@LINE-1]]:23 = 0, 0 // CHECK-LABEL: _Z6fand_6b: -bool fand_6(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:19 = M:3, C:2 +bool fand_6(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:19 = M:0, C:2 return true && a; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:14 = 0, 0 } // CHECK: Branch,File 0, [[@LINE-1]]:18 -> [[@LINE-1]]:19 = #2, (#1 - #2) // CHECK-LABEL: _Z6fand_7b: -bool fand_7(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:20 = M:3, C:2 +bool fand_7(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:20 = M:0, C:2 return a && false; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:11 = #1, (#0 - #1) } // CHECK: Branch,File 0, [[@LINE-1]]:15 -> [[@LINE-1]]:20 = 0, 0 // CHECK-LABEL: _Z5for_0b: -bool for_0(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:19 = M:3, C:2 +bool for_0(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:19 = M:0, C:2 return true || a; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:14 = 0, 0 } // CHECK: Branch,File 0, [[@LINE-1]]:18 -> [[@LINE-1]]:19 = (#1 - #2), #2 // CHECK-LABEL: _Z5for_1b: -bool for_1(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:20 = M:3, C:2 +bool for_1(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:20 = M:0, C:2 return a || false; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:11 = (#0 - #1), #1 } // CHECK: Branch,File 0, [[@LINE-1]]:15 -> [[@LINE-1]]:20 = 0, 0 // CHECK-LABEL: _Z5for_2bb: -bool for_2(bool a, bool b) {// MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:24 = M:4, C:3 +bool for_2(bool a, bool b) {// MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:24 = M:0, C:3 return true || a || b; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:14 = 0, 0 } // CHECK: Branch,File 0, [[@LINE-1]]:18 -> [[@LINE-1]]:19 = (#3 - #4), #4 // CHECK: Branch,File 0, [[@LINE-2]]:23 -> [[@LINE-2]]:24 = (#1 - #2), #2 // CHECK-LABEL: _Z5for_3bb: -bool for_3(bool a, bool b) {// MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:25 = M:4, C:3 +bool for_3(bool a, bool b) {// MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:25 = M:0, C:3 return a || false || b; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:11 = (#0 - #3), #3 } // CHECK: Branch,File 0, [[@LINE-1]]:15 -> [[@LINE-1]]:20 = 0, 0 // CHECK: Branch,File 0, [[@LINE-2]]:24 -> [[@LINE-2]]:25 = (#1 - #2), #2 // CHECK-LABEL: _Z5for_4bb: -bool for_4(bool a, bool b) {// MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:24 = M:4, C:3 +bool for_4(bool a, bool b) {// MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:24 = M:0, C:3 return a || b || true; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:11 = (#0 - #3), #3 } // CHECK: Branch,File 0, [[@LINE-1]]:15 -> [[@LINE-1]]:16 = (#3 - #4), #4 // CHECK: Branch,File 0, [[@LINE-2]]:20 -> [[@LINE-2]]:24 = 0, 0 // CHECK-LABEL: _Z5for_5b: -bool for_5(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:23 = M:3, C:2 +bool for_5(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:23 = M:0, C:2 return true || false; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:14 = 0, 0 } // CHECK: Branch,File 0, [[@LINE-1]]:18 -> [[@LINE-1]]:23 = 0, 0 // CHECK-LABEL: _Z5for_6b: -bool for_6(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:20 = M:3, C:2 +bool for_6(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:20 = M:0, C:2 return false || a; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:15 = 0, 0 } // CHECK: Branch,File 0, [[@LINE-1]]:19 -> [[@LINE-1]]:20 = (#1 - #2), #2 // CHECK-LABEL: _Z5for_7b: -bool for_7(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:19 = M:3, C:2 +bool for_7(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:19 = M:0, C:2 return a || true; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:11 = (#0 - #1), #1 } // CHECK: Branch,File 0, [[@LINE-1]]:15 -> [[@LINE-1]]:19 = 0, 0 // CHECK-LABEL: _Z5for_8b: -bool for_8(bool a) { // MCDC: Decision,File 0, [[@LINE+3]]:7 -> [[@LINE+3]]:20 = M:3, C:2 +bool for_8(bool a) { // MCDC: Decision,File 0, [[@LINE+3]]:7 -> [[@LINE+3]]:20 = M:0, C:2 // CHECK: Branch,File 0, [[@LINE+2]]:7 -> [[@LINE+2]]:11 = 0, 0 // CHECK: Branch,File 0, [[@LINE+1]]:15 -> [[@LINE+1]]:20 = 0, 0 if (true && false) diff --git a/clang/test/CoverageMapping/logical.cpp b/clang/test/CoverageMapping/logical.cpp index 2a22d6cca45189..7de59e1429808a 100644 --- a/clang/test/CoverageMapping/logical.cpp +++ b/clang/test/CoverageMapping/logical.cpp @@ -3,22 +3,22 @@ int main() { // CHECK: File 0, [[@LINE]]:12 -> [[@LINE+23]]:2 = #0 bool bt = true; - bool bf = false; // MCDC: Decision,File 0, [[@LINE+1]]:12 -> [[@LINE+1]]:20 = M:3, C:2 + bool bf = false; // MCDC: Decision,File 0, [[@LINE+1]]:12 -> [[@LINE+1]]:20 = M:0, C:2 bool a = bt && bf; // CHECK-NEXT: File 0, [[@LINE]]:12 -> [[@LINE]]:14 = #0 // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:12 -> [[@LINE-1]]:14 = #1, (#0 - #1) // CHECK-NEXT: File 0, [[@LINE-2]]:18 -> [[@LINE-2]]:20 = #1 // CHECK-NEXT: Branch,File 0, [[@LINE-3]]:18 -> [[@LINE-3]]:20 = #2, (#1 - #2) - // MCDC: Decision,File 0, [[@LINE+1]]:7 -> [[@LINE+2]]:9 = M:6, C:2 + // MCDC: Decision,File 0, [[@LINE+1]]:7 -> [[@LINE+2]]:9 = M:1, C:2 a = bt && // CHECK-NEXT: File 0, [[@LINE]]:7 -> [[@LINE]]:9 = #0 bf; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:7 -> [[@LINE-1]]:9 = #3, (#0 - #3) // CHECK-NEXT: File 0, [[@LINE-1]]:7 -> [[@LINE-1]]:9 = #3 // CHECK-NEXT: Branch,File 0, [[@LINE-2]]:7 -> [[@LINE-2]]:9 = #4, (#3 - #4) - // MCDC: Decision,File 0, [[@LINE+1]]:7 -> [[@LINE+1]]:15 = M:9, C:2 + // MCDC: Decision,File 0, [[@LINE+1]]:7 -> [[@LINE+1]]:15 = M:2, C:2 a = bf || bt; // CHECK-NEXT: File 0, [[@LINE]]:7 -> [[@LINE]]:9 = #0 // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:7 -> [[@LINE-1]]:9 = (#0 - #5), #5 // CHECK-NEXT: File 0, [[@LINE-2]]:13 -> [[@LINE-2]]:15 = #5 // CHECK-NEXT: Branch,File 0, [[@LINE-3]]:13 -> [[@LINE-3]]:15 = (#5 - #6), #6 - // MCDC: Decision,File 0, [[@LINE+1]]:7 -> [[@LINE+2]]:9 = M:12, C:2 + // MCDC: Decision,File 0, [[@LINE+1]]:7 -> [[@LINE+2]]:9 = M:3, C:2 a = bf || // CHECK-NEXT: File 0, [[@LINE]]:7 -> [[@LINE]]:9 = #0 bt; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:7 -> [[@LINE-1]]:9 = (#0 - #7), #7 // CHECK-NEXT: File 0, [[@LINE-1]]:7 -> [[@LINE-1]]:9 = #7 diff --git a/clang/test/CoverageMapping/mcdc-class.cpp b/clang/test/CoverageMapping/mcdc-class.cpp index 7b2937830be766..dcf6123ee0fc74 100644 --- a/clang/test/CoverageMapping/mcdc-class.cpp +++ b/clang/test/CoverageMapping/mcdc-class.cpp @@ -23,9 +23,9 @@ Value::~Value(void) { bar(); } -// CHECK-LABEL: Decision,File 0, 18:7 -> 18:31 = M:3, C:2 +// CHECK-LABEL: Decision,File 0, 18:7 -> 18:31 = M:0, C:2 // CHECK-NEXT: Branch,File 0, 18:7 -> 18:17 = (#0 - #2), #2 [1,0,2] // CHECK: Branch,File 0, 18:21 -> 18:31 = (#2 - #3), #3 [2,0,0] -// CHECK-LABEL: Decision,File 0, 22:7 -> 22:31 = M:3, C:2 +// CHECK-LABEL: Decision,File 0, 22:7 -> 22:31 = M:0, C:2 // CHECK-NEXT: Branch,File 0, 22:7 -> 22:17 = (#0 - #2), #2 [1,0,2] // CHECK: Branch,File 0, 22:21 -> 22:31 = (#2 - #3), #3 [2,0,0] diff --git a/clang/test/CoverageMapping/mcdc-error-conditions.cpp b/clang/test/CoverageMapping/mcdc-error-conditions.cpp index 8f5d6bd66897c9..d34ed693434795 100644 --- a/clang/test/CoverageMapping/mcdc-error-conditions.cpp +++ b/clang/test/CoverageMapping/mcdc-error-conditions.cpp @@ -1,108 +1,7 @@ -// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++11 -fcoverage-mcdc -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -emit-llvm-only %s 2>&1| FileCheck %s --check-prefixes=CHECK,COND7,TV - -// RUN: %clang_cc1 -fmcdc-max-test-vectors=8 -triple %itanium_abi_triple -std=c++11 -fcoverage-mcdc -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -emit-llvm-only %s 2>&1| FileCheck %s --check-prefixes=CHECK,COND7,TV -// RUN: %clang_cc1 -fmcdc-max-test-vectors=7 -triple %itanium_abi_triple -std=c++11 -fcoverage-mcdc -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -emit-llvm-only %s 2>&1| FileCheck %s --check-prefixes=CHECK,TV7,TV - -// RUN: %clang_cc1 -fmcdc-max-conditions=287 -triple %itanium_abi_triple -std=c++11 -fcoverage-mcdc -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -emit-llvm-only %s 2>&1| FileCheck %s --check-prefixes=CHECK,COND7,TV -// RUN: %clang_cc1 -fmcdc-max-conditions=286 -triple %itanium_abi_triple -std=c++11 -fcoverage-mcdc -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -emit-llvm-only %s 2>&1| FileCheck %s --check-prefixes=CHECK,COND7,COND -// RUN: %clang_cc1 -fmcdc-max-conditions=7 -triple %itanium_abi_triple -std=c++11 -fcoverage-mcdc -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -emit-llvm-only %s 2>&1| FileCheck %s --check-prefixes=CHECK,COND7,COND -// RUN: %clang_cc1 -fmcdc-max-conditions=6 -triple %itanium_abi_triple -std=c++11 -fcoverage-mcdc -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -emit-llvm-only %s 2>&1| FileCheck %s --check-prefixes=CHECK,COND6,COND +// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++11 -fcoverage-mcdc -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -emit-llvm-only %s 2>&1| FileCheck %s bool func_conditions(bool a, bool b, bool c, bool d, bool e, bool f, bool g) { - // TV7: :[[@LINE+2]]:10: warning: unsupported MC/DC boolean expression; number of test vectors (8) exceeds max - // COND6: :[[@LINE+1]]:10: warning: unsupported MC/DC boolean expression; number of conditions (7) exceeds max return a && b && c && d && e && f && g; } -// From clang-tidy/misc/MisleadingIdentifier.cpp -bool func_isR(unsigned CP) { - // TV: :[[@LINE+2]]:10: warning: unsupported MC/DC boolean expression; number of test vectors (2147483647) exceeds max - // COND: :[[@LINE+1]]:10: warning: unsupported MC/DC boolean expression; number of conditions (287) exceeds max - return (CP == 0x0590) || (CP == 0x05BE) || (CP == 0x05C0) || (CP == 0x05C3) || - (CP == 0x05C6) || (0x05C8 <= CP && CP <= 0x05CF) || - (0x05D0 <= CP && CP <= 0x05EA) || (0x05EB <= CP && CP <= 0x05EE) || - (0x05EF <= CP && CP <= 0x05F2) || (0x05F3 <= CP && CP <= 0x05F4) || - (0x05F5 <= CP && CP <= 0x05FF) || (0x07C0 <= CP && CP <= 0x07C9) || - (0x07CA <= CP && CP <= 0x07EA) || (0x07F4 <= CP && CP <= 0x07F5) || - (CP == 0x07FA) || (0x07FB <= CP && CP <= 0x07FC) || - (0x07FE <= CP && CP <= 0x07FF) || (0x0800 <= CP && CP <= 0x0815) || - (CP == 0x081A) || (CP == 0x0824) || (CP == 0x0828) || - (0x082E <= CP && CP <= 0x082F) || (0x0830 <= CP && CP <= 0x083E) || - (CP == 0x083F) || (0x0840 <= CP && CP <= 0x0858) || - (0x085C <= CP && CP <= 0x085D) || (CP == 0x085E) || (CP == 0x085F) || - (CP == 0x200F) || (CP == 0xFB1D) || (0xFB1F <= CP && CP <= 0xFB28) || - (0xFB2A <= CP && CP <= 0xFB36) || (CP == 0xFB37) || - (0xFB38 <= CP && CP <= 0xFB3C) || (CP == 0xFB3D) || (CP == 0xFB3E) || - (CP == 0xFB3F) || (0xFB40 <= CP && CP <= 0xFB41) || (CP == 0xFB42) || - (0xFB43 <= CP && CP <= 0xFB44) || (CP == 0xFB45) || - (0xFB46 <= CP && CP <= 0xFB4F) || (0x10800 <= CP && CP <= 0x10805) || - (0x10806 <= CP && CP <= 0x10807) || (CP == 0x10808) || - (CP == 0x10809) || (0x1080A <= CP && CP <= 0x10835) || - (CP == 0x10836) || (0x10837 <= CP && CP <= 0x10838) || - (0x10839 <= CP && CP <= 0x1083B) || (CP == 0x1083C) || - (0x1083D <= CP && CP <= 0x1083E) || (0x1083F <= CP && CP <= 0x10855) || - (CP == 0x10856) || (CP == 0x10857) || - (0x10858 <= CP && CP <= 0x1085F) || (0x10860 <= CP && CP <= 0x10876) || - (0x10877 <= CP && CP <= 0x10878) || (0x10879 <= CP && CP <= 0x1087F) || - (0x10880 <= CP && CP <= 0x1089E) || (0x1089F <= CP && CP <= 0x108A6) || - (0x108A7 <= CP && CP <= 0x108AF) || (0x108B0 <= CP && CP <= 0x108DF) || - (0x108E0 <= CP && CP <= 0x108F2) || (CP == 0x108F3) || - (0x108F4 <= CP && CP <= 0x108F5) || (0x108F6 <= CP && CP <= 0x108FA) || - (0x108FB <= CP && CP <= 0x108FF) || (0x10900 <= CP && CP <= 0x10915) || - (0x10916 <= CP && CP <= 0x1091B) || (0x1091C <= CP && CP <= 0x1091E) || - (0x10920 <= CP && CP <= 0x10939) || (0x1093A <= CP && CP <= 0x1093E) || - (CP == 0x1093F) || (0x10940 <= CP && CP <= 0x1097F) || - (0x10980 <= CP && CP <= 0x109B7) || (0x109B8 <= CP && CP <= 0x109BB) || - (0x109BC <= CP && CP <= 0x109BD) || (0x109BE <= CP && CP <= 0x109BF) || - (0x109C0 <= CP && CP <= 0x109CF) || (0x109D0 <= CP && CP <= 0x109D1) || - (0x109D2 <= CP && CP <= 0x109FF) || (CP == 0x10A00) || - (CP == 0x10A04) || (0x10A07 <= CP && CP <= 0x10A0B) || - (0x10A10 <= CP && CP <= 0x10A13) || (CP == 0x10A14) || - (0x10A15 <= CP && CP <= 0x10A17) || (CP == 0x10A18) || - (0x10A19 <= CP && CP <= 0x10A35) || (0x10A36 <= CP && CP <= 0x10A37) || - (0x10A3B <= CP && CP <= 0x10A3E) || (0x10A40 <= CP && CP <= 0x10A48) || - (0x10A49 <= CP && CP <= 0x10A4F) || (0x10A50 <= CP && CP <= 0x10A58) || - (0x10A59 <= CP && CP <= 0x10A5F) || (0x10A60 <= CP && CP <= 0x10A7C) || - (0x10A7D <= CP && CP <= 0x10A7E) || (CP == 0x10A7F) || - (0x10A80 <= CP && CP <= 0x10A9C) || (0x10A9D <= CP && CP <= 0x10A9F) || - (0x10AA0 <= CP && CP <= 0x10ABF) || (0x10AC0 <= CP && CP <= 0x10AC7) || - (CP == 0x10AC8) || (0x10AC9 <= CP && CP <= 0x10AE4) || - (0x10AE7 <= CP && CP <= 0x10AEA) || (0x10AEB <= CP && CP <= 0x10AEF) || - (0x10AF0 <= CP && CP <= 0x10AF6) || (0x10AF7 <= CP && CP <= 0x10AFF) || - (0x10B00 <= CP && CP <= 0x10B35) || (0x10B36 <= CP && CP <= 0x10B38) || - (0x10B40 <= CP && CP <= 0x10B55) || (0x10B56 <= CP && CP <= 0x10B57) || - (0x10B58 <= CP && CP <= 0x10B5F) || (0x10B60 <= CP && CP <= 0x10B72) || - (0x10B73 <= CP && CP <= 0x10B77) || (0x10B78 <= CP && CP <= 0x10B7F) || - (0x10B80 <= CP && CP <= 0x10B91) || (0x10B92 <= CP && CP <= 0x10B98) || - (0x10B99 <= CP && CP <= 0x10B9C) || (0x10B9D <= CP && CP <= 0x10BA8) || - (0x10BA9 <= CP && CP <= 0x10BAF) || (0x10BB0 <= CP && CP <= 0x10BFF) || - (0x10C00 <= CP && CP <= 0x10C48) || (0x10C49 <= CP && CP <= 0x10C7F) || - (0x10C80 <= CP && CP <= 0x10CB2) || (0x10CB3 <= CP && CP <= 0x10CBF) || - (0x10CC0 <= CP && CP <= 0x10CF2) || (0x10CF3 <= CP && CP <= 0x10CF9) || - (0x10CFA <= CP && CP <= 0x10CFF) || (0x10D40 <= CP && CP <= 0x10E5F) || - (CP == 0x10E7F) || (0x10E80 <= CP && CP <= 0x10EA9) || - (CP == 0x10EAA) || (CP == 0x10EAD) || - (0x10EAE <= CP && CP <= 0x10EAF) || (0x10EB0 <= CP && CP <= 0x10EB1) || - (0x10EB2 <= CP && CP <= 0x10EFF) || (0x10F00 <= CP && CP <= 0x10F1C) || - (0x10F1D <= CP && CP <= 0x10F26) || (CP == 0x10F27) || - (0x10F28 <= CP && CP <= 0x10F2F) || (0x10F70 <= CP && CP <= 0x10F81) || - (0x10F86 <= CP && CP <= 0x10F89) || (0x10F8A <= CP && CP <= 0x10FAF) || - (0x10FB0 <= CP && CP <= 0x10FC4) || (0x10FC5 <= CP && CP <= 0x10FCB) || - (0x10FCC <= CP && CP <= 0x10FDF) || (0x10FE0 <= CP && CP <= 0x10FF6) || - (0x10FF7 <= CP && CP <= 0x10FFF) || (0x1E800 <= CP && CP <= 0x1E8C4) || - (0x1E8C5 <= CP && CP <= 0x1E8C6) || (0x1E8C7 <= CP && CP <= 0x1E8CF) || - (0x1E8D7 <= CP && CP <= 0x1E8FF) || (0x1E900 <= CP && CP <= 0x1E943) || - (CP == 0x1E94B) || (0x1E94C <= CP && CP <= 0x1E94F) || - (0x1E950 <= CP && CP <= 0x1E959) || (0x1E95A <= CP && CP <= 0x1E95D) || - (0x1E95E <= CP && CP <= 0x1E95F) || (0x1E960 <= CP && CP <= 0x1EC6F) || - (0x1ECC0 <= CP && CP <= 0x1ECFF) || (0x1ED50 <= CP && CP <= 0x1EDFF); -} - -// CHECK: _Z15func_conditionsbbbbbbb: -// TV8-NOT: Decision, -// COND6-NOT: Decision, -// COND7: Decision,File 0, {{[0-9]+}}:10 -> {{[0-9]+}}:41 = M:8, C:7 -// CHECK: _Z8func_isRj: -// CHECK-NOT: Decision, -// CHECK-NOT: Branch,{{.*}}] -// CHECK: Branch,File 0, [[@LINE-10]]:64 -> [[@LINE-10]]:77 = +// CHECK: warning: unsupported MC/DC boolean expression; number of conditions{{.*}} exceeds max diff --git a/clang/test/CoverageMapping/mcdc-logical-scalar-ids.cpp b/clang/test/CoverageMapping/mcdc-logical-scalar-ids.cpp index 0694f7dbc294aa..c820b5df5ad3a9 100644 --- a/clang/test/CoverageMapping/mcdc-logical-scalar-ids.cpp +++ b/clang/test/CoverageMapping/mcdc-logical-scalar-ids.cpp @@ -10,25 +10,25 @@ bool func_scalar_and(bool a, bool b, bool c, bool d, bool e, bool f) { return bar(res1, res2, res3, res4, res5); } -// CHECK-LABEL: Decision,File 0, 5:17 -> 5:23 = M:3, C:2 +// CHECK-LABEL: Decision,File 0, 5:17 -> 5:23 = M:0, C:2 // CHECK-NEXT: Branch,File 0, 5:17 -> 5:18 = #1, (#0 - #1) [1,2,0] // CHECK: Branch,File 0, 5:22 -> 5:23 = #2, (#1 - #2) [2,0,0] -// CHECK-LABEL: Decision,File 0, 6:17 -> 6:28 = M:7, C:3 +// CHECK-LABEL: Decision,File 0, 6:17 -> 6:28 = M:1, C:3 // CHECK-NEXT: Branch,File 0, 6:17 -> 6:18 = #5, (#0 - #5) [1,3,0] // CHECK: Branch,File 0, 6:22 -> 6:23 = #6, (#5 - #6) [3,2,0] // CHECK: Branch,File 0, 6:27 -> 6:28 = #4, (#3 - #4) [2,0,0] -// CHECK-LABEL: Decision,File 0, 7:17 -> 7:33 = M:12, C:4 +// CHECK-LABEL: Decision,File 0, 7:17 -> 7:33 = M:2, C:4 // CHECK-NEXT: Branch,File 0, 7:17 -> 7:18 = #11, (#0 - #11) [1,4,0] // CHECK: Branch,File 0, 7:22 -> 7:23 = #12, (#11 - #12) [4,3,0] // CHECK: Branch,File 0, 7:27 -> 7:28 = #10, (#9 - #10) [3,2,0] // CHECK: Branch,File 0, 7:32 -> 7:33 = #8, (#7 - #8) [2,0,0] -// CHECK-LABEL: Decision,File 0, 8:17 -> 8:38 = M:18, C:5 +// CHECK-LABEL: Decision,File 0, 8:17 -> 8:38 = M:4, C:5 // CHECK-NEXT: Branch,File 0, 8:17 -> 8:18 = #19, (#0 - #19) [1,5,0] // CHECK: Branch,File 0, 8:22 -> 8:23 = #20, (#19 - #20) [5,4,0] // CHECK: Branch,File 0, 8:27 -> 8:28 = #18, (#17 - #18) [4,3,0] // CHECK: Branch,File 0, 8:32 -> 8:33 = #16, (#15 - #16) [3,2,0] // CHECK: Branch,File 0, 8:37 -> 8:38 = #14, (#13 - #14) [2,0,0] -// CHECK-LABEL: Decision,File 0, 9:17 -> 9:43 = M:25, C:6 +// CHECK-LABEL: Decision,File 0, 9:17 -> 9:43 = M:8, C:6 // CHECK-NEXT: Branch,File 0, 9:17 -> 9:18 = #29, (#0 - #29) [1,6,0] // CHECK: Branch,File 0, 9:22 -> 9:23 = #30, (#29 - #30) [6,5,0] // CHECK: Branch,File 0, 9:27 -> 9:28 = #28, (#27 - #28) [5,4,0] @@ -45,25 +45,25 @@ bool func_scalar_or(bool a, bool b, bool c, bool d, bool e, bool f) { return bar(res1, res2, res3, res4, res5); } -// CHECK-LABEL: Decision,File 0, 40:17 -> 40:23 = M:3, C:2 +// CHECK-LABEL: Decision,File 0, 40:17 -> 40:23 = M:0, C:2 // CHECK-NEXT: Branch,File 0, 40:17 -> 40:18 = (#0 - #1), #1 [1,0,2] // CHECK: Branch,File 0, 40:22 -> 40:23 = (#1 - #2), #2 [2,0,0] -// CHECK-LABEL: Decision,File 0, 41:17 -> 41:28 = M:7, C:3 +// CHECK-LABEL: Decision,File 0, 41:17 -> 41:28 = M:1, C:3 // CHECK-NEXT: Branch,File 0, 41:17 -> 41:18 = (#0 - #5), #5 [1,0,3] // CHECK: Branch,File 0, 41:22 -> 41:23 = (#5 - #6), #6 [3,0,2] // CHECK: Branch,File 0, 41:27 -> 41:28 = (#3 - #4), #4 [2,0,0] -// CHECK-LABEL: Decision,File 0, 42:17 -> 42:33 = M:12, C:4 +// CHECK-LABEL: Decision,File 0, 42:17 -> 42:33 = M:2, C:4 // CHECK-NEXT: Branch,File 0, 42:17 -> 42:18 = (#0 - #11), #11 [1,0,4] // CHECK: Branch,File 0, 42:22 -> 42:23 = (#11 - #12), #12 [4,0,3] // CHECK: Branch,File 0, 42:27 -> 42:28 = (#9 - #10), #10 [3,0,2] // CHECK: Branch,File 0, 42:32 -> 42:33 = (#7 - #8), #8 [2,0,0] -// CHECK-LABEL: Decision,File 0, 43:17 -> 43:38 = M:18, C:5 +// CHECK-LABEL: Decision,File 0, 43:17 -> 43:38 = M:4, C:5 // CHECK-NEXT: Branch,File 0, 43:17 -> 43:18 = (#0 - #19), #19 [1,0,5] // CHECK: Branch,File 0, 43:22 -> 43:23 = (#19 - #20), #20 [5,0,4] // CHECK: Branch,File 0, 43:27 -> 43:28 = (#17 - #18), #18 [4,0,3] // CHECK: Branch,File 0, 43:32 -> 43:33 = (#15 - #16), #16 [3,0,2] // CHECK: Branch,File 0, 43:37 -> 43:38 = (#13 - #14), #14 [2,0,0] -// CHECK-LABEL: Decision,File 0, 44:17 -> 44:43 = M:25, C:6 +// CHECK-LABEL: Decision,File 0, 44:17 -> 44:43 = M:8, C:6 // CHECK-NEXT: Branch,File 0, 44:17 -> 44:18 = (#0 - #29), #29 [1,0,6] // CHECK: Branch,File 0, 44:22 -> 44:23 = (#29 - #30), #30 [6,0,5] // CHECK: Branch,File 0, 44:27 -> 44:28 = (#27 - #28), #28 [5,0,4] @@ -81,26 +81,26 @@ bool func_scalar_mix(bool a, bool b, bool c, bool d, bool e, bool f) { return bar(res1, res2, res3, res4, res5); } -// CHECK-LABEL: Decision,File 0, 76:17 -> 76:23 = M:3, C:2 +// CHECK-LABEL: Decision,File 0, 76:17 -> 76:23 = M:0, C:2 // CHECK-NEXT: Branch,File 0, 76:17 -> 76:18 = (#0 - #1), #1 [1,0,2] // CHECK: Branch,File 0, 76:22 -> 76:23 = (#1 - #2), #2 [2,0,0] -// CHECK-LABEL: Decision,File 0, 77:17 -> 77:30 = M:7, C:3 +// CHECK-LABEL: Decision,File 0, 77:17 -> 77:30 = M:1, C:3 // CHECK-NEXT: Branch,File 0, 77:17 -> 77:18 = #3, (#0 - #3) [1,2,0] // CHECK: Branch,File 0, 77:23 -> 77:24 = (#3 - #4), #4 [2,0,3] // CHECK: Branch,File 0, 77:28 -> 77:29 = (#4 - #5), #5 [3,0,0] -// CHECK-LABEL: Decision,File 0, 78:17 -> 78:37 = M:14, C:4 +// CHECK-LABEL: Decision,File 0, 78:17 -> 78:37 = M:2, C:4 // CHECK-NEXT: File 0 // CHECK-NEXT: Branch,File 0, 78:18 -> 78:19 = (#0 - #7), #7 [1,2,3] // CHECK: Branch,File 0, 78:23 -> 78:24 = (#7 - #8), #8 [3,2,0] // CHECK: Branch,File 0, 78:30 -> 78:31 = (#6 - #9), #9 [2,0,4] // CHECK: Branch,File 0, 78:35 -> 78:36 = (#9 - #10), #10 [4,0,0] -// CHECK-LABEL: Decision,File 0, 79:17 -> 79:42 = M:22, C:5 +// CHECK-LABEL: Decision,File 0, 79:17 -> 79:42 = M:4, C:5 // CHECK-NEXT: Branch,File 0, 79:17 -> 79:18 = #12, (#0 - #12) [1,3,0] // CHECK: Branch,File 0, 79:23 -> 79:24 = (#12 - #13), #13 [3,2,4] // CHECK: Branch,File 0, 79:28 -> 79:29 = (#13 - #14), #14 [4,2,0] // CHECK: Branch,File 0, 79:35 -> 79:36 = (#11 - #15), #15 [2,0,5] // CHECK: Branch,File 0, 79:40 -> 79:41 = (#15 - #16), #16 [5,0,0] -// CHECK-LABEL: Decision,File 0, 80:17 -> 80:49 = M:37, C:6 +// CHECK-LABEL: Decision,File 0, 80:17 -> 80:49 = M:8, C:6 // CHECK-NEXT: File 0 // CHECK-NEXT: Branch,File 0, 80:18 -> 80:19 = (#0 - #19), #19 [1,3,4] // CHECK: Branch,File 0, 80:23 -> 80:24 = (#19 - #20), #20 [4,3,0] diff --git a/clang/test/CoverageMapping/mcdc-logical-stmt-ids-all.cpp b/clang/test/CoverageMapping/mcdc-logical-stmt-ids-all.cpp index d7436079d18105..6f47a4b901a8a7 100644 --- a/clang/test/CoverageMapping/mcdc-logical-stmt-ids-all.cpp +++ b/clang/test/CoverageMapping/mcdc-logical-stmt-ids-all.cpp @@ -6,7 +6,7 @@ bool func_if_and(bool a, bool b, bool c, bool d, bool e, bool f) { return false; } -// CHECK-LABEL: Decision,File 0, 4:7 -> 4:33 = M:7, C:6 +// CHECK-LABEL: Decision,File 0, 4:7 -> 4:33 = M:0, C:6 // CHECK-NEXT: Branch,File 0, 4:7 -> 4:8 = #10, (#0 - #10) [1,6,0] // CHECK: Branch,File 0, 4:12 -> 4:13 = #11, (#10 - #11) [6,5,0] // CHECK: Branch,File 0, 4:17 -> 4:18 = #9, (#8 - #9) [5,4,0] @@ -20,7 +20,7 @@ bool func_if_or(bool a, bool b, bool c, bool d, bool e, bool f) { return false; } -// CHECK-LABEL: Decision,File 0, 18:7 -> 18:33 = M:7, C:6 +// CHECK-LABEL: Decision,File 0, 18:7 -> 18:33 = M:0, C:6 // CHECK-NEXT: Branch,File 0, 18:7 -> 18:8 = (#0 - #10), #10 [1,0,6] // CHECK: Branch,File 0, 18:12 -> 18:13 = (#10 - #11), #11 [6,0,5] // CHECK: Branch,File 0, 18:17 -> 18:18 = (#8 - #9), #9 [5,0,4] @@ -33,7 +33,7 @@ bool func_while_and(bool a, bool b, bool c, bool d, bool e, bool f) { return false; } -// CHECK-LABEL: Decision,File 0, 32:10 -> 32:36 = M:7, C:6 +// CHECK-LABEL: Decision,File 0, 32:10 -> 32:36 = M:0, C:6 // CHECK-NEXT: Branch,File 0, 32:10 -> 32:11 = #10, (#0 - #10) [1,6,0] // CHECK: Branch,File 0, 32:15 -> 32:16 = #11, (#10 - #11) [6,5,0] // CHECK: Branch,File 0, 32:20 -> 32:21 = #9, (#8 - #9) [5,4,0] @@ -46,7 +46,7 @@ bool func_while_or(bool a, bool b, bool c, bool d, bool e, bool f) { return false; } -// CHECK-LABEL: Decision,File 0, 45:10 -> 45:36 = M:7, C:6 +// CHECK-LABEL: Decision,File 0, 45:10 -> 45:36 = M:0, C:6 // CHECK-NEXT: Branch,File 0, 45:10 -> 45:11 = (#0 - #10), #10 [1,0,6] // CHECK: Branch,File 0, 45:15 -> 45:16 = (#10 - #11), #11 [6,0,5] // CHECK: Branch,File 0, 45:20 -> 45:21 = (#8 - #9), #9 [5,0,4] @@ -59,7 +59,7 @@ bool func_for_and(bool a, bool b, bool c, bool d, bool e, bool f) { return false; } -// CHECK-LABEL: Decision,File 0, 58:9 -> 58:35 = M:7, C:6 +// CHECK-LABEL: Decision,File 0, 58:9 -> 58:35 = M:0, C:6 // CHECK-NEXT: Branch,File 0, 58:9 -> 58:10 = #10, (#0 - #10) [1,6,0] // CHECK: Branch,File 0, 58:14 -> 58:15 = #11, (#10 - #11) [6,5,0] // CHECK: Branch,File 0, 58:19 -> 58:20 = #9, (#8 - #9) [5,4,0] @@ -72,7 +72,7 @@ bool func_for_or(bool a, bool b, bool c, bool d, bool e, bool f) { return false; } -// CHECK-LABEL: Decision,File 0, 71:9 -> 71:35 = M:7, C:6 +// CHECK-LABEL: Decision,File 0, 71:9 -> 71:35 = M:0, C:6 // CHECK-NEXT: Branch,File 0, 71:9 -> 71:10 = (#0 - #10), #10 [1,0,6] // CHECK: Branch,File 0, 71:14 -> 71:15 = (#10 - #11), #11 [6,0,5] // CHECK: Branch,File 0, 71:19 -> 71:20 = (#8 - #9), #9 [5,0,4] @@ -85,7 +85,7 @@ bool func_do_and(bool a, bool b, bool c, bool d, bool e, bool f) { return false; } -// CHECK-LABEL: Decision,File 0, 84:16 -> 84:42 = M:7, C:6 +// CHECK-LABEL: Decision,File 0, 84:16 -> 84:42 = M:0, C:6 // CHECK-NEXT: Branch,File 0, 84:16 -> 84:17 = #10, ((#0 + #1) - #10) [1,6,0] // CHECK: Branch,File 0, 84:21 -> 84:22 = #11, (#10 - #11) [6,5,0] // CHECK: Branch,File 0, 84:26 -> 84:27 = #9, (#8 - #9) [5,4,0] @@ -98,7 +98,7 @@ bool func_do_or(bool a, bool b, bool c, bool d, bool e, bool f) { return false; } -// CHECK-LABEL: Decision,File 0, 97:16 -> 97:42 = M:7, C:6 +// CHECK-LABEL: Decision,File 0, 97:16 -> 97:42 = M:0, C:6 // CHECK-NEXT: Branch,File 0, 97:16 -> 97:17 = ((#0 + #1) - #10), #10 [1,0,6] // CHECK: Branch,File 0, 97:21 -> 97:22 = (#10 - #11), #11 [6,0,5] // CHECK: Branch,File 0, 97:26 -> 97:27 = (#8 - #9), #9 [5,0,4] @@ -110,7 +110,7 @@ bool func_ternary_and(bool a, bool b, bool c, bool d, bool e, bool f) { return (a && b && c && d && e && f) ? true : false; } -// CHECK-LABEL: Decision,File 0, 110:11 -> 110:37 = M:7, C:6 +// CHECK-LABEL: Decision,File 0, 110:11 -> 110:37 = M:0, C:6 // CHECK-NEXT: Branch,File 0, 110:11 -> 110:12 = #10, (#0 - #10) [1,6,0] // CHECK: Branch,File 0, 110:16 -> 110:17 = #11, (#10 - #11) [6,5,0] // CHECK: Branch,File 0, 110:21 -> 110:22 = #9, (#8 - #9) [5,4,0] @@ -122,7 +122,7 @@ bool func_ternary_or(bool a, bool b, bool c, bool d, bool e, bool f) { return (a || b || c || d || e || f) ? true : false; } -// CHECK-LABEL: Decision,File 0, 122:11 -> 122:37 = M:7, C:6 +// CHECK-LABEL: Decision,File 0, 122:11 -> 122:37 = M:0, C:6 // CHECK-NEXT: Branch,File 0, 122:11 -> 122:12 = (#0 - #10), #10 [1,0,6] // CHECK: Branch,File 0, 122:16 -> 122:17 = (#10 - #11), #11 [6,0,5] // CHECK: Branch,File 0, 122:21 -> 122:22 = (#8 - #9), #9 [5,0,4] @@ -137,7 +137,7 @@ bool func_if_nested_if(bool a, bool b, bool c, bool d, bool e) { return false; } -// CHECK-LABEL: Decision,File 0, 134:7 -> 134:30 = M:8, C:5 +// CHECK-LABEL: Decision,File 0, 134:7 -> 134:30 = M:0, C:5 // CHECK-NEXT: Branch,File 0, 134:7 -> 134:8 = (#0 - #6), #6 [1,0,4] // CHECK: Branch,File 0, 134:13 -> 134:14 = #7, (#6 - #7) [4,5,3] // CHECK: Branch,File 0, 134:18 -> 134:19 = #8, (#7 - #8) [5,0,3] @@ -148,7 +148,7 @@ bool func_ternary_nested_if(bool a, bool b, bool c, bool d, bool e) { return (a || (b && c) || d || e) ? true : false; } -// CHECK-LABEL: Decision,File 0, 148:11 -> 148:34 = M:8, C:5 +// CHECK-LABEL: Decision,File 0, 148:11 -> 148:34 = M:0, C:5 // CHECK-NEXT: Branch,File 0, 148:11 -> 148:12 = (#0 - #6), #6 [1,0,4] // CHECK: Branch,File 0, 148:17 -> 148:18 = #7, (#6 - #7) [4,5,3] // CHECK: Branch,File 0, 148:22 -> 148:23 = #8, (#7 - #8) [5,0,3] @@ -162,7 +162,7 @@ bool func_if_nested_if_2(bool a, bool b, bool c, bool d, bool e) { return false; } -// CHECK-LABEL: Decision,File 0, 159:7 -> 159:32 = M:9, C:5 +// CHECK-LABEL: Decision,File 0, 159:7 -> 159:32 = M:0, C:5 // CHECK-NEXT: Branch,File 0, 159:7 -> 159:8 = (#0 - #2), #2 [1,0,2] // CHECK: Branch,File 0, 159:14 -> 159:15 = #7, (#2 - #7) [2,5,4] // CHECK: Branch,File 0, 159:19 -> 159:20 = #8, (#7 - #8) [5,3,4] @@ -173,7 +173,7 @@ bool func_ternary_nested_if_2(bool a, bool b, bool c, bool d, bool e) { return (a || ((b && c) || d) && e) ? true : false; } -// CHECK-LABEL: Decision,File 0, 173:11 -> 173:36 = M:9, C:5 +// CHECK-LABEL: Decision,File 0, 173:11 -> 173:36 = M:0, C:5 // CHECK-NEXT: Branch,File 0, 173:11 -> 173:12 = (#0 - #2), #2 [1,0,2] // CHECK: Branch,File 0, 173:18 -> 173:19 = #7, (#2 - #7) [2,5,4] // CHECK: Branch,File 0, 173:23 -> 173:24 = #8, (#7 - #8) [5,3,4] @@ -187,7 +187,7 @@ bool func_if_nested_if_3(bool a, bool b, bool c, bool d, bool e, bool f) { return false; } -// CHECK-LABEL: Decision,File 0, 184:7 -> 184:39 = M:12, C:6 +// CHECK-LABEL: Decision,File 0, 184:7 -> 184:39 = M:0, C:6 // CHECK: Branch,File 0, 184:8 -> 184:9 = #5, (#0 - #5) [1,4,3] // CHECK: Branch,File 0, 184:14 -> 184:15 = (#5 - #6), #6 [4,2,5] // CHECK: Branch,File 0, 184:19 -> 184:20 = (#6 - #7), #7 [5,2,3] @@ -199,7 +199,7 @@ bool func_ternary_nested_if_3(bool a, bool b, bool c, bool d, bool e, bool f) { return ((a && (b || c) || (d && e)) && f) ? true : false; } -// CHECK-LABEL: Decision,File 0, 199:11 -> 199:43 = M:12, C:6 +// CHECK-LABEL: Decision,File 0, 199:11 -> 199:43 = M:0, C:6 // CHECK: Branch,File 0, 199:12 -> 199:13 = #5, (#0 - #5) [1,4,3] // CHECK: Branch,File 0, 199:18 -> 199:19 = (#5 - #6), #6 [4,2,5] // CHECK: Branch,File 0, 199:23 -> 199:24 = (#6 - #7), #7 [5,2,3] diff --git a/clang/test/CoverageMapping/mcdc-logical-stmt-ids.cpp b/clang/test/CoverageMapping/mcdc-logical-stmt-ids.cpp index 655bbf25ee1031..99854ec27a3fbd 100644 --- a/clang/test/CoverageMapping/mcdc-logical-stmt-ids.cpp +++ b/clang/test/CoverageMapping/mcdc-logical-stmt-ids.cpp @@ -10,25 +10,25 @@ bool func_if_and(bool a, bool b, bool c, bool d, bool e, bool f) { return false; } -// CHECK-LABEL: Decision,File 0, 4:7 -> 4:13 = M:3, C:2 +// CHECK-LABEL: Decision,File 0, 4:7 -> 4:13 = M:0, C:2 // CHECK-NEXT: Branch,File 0, 4:7 -> 4:8 = #2, (#0 - #2) [1,2,0] // CHECK: Branch,File 0, 4:12 -> 4:13 = #3, (#2 - #3) [2,0,0] -// CHECK-LABEL: Decision,File 0, 5:9 -> 5:20 = M:7, C:3 +// CHECK-LABEL: Decision,File 0, 5:9 -> 5:20 = M:1, C:3 // CHECK-NEXT: Branch,File 0, 5:9 -> 5:10 = #7, (#1 - #7) [1,3,0] // CHECK: Branch,File 0, 5:14 -> 5:15 = #8, (#7 - #8) [3,2,0] // CHECK: Branch,File 0, 5:19 -> 5:20 = #6, (#5 - #6) [2,0,0] -// CHECK-LABEL: Decision,File 0, 6:11 -> 6:27 = M:12, C:4 +// CHECK-LABEL: Decision,File 0, 6:11 -> 6:27 = M:2, C:4 // CHECK-NEXT: Branch,File 0, 6:11 -> 6:12 = #14, (#4 - #14) [1,4,0] // CHECK: Branch,File 0, 6:16 -> 6:17 = #15, (#14 - #15) [4,3,0] // CHECK: Branch,File 0, 6:21 -> 6:22 = #13, (#12 - #13) [3,2,0] // CHECK: Branch,File 0, 6:26 -> 6:27 = #11, (#10 - #11) [2,0,0] -// CHECK-LABEL: Decision,File 0, 7:13 -> 7:34 = M:18, C:5 +// CHECK-LABEL: Decision,File 0, 7:13 -> 7:34 = M:4, C:5 // CHECK-NEXT: Branch,File 0, 7:13 -> 7:14 = #23, (#9 - #23) [1,5,0] // CHECK: Branch,File 0, 7:18 -> 7:19 = #24, (#23 - #24) [5,4,0] // CHECK: Branch,File 0, 7:23 -> 7:24 = #22, (#21 - #22) [4,3,0] // CHECK: Branch,File 0, 7:28 -> 7:29 = #20, (#19 - #20) [3,2,0] // CHECK: Branch,File 0, 7:33 -> 7:34 = #18, (#17 - #18) [2,0,0] -// CHECK-LABEL: Decision,File 0, 8:16 -> 8:42 = M:25, C:6 +// CHECK-LABEL: Decision,File 0, 8:16 -> 8:42 = M:8, C:6 // CHECK-NEXT: Branch,File 0, 8:16 -> 8:17 = #34, (#16 - #34) [1,6,0] // CHECK: Branch,File 0, 8:21 -> 8:22 = #35, (#34 - #35) [6,5,0] // CHECK: Branch,File 0, 8:26 -> 8:27 = #33, (#32 - #33) [5,4,0] @@ -46,25 +46,25 @@ bool func_if_or(bool a, bool b, bool c, bool d, bool e, bool f) { return false; } -// CHECK-LABEL: Decision,File 0, 40:7 -> 40:13 = M:3, C:2 +// CHECK-LABEL: Decision,File 0, 40:7 -> 40:13 = M:0, C:2 // CHECK-NEXT: Branch,File 0, 40:7 -> 40:8 = (#0 - #2), #2 [1,0,2] // CHECK: Branch,File 0, 40:12 -> 40:13 = (#2 - #3), #3 [2,0,0] -// CHECK-LABEL: Decision,File 0, 41:9 -> 41:20 = M:7, C:3 +// CHECK-LABEL: Decision,File 0, 41:9 -> 41:20 = M:1, C:3 // CHECK-NEXT: Branch,File 0, 41:9 -> 41:10 = (#1 - #7), #7 [1,0,3] // CHECK: Branch,File 0, 41:14 -> 41:15 = (#7 - #8), #8 [3,0,2] // CHECK: Branch,File 0, 41:19 -> 41:20 = (#5 - #6), #6 [2,0,0] -// CHECK-LABEL: Decision,File 0, 42:11 -> 42:27 = M:12, C:4 +// CHECK-LABEL: Decision,File 0, 42:11 -> 42:27 = M:2, C:4 // CHECK-NEXT: Branch,File 0, 42:11 -> 42:12 = (#4 - #14), #14 [1,0,4] // CHECK: Branch,File 0, 42:16 -> 42:17 = (#14 - #15), #15 [4,0,3] // CHECK: Branch,File 0, 42:21 -> 42:22 = (#12 - #13), #13 [3,0,2] // CHECK: Branch,File 0, 42:26 -> 42:27 = (#10 - #11), #11 [2,0,0] -// CHECK-LABEL: Decision,File 0, 43:13 -> 43:34 = M:18, C:5 +// CHECK-LABEL: Decision,File 0, 43:13 -> 43:34 = M:4, C:5 // CHECK-NEXT: Branch,File 0, 43:13 -> 43:14 = (#9 - #23), #23 [1,0,5] // CHECK: Branch,File 0, 43:18 -> 43:19 = (#23 - #24), #24 [5,0,4] // CHECK: Branch,File 0, 43:23 -> 43:24 = (#21 - #22), #22 [4,0,3] // CHECK: Branch,File 0, 43:28 -> 43:29 = (#19 - #20), #20 [3,0,2] // CHECK: Branch,File 0, 43:33 -> 43:34 = (#17 - #18), #18 [2,0,0] -// CHECK-LABEL: Decision,File 0, 44:16 -> 44:42 = M:25, C:6 +// CHECK-LABEL: Decision,File 0, 44:16 -> 44:42 = M:8, C:6 // CHECK-NEXT: Branch,File 0, 44:16 -> 44:17 = (#16 - #34), #34 [1,0,6] // CHECK: Branch,File 0, 44:21 -> 44:22 = (#34 - #35), #35 [6,0,5] // CHECK: Branch,File 0, 44:26 -> 44:27 = (#32 - #33), #33 [5,0,4] @@ -82,26 +82,26 @@ bool func_if_mix(bool a, bool b, bool c, bool d, bool e, bool f) { return false; } -// CHECK-LABEL: Decision,File 0, 76:7 -> 76:13 = M:3, C:2 +// CHECK-LABEL: Decision,File 0, 76:7 -> 76:13 = M:0, C:2 // CHECK-NEXT: Branch,File 0, 76:7 -> 76:8 = (#0 - #2), #2 [1,0,2] // CHECK: Branch,File 0, 76:12 -> 76:13 = (#2 - #3), #3 [2,0,0] -// CHECK-LABEL: Decision,File 0, 77:9 -> 77:22 = M:7, C:3 +// CHECK-LABEL: Decision,File 0, 77:9 -> 77:22 = M:1, C:3 // CHECK-NEXT: Branch,File 0, 77:9 -> 77:10 = #5, (#1 - #5) [1,2,0] // CHECK: Branch,File 0, 77:15 -> 77:16 = (#5 - #6), #6 [2,0,3] // CHECK: Branch,File 0, 77:20 -> 77:21 = (#6 - #7), #7 [3,0,0] -// CHECK-LABEL: Decision,File 0, 78:11 -> 78:31 = M:14, C:4 +// CHECK-LABEL: Decision,File 0, 78:11 -> 78:31 = M:2, C:4 // CHECK-NEXT: File 0 // CHECK-NEXT: Branch,File 0, 78:12 -> 78:13 = (#4 - #10), #10 [1,2,3] // CHECK: Branch,File 0, 78:17 -> 78:18 = (#10 - #11), #11 [3,2,0] // CHECK: Branch,File 0, 78:24 -> 78:25 = (#9 - #12), #12 [2,0,4] // CHECK: Branch,File 0, 78:29 -> 78:30 = (#12 - #13), #13 [4,0,0] -// CHECK-LABEL: Decision,File 0, 79:13 -> 79:38 = M:22, C:5 +// CHECK-LABEL: Decision,File 0, 79:13 -> 79:38 = M:4, C:5 // CHECK-NEXT: Branch,File 0, 79:13 -> 79:14 = #16, (#8 - #16) [1,3,0] // CHECK: Branch,File 0, 79:19 -> 79:20 = (#16 - #17), #17 [3,2,4] // CHECK: Branch,File 0, 79:24 -> 79:25 = (#17 - #18), #18 [4,2,0] // CHECK: Branch,File 0, 79:31 -> 79:32 = (#15 - #19), #19 [2,0,5] // CHECK: Branch,File 0, 79:36 -> 79:37 = (#19 - #20), #20 [5,0,0] -// CHECK-LABEL: Decision,File 0, 80:15 -> 80:47 = M:37, C:6 +// CHECK-LABEL: Decision,File 0, 80:15 -> 80:47 = M:8, C:6 // CHECK-NEXT: File 0 // CHECK-NEXT: Branch,File 0, 80:16 -> 80:17 = (#14 - #24), #24 [1,3,4] // CHECK: Branch,File 0, 80:21 -> 80:22 = (#24 - #25), #25 [4,3,0] diff --git a/clang/test/CoverageMapping/mcdc-scratch-space.c b/clang/test/CoverageMapping/mcdc-scratch-space.c index a263e9b688faed..2b5b12d9dcad65 100644 --- a/clang/test/CoverageMapping/mcdc-scratch-space.c +++ b/clang/test/CoverageMapping/mcdc-scratch-space.c @@ -2,14 +2,14 @@ // CHECK: builtin_macro0: int builtin_macro0(int a) { - // CHECK: Decision,File 0, [[@LINE+1]]:11 -> [[@LINE+2]]:15 = M:3, C:2 + // CHECK: Decision,File 0, [[@LINE+1]]:11 -> [[@LINE+2]]:15 = M:0, C:2 return (__LINE__ // CHECK: Branch,File 0, [[@LINE]]:11 -> [[@LINE]]:11 = 0, 0 [1,2,0] && a); // CHECK: Branch,File 0, [[@LINE]]:14 -> [[@LINE]]:15 = #2, (#1 - #2) [2,0,0] } // CHECK: builtin_macro1: int builtin_macro1(int a) { - // CHECK: Decision,File 0, [[@LINE+1]]:11 -> [[@LINE+2]]:22 = M:3, C:2 + // CHECK: Decision,File 0, [[@LINE+1]]:11 -> [[@LINE+2]]:22 = M:0, C:2 return (a // CHECK: Branch,File 0, [[@LINE]]:11 -> [[@LINE]]:12 = (#0 - #1), #1 [1,0,2] || __LINE__); // CHECK: Branch,File 0, [[@LINE]]:14 -> [[@LINE]]:14 = 0, 0 [2,0,0] } @@ -18,7 +18,7 @@ int builtin_macro1(int a) { // CHECK: pre0: int pre0(int pre_a, int b_post) { - // CHECK: Decision,File 0, [[@LINE+2]]:11 -> [[@LINE+3]]:20 = M:3, C:2 + // CHECK: Decision,File 0, [[@LINE+2]]:11 -> [[@LINE+3]]:20 = M:0, C:2 // CHECK: Expansion,File 0, [[@LINE+1]]:11 -> [[@LINE+1]]:14 = #0 (Expanded file = 1) return (PRE(a) && b_post); @@ -30,7 +30,7 @@ int pre0(int pre_a, int b_post) { // CHECK: pre1: int pre1(int pre_a, int b_post) { - // CHECK: Decision,File 0, [[@LINE+3]]:11 -> [[@LINE+4]]:20 = M:3, C:2 + // CHECK: Decision,File 0, [[@LINE+3]]:11 -> [[@LINE+4]]:20 = M:0, C:2 // CHECK: Expansion,File 0, [[@LINE+2]]:11 -> [[@LINE+2]]:14 = #0 (Expanded file = 1) // CHECK: Branch,File 0, [[@LINE+2]]:14 -> [[@LINE+2]]:20 = #2, (#1 - #2) [2,0,0] return (PRE(foo) @@ -43,7 +43,7 @@ int pre1(int pre_a, int b_post) { // CHECK: post0: int post0(int pre_a, int b_post) { - // CHECK: Decision,File 0, [[@LINE+2]]:11 -> [[@LINE+3]]:18 = M:3, C:2 + // CHECK: Decision,File 0, [[@LINE+2]]:11 -> [[@LINE+3]]:18 = M:0, C:2 // CHECK: Branch,File 0, [[@LINE+1]]:11 -> [[@LINE+1]]:16 = (#0 - #1), #1 [1,0,2] return (pre_a || POST(b)); @@ -55,7 +55,7 @@ int post0(int pre_a, int b_post) { // CHECK: post1: int post1(int pre_a, int b_post) { - // CHECK: Decision,File 0, [[@LINE+3]]:11 -> [[@LINE+4]]:18 = M:3, C:2 + // CHECK: Decision,File 0, [[@LINE+3]]:11 -> [[@LINE+4]]:18 = M:0, C:2 // CHECK: Branch,File 0, [[@LINE+2]]:11 -> [[@LINE+2]]:16 = (#0 - #1), #1 [1,0,2] // CHECK: Expansion,File 0, [[@LINE+2]]:14 -> [[@LINE+2]]:18 = 0 (Expanded file = 1) return (pre_a diff --git a/clang/test/CoverageMapping/mcdc-system-headers.cpp b/clang/test/CoverageMapping/mcdc-system-headers.cpp index ae26ed5fe469f2..4dfbb17c2bba81 100644 --- a/clang/test/CoverageMapping/mcdc-system-headers.cpp +++ b/clang/test/CoverageMapping/mcdc-system-headers.cpp @@ -15,7 +15,7 @@ // CHECK: _Z5func0i: int func0(int a) { - // CHECK: Decision,File 0, [[@LINE+3]]:11 -> [[@LINE+3]]:21 = M:3, C:2 + // CHECK: Decision,File 0, [[@LINE+3]]:11 -> [[@LINE+3]]:21 = M:0, C:2 // W_SYS: Expansion,File 0, [[@LINE+2]]:11 -> [[@LINE+2]]:16 = #0 (Expanded file = 1) // X_SYS: Branch,File 0, [[@LINE+1]]:11 -> [[@LINE+1]]:11 = 0, 0 [1,2,0] return (CONST && a); @@ -25,7 +25,7 @@ int func0(int a) { // CHECK: _Z5func1ii: int func1(int a, int b) { - // CHECK: Decision,File 0, [[@LINE+2]]:11 -> [[@LINE+2]]:21 = M:3, C:2 + // CHECK: Decision,File 0, [[@LINE+2]]:11 -> [[@LINE+2]]:21 = M:0, C:2 // CHECK: Branch,File 0, [[@LINE+1]]:11 -> [[@LINE+1]]:12 = (#0 - #1), #1 [1,0,2] return (a || EXPR1(b)); // W_SYS: Expansion,File 0, [[@LINE-1]]:16 -> [[@LINE-1]]:21 = #1 (Expanded file = 1) @@ -35,8 +35,8 @@ int func1(int a, int b) { // CHECK: _Z5func2ii: int func2(int a, int b) { - // W_SYS: Decision,File 0, [[@LINE+5]]:11 -> [[@LINE+5]]:28 = M:4, C:3 - // X_SYS: Decision,File 0, [[@LINE+4]]:11 -> [[@LINE+4]]:28 = M:3, C:2 + // W_SYS: Decision,File 0, [[@LINE+5]]:11 -> [[@LINE+5]]:28 = M:0, C:3 + // X_SYS: Decision,File 0, [[@LINE+4]]:11 -> [[@LINE+4]]:28 = M:0, C:2 // W_SYS: Expansion,File 0, [[@LINE+3]]:11 -> [[@LINE+3]]:16 = #0 (Expanded file = 1) // W_SYS: Expansion,File 0, [[@LINE+2]]:23 -> [[@LINE+2]]:28 = #1 (Expanded file = 2) // X_SYS: Branch,File 0, [[@LINE+1]]:11 -> [[@LINE+1]]:11 = #1, (#0 - #1) [1,2,0] diff --git a/clang/test/Profile/c-mcdc-class.cpp b/clang/test/Profile/c-mcdc-class.cpp index 748344194ef86d..6aab55add32807 100644 --- a/clang/test/Profile/c-mcdc-class.cpp +++ b/clang/test/Profile/c-mcdc-class.cpp @@ -36,24 +36,23 @@ Value::~Value(void) { // SHIFT FIRST CONDITION WITH ID = 0. // MCDCCTOR: %[[LAB1:[0-9]+]] = load i32, ptr %value, align 4 // MCDCCTOR-DAG: %[[BOOL:cmp[0-9]*]] = icmp ne i32 %[[LAB1]], 2 -// MCDCCTOR-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 -// MCDCCTOR-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 0 -// MCDCCTOR-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 0 -// MCDCCTOR-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] +// MCDCCTOR-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDCCTOR-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 +// MCDCCTOR-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 0 +// MCDCCTOR-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] // MCDCCTOR-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT SECOND CONDITION WITH ID = 1. // MCDCCTOR: %[[LAB1:[0-9]+]] = load i32, ptr %value2, align 4 // MCDCCTOR-DAG: %[[BOOL:cmp[0-9]*]] = icmp ne i32 %[[LAB1]], 6 -// MCDCCTOR-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 -// MCDCCTOR-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 2 -// MCDCCTOR-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 1 -// MCDCCTOR-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] +// MCDCCTOR-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDCCTOR-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 +// MCDCCTOR-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 1 +// MCDCCTOR-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] // MCDCCTOR-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // UPDATE FINAL BITMASK WITH RESULT. -// MCDCCTOR-DAG: %[[TEMP0:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDCCTOR: %[[TEMP:[0-9]+]] = add i32 %[[TEMP0]], 0 +// MCDCCTOR-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 // MCDCCTOR: %[[LAB1:[0-9]+]] = lshr i32 %[[TEMP]], 3 // MCDCCTOR: %[[LAB4:[0-9]+]] = getelementptr inbounds i8, ptr @__profbm__ZN5ValueC2Ev, i32 %[[LAB1]] // MCDCCTOR: %[[LAB5:[0-9]+]] = and i32 %[[TEMP]], 7 @@ -74,24 +73,23 @@ Value::~Value(void) { // SHIFT FIRST CONDITION WITH ID = 0. // MCDCDTOR: %[[LAB1:[0-9]+]] = load i32, ptr %value, align 4 // MCDCDTOR-DAG: %[[BOOL:cmp[0-9]*]] = icmp ne i32 %[[LAB1]], 2 -// MCDCDTOR-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 -// MCDCDTOR-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 0 -// MCDCDTOR-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 0 -// MCDCDTOR-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] +// MCDCDTOR-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDCDTOR-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 +// MCDCDTOR-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 0 +// MCDCDTOR-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] // MCDCDTOR-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT SECOND CONDITION WITH ID = 1. // MCDCDTOR: %[[LAB1:[0-9]+]] = load i32, ptr %value2, align 4 // MCDCDTOR-DAG: %[[BOOL:cmp[0-9]*]] = icmp ne i32 %[[LAB1]], 3 -// MCDCDTOR-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 -// MCDCDTOR-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 2 -// MCDCDTOR-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 1 -// MCDCDTOR-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] +// MCDCDTOR-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDCDTOR-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 +// MCDCDTOR-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 1 +// MCDCDTOR-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] // MCDCDTOR-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // UPDATE FINAL BITMASK WITH RESULT. -// MCDCDTOR-DAG: %[[TEMP0:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDCDTOR: %[[TEMP:[0-9]+]] = add i32 %[[TEMP0]], 0 +// MCDCDTOR-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 // MCDCDTOR: %[[LAB1:[0-9]+]] = lshr i32 %[[TEMP]], 3 // MCDCDTOR: %[[LAB4:[0-9]+]] = getelementptr inbounds i8, ptr @__profbm__ZN5ValueD2Ev, i32 %[[LAB1]] // MCDCDTOR: %[[LAB5:[0-9]+]] = and i32 %[[TEMP]], 7 diff --git a/clang/test/Profile/c-mcdc-logicalop-ternary.c b/clang/test/Profile/c-mcdc-logicalop-ternary.c index 91174befb5fe54..3e6b6b1e380d1e 100644 --- a/clang/test/Profile/c-mcdc-logicalop-ternary.c +++ b/clang/test/Profile/c-mcdc-logicalop-ternary.c @@ -9,7 +9,7 @@ int test(int a, int b, int c, int d, int e, int f) { // NOMCDC-NOT: __profbm_test // MCDC BOOKKEEPING. -// MCDC: @__profbm_test = private global [2 x i8] zeroinitializer +// MCDC: @__profbm_test = private global [3 x i8] zeroinitializer // ALLOCATE MCDC TEMP AND ZERO IT. // MCDC-LABEL: @test( @@ -18,8 +18,7 @@ int test(int a, int b, int c, int d, int e, int f) { // TERNARY TRUE SHOULD UPDATE THE BITMAP WITH RESULT AT ELEMENT 0. // MCDC-LABEL: cond.true: -// MCDC-DAG: %[[TEMP0:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC: %[[TEMP:[0-9]+]] = add i32 %[[TEMP0]], 0 +// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 // MCDC: %[[LAB1:[0-9]+]] = lshr i32 %[[TEMP]], 3 // MCDC: %[[LAB4:[0-9]+]] = getelementptr inbounds i8, ptr @__profbm_test, i32 %[[LAB1]] // MCDC: %[[LAB5:[0-9]+]] = and i32 %[[TEMP]], 7 @@ -35,10 +34,9 @@ int test(int a, int b, int c, int d, int e, int f) { // TERNARY TRUE YIELDS TERNARY LHS LOGICAL-AND. // TERNARY LHS LOGICAL-AND SHOULD UPDATE THE BITMAP WITH RESULT AT ELEMENT 1. // MCDC-LABEL: land.end: -// MCDC-DAG: %[[TEMP0:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC: %[[TEMP:[0-9]+]] = add i32 %[[TEMP0]], 3 +// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 // MCDC: %[[LAB1:[0-9]+]] = lshr i32 %[[TEMP]], 3 -// MCDC: %[[LAB4:[0-9]+]] = getelementptr inbounds i8, ptr @__profbm_test, i32 %[[LAB1]] +// MCDC: %[[LAB4:[0-9]+]] = getelementptr inbounds i8, ptr getelementptr inbounds ([3 x i8], ptr @__profbm_test, i32 0, i32 1), i32 %[[LAB1]] // MCDC: %[[LAB5:[0-9]+]] = and i32 %[[TEMP]], 7 // MCDC: %[[LAB6:[0-9]+]] = trunc i32 %[[LAB5]] to i8 // MCDC: %[[LAB7:[0-9]+]] = shl i8 1, %[[LAB6]] @@ -48,8 +46,7 @@ int test(int a, int b, int c, int d, int e, int f) { // TERNARY FALSE SHOULD UPDATE THE BITMAP WITH RESULT AT ELEMENT 0. // MCDC-LABEL: cond.false: -// MCDC-DAG: %[[TEMP0:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC: %[[TEMP:[0-9]+]] = add i32 %[[TEMP0]], 0 +// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 // MCDC: %[[LAB1:[0-9]+]] = lshr i32 %[[TEMP]], 3 // MCDC: %[[LAB4:[0-9]+]] = getelementptr inbounds i8, ptr @__profbm_test, i32 %[[LAB1]] // MCDC: %[[LAB5:[0-9]+]] = and i32 %[[TEMP]], 7 @@ -65,10 +62,9 @@ int test(int a, int b, int c, int d, int e, int f) { // TERNARY FALSE YIELDS TERNARY RHS LOGICAL-OR. // TERNARY RHS LOGICAL-OR SHOULD UPDATE THE BITMAP WITH RESULT AT ELEMENT 2. // MCDC-LABEL: lor.end: -// MCDC-DAG: %[[TEMP0:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC: %[[TEMP:[0-9]+]] = add i32 %[[TEMP0]], 6 +// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 // MCDC: %[[LAB1:[0-9]+]] = lshr i32 %[[TEMP]], 3 -// MCDC: %[[LAB4:[0-9]+]] = getelementptr inbounds i8, ptr @__profbm_test, i32 %[[LAB1]] +// MCDC: %[[LAB4:[0-9]+]] = getelementptr inbounds i8, ptr getelementptr inbounds ([3 x i8], ptr @__profbm_test, i32 0, i32 2), i32 %[[LAB1]] // MCDC: %[[LAB5:[0-9]+]] = and i32 %[[TEMP]], 7 // MCDC: %[[LAB6:[0-9]+]] = trunc i32 %[[LAB5]] to i8 // MCDC: %[[LAB7:[0-9]+]] = shl i8 1, %[[LAB6]] diff --git a/clang/test/Profile/c-mcdc-nested-ternary.c b/clang/test/Profile/c-mcdc-nested-ternary.c index 8576ba39e583b4..ebea17ca146ae6 100644 --- a/clang/test/Profile/c-mcdc-nested-ternary.c +++ b/clang/test/Profile/c-mcdc-nested-ternary.c @@ -20,43 +20,42 @@ int test(int b, int c, int d, int e, int f) { // MCDC-LABEL: cond.true: // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %c.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 -// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 0 -// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 0 -// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 +// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 0 +// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // TERNARY FALSE SHOULD SHIFT ID = 0 FOR CONDITION 'd'. // MCDC-LABEL: cond.false: // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %d.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 -// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 0 -// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 0 -// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 +// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 0 +// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT SECOND CONDITION WITH ID = 2. // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %e.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 -// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 0 -// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 1 -// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 +// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 2 +// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT THIRD CONDITION WITH ID = 1. // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %f.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 -// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 3 -// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 2 -// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 +// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 1 +// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // UPDATE FINAL BITMASK WITH RESULT. -// MCDC-DAG: %[[TEMP0:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC: %[[TEMP:[0-9]+]] = add i32 %[[TEMP0]], 0 +// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 // MCDC: %[[LAB1:[0-9]+]] = lshr i32 %[[TEMP]], 3 // MCDC: %[[LAB4:[0-9]+]] = getelementptr inbounds i8, ptr @__profbm_test, i32 %[[LAB1]] // MCDC: %[[LAB5:[0-9]+]] = and i32 %[[TEMP]], 7 diff --git a/clang/test/Profile/c-mcdc-not.c b/clang/test/Profile/c-mcdc-not.c index ae683c3fe40fe7..165bfbae3349da 100644 --- a/clang/test/Profile/c-mcdc-not.c +++ b/clang/test/Profile/c-mcdc-not.c @@ -9,7 +9,7 @@ int test(int a, int b, int c, int d, int e, int f) { // NOMCDC-NOT: __profbm_test // MCDC BOOKKEEPING. -// MCDC: @__profbm_test = private global [2 x i8] zeroinitializer +// MCDC: @__profbm_test = private global [8 x i8] zeroinitializer // MCDC: @__profc_test = private global [9 x i64] zeroinitializer // ALLOCATE MCDC TEMP AND ZERO IT. @@ -21,62 +21,61 @@ int test(int a, int b, int c, int d, int e, int f) { // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %a.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 // MCDC-DAG: %[[LNOT:lnot[0-9]*]] = xor i1 %[[BOOL]] -// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 0 -// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 0 -// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[LNOT]], i32 %[[LAB2]], i32 %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[LNOT]] to i32 +// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 0 +// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT SECOND CONDITION WITH ID = 2. // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %b.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 -// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 14 -// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 1 -// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 +// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 2 +// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT THIRD CONDITION WITH ID = 1. // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %c.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 // MCDC-DAG: %[[LNOT:lnot[0-9]*]] = xor i1 %[[BOOL]] -// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 0 -// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 0 -// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[LNOT]], i32 %[[LAB2]], i32 %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[LNOT]] to i32 +// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 1 +// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT FOURTH CONDITION WITH ID = 4. // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %d.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 -// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 12 -// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 2 -// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 +// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 4 +// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT FIFTH CONDITION WITH ID = 3. // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %e.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 -// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 0 -// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 0 -// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 +// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 3 +// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT SIXTH CONDITION WITH ID = 5. // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %f.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 // MCDC-DAG: %[[LNOT:lnot[0-9]*]] = xor i1 %[[BOOL]] -// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 8 -// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 4 -// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[LNOT]], i32 %[[LAB2]], i32 %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[LNOT]] to i32 +// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 5 +// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // UPDATE FINAL BITMASK WITH RESULT. -// MCDC-DAG: %[[TEMP0:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC: %[[TEMP:[0-9]+]] = add i32 %[[TEMP0]], 0 +// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 // MCDC: %[[LAB1:[0-9]+]] = lshr i32 %[[TEMP]], 3 // MCDC: %[[LAB4:[0-9]+]] = getelementptr inbounds i8, ptr @__profbm_test, i32 %[[LAB1]] // MCDC: %[[LAB5:[0-9]+]] = and i32 %[[TEMP]], 7 diff --git a/clang/test/Profile/c-mcdc.c b/clang/test/Profile/c-mcdc.c index 251c18baa861dd..823160329b31f8 100644 --- a/clang/test/Profile/c-mcdc.c +++ b/clang/test/Profile/c-mcdc.c @@ -11,80 +11,85 @@ int test(int a, int b, int c, int d, int e, int f) { // NOPROFPASS-NOT: __profbm_test // MCDC BOOKKEEPING. -// MCDC: @__profbm_test = private global [2 x i8] zeroinitializer +// MCDC: @__profbm_test = private global [8 x i8] zeroinitializer // MCDC: @__profc_test = private global [9 x i64] zeroinitializer // ALLOCATE MCDC TEMP AND ZERO IT. // NOPROFPASS-LABEL: @test( -// NOPROFPASS: call void @llvm.instrprof.mcdc.parameters(ptr @__profn_test, i64 [[HASH:[0-9]+]], i32 15) +// NOPROFPASS: call void @llvm.instrprof.mcdc.parameters(ptr @__profn_test, i64 [[HASH:[0-9]+]], i32 8) // MCDC-LABEL: @test( // MCDC: %mcdc.addr = alloca i32, align 4 // MCDC: store i32 0, ptr %mcdc.addr, align 4 // SHIFT FIRST CONDITION WITH ID = 0. +// NOPROFPASS: call void @llvm.instrprof.mcdc.condbitmap.update(ptr @__profn_test, i64 [[HASH]], i32 0, ptr %mcdc.addr, i1 %tobool{{[0-9]*}}) // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %a.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 -// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 0 -// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 0 -// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 +// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 0 +// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT SECOND CONDITION WITH ID = 2. // NOPROFPASS-LABEL: land.lhs.true: +// NOPROFPASS: call void @llvm.instrprof.mcdc.condbitmap.update(ptr @__profn_test, i64 [[HASH]], i32 2, ptr %mcdc.addr, i1 %tobool{{[0-9]*}}) // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %b.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 -// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 14 -// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 1 -// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 +// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 2 +// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT THIRD CONDITION WITH ID = 1. // NOPROFPASS-LABEL: lor.rhs: +// NOPROFPASS: call void @llvm.instrprof.mcdc.condbitmap.update(ptr @__profn_test, i64 [[HASH]], i32 1, ptr %mcdc.addr, i1 %tobool{{[0-9]*}}) // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %c.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 -// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 0 -// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 0 -// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 +// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 1 +// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT FOURTH CONDITION WITH ID = 4. // NOPROFPASS-LABEL: land.lhs.true3: +// NOPROFPASS: call void @llvm.instrprof.mcdc.condbitmap.update(ptr @__profn_test, i64 [[HASH]], i32 4, ptr %mcdc.addr, i1 %tobool{{[0-9]*}}) // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %d.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 -// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 12 -// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 2 -// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 +// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 4 +// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT FIFTH CONDITION WITH ID = 3. // NOPROFPASS-LABEL: lor.rhs6: +// NOPROFPASS: call void @llvm.instrprof.mcdc.condbitmap.update(ptr @__profn_test, i64 [[HASH]], i32 3, ptr %mcdc.addr, i1 %tobool{{[0-9]*}}) // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %e.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 -// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 0 -// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 0 -// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 +// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 3 +// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT SIXTH CONDITION WITH ID = 5. // NOPROFPASS-LABEL: land.rhs: +// NOPROFPASS: call void @llvm.instrprof.mcdc.condbitmap.update(ptr @__profn_test, i64 [[HASH]], i32 5, ptr %mcdc.addr, i1 %tobool{{[0-9]*}}) // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %f.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 -// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 8 -// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 4 -// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 +// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 5 +// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // UPDATE FINAL BITMASK WITH RESULT. // NOPROFPASS-LABEL: lor.end: -// NOPROFPASS: call void @llvm.instrprof.mcdc.tvbitmap.update(ptr @__profn_test, i64 [[HASH]], i32 0, i32 0, ptr %mcdc.addr) -// MCDC-DAG: %[[TEMP0:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC: %[[TEMP:[0-9]+]] = add i32 %[[TEMP0]], 0 +// NOPROFPASS: call void @llvm.instrprof.mcdc.tvbitmap.update(ptr @__profn_test, i64 [[HASH]], i32 8, i32 0, ptr %mcdc.addr) +// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 // MCDC: %[[LAB1:[0-9]+]] = lshr i32 %[[TEMP]], 3 // MCDC: %[[LAB4:[0-9]+]] = getelementptr inbounds i8, ptr @__profbm_test, i32 %[[LAB1]] // MCDC: %[[LAB5:[0-9]+]] = and i32 %[[TEMP]], 7 diff --git a/llvm/docs/CoverageMappingFormat.rst b/llvm/docs/CoverageMappingFormat.rst index 96bdf8fa71be73..f2ae8df5ad7f82 100644 --- a/llvm/docs/CoverageMappingFormat.rst +++ b/llvm/docs/CoverageMappingFormat.rst @@ -148,7 +148,7 @@ There are several kinds of mapping regions: * Decision regions associate multiple branch regions with a boolean expression in the source code. This information also includes the number of - bitmap bits needed to represent the expression's executed test vectors as + bitmap bytes needed to represent the expression's executed test vectors as well as the total number of instrumentable branch conditions that comprise the expression. Decision regions are used to visualize Modified Condition/Decision Coverage (MC/DC) in *llvm-cov* for each boolean diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 10d53bea149ef6..f39b8dc6c90d47 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -14411,7 +14411,7 @@ Syntax: :: declare void @llvm.instrprof.mcdc.parameters(ptr , i64 , - i32 ) + i32 ) Overview: """"""""" @@ -14429,7 +14429,7 @@ name of the entity being instrumented. This should generally be the The second argument is a hash value that can be used by the consumer of the profile data to detect changes to the instrumented source. -The third argument is the number of bitmap bits required by the function to +The third argument is the number of bitmap bytes required by the function to record the number of test vectors executed for each boolean expression. Semantics: @@ -14496,7 +14496,7 @@ Syntax: :: declare void @llvm.instrprof.mcdc.tvbitmap.update(ptr , i64 , - i32 ) + i32 ) i32 , ptr ) @@ -14506,9 +14506,10 @@ Overview: The '``llvm.instrprof.mcdc.tvbitmap.update``' intrinsic is used to track MC/DC test vector execution after each boolean expression has been fully executed. The overall value of the condition bitmap, after it has been successively -updated with the true or false evaluation of each condition, uniquely identifies -an executed MC/DC test vector and is used as a bit index into the global test -vector bitmap. +updated using the '``llvm.instrprof.mcdc.condbitmap.update``' intrinsic with +the true or false evaluation of each condition, uniquely identifies an executed +MC/DC test vector and is used as a bit index into the global test vector +bitmap. Arguments: """""""""" @@ -14520,9 +14521,10 @@ name of the entity being instrumented. This should generally be the The second argument is a hash value that can be used by the consumer of the profile data to detect changes to the instrumented source. -The third argument is not used. +The third argument is the number of bitmap bytes required by the function to +record the number of test vectors executed for each boolean expression. -The fourth argument is the bit index into the global test vector bitmap +The fourth argument is the byte index into the global test vector bitmap corresponding to the function. The fifth argument is the address of the condition bitmap, which contains a diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index 1ac4a5fffb43bb..9010e1a1c896bf 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -33,7 +33,6 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" -#include "llvm/Support/MathExtras.h" #include #include #include @@ -1581,16 +1580,10 @@ class InstrProfMCDCBitmapInstBase : public InstrProfInstBase { return isa(V) && classof(cast(V)); } - /// \return The number of bits used for the MCDC bitmaps for the instrumented - /// function. - ConstantInt *getNumBitmapBits() const { - return cast(const_cast(getArgOperand(2))); - } - /// \return The number of bytes used for the MCDC bitmaps for the instrumented /// function. - auto getNumBitmapBytes() const { - return alignTo(getNumBitmapBits()->getZExtValue(), CHAR_BIT) / CHAR_BIT; + ConstantInt *getNumBitmapBytes() const { + return cast(const_cast(getArgOperand(2))); } }; diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h index 5fc497db8df54a..da031040452491 100644 --- a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -677,8 +677,7 @@ class CounterMappingContext { /// pairs. Expected evaluateMCDCRegion(const CounterMappingRegion &Region, - ArrayRef Branches, - bool IsVersion11); + ArrayRef Branches); unsigned getMaxCounterID(const Counter &C) const; }; diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 0c899e6d84965c..5cd8c3283373e3 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -1161,7 +1161,7 @@ enum ProfVersion { Version10 = 10, // An additional field is used for bitmap bytes. Version11 = 11, - // VTable profiling, decision record and bitmap are modified for mcdc. + // VTable profiling, Version12 = 12, // The current version is 12. CurrentVersion = INSTR_PROF_INDEX_VERSION diff --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp index 455124efd5b248..8c81bbe8e9c4ee 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -384,18 +384,15 @@ class MCDCRecordProcessor : NextIDsBuilder, mcdc::TVIdxBuilder { DenseSet TVIdxs; #endif - bool IsVersion11; - public: MCDCRecordProcessor(const BitVector &Bitmap, const CounterMappingRegion &Region, - ArrayRef Branches, - bool IsVersion11) + ArrayRef Branches) : NextIDsBuilder(Branches), TVIdxBuilder(this->NextIDs), Bitmap(Bitmap), Region(Region), DecisionParams(Region.getDecisionParams()), Branches(Branches), NumConditions(DecisionParams.NumConditions), Folded(NumConditions, false), IndependencePairs(NumConditions), - ExecVectors(ExecVectorsByCond[false]), IsVersion11(IsVersion11) {} + ExecVectors(ExecVectorsByCond[false]) {} private: // Walk the binary decision diagram and try assigning both false and true to @@ -418,9 +415,7 @@ class MCDCRecordProcessor : NextIDsBuilder, mcdc::TVIdxBuilder { assert(TVIdx < SavedNodes[ID].Width); assert(TVIdxs.insert(NextTVIdx).second && "Duplicate TVIdx"); - if (!Bitmap[IsVersion11 - ? DecisionParams.BitmapIdx * CHAR_BIT + TV.getIndex() - : DecisionParams.BitmapIdx - NumTestVectors + NextTVIdx]) + if (!Bitmap[DecisionParams.BitmapIdx * CHAR_BIT + TV.getIndex()]) continue; // Copy the completed test vector to the vector of testvectors. @@ -526,9 +521,9 @@ class MCDCRecordProcessor : NextIDsBuilder, mcdc::TVIdxBuilder { Expected CounterMappingContext::evaluateMCDCRegion( const CounterMappingRegion &Region, - ArrayRef Branches, bool IsVersion11) { + ArrayRef Branches) { - MCDCRecordProcessor MCDCProcessor(Bitmap, Region, Branches, IsVersion11); + MCDCRecordProcessor MCDCProcessor(Bitmap, Region, Branches); return MCDCProcessor.processMCDCRecord(); } @@ -615,8 +610,8 @@ static unsigned getMaxCounterID(const CounterMappingContext &Ctx, } /// Returns the bit count -static unsigned getMaxBitmapSize(const CoverageMappingRecord &Record, - bool IsVersion11) { +static unsigned getMaxBitmapSize(const CounterMappingContext &Ctx, + const CoverageMappingRecord &Record) { unsigned MaxBitmapIdx = 0; unsigned NumConditions = 0; // Scan max(BitmapIdx). @@ -631,12 +626,8 @@ static unsigned getMaxBitmapSize(const CoverageMappingRecord &Record, NumConditions = DecisionParams.NumConditions; } } - - if (IsVersion11) - MaxBitmapIdx = MaxBitmapIdx * CHAR_BIT + - llvm::alignTo(uint64_t(1) << NumConditions, CHAR_BIT); - - return MaxBitmapIdx; + unsigned SizeInBits = llvm::alignTo(uint64_t(1) << NumConditions, CHAR_BIT); + return MaxBitmapIdx * CHAR_BIT + SizeInBits; } namespace { @@ -824,9 +815,6 @@ Error CoverageMapping::loadFunctionRecord( } Ctx.setCounts(Counts); - bool IsVersion11 = - ProfileReader.getVersion() < IndexedInstrProf::ProfVersion::Version12; - BitVector Bitmap; if (Error E = ProfileReader.getFunctionBitmap(Record.FunctionName, Record.FunctionHash, Bitmap)) { @@ -838,7 +826,7 @@ Error CoverageMapping::loadFunctionRecord( } if (IPE != instrprof_error::unknown_function) return make_error(IPE); - Bitmap = BitVector(getMaxBitmapSize(Record, IsVersion11)); + Bitmap = BitVector(getMaxBitmapSize(Ctx, Record)); } Ctx.setBitmap(std::move(Bitmap)); @@ -896,7 +884,7 @@ Error CoverageMapping::loadFunctionRecord( // DecisionRegion, all of the information is now available to process. // This is where the bulk of the MC/DC progressing takes place. Expected Record = - Ctx.evaluateMCDCRegion(*MCDCDecision, MCDCBranches, IsVersion11); + Ctx.evaluateMCDCRegion(*MCDCDecision, MCDCBranches); if (auto E = Record.takeError()) { consumeError(std::move(E)); return Error::success(); diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 0c79eaa812b5fc..f9b58d9f278214 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -936,6 +936,9 @@ Value *InstrLowerer::getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I) { auto *Bitmaps = getOrCreateRegionBitmaps(I); IRBuilder<> Builder(I); + auto *Addr = Builder.CreateConstInBoundsGEP2_32( + Bitmaps->getValueType(), Bitmaps, 0, I->getBitmapIndex()->getZExtValue()); + if (isRuntimeCounterRelocationEnabled()) { LLVMContext &Ctx = M.getContext(); Ctx.diagnose(DiagnosticInfoPGOProfile( @@ -945,7 +948,7 @@ Value *InstrLowerer::getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I) { DS_Warning)); } - return Bitmaps; + return Addr; } void InstrLowerer::lowerCover(InstrProfCoverInst *CoverInstruction) { @@ -1015,11 +1018,9 @@ void InstrLowerer::lowerMCDCTestVectorBitmapUpdate( auto *MCDCCondBitmapAddr = Update->getMCDCCondBitmapAddr(); auto *BitmapAddr = getBitmapAddress(Update); - // Load Temp Val + BitmapIdx. + // Load Temp Val. // %mcdc.temp = load i32, ptr %mcdc.addr, align 4 - auto *Temp = Builder.CreateAdd( - Builder.CreateLoad(Int32Ty, MCDCCondBitmapAddr, "mcdc.temp"), - Update->getBitmapIndex()); + auto *Temp = Builder.CreateLoad(Int32Ty, MCDCCondBitmapAddr, "mcdc.temp"); // Calculate byte offset using div8. // %1 = lshr i32 %mcdc.temp, 3 @@ -1414,7 +1415,7 @@ GlobalVariable * InstrLowerer::createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc, StringRef Name, GlobalValue::LinkageTypes Linkage) { - uint64_t NumBytes = Inc->getNumBitmapBytes(); + uint64_t NumBytes = Inc->getNumBitmapBytes()->getZExtValue(); auto *BitmapTy = ArrayType::get(Type::getInt8Ty(M.getContext()), NumBytes); auto GV = new GlobalVariable(M, BitmapTy, false, Linkage, Constant::getNullValue(BitmapTy), Name); @@ -1433,7 +1434,7 @@ InstrLowerer::getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc) { // the corresponding profile section. auto *BitmapPtr = setupProfileSection(Inc, IPSK_bitmap); PD.RegionBitmaps = BitmapPtr; - PD.NumBitmapBytes = Inc->getNumBitmapBytes(); + PD.NumBitmapBytes = Inc->getNumBitmapBytes()->getZExtValue(); return PD.RegionBitmaps; } diff --git a/llvm/test/Instrumentation/InstrProfiling/inline-data-var-create.ll b/llvm/test/Instrumentation/InstrProfiling/inline-data-var-create.ll index 456103164378e9..7c064f547141f3 100644 --- a/llvm/test/Instrumentation/InstrProfiling/inline-data-var-create.ll +++ b/llvm/test/Instrumentation/InstrProfiling/inline-data-var-create.ll @@ -27,21 +27,21 @@ declare void @llvm.instrprof.mcdc.parameters(ptr %0, i64 %1, i32 %2) define internal void @foobar() { call void @llvm.instrprof.increment(ptr @__profn_foobar, i64 123456, i32 32, i32 0) - call void @llvm.instrprof.mcdc.parameters(ptr @__profn_foobar, i64 123456, i32 792) + call void @llvm.instrprof.mcdc.parameters(ptr @__profn_foobar, i64 123456, i32 99) ret void } define void @foo() { call void @llvm.instrprof.increment(ptr @__profn_foo, i64 123456, i32 32, i32 0) - call void @llvm.instrprof.mcdc.parameters(ptr @__profn_foo, i64 123456, i32 168) + call void @llvm.instrprof.mcdc.parameters(ptr @__profn_foo, i64 123456, i32 21) call void @foobar() ret void } define void @bar() { call void @llvm.instrprof.increment(ptr @__profn_bar, i64 123456, i32 32, i32 0) - call void @llvm.instrprof.mcdc.parameters(ptr @__profn_bar, i64 123456, i32 184) + call void @llvm.instrprof.mcdc.parameters(ptr @__profn_bar, i64 123456, i32 23) call void @foobar() ret void } diff --git a/llvm/test/Instrumentation/InstrProfiling/mcdc.ll b/llvm/test/Instrumentation/InstrProfiling/mcdc.ll index e9ae80891ea6e6..a7f1e606e35fa0 100644 --- a/llvm/test/Instrumentation/InstrProfiling/mcdc.ll +++ b/llvm/test/Instrumentation/InstrProfiling/mcdc.ll @@ -30,8 +30,7 @@ entry: ; CHECK-NEXT: store i32 %[[LAB3]], ptr %mcdc.addr, align 4 call void @llvm.instrprof.mcdc.tvbitmap.update(ptr @__profn_test, i64 99278, i32 1, i32 0, ptr %mcdc.addr) - ; CHECK: %[[TEMP0:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 - ; CHECK-NEXT: %[[TEMP:[0-9]+]] = add i32 %[[TEMP0]], 0 + ; CHECK: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 ; CHECK-NEXT: %[[LAB4:[0-9]+]] = lshr i32 %[[TEMP]], 3 ; CHECK-NEXT: %[[LAB7:[0-9]+]] = getelementptr inbounds i8, ptr @__profbm_test, i32 %[[LAB4]] ; CHECK-NEXT: %[[LAB8:[0-9]+]] = and i32 %[[TEMP]], 7 diff --git a/llvm/test/tools/llvm-cov/Inputs/mcdc-const-folding.o b/llvm/test/tools/llvm-cov/Inputs/mcdc-const-folding.o index 7a16162f29c00777d9a05f5fed1cc4cbf333c56c..4f54fa7b8a1a1492a9a455bbf6fa651844345c59 100644 GIT binary patch delta 6068 zcmai2dr(x@8Q*gSVHI6K;BrwBf|+1?Q(YBVFtTid5u!Fcd_Te?%0qmBg0dKetzsb6 z8OYb9WG0zNr)kr)X;@7<(SNc`G~&c*HBB;UO{daHGj@oRFt!X!?9ku2dk&m#)AY>T zZ_oYxzVrLO^WAgKy>RP{Hh4zs&SZl2Hnfh3rYOu2u?qM!L$$3(MT-1;?|$(BYqubyK)eF<7~GBrjfFFnV)20r|2bRrj(1KVfuJLU|$ z_igdWGc6%oXr4oWrZjOQe)3O_Hq8-zq!2Yc!x`|i;Rt`EGe9G(C^Cxo$*rZdeMhnp zAKr?#W;!*W$(a6#em=4L{xhy0YKYxDQ^3xv&OrFg&Y>jM*kyMe2O_CCa(>)H*GGCYQKB#8X405p5MI!q4aqn@1l_ zbGBIpat6+^@bs-GI@+8n#QhPFYK{%dMw59=RZfhXTnXP&Tv3_P=4FM6p(Vzv%=I&- zQceAinqnjtRC;V^(9}a9d*p^TS%Z$7j^{D2sU`P*!9s@(&%#=9)sDk~YR5N&cO7lk zeaFqFEHSzKXS4X4%y0i)bSH)s{%94yofNcyp0yHfzZwzB${IMlO$dd&Bc%5NyYyys zdisL0(59Qr;(f901kh$vkB@jP!qwxDZ-FE~Wb%R-1d z-xk7or{iY69tXp12gd-Lu2Pp}~ zAnWOu7JegjWul9(QM6cG0cLe=8I-N~tyC<-6+PNT|J$}AzL60LY zT@1Q3-DZooX||=5zF0S^1-e4~whHzoDgpb3Kr_Ve62V?4AJ_;HW~pF5#)-?7SCD_P zUbIHAGVU{itrhqhF{(n~ZwkCr;5hp!!nTtHIh1?pOc`!8Y8HbB|^HxcXVCzFS z7Mq0ry@^meLVZwp#VfBk;>jx~^Rd$aJQHaI_I_>+r&tGkUYUw#4yCWq4Y$HR1oj&-uXR)ccC*4R2b+M4 zw@UJXtykC_pfZPpp_icT~2!KagM!1i)$yrOvWQ&{HzD`G2k<6VW_rRZECC)iIE zmhW_#&*xORQZE_~W2N#x6}(Q-oKWxv1)og=h?Ojc^SbR<*!yS%>@tOI1}igpl+qv8 z*;*=r-OjC1qj(-tbPkXYK0flp_Hb+TDxR(I6t}k5Xb9|E3frRSd_ZxlboLQ>VE@8x z(HDwml;ZiNg5xDKd4~K$!TG+FD`YzLt(ZEms{3NEKg^TQDvTP zY*hT!6L=@}z^Bf#O_t}0|Ka%@4M68bg;mezpVA1}*A+HR$;xjjzoW2f4j)juTW5cy z64<|UTlAIUnW5JpwQM;x=Ht6W8MPRrWVH`aAJ`)byIf(9S(+y;&0b<2yuTDr{+%Lq0^|hy zd%^PT&nPV3Umm^aGi9s?tbEseuHfpadoK)^dm)7e5Ni=%f@nNeH!hipi$=h%QP}4c z_6L;yh;BS7Sb26y<#s9o+pUcKu`>1*@`3%0!oHxeZs8d)`M5`;A`M<*-uMB_8&8;D)#NPHjh8rtb~5xc$-Y6AV6O}G*I{e~^?+R=Ua=pA zv1@1mto*3F7skFxBVZX%0d1kskXgl3dJztH!LAEqTd1T+FS;z)rZ851RCb2pY2q+_ zDGbL)2Grz7<)tuIepG%MhRcu2FT?P?6!*B!E}UQVxFedqNI9izSTgxav+eE6LR0Og zfzoVNOTkh%+d?^IZq`6v*w?In3kA_1v-<7i-Qd;=?UY-VhZj^3H@%^oZt`w)YqzZ% z8Y@kpm`$eUngPdBI*o28@8;218JNwc{ZoD_qE(S+!&*8GB!8wv27ttDG3~ESBA$)# z0TL_DgDM&T60_B`w@f0QP4EG7YYHjLrm;b{a znLz6*5hk9-%kx+3m!| zU#DO-gm*!>#zZjhE(q7)$xZn-OJ5s#YazTF!nLMmOQ7-H5Uw-r(c&0E7JFSR!OCI@ z1?wPO1K~ZUR)K$J)s)h;J*K^O%K4~tt{`tclxv|}Z?XUdYs+ZeUei7(d^3sFO5bh@ z?uGCk2=6nsC5hC!2g2yO&^_+P9~Zp%0rKvLVm%c1o7&k#8m~vL2Qab3P!Tf8_mnH- ztbJWT;|KD{wGWyNrWTt-t^1(aVA_-UYZMI$RAp=VNtSjRc^jdCo$z9w%23Vg`a#t>w$o9UKQJ9(R+*a*cY6icV^MkqF; zSh|ewt9A+JehM~2@emXbo9sGy4?*#;seP(b>j5_o1|Iend5=J`35rLs2$E^M35qRP z1j$m7Rk?(7ECpMjco>Rall7DLFciJmCCSu!P%7q=w-t&}NKnTAyK z)w!5qg>RtXQ7C$$__V2Y8`SDWLDB6+x7jVc&XV^SBwHbQ3=1lS##eVE%Bm8zj-)g6>i2RYBepkZOn22~!KqrS$kDiuFBA>rR^X0RK2c xi?&wAU`V%{f+yW9nsPea%t77`x0X5I8g;!RlW%&!0m?bWOX`K~qzk7W_&3g0Q!xMl delta 5899 zcmai04Qx}_74|hDF*Lz(34QTOe!MP_Zf1!EB!NI3^V208h=$*kUq2ece}2FTA!&#M zH4AGy)jC3}v@*q-rh(Q;F-g~IQmY_3+D@BV5TmH9T0C_t6J;YJqNS~tedqeVC}BCqJF|7qV=1TLsie*3uUQLzlcw{l@_=AhRzH6yQs-f z9_GydABzD*`|Pvl;x{susvn=yFEB^zc$IdFrR7XV>(YJi^ov{bN!Sazzsm9rjkiKo z7r7TI9A6NxBxmwU;&$GGN$a1;9G_n9Q!0sGnc|$G$fxeJb`gcs3esgm?DUH%tGmY= z^EGAh*5<49I95B>nw?S_nlQ27<9l~6H0e;Fe_th?7g%??Z-(sE_#AXJZH_&zTFLl$ zD<4d@a!hadKhxIdl&Rcs79TJBr4`?D+PL-EXxp4*P$YvmXa2aX<-AB9QI#6G;XFO$ zJdMmy?pc1Gqm_FxB1N3nD9u$c<2iHiP)O+#ktPAO3wBXH`GD zkc&iD>n2s5Cgr{^UC}HRJ5;DAeYO|7@v5l0@^O`8DJxcwT~bVhRY@w)JBwaVH|9(} zCo6sTBDVw1)0bBI#v(T`cq^$zchXC#bgE;CzQ)OW`PJ0Gptiv)dh02evH1Va7r64# z3P%wQWjt0Ixt(%c#;5Szj@+nnp8j3G_5$f$LEmxAllx+$I>$UQg-$xwz+RJf&NLpk zNqb4!ho*C+LE7I;-zd^3>w(>{tBN_HWXzp23ku7u!xQ;4#R{60Ux{&T{#>z!j=~mEEWdKr zV%|es*|>*<9NLO#B!A|kYm$ycZYsoztpN6=iZ#)^0*}}{u%;mDbQP(jH`8sl2W@s+ z8I63`aPL#mGWFY`&_42%81A@=TDhePy+*Y_r>kkK+&`iJtXxGsB}PrHRkG6fp+ehK zoD1bEsN%m>@wLgYZIOy+QUY`3*^}WSh2Ba_&b5qOrG`7AP^$@N6nd2cKxa&uoV!M$ z^QooO=#D7VnpY_FeHFL1;RE=*Iu~%htV#w{yiCQd8lF?}NJ@N>ieF8}?Y5?YX}(|D zMGnR18}5RnIOeh0GI6V9TL~rR8)6B$78qg`TlW@?=~8SC?VaN-;2ht}!3hnCIv+&_N9?)Xq%H8fvC|4hDxhWJF<`(%PSRqZaX>k*CV zq$rpLbOE-6t-DfV7HiDqGy=3nL+1hgLal2(dCCm;4h>xZGzT|dO;iiCQ$rUaYW3te z^#J{G3OZY6+H9vO4m6g6^4{?E3MGKPuAz@1YAL@*u5v>ROZzbe%8kI?6sDz{Tv~jq7N1RV%;nY4%^F%p382*)Y67)-wUJzl4Y8d9u#If9 z!Wwf2buE6F!iyaOPZ=1$V(UJyF&dy{)%^xt26{z9>$N&QAW>t(h&9JSz?F~TX$Gv z9@cbvsTQ1wZ;Za#yXY-k>PZH#O9)RrLqD0Q4^!%I{)prMJoBH^hBu$EX(L zEOJ#DBA0snhC3hs=zMYC^eivM!K{$hPYEzrYRpH#wrX1^)wWOon!DM$+cc*B1`ksU z&{Gp$kxhb@Hx~2;iNeP(CcrX%*fjU%=@8T+3aqq4K!E z=MDsLTYYozF~Y@h{ry^)61QpyXz|C9WA&#t8K<`bE_YCIxWcH$X`&I--=-<(Z>f{y zS!TG;YN-B}dW~vWVuq4+Yxm+e3TE?%krwCAfhSNkGvYen9H z`^z$>iZT+_6CaQ)%NL9A-A6v4)(0e~^JSse$OLMAKn_W40Fa;av5i?N349r#MjSD=0K8s3mYqiemQmptqI z6x^6Yp>#M!aFOy;8tKydPO#Q zHUon<3a?#y!EL}cd+}`~F?gf!s>!nj^KA#V#Vh)yHw_i zi+3OauW&Ci8n~0pdqMJUQlbIKJ|Mfj&}UvByg_)^CGU>pL3q|qkm9R}Kr-xjbBucEp^#pqsC{D*~3pk;y6Fku^xFMd%!&Dpkij6 zkW8G+7#OG@wqNqv$vnZmioxEY1ngPzJi}uX_A|*jO9_lC$P@7kKbf#~6q7aqd!0Nd Nd0$LeygN?L{SRp}gY^Ia diff --git a/llvm/test/tools/llvm-cov/Inputs/mcdc-const-folding.proftext b/llvm/test/tools/llvm-cov/Inputs/mcdc-const-folding.proftext index 97d78a39f43562..82335a8195faa4 100644 --- a/llvm/test/tools/llvm-cov/Inputs/mcdc-const-folding.proftext +++ b/llvm/test/tools/llvm-cov/Inputs/mcdc-const-folding.proftext @@ -40,7 +40,7 @@ _Z5case2b # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x6 +0xa _Z5case3b @@ -55,7 +55,7 @@ _Z5case3b # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x5 +0x9 _Z5case4b @@ -70,7 +70,7 @@ _Z5case4b # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x1 +0x2 _Z5case5b @@ -85,7 +85,7 @@ _Z5case5b # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x5 +0x6 _Z5case6b @@ -100,7 +100,7 @@ _Z5case6b # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x6 +0x5 _Z5case7b @@ -166,7 +166,7 @@ _Z5caseabb # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0xe +0xa2 _Z5casebbb @@ -183,7 +183,7 @@ _Z5casebbb # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0xd +0xa1 _Z5casecbb @@ -200,7 +200,7 @@ _Z5casecbb # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x1 +0x2 _Z5casedbb @@ -217,7 +217,7 @@ _Z5casedbb # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x3 +0x12 _Z5caseebb @@ -234,7 +234,7 @@ _Z5caseebb # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0xa +0x14 _Z5casefbb @@ -251,7 +251,7 @@ _Z5casefbb # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x9 +0x6 _Z5casegbb @@ -268,7 +268,7 @@ _Z5casegbb # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x7 +0x23 _Z5casehbb @@ -302,7 +302,7 @@ _Z5caseibb # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0xb +0x83 _Z5casejbb @@ -319,7 +319,7 @@ _Z5casejbb # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0xd +0xa1 _Z5casekbb @@ -336,7 +336,7 @@ _Z5casekbb # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x3 +0x12 _Z5caselbb @@ -353,7 +353,7 @@ _Z5caselbb # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x3 +0x12 _Z5casembb @@ -370,7 +370,7 @@ _Z5casembb # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x3 +0x12 _Z5casenbb @@ -387,7 +387,7 @@ _Z5casenbb # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x9 +0x6 main diff --git a/llvm/test/tools/llvm-cov/Inputs/mcdc-const.o b/llvm/test/tools/llvm-cov/Inputs/mcdc-const.o index bc38b71b5de072cedeb2172d160d6705d4855c47..1145fcc6f7125452262d0745b9de0591c1e48cda 100644 GIT binary patch delta 61 zcmcbiaYJK5A|unpq;w#0%9@dBG8>}_khEqD2a=N+yBQfa%QHDLPVV3p*j&Ku!NbTh Nc_Op?<{SJCEC3mc5Xk@l delta 59 zcmcbiaYJK5A|uPhq;w#0%9@d7G8>}_khEqD2a=N+yBWDR%QHDLGBRyWWDe(HWSTsY LS$^{k{stBR?>Y~s diff --git a/llvm/test/tools/llvm-cov/Inputs/mcdc-const.proftext b/llvm/test/tools/llvm-cov/Inputs/mcdc-const.proftext index 16eba3a0180803..10253f26e0a680 100644 --- a/llvm/test/tools/llvm-cov/Inputs/mcdc-const.proftext +++ b/llvm/test/tools/llvm-cov/Inputs/mcdc-const.proftext @@ -14,10 +14,12 @@ _Z4testbbbb 0 0 # Num Bitmap Bytes: -$2 +$4 # Bitmap Byte Values: 0x1 -0x4 +0x2 +0x0 +0x0 main diff --git a/llvm/test/tools/llvm-cov/Inputs/mcdc-general-18.o b/llvm/test/tools/llvm-cov/Inputs/mcdc-general-18.o deleted file mode 100644 index e802f51132c068f0f4e2921c952710cb5c5665e5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6456 zcmd5=eQaA-6~FJjWU*VvcC)5U(xq}rH?p!k>?X~c(jYsHvs7eLwzAQ|z;zubjTV2% zd5H!L7F4%e5a~8hCH_Df|1qHN&o~k&z2nn6Lm{Z zC|`2!B`h8~_tr|&n}B?okPBBWG95Qwd)~P8d!JeLIE)^z;#c{y(@$`+Tyb`+r=+UP zu6#LEehzeKf2kD>%(d^rx4SlCt6&s5^&LRgc0w<_nDKK){L+=UZy=0b{s0Ob+^yVo z;H!qd?i`^tRPs}3!C1Wfmr(fy&(O<@J*{|U5VU?nySdHy7yV z>nnjI)|@O~caG97wB~MFHi|ud@+ZE}CBBeIeSQs$xhw0NHt5UH3R<YH`(dXikSd zxIn%+Fo~WU&3~*SolBJb$Id@^5hev&0)(NFzP|diT;!cyZicL#_(`rD49GQH8y>0vl%uFVHie>2pOV9?C2Fu z$PD2V{%EUdSSGGoMXLx#x7=hwY*_il&0>dNY!&U%_DyD!z*W2G6kXBIAV|1s7PlCu zmX?-$%|^4>T@B*HAg#Toi7eXFW{6fR=0~TH|Xs(L9sF;)Y}A=8L-C6hIip{1FKiVEjH@AR z#E?`;?!n4HB`H-7+gKT>>=R)($UY1LnE2;gD}NfCI(zwMnFS(ns#N(?-mnM>zqB8%CQ zw2ckfQ9C*_>)@2TWPIs9VLTExb_7~{PeC%{0N+A;a)cAX@JO&@Bp68qdlSLv$>uXn zPgsJQK5xEym1bxY1G*^qj6bo^JeEk+V_%h!P`f!PLWABhl0QhG3AmL9)5I!Lk zfcU2pdyaYYyy2csc%V0l_bNJ$GQROTeGLFZY_G!Hml;S5ah92<2=65e#=nY%K;(L4 z3I7JzlKPuA;0RdNPI%iLQa|7U%F2=*Qvh08vWvC&52YBc1*`hpX>WT~F5#uFLlzuc z^~kDrt#;QE+iC%jB2ZVG@?7N10sblY)XVZsz)_#|r(Uc(!mNaPIz9aj6G4ff(oK+wa0voZpcH3XV^i{Uj9}{qvK1P{ZY+3)sUNeh0}5 z3aucVLxXy{88yuh_f0#NBDCZ zUMBoS4d*$#py1da62%(|j`>WHpI<6CURk+6zaw1l&mR?ib*|piaDFcDE4bQ+7HlLC z_tTYx=u&VzZ#bVG1xG(0P=Gxe9w0}16&z0?_H#hN(GTk?;_}5l|htu6!ee#Na{YWRT=kD5APKo*(7TTXm;mHe~9gj|+SLi#f@$2};3JX(tWN3&`yZ+?NO4>g)mH{ncIY z6tiOhADTQIOGW81{m<+UICt>(R;N8QK#$Y8#QT~ke@7AXZZNK9oDBY|4i{iJ|0>-H z0%CC$g!QvH+(1|5{|AKVhBdn48xha(y$~GZ>o>h96~*rXs7PX&`Qw0Yygdrh+v4hT zJdBUGK+c~z-Zx}V%m}PJic@^P&+!iXW4TfMXr1_Y+vm8<8^u3Q3Vd(mczXP2U_<6F zBCI0$yJX*6gL?gExKaN5>*SBIIdA620K@qBx4%jq^xXBia&z@G8^w=Pe16XO{P3?9 zj>~)!uzK+$6#v^^D7n)6mjL5>_Ailr{6p*yg0aj+tN$sw=ts#O^T+d>HRAJfY@HoRIctLd;U9xx7nWg2yEb*=Y;jn zkbNfu#MzGd+ko?0RsEj+;%WA*`w!Q?sEx09mh6{UkvR8{;lIHGXCcl2@ptR=zYF%* vp8J0VFl(Yx0Ja35Dn-KXlK)eI?+=n*>i__H?x=Ty<~L58XovSsJ^p_HjJhJT diff --git a/llvm/test/tools/llvm-cov/Inputs/mcdc-general-18.profdata b/llvm/test/tools/llvm-cov/Inputs/mcdc-general-18.profdata deleted file mode 100644 index d351096967dbf315b7bedf94441be0f19a936ba3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 888 zcmeyLQ&5zjmf6V7fE`3Iqly=x@!6o#0#KR>N{c{gb~Kt5$~T140_so(8nvK}5iA1J z58*K|IPgLo05b>XoClLx!0OS(8>ApEgvc^5z-Wbq5R+m0VfGwQ=LDOBE}pQC3oH)v zA1{~(Busg*iZ9p=Q2}!|EIdBc@qyK&iyKJuV-=6E!!CZu5xcmV6LxWX9O6t2zpnN^ zH%pm%8x}GeV0SPuPiEWlv_4U0G!4GUgLxCihtV*7umr6SbtlY= cFt0(92LnShRK5XC9h3_*4rV+9oB>e)05F*>P5=M^ diff --git a/llvm/test/tools/llvm-cov/Inputs/mcdc-general.o b/llvm/test/tools/llvm-cov/Inputs/mcdc-general.o index 8bed601221a1459b613300a7386e5251a3fdce1d..e802f51132c068f0f4e2921c952710cb5c5665e5 100644 GIT binary patch delta 1785 zcmZ`(eP|PB7{B)}jk%cIrEQWoH^u1`P0IdgTDK1EdUvwS2-{e7NQZN!AhUHFqwF?> zF{qQKC{`&8RlTApewwyYWY-X6K0uQo_{M()}p|79-?SDnW=qEDW)3y~x4v=ie< zzK~$j`OcAx9I^Gu>xD>@T6SlhZjM>{U$uMp zo`bjKCz2|U3^5G=&nBGZI)j_RocK(_7Vv`RrV1Xlt2ejU~8b0 z1WE@39}*G-61)=4W8LxWQGXynoSlw^5*DewRSK{NB-2&w>B{?8rYP?*h*}(6w zLpDs|U^oKbg(GAb`cV6kpjFWSr&>EjC}O_juz>a}FZUVdXU__H75#s?{_-+-g~euN z_TlmPU3b>5l3Is9!*LyEB9u^=SV@ibb()vBW-Hu8HYc#GAKp&&IcSVfW0$}NkoBkQ z0UOy~fn~dNc;@|H^>?5zN{#oI(Tu_u*x^waM0*OdQM%)sI;FqavVgM=`Bl0o;H>&S zUi}Ibqm=x@IwZfdPWl!Er8wiFfQ$XF;J+C5VtW5PBH%Fu{cWs+ZjBl(0?V>Rcvu@8 zMs~|GtPK+yCEH<8r^X9{hSl|d8@pivH~C5(>wl@Yp;4#gh-V96W1gdeQtae2R%ORY zffcKi1YC?dE#Ttze();Zf*Jke#y9w^Ov;{QX)t!5kQNwd+D`t0iKdS53BJj>2DsDo zq|&NLFcC_C)tn}27-%+$1>=@F5Az^Eq!ctoZsT~F-(zGU}HAd04s15YNR4`jZ z`T}hi!y2pLd9a%J!;?=YAhE)OmQ8u+)-`ph1?ix+X5q<2DEr-9cvo|`{$|aQSUv_pil^A>5kzJzXJyiPqwF zac$46r4NX^cInE6kbOQUXJ*r9V1U&QQc#P>^Woyl6Kr8I$P0~djV@Yd4hsmkp)fVU zmS#{p_n}lCt7yZQ5Ls%*Uj8=)LqenwpEt_~Db;`FZ>RgjJ@bQ>`Ei{J=}s$d_=U$P z?iQ&P?N#1okM?@LN4uL_(FVD_j;~{>+2X;_kdrEvUwTK9o{~cT4XKA#7%nL@d-`d& zmb3F28Eds?Zl{k4oEFzB?w`xq+`=zPB`~?BJ)0|Taf=j{P;f(f{!kOu3yTVGvg7=m z^1OY_K7RDd1j~Ns!o}a<8F{ys73rNNcf+{6_aPe5vCp}aI%YMj%8fp35_bBIvrX`} z?*t3Nim!=r&Xa`gZ@Bos?jkd0xR&iW%&!D z?cyQfw;yVPhIv$2F1th6y|5fG%u^+7nXpg8S&Tg+EJCT@duxuo0h2+4y+;|?hg^Tg zktv?@vWTLJ0X%m_)c4$Tk?TLm-@N1Tq=}vo_I-#}7-kv$6j9vHA9YMA0^`VTEn!~} zY#sC?yR(F)XCm1REM)9&5p3yPZ6#6e`y74%-UVYJgY}R$U-S6T)b6g%Qa@}2Qt0_U z_x!};NmIL3CLxD;zA9ngqP9`;@FKGFCF}`@lx{#x*hu{<*wVTF^h9}kyT^O;LBDFG zbXqft`)c>at8FX6R|UQcCc}o=;$dkP_pKj?pOHP_Vg1OuJ2}GHE)PqwwRlLn!B!1+ zlDaZad3?N`55qY6T;%#=DB%55%E`Xqv0wB2a}S99iAODyhEZ)G+UJzzlT%3jlo`3h z*g1&mPqKgDIlU=3O|?-P{P2OkhqXdMx8P7b4z;lz>=1OuESQfga52`*=AjU?f)f8z zlBA>rU9ml^13KeYASpl})cCi;bY(T%j5o6eh*nzcDs-a01!JhYp@3R}Xrd1O))Nph z6?k@Yb3maTq9|ZiHn9j4Hd}Hc0=lt}dN3^Z5{#k#2nwjb#BhsELubO0J0kFIVjmlT L0zv~2HS7KbEcK>Y diff --git a/llvm/test/tools/llvm-cov/Inputs/mcdc-general.proftext b/llvm/test/tools/llvm-cov/Inputs/mcdc-general.proftext index 2faca9da1b1e0b..a83f036ddf2c94 100644 --- a/llvm/test/tools/llvm-cov/Inputs/mcdc-general.proftext +++ b/llvm/test/tools/llvm-cov/Inputs/mcdc-general.proftext @@ -24,11 +24,14 @@ _Z4testbbbb 2 1 # Num Bitmap Bytes: -$3 +$6 # Bitmap Byte Values: -0xef -0x77 -0x3 +0x2f +0x8 +0xb +0x9 +0x83 +0x80 main diff --git a/llvm/test/tools/llvm-cov/Inputs/mcdc-macro.o b/llvm/test/tools/llvm-cov/Inputs/mcdc-macro.o index 5b3934816bd222383c5e63188c29bf390a1f19fa..c0a9d6044553030ed1cc28bce2e764bcf3877869 100644 GIT binary patch delta 1412 zcmZ8hOKcNY6!m-d7*FDi$Dc5ECEBElL;_aji4aNDD#J??b=nQkRuu7fRdrFJevlU| z5HXmbSgK2+t8`a^yMt~*LItd9OI2iV+5l2j6&6euRU#J&M8!w_xc83Vq$XE-_xap& z?&n>NJUzHPIMdH{eRf;V%O8k+@9inx5yhu=Q}Lk{|CunKOf^MhZYlTf?2%U{&B{n4 zGFK6$@g+UCU8>||){(ndErq8~M`UP0Y4n+u1~1muH4{_K$Xu1x)#}0~`hD&K_NlJ= zdu}bT+beddi~5EVA(^mB(AAyAAG7yfn>_wF@|io_$C1C>`*CE_z0JzhT`Zm3Eebyc zWV&5)skJgD%MkX;JDXl?mUp|dsRubts%|pOXmpwNhB{pnrSMXr+NwFZUP$hudS6j% zqPT=z`A8#o!0X)FpewpqsooEas}DySr(gen3i(!>)$Bq=5&7S7sPW>X+bbd6t5Ntd zbWj`-M-Q*zG_)m=-?A?AcJ+Qpu{|L?WdqCyDgF)m>s+t^Jm-Ta&$t|$F$^_)+4ljp=AonSAh%u^W1-j5$0G7!$|EX8Jefu z(Sp9CAUX{5ghU?OCvjmzJS_1QC}YiQ8`yxvH{lkt^$qMHiGPM_r@?*&6aA{|ACR6s zhis>jV)U7y(IkF6-iuV|4I6K7X=CZV=kSpAwZ%ITQ#M(qElzscDw%*s*z27(ma^pX zn)|3%>Wem(a*tCY+DX_OHrUs21~vFmxaRHc%NF!~4_8t119f4)frH95siYlsyH-{K~RPxPH?e+NluzRUU$%Y&m^ zY!^7)ao7<}vl+-nEoQs;8sZz^Al}pBV`+8?vN4O*pp1UM8~qcbe?V{CV#mNq#32_? zvxiVd{0toQm!LOcvCmy?E|FG;-9HBW4q+fZ#7<(4rP?w!50^3a46@x8TZS_FY0R_O zFgV6m2t|`L-)4&s2jScJDBl%?6})^JylRZ%Q?iD|W}%G!3MN?Ue6aP@85l?o@r(u^ PCP%SjIZ0DF$*unZ3}9jl delta 1373 zcmZuuUr19?9NzET?Y5ilZf?tahe0EPwh_@rwDz*=49ln>5u*PViC9EotjBUuMuv*= z3+v5-f;5OtU-BVSdeNuMpo|{Uo}8lx5hUo3{m$K4UPeE-=bZ2Necw6XId>*@FV+`g zQ||sq(0{C{>qDw*wd;4on!`rbxt2cE-?pVW#}b8fNHoQilM;oSegU%3(BtBcLNGcl zdi9;F?mM@rmzlO_VRl|9oFMo`t|w8t9c9?racaeV)jYJ-{nMOR+#SUbU()-^kI4^^ z_g`~%-f1c`W(3yxJ*^6E3`kc{qSFP(7+bdpHc_}U=H8r^?bz~McjKJ)-QcAzG za9qg56rD&E+=CN6_S&AqAm5CLHu*Bk|GnU;)Y&t6I@GoO-ex}o{(Y?810r?82)hV1v^IN7friKyNhp0mE(6ysh&DjPpQG%_Fm~Ollnl_ z>r_ulb=IrF#?soRleCSSsDgYT-&xhkSEI&;Tg+>2MrejUuZ`Kp0B4meP>rdL8f?XO z8e@D6B)Er)r{>SSg9EDl_=MOaMevW^EbvKuzfp@`u&@=h{=HVLE0#Xrvr^zrgbFRt zTgKW2MvM|WSjL_f_!;-YOYEm{+#KI>J-ow7a(%M6BwGIxNxquwl7;1Y?g+es=gGTP z#`*j3}GAU#H4^frg>T(g<5||@p6ITYB%QR1grWF?|wSzax*aw2`we*jR>LKqHg{|nzTFlzt+ diff --git a/llvm/test/tools/llvm-cov/Inputs/mcdc-macro.proftext b/llvm/test/tools/llvm-cov/Inputs/mcdc-macro.proftext index 44057ffd7d885e..35ecc42b5802a6 100644 --- a/llvm/test/tools/llvm-cov/Inputs/mcdc-macro.proftext +++ b/llvm/test/tools/llvm-cov/Inputs/mcdc-macro.proftext @@ -27,12 +27,21 @@ func 0 0 # Num Bitmap Bytes: -$4 +$13 # Bitmap Byte Values: 0x0 -0x9 0x0 -0x1 +0x0 +0x20 +0x8 +0x0 +0x20 +0x0 +0x0 +0x0 +0x0 +0x0 +0x0 func1 diff --git a/llvm/test/tools/llvm-cov/Inputs/mcdc-maxbs.o b/llvm/test/tools/llvm-cov/Inputs/mcdc-maxbs.o index 21f9d3a17dad0d1531e38ec1fbffb7dca3085535..bc993b19e84733fdf7670a5aec58219519837ec2 100644 GIT binary patch delta 377 zcmbQBFhOBLxSp@ae^v${Xy65r98lg4Mj+=fM2yk!q(?7Ujp3z<9WwPLT#Qf|rsn^S zMJJ(Jc7e1s|Bo)_+X+$()$)2PRNC->;mPLziTslf_WE!#7A=Mee{23WFf2JB9DElSQPzJ*ThbJpAMl&)_u3-#fWY~P3QIL_5dGbT1#f(Qcw=!R5 z6g&!21_WDyScrj%;ppT{*7J<^lQ*)7Ge%Di1d{2KKeCE5mP~GB6PN4)t7QPHV_;wh zVjU=dGDyATDl}OWD1ZOthd|jANU}UY_MiU{aAmR}yEx;`$(ro*fsQ)K{+yBV!RC#e N|A7wOypcPe1pps?Vvqm; delta 404 zcmbQBFhOBLc)h>Je^v${Xy65rEKuGKMj+=fM2yk!(oSX|*P|D#(BnTt=Y`Hw2VXHa zJY;v+#^`YAnMW^Jvjc|-7gP^Z^MA*p<50tPfedT@A6?A1lM`Xi>un%uFQ9!gyqJEAwSW!6P7*K(G~vg&3F^j!e#EJ Date: Thu, 13 Jun 2024 17:21:24 +0100 Subject: [PATCH 067/155] Reapply#4 "[RemoveDIs] Load into new debug info format by default in LLVM (#89799)" Reapplies commit c5aeca73 (and its followup commit 21396be8), which were reverted due to missing functionality in MLIR and Flang regarding printing debug records. This has now been added in commit 08aa511, along with support for printing debug records in flang. This reverts commit 2dc2290860355dd2bac3b655eea895fe30fde257. --- clang/test/CodeGen/instrument-objc-method.m | 8 +- flang/test/Transforms/debug-local-var-2.f90 | 16 +- llvm/docs/ReleaseNotes.rst | 7 + llvm/include/llvm/AsmParser/LLParser.h | 1 - llvm/lib/AsmParser/LLParser.cpp | 34 ++- llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 2 +- llvm/lib/IR/BasicBlock.cpp | 2 +- llvm/lib/IR/DebugProgramInstruction.cpp | 4 +- llvm/lib/IR/Function.cpp | 4 +- llvm/lib/IR/Module.cpp | 4 +- llvm/tools/llvm-as/llvm-as.cpp | 7 +- llvm/tools/llvm-dis/llvm-dis.cpp | 2 +- llvm/tools/llvm-link/llvm-link.cpp | 8 +- .../Analysis/IRSimilarityIdentifierTest.cpp | 22 +- llvm/unittests/IR/BasicBlockDbgInfoTest.cpp | 68 ------ llvm/unittests/IR/DebugInfoTest.cpp | 73 +++--- llvm/unittests/IR/IRBuilderTest.cpp | 12 +- llvm/unittests/IR/InstructionsTest.cpp | 6 + llvm/unittests/IR/ValueTest.cpp | 9 +- .../Transforms/Utils/CloningTest.cpp | 5 +- llvm/unittests/Transforms/Utils/LocalTest.cpp | 211 ++++++++++-------- 21 files changed, 245 insertions(+), 260 deletions(-) diff --git a/clang/test/CodeGen/instrument-objc-method.m b/clang/test/CodeGen/instrument-objc-method.m index cfc0a0a98bec6b..2c9d1fc88554bd 100644 --- a/clang/test/CodeGen/instrument-objc-method.m +++ b/clang/test/CodeGen/instrument-objc-method.m @@ -11,16 +11,16 @@ @implementation ObjCClass + (void)initialize { } -// PREINLINE: declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 -// BARE: @"\01+[ObjCClass load]"{{\(.*\)}} #2 +// BARE: @"\01+[ObjCClass load]"{{\(.*\)}} #1 + (void)load __attribute__((no_instrument_function)) { } -// PREINLINE: @"\01-[ObjCClass dealloc]"{{\(.*\)}} #2 -// BARE: @"\01-[ObjCClass dealloc]"{{\(.*\)}} #2 +// PREINLINE: @"\01-[ObjCClass dealloc]"{{\(.*\)}} #1 +// BARE: @"\01-[ObjCClass dealloc]"{{\(.*\)}} #1 - (void)dealloc __attribute__((no_instrument_function)) { } +// PREINLINE: declare void @llvm.dbg.declare(metadata, metadata, metadata) #2 // PREINLINE: attributes #0 = { {{.*}}"instrument-function-entry"="__cyg_profile_func_enter" // PREINLINE-NOT: attributes #0 = { {{.*}}"instrument-function-entry"="__cyg_profile_func_enter_bare" // PREINLINE-NOT: attributes #2 = { {{.*}}"__cyg_profile_func_enter" diff --git a/flang/test/Transforms/debug-local-var-2.f90 b/flang/test/Transforms/debug-local-var-2.f90 index 3b2873a1edaafe..ce78bfd0050569 100644 --- a/flang/test/Transforms/debug-local-var-2.f90 +++ b/flang/test/Transforms/debug-local-var-2.f90 @@ -28,27 +28,27 @@ ! BOTH-LABEL: define {{.*}}i64 @_QFPfn1 ! BOTH-SAME: (ptr %[[ARG1:.*]], ptr %[[ARG2:.*]], ptr %[[ARG3:.*]]) -! INTRINSICS-DAG: tail call void @llvm.dbg.declare(metadata ptr %[[ARG1]], metadata ![[A1:.*]], metadata !DIExpression()) -! INTRINSICS-DAG: tail call void @llvm.dbg.declare(metadata ptr %[[ARG2]], metadata ![[B1:.*]], metadata !DIExpression()) -! INTRINSICS-DAG: tail call void @llvm.dbg.declare(metadata ptr %[[ARG3]], metadata ![[C1:.*]], metadata !DIExpression()) +! INTRINSICS-DAG: call void @llvm.dbg.declare(metadata ptr %[[ARG1]], metadata ![[A1:.*]], metadata !DIExpression()) +! INTRINSICS-DAG: call void @llvm.dbg.declare(metadata ptr %[[ARG2]], metadata ![[B1:.*]], metadata !DIExpression()) +! INTRINSICS-DAG: call void @llvm.dbg.declare(metadata ptr %[[ARG3]], metadata ![[C1:.*]], metadata !DIExpression()) ! RECORDS-DAG: #dbg_declare(ptr %[[ARG1]], ![[A1:.*]], !DIExpression(), !{{.*}}) ! RECORDS-DAG: #dbg_declare(ptr %[[ARG2]], ![[B1:.*]], !DIExpression(), !{{.*}}) ! RECORDS-DAG: #dbg_declare(ptr %[[ARG3]], ![[C1:.*]], !DIExpression(), !{{.*}}) ! BOTH-DAG: %[[AL2:.*]] = alloca i64 -! INTRINSICS-DAG: tail call void @llvm.dbg.declare(metadata ptr %[[AL2]], metadata ![[RES1:.*]], metadata !DIExpression()) +! INTRINSICS-DAG: call void @llvm.dbg.declare(metadata ptr %[[AL2]], metadata ![[RES1:.*]], metadata !DIExpression()) ! RECORDS-DAG: #dbg_declare(ptr %[[AL2]], ![[RES1:.*]], !DIExpression(), !{{.*}}) ! BOTH-LABEL: } ! BOTH-LABEL: define {{.*}}i32 @_QFPfn2 ! BOTH-SAME: (ptr %[[FN2ARG1:.*]], ptr %[[FN2ARG2:.*]], ptr %[[FN2ARG3:.*]]) -! INTRINSICS-DAG: tail call void @llvm.dbg.declare(metadata ptr %[[FN2ARG1]], metadata ![[A2:.*]], metadata !DIExpression()) -! INTRINSICS-DAG: tail call void @llvm.dbg.declare(metadata ptr %[[FN2ARG2]], metadata ![[B2:.*]], metadata !DIExpression()) -! INTRINSICS-DAG: tail call void @llvm.dbg.declare(metadata ptr %[[FN2ARG3]], metadata ![[C2:.*]], metadata !DIExpression()) +! INTRINSICS-DAG: call void @llvm.dbg.declare(metadata ptr %[[FN2ARG1]], metadata ![[A2:.*]], metadata !DIExpression()) +! INTRINSICS-DAG: call void @llvm.dbg.declare(metadata ptr %[[FN2ARG2]], metadata ![[B2:.*]], metadata !DIExpression()) +! INTRINSICS-DAG: call void @llvm.dbg.declare(metadata ptr %[[FN2ARG3]], metadata ![[C2:.*]], metadata !DIExpression()) ! RECORDS-DAG: #dbg_declare(ptr %[[FN2ARG1]], ![[A2:.*]], !DIExpression(), !{{.*}}) ! RECORDS-DAG: #dbg_declare(ptr %[[FN2ARG2]], ![[B2:.*]], !DIExpression(), !{{.*}}) ! RECORDS-DAG: #dbg_declare(ptr %[[FN2ARG3]], ![[C2:.*]], !DIExpression(), !{{.*}}) ! BOTH-DAG: %[[AL3:.*]] = alloca i32 -! INTRINSICS-DAG: tail call void @llvm.dbg.declare(metadata ptr %[[AL3]], metadata ![[RES2:.*]], metadata !DIExpression()) +! INTRINSICS-DAG: call void @llvm.dbg.declare(metadata ptr %[[AL3]], metadata ![[RES2:.*]], metadata !DIExpression()) ! RECORDS-DAG: #dbg_declare(ptr %[[AL3]], ![[RES2:.*]], !DIExpression(), !{{.*}}) ! BOTH-LABEL: } diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index d52459b53caf9f..5fdbc9f349af43 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -231,6 +231,13 @@ Changes to the Metadata Info Changes to the Debug Info --------------------------------- +* LLVM has switched from using debug intrinsics internally to using debug + records by default. This should happen transparently when using the DIBuilder + to construct debug variable information, but will require changes for any code + that interacts with debug intrinsics directly. Debug intrinsics will only be + supported on a best-effort basis from here onwards; for more information, see + the `migration docs `_. + Changes to the LLVM tools --------------------------------- * llvm-nm and llvm-objdump can now print symbol information from linked diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h index b2dcdfad0a04b4..e687254f6c4c70 100644 --- a/llvm/include/llvm/AsmParser/LLParser.h +++ b/llvm/include/llvm/AsmParser/LLParser.h @@ -337,7 +337,6 @@ namespace llvm { // Top-Level Entities bool parseTopLevelEntities(); - bool finalizeDebugInfoFormat(Module *M); void dropUnknownMetadataReferences(); bool validateEndOfModule(bool UpgradeDebugInfo); bool validateEndOfIndex(); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index f0fde9ae4df5c3..eb1e3e494a42f4 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -74,23 +74,6 @@ static std::string getTypeString(Type *T) { return Tmp.str(); } -// Whatever debug info format we parsed, we should convert to the expected debug -// info format immediately afterwards. -bool LLParser::finalizeDebugInfoFormat(Module *M) { - // We should have already returned an error if we observed both intrinsics and - // records in this IR. - assert(!(SeenNewDbgInfoFormat && SeenOldDbgInfoFormat) && - "Mixed debug intrinsics/records seen without a parsing error?"); - if (PreserveInputDbgFormat == cl::boolOrDefault::BOU_TRUE) { - UseNewDbgInfoFormat = SeenNewDbgInfoFormat; - WriteNewDbgInfoFormatToBitcode = SeenNewDbgInfoFormat; - WriteNewDbgInfoFormat = SeenNewDbgInfoFormat; - } else if (M) { - M->setIsNewDbgInfoFormat(false); - } - return false; -} - /// Run: module ::= toplevelentity* bool LLParser::Run(bool UpgradeDebugInfo, DataLayoutCallbackTy DataLayoutCallback) { @@ -108,7 +91,7 @@ bool LLParser::Run(bool UpgradeDebugInfo, } return parseTopLevelEntities() || validateEndOfModule(UpgradeDebugInfo) || - validateEndOfIndex() || finalizeDebugInfoFormat(M); + validateEndOfIndex(); } bool LLParser::parseStandaloneConstantValue(Constant *&C, @@ -207,6 +190,18 @@ void LLParser::dropUnknownMetadataReferences() { bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) { if (!M) return false; + + // We should have already returned an error if we observed both intrinsics and + // records in this IR. + assert(!(SeenNewDbgInfoFormat && SeenOldDbgInfoFormat) && + "Mixed debug intrinsics/records seen without a parsing error?"); + if (PreserveInputDbgFormat == cl::boolOrDefault::BOU_TRUE) { + UseNewDbgInfoFormat = SeenNewDbgInfoFormat; + WriteNewDbgInfoFormatToBitcode = SeenNewDbgInfoFormat; + WriteNewDbgInfoFormat = SeenNewDbgInfoFormat; + M->setNewDbgInfoFormatFlag(SeenNewDbgInfoFormat); + } + // Handle any function attribute group forward references. for (const auto &RAG : ForwardRefAttrGroups) { Value *V = RAG.first; @@ -439,6 +434,9 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) { UpgradeModuleFlags(*M); UpgradeSectionAttributes(*M); + if (PreserveInputDbgFormat != cl::boolOrDefault::BOU_TRUE) + M->setIsNewDbgInfoFormat(UseNewDbgInfoFormat); + if (!Slots) return false; // Initialize the slot mapping. diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 4ad3a2eaceea95..af5d6891805a67 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -4358,7 +4358,7 @@ Error BitcodeReader::parseModule(uint64_t ResumeBit, if (PreserveInputDbgFormat != cl::boolOrDefault::BOU_TRUE) { TheModule->IsNewDbgInfoFormat = UseNewDbgInfoFormat && - LoadBitcodeIntoNewDbgInfoFormat == cl::boolOrDefault::BOU_TRUE; + LoadBitcodeIntoNewDbgInfoFormat != cl::boolOrDefault::BOU_FALSE; } this->ValueTypeCallback = std::move(Callbacks.ValueType); diff --git a/llvm/lib/IR/BasicBlock.cpp b/llvm/lib/IR/BasicBlock.cpp index 29f2cbf611fa3a..aea9425ebebaab 100644 --- a/llvm/lib/IR/BasicBlock.cpp +++ b/llvm/lib/IR/BasicBlock.cpp @@ -181,7 +181,7 @@ template class llvm::SymbolTableListTraitsisComplex()) || + return (!hasArgList() && isa(getRawLocation())) || + (getNumVariableLocationOps() == 0 && !getExpression()->isComplex()) || any_of(location_ops(), [](Value *V) { return isa(V); }); } diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 3f735020e87402..9360e6d7d274c8 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -83,6 +83,8 @@ static cl::opt NonGlobalValueMaxNameSize( "non-global-value-max-name-size", cl::Hidden, cl::init(1024), cl::desc("Maximum size for the name of non-global values.")); +extern cl::opt UseNewDbgInfoFormat; + void Function::convertToNewDbgValues() { IsNewDbgInfoFormat = true; for (auto &BB : *this) { @@ -441,7 +443,7 @@ Function::Function(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, : GlobalObject(Ty, Value::FunctionVal, OperandTraits::op_begin(this), 0, Linkage, name, computeAddrSpace(AddrSpace, ParentModule)), - NumArgs(Ty->getNumParams()), IsNewDbgInfoFormat(false) { + NumArgs(Ty->getNumParams()), IsNewDbgInfoFormat(UseNewDbgInfoFormat) { assert(FunctionType::isValidReturnType(getReturnType()) && "invalid return type"); setGlobalObjectSubClassData(0); diff --git a/llvm/lib/IR/Module.cpp b/llvm/lib/IR/Module.cpp index f97dd18c736c51..55c282cb25e793 100644 --- a/llvm/lib/IR/Module.cpp +++ b/llvm/lib/IR/Module.cpp @@ -54,6 +54,8 @@ using namespace llvm; +extern cl::opt UseNewDbgInfoFormat; + //===----------------------------------------------------------------------===// // Methods to implement the globals and functions lists. // @@ -72,7 +74,7 @@ template class llvm::SymbolTableListTraits; Module::Module(StringRef MID, LLVMContext &C) : Context(C), ValSymTab(std::make_unique(-1)), ModuleID(std::string(MID)), SourceFileName(std::string(MID)), DL(""), - IsNewDbgInfoFormat(false) { + IsNewDbgInfoFormat(UseNewDbgInfoFormat) { Context.addModule(this); } diff --git a/llvm/tools/llvm-as/llvm-as.cpp b/llvm/tools/llvm-as/llvm-as.cpp index e48e3f4d22c123..0958e16c2197ac 100644 --- a/llvm/tools/llvm-as/llvm-as.cpp +++ b/llvm/tools/llvm-as/llvm-as.cpp @@ -142,11 +142,10 @@ int main(int argc, char **argv) { } // Convert to new debug format if requested. - assert(!M->IsNewDbgInfoFormat && "Unexpectedly in new debug mode"); - if (UseNewDbgInfoFormat && WriteNewDbgInfoFormatToBitcode) { - M->convertToNewDbgValues(); + M->setIsNewDbgInfoFormat(UseNewDbgInfoFormat && + WriteNewDbgInfoFormatToBitcode); + if (M->IsNewDbgInfoFormat) M->removeDebugIntrinsicDeclarations(); - } std::unique_ptr Index = std::move(ModuleAndIndex.Index); diff --git a/llvm/tools/llvm-dis/llvm-dis.cpp b/llvm/tools/llvm-dis/llvm-dis.cpp index fbbb5506e43e05..d28af85bc739eb 100644 --- a/llvm/tools/llvm-dis/llvm-dis.cpp +++ b/llvm/tools/llvm-dis/llvm-dis.cpp @@ -258,7 +258,7 @@ int main(int argc, char **argv) { // All that llvm-dis does is write the assembly to a file. if (!DontPrint) { if (M) { - ScopedDbgInfoFormatSetter FormatSetter(*M, WriteNewDbgInfoFormat); + M->setIsNewDbgInfoFormat(WriteNewDbgInfoFormat); if (WriteNewDbgInfoFormat) M->removeDebugIntrinsicDeclarations(); M->print(Out->os(), Annotator.get(), PreserveAssemblyUseListOrder); diff --git a/llvm/tools/llvm-link/llvm-link.cpp b/llvm/tools/llvm-link/llvm-link.cpp index 7794f2d81ed064..b84469d1c757f8 100644 --- a/llvm/tools/llvm-link/llvm-link.cpp +++ b/llvm/tools/llvm-link/llvm-link.cpp @@ -489,12 +489,6 @@ int main(int argc, char **argv) { if (LoadBitcodeIntoNewDbgInfoFormat == cl::boolOrDefault::BOU_UNSET) LoadBitcodeIntoNewDbgInfoFormat = cl::boolOrDefault::BOU_TRUE; - // RemoveDIs debug-info transition: tests may request that we /try/ to use the - // new debug-info format. - if (TryUseNewDbgInfoFormat) { - // Turn the new debug-info format on. - UseNewDbgInfoFormat = true; - } // Since llvm-link collects multiple IR modules together, for simplicity's // sake we disable the "PreserveInputDbgFormat" flag to enforce a single // debug info format. @@ -556,7 +550,7 @@ int main(int argc, char **argv) { SetFormat(WriteNewDbgInfoFormat); Composite->print(Out.os(), nullptr, PreserveAssemblyUseListOrder); } else if (Force || !CheckBitcodeOutputToConsole(Out.os())) { - SetFormat(WriteNewDbgInfoFormatToBitcode); + SetFormat(UseNewDbgInfoFormat && WriteNewDbgInfoFormatToBitcode); WriteBitcodeToFile(*Composite, Out.os(), PreserveBitcodeUseListOrder); } diff --git a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp index f6a053792f8529..24f4f11db9a8b9 100644 --- a/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp +++ b/llvm/unittests/Analysis/IRSimilarityIdentifierTest.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/IRSimilarityIdentifier.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/AsmParser/Parser.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" @@ -22,6 +23,11 @@ using namespace llvm; using namespace IRSimilarity; +extern llvm::cl::opt UseNewDbgInfoFormat; +extern cl::opt PreserveInputDbgFormat; +extern bool WriteNewDbgInfoFormatToBitcode; +extern cl::opt WriteNewDbgInfoFormat; + static std::unique_ptr makeLLVMModule(LLVMContext &Context, StringRef ModuleStr) { SMDiagnostic Err; @@ -1306,19 +1312,18 @@ TEST(IRInstructionMapper, CallBrInstIllegal) { ASSERT_GT(UnsignedVec[0], Mapper.IllegalInstrNumber); } -// Checks that an debuginfo intrinsics are mapped to be invisible. Since they +// Checks that an debuginfo records are mapped to be invisible. Since they // do not semantically change the program, they can be recognized as similar. TEST(IRInstructionMapper, DebugInfoInvisible) { StringRef ModuleString = R"( define i32 @f(i32 %a, i32 %b) { then: - %0 = add i32 %a, %b - call void @llvm.dbg.value(metadata !0) - %1 = add i32 %a, %b + %0 = add i32 %a, %b + #dbg_value(i32 0, !0, !0, !0) + %1 = add i32 %a, %b ret i32 0 } - declare void @llvm.dbg.value(metadata) !0 = distinct !{!"test\00", i32 10})"; LLVMContext Context; std::unique_ptr M = makeLLVMModule(Context, ModuleString); @@ -1914,19 +1919,19 @@ TEST(IRSimilarityCandidate, CheckRegionsDifferentTypes) { ASSERT_FALSE(longSimCandCompare(InstrList)); } -// Check that debug instructions do not impact similarity. They are marked as +// Check that debug records do not impact similarity. They are marked as // invisible. TEST(IRSimilarityCandidate, IdenticalWithDebug) { StringRef ModuleString = R"( define i32 @f(i32 %a, i32 %b) { bb0: %0 = add i32 %a, %b - call void @llvm.dbg.value(metadata !0) + #dbg_value(i32 0, !0, !0, !0) %1 = add i32 %b, %a ret i32 0 bb1: %2 = add i32 %a, %b - call void @llvm.dbg.value(metadata !1) + #dbg_value(i32 1, !1, !1, !1) %3 = add i32 %b, %a ret i32 0 bb2: @@ -1935,7 +1940,6 @@ TEST(IRSimilarityCandidate, IdenticalWithDebug) { ret i32 0 } - declare void @llvm.dbg.value(metadata) !0 = distinct !{!"test\00", i32 10} !1 = distinct !{!"test\00", i32 11})"; LLVMContext Context; diff --git a/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp b/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp index f873bbd4293af5..91a0745a0cc76e 100644 --- a/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp +++ b/llvm/unittests/IR/BasicBlockDbgInfoTest.cpp @@ -25,8 +25,6 @@ using namespace llvm; -extern cl::opt UseNewDbgInfoFormat; - static std::unique_ptr parseIR(LLVMContext &C, const char *IR) { SMDiagnostic Err; std::unique_ptr Mod = parseAssemblyString(IR, Err, C); @@ -44,8 +42,6 @@ namespace { // by DbgVariableRecords, the dbg.value replacement. TEST(BasicBlockDbgInfoTest, InsertAfterSelf) { LLVMContext C; - UseNewDbgInfoFormat = true; - std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { call void @llvm.dbg.value(metadata i16 %a, metadata !9, metadata !DIExpression()), !dbg !11 @@ -72,8 +68,6 @@ TEST(BasicBlockDbgInfoTest, InsertAfterSelf) { !11 = !DILocation(line: 1, column: 1, scope: !6) )"); - // Convert the module to "new" form debug-info. - M->convertToNewDbgValues(); // Fetch the entry block. BasicBlock &BB = M->getFunction("f")->getEntryBlock(); @@ -103,16 +97,10 @@ TEST(BasicBlockDbgInfoTest, InsertAfterSelf) { EXPECT_TRUE(RetInst->hasDbgRecords()); auto Range2 = RetInst->getDbgRecordRange(); EXPECT_EQ(std::distance(Range2.begin(), Range2.end()), 1u); - - M->convertFromNewDbgValues(); - - UseNewDbgInfoFormat = false; } TEST(BasicBlockDbgInfoTest, SplitBasicBlockBefore) { LLVMContext C; - UseNewDbgInfoFormat = true; - std::unique_ptr M = parseIR(C, R"---( define dso_local void @func() #0 !dbg !10 { %1 = alloca i32, align 4 @@ -150,8 +138,6 @@ TEST(BasicBlockDbgInfoTest, SplitBasicBlockBefore) { )---"); ASSERT_TRUE(M); - M->convertToNewDbgValues(); - Function *F = M->getFunction("func"); BasicBlock &BB = F->getEntryBlock(); @@ -161,14 +147,10 @@ TEST(BasicBlockDbgInfoTest, SplitBasicBlockBefore) { BasicBlock &BBBefore = F->getEntryBlock(); auto I2 = std::prev(BBBefore.end(), 2); ASSERT_TRUE(I2->hasDbgRecords()); - - UseNewDbgInfoFormat = false; } TEST(BasicBlockDbgInfoTest, MarkerOperations) { LLVMContext C; - UseNewDbgInfoFormat = true; - std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { call void @llvm.dbg.value(metadata i16 %a, metadata !9, metadata !DIExpression()), !dbg !11 @@ -196,8 +178,6 @@ TEST(BasicBlockDbgInfoTest, MarkerOperations) { // Fetch the entry block, BasicBlock &BB = M->getFunction("f")->getEntryBlock(); - // Convert the module to "new" form debug-info. - M->convertToNewDbgValues(); EXPECT_EQ(BB.size(), 2u); // Fetch out our two markers, @@ -295,14 +275,10 @@ TEST(BasicBlockDbgInfoTest, MarkerOperations) { // Teardown, Instr1->insertBefore(BB, BB.begin()); - - UseNewDbgInfoFormat = false; } TEST(BasicBlockDbgInfoTest, HeadBitOperations) { LLVMContext C; - UseNewDbgInfoFormat = true; - std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { %b = add i16 %a, 1, !dbg !11 @@ -332,8 +308,6 @@ TEST(BasicBlockDbgInfoTest, HeadBitOperations) { // Test that the movement of debug-data when using moveBefore etc and // insertBefore etc are governed by the "head" bit of iterators. BasicBlock &BB = M->getFunction("f")->getEntryBlock(); - // Convert the module to "new" form debug-info. - M->convertToNewDbgValues(); // Test that the head bit behaves as expected: it should be set when the // code wants the _start_ of the block, but not otherwise. @@ -404,14 +378,10 @@ TEST(BasicBlockDbgInfoTest, HeadBitOperations) { DInst->DebugMarker->StoredDbgRecords.empty()); EXPECT_FALSE(CInst->DebugMarker->StoredDbgRecords.empty()); EXPECT_EQ(&*BB.begin(), CInst); - - UseNewDbgInfoFormat = false; } TEST(BasicBlockDbgInfoTest, InstrDbgAccess) { LLVMContext C; - UseNewDbgInfoFormat = true; - std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { %b = add i16 %a, 1, !dbg !11 @@ -441,8 +411,6 @@ TEST(BasicBlockDbgInfoTest, InstrDbgAccess) { // Check that DbgVariableRecords can be accessed from Instructions without // digging into the depths of DbgMarkers. BasicBlock &BB = M->getFunction("f")->getEntryBlock(); - // Convert the module to "new" form debug-info. - M->convertToNewDbgValues(); Instruction *BInst = &*BB.begin(); Instruction *CInst = BInst->getNextNode(); @@ -483,8 +451,6 @@ TEST(BasicBlockDbgInfoTest, InstrDbgAccess) { CInst->dropOneDbgRecord(DVR1); EXPECT_FALSE(CInst->hasDbgRecords()); EXPECT_EQ(CInst->DebugMarker->StoredDbgRecords.size(), 0u); - - UseNewDbgInfoFormat = false; } /* Let's recall the big illustration from BasicBlock::spliceDebugInfo: @@ -577,9 +543,7 @@ class DbgSpliceTest : public ::testing::Test { DbgVariableRecord *DVRA, *DVRB, *DVRConst; void SetUp() override { - UseNewDbgInfoFormat = true; M = parseIR(C, SpliceTestIR.c_str()); - M->convertToNewDbgValues(); BBEntry = &M->getFunction("f")->getEntryBlock(); BBExit = BBEntry->getNextNode(); @@ -599,8 +563,6 @@ class DbgSpliceTest : public ::testing::Test { cast(&*CInst->DebugMarker->StoredDbgRecords.begin()); } - void TearDown() override { UseNewDbgInfoFormat = false; } - bool InstContainsDbgVariableRecord(Instruction *I, DbgVariableRecord *DVR) { for (DbgRecord &D : I->getDbgRecordRange()) { if (&D == DVR) { @@ -1187,8 +1149,6 @@ metadata !9, metadata !DIExpression()), !dbg !11 Dest %c = add i16 %b, 1, // then the trailing DbgVariableRecords should get flushed back out. TEST(BasicBlockDbgInfoTest, DbgSpliceTrailing) { LLVMContext C; - UseNewDbgInfoFormat = true; - std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { entry: @@ -1219,7 +1179,6 @@ TEST(BasicBlockDbgInfoTest, DbgSpliceTrailing) { BasicBlock &Entry = M->getFunction("f")->getEntryBlock(); BasicBlock &Exit = *Entry.getNextNode(); - M->convertToNewDbgValues(); // Begin by forcing entry block to have dangling DbgVariableRecord. Entry.getTerminator()->eraseFromParent(); @@ -1234,8 +1193,6 @@ TEST(BasicBlockDbgInfoTest, DbgSpliceTrailing) { Instruction *BInst = &*Entry.begin(); ASSERT_TRUE(BInst->DebugMarker); EXPECT_EQ(BInst->DebugMarker->StoredDbgRecords.size(), 1u); - - UseNewDbgInfoFormat = false; } // When we remove instructions from the program, adjacent DbgVariableRecords @@ -1244,8 +1201,6 @@ TEST(BasicBlockDbgInfoTest, DbgSpliceTrailing) { // dbg.values. Test that this can be replicated correctly by DbgVariableRecords TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsert) { LLVMContext C; - UseNewDbgInfoFormat = true; - std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { entry: @@ -1273,7 +1228,6 @@ TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsert) { )"); BasicBlock &Entry = M->getFunction("f")->getEntryBlock(); - M->convertToNewDbgValues(); // Fetch the relevant instructions from the converted function. Instruction *SubInst = &*Entry.begin(); @@ -1316,16 +1270,12 @@ TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsert) { EXPECT_EQ(std::distance(R4.begin(), R4.end()), 1u); auto R5 = RetInst->getDbgRecordRange(); EXPECT_EQ(std::distance(R5.begin(), R5.end()), 1u); - - UseNewDbgInfoFormat = false; } // Test instruction removal and re-insertion, this time with one // DbgVariableRecord that should hop up one instruction. TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsertForOneDbgVariableRecord) { LLVMContext C; - UseNewDbgInfoFormat = true; - std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { entry: @@ -1352,7 +1302,6 @@ TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsertForOneDbgVariableRecord) { )"); BasicBlock &Entry = M->getFunction("f")->getEntryBlock(); - M->convertToNewDbgValues(); // Fetch the relevant instructions from the converted function. Instruction *SubInst = &*Entry.begin(); @@ -1391,8 +1340,6 @@ TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsertForOneDbgVariableRecord) { EXPECT_FALSE(RetInst->hasDbgRecords()); auto R3 = AddInst->getDbgRecordRange(); EXPECT_EQ(std::distance(R3.begin(), R3.end()), 1u); - - UseNewDbgInfoFormat = false; } // Similar to the above, what if we splice into an empty block with debug-info, @@ -1401,8 +1348,6 @@ TEST(BasicBlockDbgInfoTest, RemoveInstAndReinsertForOneDbgVariableRecord) { // of the i16 0 dbg.value. TEST(BasicBlockDbgInfoTest, DbgSpliceToEmpty1) { LLVMContext C; - UseNewDbgInfoFormat = true; - std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { entry: @@ -1436,7 +1381,6 @@ TEST(BasicBlockDbgInfoTest, DbgSpliceToEmpty1) { Function &F = *M->getFunction("f"); BasicBlock &Entry = F.getEntryBlock(); BasicBlock &Exit = *Entry.getNextNode(); - M->convertToNewDbgValues(); // Begin by forcing entry block to have dangling DbgVariableRecord. Entry.getTerminator()->eraseFromParent(); @@ -1463,16 +1407,12 @@ TEST(BasicBlockDbgInfoTest, DbgSpliceToEmpty1) { // No trailing DbgVariableRecords in the entry block now. EXPECT_EQ(Entry.getTrailingDbgRecords(), nullptr); - - UseNewDbgInfoFormat = false; } // Similar test again, but this time: splice the contents of exit into entry, // with the intention of leaving the first dbg.value (i16 0) behind. TEST(BasicBlockDbgInfoTest, DbgSpliceToEmpty2) { LLVMContext C; - UseNewDbgInfoFormat = true; - std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { entry: @@ -1506,7 +1446,6 @@ TEST(BasicBlockDbgInfoTest, DbgSpliceToEmpty2) { Function &F = *M->getFunction("f"); BasicBlock &Entry = F.getEntryBlock(); BasicBlock &Exit = *Entry.getNextNode(); - M->convertToNewDbgValues(); // Begin by forcing entry block to have dangling DbgVariableRecord. Entry.getTerminator()->eraseFromParent(); @@ -1537,16 +1476,12 @@ TEST(BasicBlockDbgInfoTest, DbgSpliceToEmpty2) { EXPECT_FALSE(Exit.getTrailingDbgRecords()->empty()); Exit.getTrailingDbgRecords()->eraseFromParent(); Exit.deleteTrailingDbgRecords(); - - UseNewDbgInfoFormat = false; } // What if we moveBefore end() -- there might be no debug-info there, in which // case we shouldn't crash. TEST(BasicBlockDbgInfoTest, DbgMoveToEnd) { LLVMContext C; - UseNewDbgInfoFormat = true; - std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { entry: @@ -1576,7 +1511,6 @@ TEST(BasicBlockDbgInfoTest, DbgMoveToEnd) { Function &F = *M->getFunction("f"); BasicBlock &Entry = F.getEntryBlock(); BasicBlock &Exit = *Entry.getNextNode(); - M->convertToNewDbgValues(); // Move the return to the end of the entry block. Instruction *Br = Entry.getTerminator(); @@ -1589,8 +1523,6 @@ TEST(BasicBlockDbgInfoTest, DbgMoveToEnd) { EXPECT_EQ(Entry.getTrailingDbgRecords(), nullptr); EXPECT_EQ(Exit.getTrailingDbgRecords(), nullptr); EXPECT_FALSE(Ret->hasDbgRecords()); - - UseNewDbgInfoFormat = false; } } // End anonymous namespace. diff --git a/llvm/unittests/IR/DebugInfoTest.cpp b/llvm/unittests/IR/DebugInfoTest.cpp index ec3f33318f8cdb..cac8acbe15a79d 100644 --- a/llvm/unittests/IR/DebugInfoTest.cpp +++ b/llvm/unittests/IR/DebugInfoTest.cpp @@ -156,7 +156,7 @@ TEST(StripTest, LoopMetadata) { EXPECT_FALSE(BrokenDebugInfo); } -TEST(MetadataTest, DeleteInstUsedByDbgValue) { +TEST(MetadataTest, DeleteInstUsedByDbgRecord) { LLVMContext C; std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { @@ -187,12 +187,13 @@ TEST(MetadataTest, DeleteInstUsedByDbgValue) { // Find the dbg.value using %b. SmallVector DVIs; - findDbgValues(DVIs, &I); + SmallVector DVRs; + findDbgValues(DVIs, &I, &DVRs); // Delete %b. The dbg.value should now point to undef. I.eraseFromParent(); - EXPECT_EQ(DVIs[0]->getNumVariableLocationOps(), 1u); - EXPECT_TRUE(isa(DVIs[0]->getValue(0))); + EXPECT_EQ(DVRs[0]->getNumVariableLocationOps(), 1u); + EXPECT_TRUE(isa(DVRs[0]->getValue(0))); } TEST(DbgVariableIntrinsic, EmptyMDIsKillLocation) { @@ -230,8 +231,8 @@ TEST(DbgVariableIntrinsic, EmptyMDIsKillLocation) { // Get the dbg.declare. Function &F = *cast(M->getNamedValue("fun")); - DbgVariableIntrinsic *DbgDeclare = - cast(&F.front().front()); + DbgVariableRecord *DbgDeclare = + cast(&*F.front().front().getDbgRecordRange().begin()); // Check that this form counts as a "no location" marker. EXPECT_TRUE(DbgDeclare->isKillLocation()); } @@ -239,6 +240,9 @@ TEST(DbgVariableIntrinsic, EmptyMDIsKillLocation) { // Duplicate of above test, but in DbgVariableRecord representation. TEST(MetadataTest, DeleteInstUsedByDbgVariableRecord) { LLVMContext C; + bool OldDbgValueMode = UseNewDbgInfoFormat; + UseNewDbgInfoFormat = true; + std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { %b = add i16 %a, 1, !dbg !11 @@ -264,10 +268,7 @@ TEST(MetadataTest, DeleteInstUsedByDbgVariableRecord) { !11 = !DILocation(line: 1, column: 1, scope: !6) )"); - bool OldDbgValueMode = UseNewDbgInfoFormat; - UseNewDbgInfoFormat = true; Instruction &I = *M->getFunction("f")->getEntryBlock().getFirstNonPHI(); - M->convertToNewDbgValues(); // Find the DbgVariableRecords using %b. SmallVector DVIs; @@ -289,6 +290,8 @@ TEST(MetadataTest, DeleteInstUsedByDbgVariableRecord) { // Ensure that the order of dbg.value intrinsics returned by findDbgValues, and // their corresponding DbgVariableRecord representation, are consistent. TEST(MetadataTest, OrderingOfDbgVariableRecords) { + bool OldDbgValueMode = UseNewDbgInfoFormat; + UseNewDbgInfoFormat = false; LLVMContext C; std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { @@ -316,8 +319,6 @@ TEST(MetadataTest, OrderingOfDbgVariableRecords) { !12 = !DILocalVariable(name: "bar", scope: !6, file: !1, line: 1, type: !10) )"); - bool OldDbgValueMode = UseNewDbgInfoFormat; - UseNewDbgInfoFormat = true; Instruction &I = *M->getFunction("f")->getEntryBlock().getFirstNonPHI(); SmallVector DVIs; @@ -515,14 +516,15 @@ TEST(DbgAssignIntrinsicTest, replaceVariableLocationOp) { Value *V1 = Fun.getArg(0); Value *P1 = Fun.getArg(1); Value *P2 = Fun.getArg(2); - DbgAssignIntrinsic *DAI = cast(Fun.begin()->begin()); - ASSERT_TRUE(V1 == DAI->getVariableLocationOp(0)); - ASSERT_TRUE(P1 == DAI->getAddress()); + DbgVariableRecord *DbgAssign = cast( + &*Fun.front().front().getDbgRecordRange().begin()); + ASSERT_TRUE(V1 == DbgAssign->getVariableLocationOp(0)); + ASSERT_TRUE(P1 == DbgAssign->getAddress()); #define TEST_REPLACE(Old, New, ExpectedValue, ExpectedAddr) \ - DAI->replaceVariableLocationOp(Old, New); \ - EXPECT_EQ(DAI->getVariableLocationOp(0), ExpectedValue); \ - EXPECT_EQ(DAI->getAddress(), ExpectedAddr); + DbgAssign->replaceVariableLocationOp(Old, New); \ + EXPECT_EQ(DbgAssign->getVariableLocationOp(0), ExpectedValue); \ + EXPECT_EQ(DbgAssign->getAddress(), ExpectedAddr); // Replace address only. TEST_REPLACE(/*Old*/ P1, /*New*/ P2, /*Value*/ V1, /*Address*/ P2); @@ -533,8 +535,8 @@ TEST(DbgAssignIntrinsicTest, replaceVariableLocationOp) { // Replace address only, value uses a DIArgList. // Value = {DIArgList(V1)}, Addr = P1. - DAI->setRawLocation(DIArgList::get(C, ValueAsMetadata::get(V1))); - DAI->setExpression(DIExpression::get( + DbgAssign->setRawLocation(DIArgList::get(C, ValueAsMetadata::get(V1))); + DbgAssign->setExpression(DIExpression::get( C, {dwarf::DW_OP_LLVM_arg, 0, dwarf::DW_OP_stack_value})); TEST_REPLACE(/*Old*/ P1, /*New*/ P2, /*Value*/ V1, /*Address*/ P2); #undef TEST_REPLACE @@ -620,11 +622,11 @@ TEST(AssignmentTrackingTest, Utils) { // // Check there are two llvm.dbg.assign intrinsics linked to Alloca. auto CheckFun1Mapping = [&Alloca]() { - auto Markers = at::getAssignmentMarkers(&Alloca); + auto Markers = at::getDVRAssignmentMarkers(&Alloca); EXPECT_TRUE(std::distance(Markers.begin(), Markers.end()) == 2); // Check those two entries are distinct. - DbgAssignIntrinsic *First = *Markers.begin(); - DbgAssignIntrinsic *Second = *std::next(Markers.begin()); + DbgVariableRecord *First = *Markers.begin(); + DbgVariableRecord *Second = *std::next(Markers.begin()); EXPECT_NE(First, Second); // Check that we can get back to Alloca from each llvm.dbg.assign. @@ -660,7 +662,7 @@ TEST(AssignmentTrackingTest, Utils) { DIAssignID *Fun2ID = cast_or_null( Fun2Alloca.getMetadata(LLVMContext::MD_DIAssignID)); EXPECT_NE(New, Fun2ID); - auto Fun2Markers = at::getAssignmentMarkers(&Fun2Alloca); + auto Fun2Markers = at::getDVRAssignmentMarkers(&Fun2Alloca); ASSERT_TRUE(std::distance(Fun2Markers.begin(), Fun2Markers.end()) == 1); auto Fun2Insts = at::getAssignmentInsts(*Fun2Markers.begin()); ASSERT_TRUE(std::distance(Fun2Insts.begin(), Fun2Insts.end()) == 1); @@ -669,10 +671,10 @@ TEST(AssignmentTrackingTest, Utils) { // 3. Check that deleting dbg.assigns from a specific instruction works. Instruction &Fun3Alloca = *M->getFunction("fun3")->getEntryBlock().getFirstNonPHIOrDbg(); - auto Fun3Markers = at::getAssignmentMarkers(&Fun3Alloca); + auto Fun3Markers = at::getDVRAssignmentMarkers(&Fun3Alloca); ASSERT_TRUE(std::distance(Fun3Markers.begin(), Fun3Markers.end()) == 1); at::deleteAssignmentMarkers(&Fun3Alloca); - Fun3Markers = at::getAssignmentMarkers(&Fun3Alloca); + Fun3Markers = at::getDVRAssignmentMarkers(&Fun3Alloca); EXPECT_EQ(Fun3Markers.empty(), true); // 4. Check that deleting works and applies only to the target function. @@ -683,7 +685,7 @@ TEST(AssignmentTrackingTest, Utils) { // llvm.dbg.assign. EXPECT_EQ(Fun2ID, cast_or_null( Fun2Alloca.getMetadata(LLVMContext::MD_DIAssignID))); - EXPECT_FALSE(at::getAssignmentMarkers(&Fun2Alloca).empty()); + EXPECT_FALSE(at::getDVRAssignmentMarkers(&Fun2Alloca).empty()); } TEST(IRBuilder, GetSetInsertionPointWithEmptyBasicBlock) { @@ -769,12 +771,12 @@ TEST(AssignmentTrackingTest, InstrMethods) { // Use SetVectors to check that the attachments and markers are unique // (another test requirement). SetVector OrigIDs; - SetVector Markers; + SetVector Markers; for (const Instruction *SI : Stores) { Metadata *ID = SI->getMetadata(LLVMContext::MD_DIAssignID); ASSERT_TRUE(OrigIDs.insert(ID)); ASSERT_TRUE(ID != nullptr); - auto Range = at::getAssignmentMarkers(SI); + auto Range = at::getDVRAssignmentMarkers(SI); ASSERT_TRUE(std::distance(Range.begin(), Range.end()) == 1); ASSERT_TRUE(Markers.insert(*Range.begin())); } @@ -867,6 +869,8 @@ TEST(AssignmentTrackingTest, InstrMethods) { // dbg.values that have been converted to a non-instruction format. TEST(MetadataTest, ConvertDbgToDbgVariableRecord) { LLVMContext C; + bool OldDbgValueMode = UseNewDbgInfoFormat; + UseNewDbgInfoFormat = false; std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { call void @llvm.dbg.value(metadata i16 %a, metadata !9, metadata !DIExpression()), !dbg !11 @@ -1041,14 +1045,14 @@ TEST(MetadataTest, ConvertDbgToDbgVariableRecord) { // The record of those trailing DbgVariableRecords would dangle and cause an // assertion failure if it lived until the end of the LLVMContext. ExitBlock->deleteTrailingDbgRecords(); + UseNewDbgInfoFormat = OldDbgValueMode; } TEST(MetadataTest, DbgVariableRecordConversionRoutines) { LLVMContext C; - // For the purpose of this test, set and un-set the command line option - // corresponding to UseNewDbgInfoFormat. - UseNewDbgInfoFormat = true; + bool OldDbgValueMode = UseNewDbgInfoFormat; + UseNewDbgInfoFormat = false; std::unique_ptr M = parseIR(C, R"( define i16 @f(i16 %a) !dbg !6 { @@ -1079,6 +1083,11 @@ TEST(MetadataTest, DbgVariableRecordConversionRoutines) { !11 = !DILocation(line: 1, column: 1, scope: !6) )"); + // For the purpose of this test, set and un-set the command line option + // corresponding to UseNewDbgInfoFormat, but only after parsing, to ensure + // that the IR starts off in the old format. + UseNewDbgInfoFormat = true; + // Check that the conversion routines and utilities between dbg.value // debug-info format and DbgVariableRecords works. Function *F = M->getFunction("f"); @@ -1183,7 +1192,7 @@ TEST(MetadataTest, DbgVariableRecordConversionRoutines) { EXPECT_EQ(DVI2->getVariable(), DLV2); EXPECT_EQ(DVI2->getExpression(), Expr2); - UseNewDbgInfoFormat = false; + UseNewDbgInfoFormat = OldDbgValueMode; } // Test that the hashing function for DISubprograms representing methods produce diff --git a/llvm/unittests/IR/IRBuilderTest.cpp b/llvm/unittests/IR/IRBuilderTest.cpp index 2001df090aed53..ff96df85812002 100644 --- a/llvm/unittests/IR/IRBuilderTest.cpp +++ b/llvm/unittests/IR/IRBuilderTest.cpp @@ -994,17 +994,17 @@ TEST_F(IRBuilderTest, DIBuilder) { EXPECT_TRUE(verifyModule(*M)); }; - // Test in old-debug mode. - EXPECT_FALSE(M->IsNewDbgInfoFormat); + // Test in new-debug mode. + EXPECT_TRUE(M->IsNewDbgInfoFormat); RunTest(); - // Test in new-debug mode. - // Reset the test then call convertToNewDbgValues to flip the flag + // Test in old-debug mode. + // Reset the test then call convertFromNewDbgValues to flip the flag // on the test's Module, Function and BasicBlock. TearDown(); SetUp(); - M->convertToNewDbgValues(); - EXPECT_TRUE(M->IsNewDbgInfoFormat); + M->convertFromNewDbgValues(); + EXPECT_FALSE(M->IsNewDbgInfoFormat); RunTest(); } diff --git a/llvm/unittests/IR/InstructionsTest.cpp b/llvm/unittests/IR/InstructionsTest.cpp index b47c73f0b329ae..b6044b28629204 100644 --- a/llvm/unittests/IR/InstructionsTest.cpp +++ b/llvm/unittests/IR/InstructionsTest.cpp @@ -25,12 +25,15 @@ #include "llvm/IR/Module.h" #include "llvm/IR/NoFolder.h" #include "llvm/IR/Operator.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/SourceMgr.h" #include "llvm-c/Core.h" #include "gmock/gmock-matchers.h" #include "gtest/gtest.h" #include +extern llvm::cl::opt UseNewDbgInfoFormat; + namespace llvm { namespace { @@ -1460,6 +1463,8 @@ TEST(InstructionsTest, GetSplat) { TEST(InstructionsTest, SkipDebug) { LLVMContext C; + bool OldDbgValueMode = UseNewDbgInfoFormat; + UseNewDbgInfoFormat = false; std::unique_ptr M = parseIR(C, R"( declare void @llvm.dbg.value(metadata, metadata, metadata) @@ -1495,6 +1500,7 @@ TEST(InstructionsTest, SkipDebug) { // After the terminator, there are no non-debug instructions. EXPECT_EQ(nullptr, Term->getNextNonDebugInstruction()); + UseNewDbgInfoFormat = OldDbgValueMode; } TEST(InstructionsTest, PhiMightNotBeFPMathOperator) { diff --git a/llvm/unittests/IR/ValueTest.cpp b/llvm/unittests/IR/ValueTest.cpp index 246c2fc7fe4063..33a86d510d45cb 100644 --- a/llvm/unittests/IR/ValueTest.cpp +++ b/llvm/unittests/IR/ValueTest.cpp @@ -13,6 +13,7 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/SourceMgr.h" #include "gtest/gtest.h" using namespace llvm; @@ -255,6 +256,8 @@ TEST(ValueTest, getLocalSlotDeath) { TEST(ValueTest, replaceUsesOutsideBlock) { // Check that Value::replaceUsesOutsideBlock(New, BB) replaces uses outside // BB, including dbg.* uses of MetadataAsValue(ValueAsMetadata(this)). + bool OldDbgValueMode = UseNewDbgInfoFormat; + UseNewDbgInfoFormat = false; const auto *IR = R"( define i32 @f() !dbg !6 { entry: @@ -315,6 +318,7 @@ TEST(ValueTest, replaceUsesOutsideBlock) { // These users are outside Entry so should be changed. ASSERT_TRUE(ExitDbg->getValue(0) == cast(B)); ASSERT_TRUE(Ret->getOperand(0) == cast(B)); + UseNewDbgInfoFormat = OldDbgValueMode; } TEST(ValueTest, replaceUsesOutsideBlockDbgVariableRecord) { @@ -359,10 +363,6 @@ TEST(ValueTest, replaceUsesOutsideBlockDbgVariableRecord) { if (!M) Err.print("ValueTest", errs()); - bool OldDbgValueMode = UseNewDbgInfoFormat; - UseNewDbgInfoFormat = true; - M->convertToNewDbgValues(); - auto GetNext = [](auto *I) { return &*++I->getIterator(); }; Function *F = M->getFunction("f"); @@ -389,7 +389,6 @@ TEST(ValueTest, replaceUsesOutsideBlockDbgVariableRecord) { EXPECT_TRUE(DVR1->getVariableLocationOp(0) == cast(A)); // These users are outside Entry so should be changed. EXPECT_TRUE(DVR2->getVariableLocationOp(0) == cast(B)); - UseNewDbgInfoFormat = OldDbgValueMode; } } // end anonymous namespace diff --git a/llvm/unittests/Transforms/Utils/CloningTest.cpp b/llvm/unittests/Transforms/Utils/CloningTest.cpp index 5e302d9c0a0d3e..1d0d56a2099ceb 100644 --- a/llvm/unittests/Transforms/Utils/CloningTest.cpp +++ b/llvm/unittests/Transforms/Utils/CloningTest.cpp @@ -844,8 +844,9 @@ TEST(CloneFunction, CloneFunctionWithInlinedSubprograms) { EXPECT_FALSE(verifyModule(*ImplModule, &errs())); // Check that DILexicalBlock of inlined function was not cloned. - auto DbgDeclareI = Func->begin()->begin(); - auto ClonedDbgDeclareI = ClonedFunc->begin()->begin(); + auto DbgDeclareI = Func->begin()->begin()->getDbgRecordRange().begin(); + auto ClonedDbgDeclareI = + ClonedFunc->begin()->begin()->getDbgRecordRange().begin(); const DebugLoc &DbgLoc = DbgDeclareI->getDebugLoc(); const DebugLoc &ClonedDbgLoc = ClonedDbgDeclareI->getDebugLoc(); EXPECT_NE(DbgLoc.get(), ClonedDbgLoc.get()); diff --git a/llvm/unittests/Transforms/Utils/LocalTest.cpp b/llvm/unittests/Transforms/Utils/LocalTest.cpp index 9b1176765c17f1..316d59a9d22969 100644 --- a/llvm/unittests/Transforms/Utils/LocalTest.cpp +++ b/llvm/unittests/Transforms/Utils/LocalTest.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Local.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/PostDominators.h" @@ -26,6 +27,27 @@ using namespace llvm; +extern llvm::cl::opt UseNewDbgInfoFormat; +extern cl::opt PreserveInputDbgFormat; +extern bool WriteNewDbgInfoFormatToBitcode; +extern cl::opt WriteNewDbgInfoFormat; + +// Backup all of the existing settings that may be modified when +// PreserveInputDbgFormat=true, so that when the test is finished we return them +// (and the "preserve" setting) to their original values. +static auto SaveDbgInfoFormat() { + return make_scope_exit( + [OldPreserveInputDbgFormat = PreserveInputDbgFormat.getValue(), + OldUseNewDbgInfoFormat = UseNewDbgInfoFormat.getValue(), + OldWriteNewDbgInfoFormatToBitcode = WriteNewDbgInfoFormatToBitcode, + OldWriteNewDbgInfoFormat = WriteNewDbgInfoFormat.getValue()] { + PreserveInputDbgFormat = OldPreserveInputDbgFormat; + UseNewDbgInfoFormat = OldUseNewDbgInfoFormat; + WriteNewDbgInfoFormatToBitcode = OldWriteNewDbgInfoFormatToBitcode; + WriteNewDbgInfoFormat = OldWriteNewDbgInfoFormat; + }); +} + TEST(Local, RecursivelyDeleteDeadPHINodes) { LLVMContext C; @@ -116,7 +138,6 @@ static std::unique_ptr parseIR(LLVMContext &C, const char *IR) { TEST(Local, ReplaceDbgDeclare) { LLVMContext C; - // Original C source to get debug info for a local variable: // void f() { int x; } std::unique_ptr M = parseIR(C, @@ -124,11 +145,11 @@ TEST(Local, ReplaceDbgDeclare) { define void @f() !dbg !8 { entry: %x = alloca i32, align 4 - call void @llvm.dbg.declare(metadata i32* %x, metadata !11, metadata !DIExpression()), !dbg !13 - call void @llvm.dbg.declare(metadata i32* %x, metadata !11, metadata !DIExpression()), !dbg !13 + #dbg_declare(ptr %x, !11, !DIExpression(), !13) + #dbg_declare(ptr %x, !11, !DIExpression(), !13) ret void, !dbg !14 } - declare void @llvm.dbg.declare(metadata, metadata, metadata) + !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4} !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 6.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) @@ -151,20 +172,18 @@ TEST(Local, ReplaceDbgDeclare) { Instruction *Inst = &F->front().front(); auto *AI = dyn_cast(Inst); ASSERT_TRUE(AI); - Inst = Inst->getNextNode()->getNextNode(); - ASSERT_TRUE(Inst); - auto *DII = dyn_cast(Inst); - ASSERT_TRUE(DII); + Value *NewBase = Constant::getNullValue(PointerType::getUnqual(C)); DIBuilder DIB(*M); replaceDbgDeclare(AI, NewBase, DIB, DIExpression::ApplyOffset, 0); - // There should be exactly two dbg.declares. - int Declares = 0; - for (const Instruction &I : F->front()) - if (isa(I)) - Declares++; - EXPECT_EQ(2, Declares); + // There should be exactly two dbg.declares, attached to the terminator. + Inst = F->front().getTerminator(); + ASSERT_TRUE(Inst); + EXPECT_TRUE(Inst->hasDbgRecords()); + EXPECT_EQ(range_size(Inst->getDbgRecordRange()), 2u); + for (DbgVariableRecord &DVR : filterDbgVars(Inst->getDbgRecordRange())) + EXPECT_EQ(DVR.getAddress(), NewBase); } /// Build the dominator tree for the function and run the Test. @@ -499,11 +518,10 @@ struct SalvageDebugInfoTest : ::testing::Test { entry: %x = add i32 0, 1 %y = add i32 %x, 2 - call void @llvm.dbg.value(metadata i32 %x, metadata !11, metadata !DIExpression()), !dbg !13 - call void @llvm.dbg.value(metadata i32 %y, metadata !11, metadata !DIExpression()), !dbg !13 + #dbg_value(i32 %x, !11, !DIExpression(), !13) + #dbg_value(i32 %y, !11, !DIExpression(), !13) ret void, !dbg !14 } - declare void @llvm.dbg.value(metadata, metadata, metadata) !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!3, !4} !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 6.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) @@ -526,48 +544,47 @@ struct SalvageDebugInfoTest : ::testing::Test { ASSERT_TRUE(F); } - bool doesDebugValueDescribeX(const DbgValueInst &DI) { - if (DI.getNumVariableLocationOps() != 1u) + bool doesDebugValueDescribeX(const DbgVariableRecord &DVR) { + if (DVR.getNumVariableLocationOps() != 1u) return false; - const auto &CI = *cast(DI.getValue(0)); + const auto &CI = *cast(DVR.getValue(0)); if (CI.isZero()) - return DI.getExpression()->getElements().equals( + return DVR.getExpression()->getElements().equals( {dwarf::DW_OP_plus_uconst, 1, dwarf::DW_OP_stack_value}); else if (CI.isOneValue()) - return DI.getExpression()->getElements().empty(); + return DVR.getExpression()->getElements().empty(); return false; } - bool doesDebugValueDescribeY(const DbgValueInst &DI) { - if (DI.getNumVariableLocationOps() != 1u) + bool doesDebugValueDescribeY(const DbgVariableRecord &DVR) { + if (DVR.getNumVariableLocationOps() != 1u) return false; - const auto &CI = *cast(DI.getVariableLocationOp(0)); + const auto &CI = *cast(DVR.getVariableLocationOp(0)); if (CI.isZero()) - return DI.getExpression()->getElements().equals( + return DVR.getExpression()->getElements().equals( {dwarf::DW_OP_plus_uconst, 3, dwarf::DW_OP_stack_value}); else if (CI.isOneValue()) - return DI.getExpression()->getElements().equals( + return DVR.getExpression()->getElements().equals( {dwarf::DW_OP_plus_uconst, 2, dwarf::DW_OP_stack_value}); return false; } void verifyDebugValuesAreSalvaged() { + // The function should only contain debug values and a terminator. + EXPECT_EQ(F->size(), 1u); + EXPECT_TRUE(F->begin()->begin()->isTerminator()); + // Check that the debug values for %x and %y are preserved. bool FoundX = false; bool FoundY = false; - for (const Instruction &I : F->front()) { - auto DI = dyn_cast(&I); - if (!DI) { - // The function should only contain debug values and a terminator. - ASSERT_TRUE(I.isTerminator()); - continue; - } - EXPECT_EQ(DI->getVariable()->getName(), "x"); - FoundX |= doesDebugValueDescribeX(*DI); - FoundY |= doesDebugValueDescribeY(*DI); + for (DbgVariableRecord &DVR : + filterDbgVars(F->begin()->begin()->getDbgRecordRange())) { + EXPECT_EQ(DVR.getVariable()->getName(), "x"); + FoundX |= doesDebugValueDescribeX(DVR); + FoundY |= doesDebugValueDescribeY(DVR); } - ASSERT_TRUE(FoundX); - ASSERT_TRUE(FoundY); + EXPECT_TRUE(FoundX); + EXPECT_TRUE(FoundY); } }; @@ -590,6 +607,12 @@ TEST_F(SalvageDebugInfoTest, RecursiveBlockSimplification) { TEST(Local, wouldInstructionBeTriviallyDead) { LLVMContext Ctx; + // FIXME: PreserveInputDbgFormat is set to true because this test has + // been written to expect debug intrinsics rather than debug records. + // TODO: This test doesn't have a DbgRecord equivalent form so delete + // it when debug intrinsics are removed. + auto SettingGuard = SaveDbgInfoFormat(); + PreserveInputDbgFormat = cl::boolOrDefault::BOU_TRUE; std::unique_ptr M = parseIR(Ctx, R"( define dso_local void @fun() local_unnamed_addr #0 !dbg !9 { @@ -683,12 +706,10 @@ TEST(Local, FindDbgUsers) { R"( define dso_local void @fun(ptr %a) #0 !dbg !11 { entry: - call void @llvm.dbg.assign(metadata ptr %a, metadata !16, metadata !DIExpression(), metadata !15, metadata ptr %a, metadata !DIExpression()), !dbg !19 + #dbg_assign(ptr %a, !16, !DIExpression(), !15, ptr %a, !DIExpression(), !19) ret void } - declare void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) - !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!2, !3, !9} !llvm.ident = !{!10} @@ -715,9 +736,13 @@ TEST(Local, FindDbgUsers) { verifyModule(*M, &errs(), &BrokenDebugInfo); ASSERT_FALSE(BrokenDebugInfo); + // Convert to debug intrinsics as we want to test findDbgUsers and + // findDbgValue's debug-intrinsic-finding code here. + // TODO: Remove this test when debug intrinsics are removed. + M->convertFromNewDbgValues(); + Function &Fun = *cast(M->getNamedValue("fun")); Value *Arg = Fun.getArg(0); - SmallVector Users; // Arg (%a) is used twice by a single dbg.assign. Check findDbgUsers returns // only 1 pointer to it rather than 2. @@ -738,7 +763,7 @@ TEST(Local, FindDbgRecords) { R"( define dso_local void @fun(ptr %a) #0 !dbg !11 { entry: - call void @llvm.dbg.assign(metadata ptr %a, metadata !16, metadata !DIExpression(), metadata !15, metadata ptr %a, metadata !DIExpression()), !dbg !19 + #dbg_assign(ptr %a, !16, !DIExpression(), !15, ptr %a, !DIExpression(), !19) ret void } @@ -767,9 +792,6 @@ TEST(Local, FindDbgRecords) { bool BrokenDebugInfo = true; verifyModule(*M, &errs(), &BrokenDebugInfo); ASSERT_FALSE(BrokenDebugInfo); - bool NewDbgInfoFormat = UseNewDbgInfoFormat; - UseNewDbgInfoFormat = true; - M->convertToNewDbgValues(); Function &Fun = *cast(M->getNamedValue("fun")); Value *Arg = Fun.getArg(0); @@ -789,12 +811,10 @@ TEST(Local, FindDbgRecords) { findDbgValues(Vals, Arg, &Records); EXPECT_EQ(Vals.size(), 0u); EXPECT_EQ(Records.size(), 1u); - UseNewDbgInfoFormat = NewDbgInfoFormat; } TEST(Local, ReplaceAllDbgUsesWith) { using namespace llvm::dwarf; - LLVMContext Ctx; // Note: The datalayout simulates Darwin/x86_64. @@ -807,39 +827,36 @@ TEST(Local, ReplaceAllDbgUsesWith) { define void @f() !dbg !6 { entry: %a = add i32 0, 1, !dbg !15 - call void @llvm.dbg.value(metadata i32 %a, metadata !9, metadata !DIExpression()), !dbg !15 + #dbg_value(i32 %a, !9, !DIExpression(), !15) %b = add i64 0, 1, !dbg !16 - call void @llvm.dbg.value(metadata i64 %b, metadata !11, metadata !DIExpression()), !dbg !16 - call void @llvm.dbg.value(metadata i64 %b, metadata !11, metadata !DIExpression(DW_OP_lit0, DW_OP_mul)), !dbg !16 - call void @llvm.dbg.value(metadata i64 %b, metadata !11, metadata !DIExpression(DW_OP_lit0, DW_OP_mul, DW_OP_stack_value)), !dbg !16 - call void @llvm.dbg.value(metadata i64 %b, metadata !11, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 8)), !dbg !16 - call void @llvm.dbg.value(metadata i64 %b, metadata !11, metadata !DIExpression(DW_OP_lit0, DW_OP_mul, DW_OP_LLVM_fragment, 0, 8)), !dbg !16 - call void @llvm.dbg.value(metadata i64 %b, metadata !11, metadata !DIExpression(DW_OP_lit0, DW_OP_mul, DW_OP_stack_value, DW_OP_LLVM_fragment, 0, 8)), !dbg !16 - %c = inttoptr i64 0 to i64*, !dbg !17 - call void @llvm.dbg.declare(metadata i64* %c, metadata !13, metadata !DIExpression()), !dbg !17 + #dbg_value(i64 %b, !11, !DIExpression(), !16) + #dbg_value(i64 %b, !11, !DIExpression(DW_OP_lit0, DW_OP_mul), !16) + #dbg_value(i64 %b, !11, !DIExpression(DW_OP_lit0, DW_OP_mul, DW_OP_stack_value), !16) + #dbg_value(i64 %b, !11, !DIExpression(DW_OP_LLVM_fragment, 0, 8), !16) + #dbg_value(i64 %b, !11, !DIExpression(DW_OP_lit0, DW_OP_mul, DW_OP_LLVM_fragment, 0, 8), !16) + #dbg_value(i64 %b, !11, !DIExpression(DW_OP_lit0, DW_OP_mul, DW_OP_stack_value, DW_OP_LLVM_fragment, 0, 8), !16) + %c = inttoptr i64 0 to ptr, !dbg !17 - %d = inttoptr i64 0 to i32*, !dbg !18 - call void @llvm.dbg.declare(metadata i32* %d, metadata !20, metadata !DIExpression()), !dbg !18 + #dbg_declare(ptr %c, !13, !DIExpression(), !17) + %d = inttoptr i64 0 to ptr, !dbg !18 + #dbg_declare(ptr %d, !20, !DIExpression(), !18) %e = add <2 x i16> zeroinitializer, zeroinitializer - call void @llvm.dbg.value(metadata <2 x i16> %e, metadata !14, metadata !DIExpression()), !dbg !18 + #dbg_value(<2 x i16> %e, !14, !DIExpression(), !18) %f = call i32 @escape(i32 0) - call void @llvm.dbg.value(metadata i32 %f, metadata !9, metadata !DIExpression()), !dbg !15 + #dbg_value(i32 %f, !9, !DIExpression(), !15) %barrier = call i32 @escape(i32 0) %g = call i32 @escape(i32 %f) - call void @llvm.dbg.value(metadata i32 %g, metadata !9, metadata !DIExpression()), !dbg !15 + #dbg_value(i32 %g, !9, !DIExpression(), !15) ret void, !dbg !19 } - declare void @llvm.dbg.declare(metadata, metadata, metadata) - declare void @llvm.dbg.value(metadata, metadata, metadata) - !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!5} @@ -894,38 +911,47 @@ TEST(Local, ReplaceAllDbgUsesWith) { EXPECT_TRUE(replaceAllDbgUsesWith(D, C, C, DT)); SmallVector CDbgVals; - findDbgUsers(CDbgVals, &C); - EXPECT_EQ(2U, CDbgVals.size()); - EXPECT_TRUE(all_of(CDbgVals, [](DbgVariableIntrinsic *DII) { - return isa(DII); - })); + SmallVector CDbgRecords; + findDbgUsers(CDbgVals, &C, &CDbgRecords); + EXPECT_EQ(0U, CDbgVals.size()); + EXPECT_EQ(2U, CDbgRecords.size()); + EXPECT_TRUE(all_of( + CDbgRecords, [](DbgVariableRecord *DVR) { return DVR->isDbgDeclare(); })); EXPECT_TRUE(replaceAllDbgUsesWith(C, D, D, DT)); SmallVector DDbgVals; - findDbgUsers(DDbgVals, &D); - EXPECT_EQ(2U, DDbgVals.size()); - EXPECT_TRUE(all_of(DDbgVals, [](DbgVariableIntrinsic *DII) { - return isa(DII); - })); + SmallVector DDbgRecords; + findDbgUsers(DDbgVals, &D, &DDbgRecords); + EXPECT_EQ(0U, DDbgVals.size()); + EXPECT_EQ(2U, DDbgRecords.size()); + EXPECT_TRUE(all_of( + DDbgRecords, [](DbgVariableRecord *DVR) { return DVR->isDbgDeclare(); })); // Introduce a use-before-def. Check that the dbg.value for %a is salvaged. EXPECT_TRUE(replaceAllDbgUsesWith(A, F_, F_, DT)); - auto *ADbgVal = cast(A.getNextNode()); - EXPECT_EQ(ADbgVal->getNumVariableLocationOps(), 1u); - EXPECT_EQ(ConstantInt::get(A.getType(), 0), ADbgVal->getVariableLocationOp(0)); + EXPECT_FALSE(A.hasDbgRecords()); + EXPECT_TRUE(B.hasDbgRecords()); + DbgVariableRecord *BDbgVal = + cast(&*B.getDbgRecordRange().begin()); + EXPECT_EQ(BDbgVal->getNumVariableLocationOps(), 1u); + EXPECT_EQ(ConstantInt::get(A.getType(), 0), + BDbgVal->getVariableLocationOp(0)); // Introduce a use-before-def. Check that the dbg.values for %f become undef. EXPECT_TRUE(replaceAllDbgUsesWith(F_, G, G, DT)); - auto *FDbgVal = cast(F_.getNextNode()); - EXPECT_EQ(FDbgVal->getNumVariableLocationOps(), 1u); - EXPECT_TRUE(FDbgVal->isKillLocation()); + DbgVariableRecord *BarrierDbgVal = + cast(&*Barrier.getDbgRecordRange().begin()); + EXPECT_EQ(BarrierDbgVal->getNumVariableLocationOps(), 1u); + EXPECT_TRUE(BarrierDbgVal->isKillLocation()); - SmallVector FDbgVals; - findDbgValues(FDbgVals, &F_); - EXPECT_EQ(0U, FDbgVals.size()); + SmallVector BarrierDbgVals; + SmallVector BarrierDbgRecs; + findDbgValues(BarrierDbgVals, &F_, &BarrierDbgRecs); + EXPECT_EQ(0U, BarrierDbgVals.size()); + EXPECT_EQ(0U, BarrierDbgRecs.size()); // Simulate i32 -> i64 conversion to test sign-extension. Here are some // interesting cases to handle: @@ -935,13 +961,15 @@ TEST(Local, ReplaceAllDbgUsesWith) { // 4-6) like (1-3), but with a fragment EXPECT_TRUE(replaceAllDbgUsesWith(B, A, A, DT)); - SmallVector ADbgVals; - findDbgValues(ADbgVals, &A); - EXPECT_EQ(6U, ADbgVals.size()); + SmallVector BDbgVals; + SmallVector BDbgRecs; + findDbgValues(BDbgVals, &A, &BDbgRecs); + EXPECT_EQ(0U, BDbgVals.size()); + EXPECT_EQ(6U, BDbgRecs.size()); // Check that %a has a dbg.value with a DIExpression matching \p Ops. auto hasADbgVal = [&](ArrayRef Ops) { - return any_of(ADbgVals, [&](DbgValueInst *DVI) { + return any_of(BDbgRecs, [&](DbgVariableRecord *DVI) { assert(DVI->getVariable()->getName() == "2"); return DVI->getExpression()->getElements() == Ops; }); @@ -1344,6 +1372,11 @@ TEST(Local, ExpressionForConstant) { TEST(Local, ReplaceDbgVariableRecord) { LLVMContext C; + // FIXME: PreserveInputDbgFormat is set to true because this test has + // been written to expect debug intrinsics rather than debug records; use the + // intrinsic format until we update the test checks. + auto SettingGuard = SaveDbgInfoFormat(); + PreserveInputDbgFormat = cl::boolOrDefault::BOU_TRUE; // Test that RAUW also replaces the operands of DbgVariableRecord objects, // i.e. non-instruction stored debugging information. From bd5fbab38f200c09eb5bc727b56812e53a3e9f00 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 14 Jun 2024 10:58:35 +0200 Subject: [PATCH 068/155] [SimplifyCFG] Add tests for sinking with multiple uses (NFC) --- .../SimplifyCFG/X86/sink-common-code.ll | 161 ++++++++++++++++++ 1 file changed, 161 insertions(+) diff --git a/llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll b/llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll index b67ee630368480..7b2161351a7941 100644 --- a/llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/sink-common-code.ll @@ -1702,4 +1702,165 @@ return: ret void } +define ptr @multi_use_in_phi(i1 %cond, ptr %p, i64 %a, i64 %b) { +; CHECK-LABEL: @multi_use_in_phi( +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: call void @dummy() +; CHECK-NEXT: [[GEP1_A:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: br label [[JOIN:%.*]] +; CHECK: else: +; CHECK-NEXT: [[GEP1_B:%.*]] = getelementptr i8, ptr [[P]], i64 [[A]] +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: [[GEP1_B_SINK:%.*]] = phi ptr [ [[GEP1_B]], [[ELSE]] ], [ [[GEP1_A]], [[IF]] ] +; CHECK-NEXT: [[PHI1:%.*]] = phi ptr [ [[GEP1_A]], [[IF]] ], [ [[GEP1_B]], [[ELSE]] ] +; CHECK-NEXT: [[GEP2_B:%.*]] = getelementptr i8, ptr [[GEP1_B_SINK]], i64 [[B:%.*]] +; CHECK-NEXT: call void @use.ptr(ptr [[PHI1]]) +; CHECK-NEXT: ret ptr [[GEP2_B]] +; + br i1 %cond, label %if, label %else + +if: + call void @dummy() + %gep1.a = getelementptr i8, ptr %p, i64 %a + %gep2.a = getelementptr i8, ptr %gep1.a, i64 %b + br label %join + +else: + %gep1.b = getelementptr i8, ptr %p, i64 %a + %gep2.b = getelementptr i8, ptr %gep1.b, i64 %b + br label %join + +join: + %phi1 = phi ptr [ %gep1.a, %if ], [ %gep1.b, %else ] + %phi2 = phi ptr [ %gep2.a, %if ], [ %gep2.b, %else ] + call void @use.ptr(ptr %phi1) + ret ptr %phi2 +} + +define ptr @multi_use_in_phi_inconsistent(i1 %cond, ptr %p, i64 %a, i64 %b) { +; CHECK-LABEL: @multi_use_in_phi_inconsistent( +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: call void @dummy() +; CHECK-NEXT: [[GEP1_A:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: br label [[JOIN:%.*]] +; CHECK: else: +; CHECK-NEXT: [[GEP1_B:%.*]] = getelementptr i8, ptr [[P]], i64 [[A]] +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: [[GEP1_B_SINK:%.*]] = phi ptr [ [[GEP1_B]], [[ELSE]] ], [ [[GEP1_A]], [[IF]] ] +; CHECK-NEXT: [[PHI1:%.*]] = phi ptr [ [[GEP1_A]], [[IF]] ], [ [[P]], [[ELSE]] ] +; CHECK-NEXT: [[GEP2_B:%.*]] = getelementptr i8, ptr [[GEP1_B_SINK]], i64 [[B:%.*]] +; CHECK-NEXT: call void @use.ptr(ptr [[PHI1]]) +; CHECK-NEXT: ret ptr [[GEP2_B]] +; + br i1 %cond, label %if, label %else + +if: + call void @dummy() + %gep1.a = getelementptr i8, ptr %p, i64 %a + %gep2.a = getelementptr i8, ptr %gep1.a, i64 %b + br label %join + +else: + %gep1.b = getelementptr i8, ptr %p, i64 %a + %gep2.b = getelementptr i8, ptr %gep1.b, i64 %b + br label %join + +join: + %phi1 = phi ptr [ %gep1.a, %if ], [ %p, %else ] + %phi2 = phi ptr [ %gep2.a, %if ], [ %gep2.b, %else ] + call void @use.ptr(ptr %phi1) + ret ptr %phi2 +} + +define i64 @multi_use_in_block(i1 %cond, ptr %p, i64 %a, i64 %b) { +; CHECK-LABEL: @multi_use_in_block( +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: call void @dummy() +; CHECK-NEXT: [[GEP1_A:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[V_A:%.*]] = load i64, ptr [[GEP1_A]], align 8 +; CHECK-NEXT: [[GEP2_A:%.*]] = getelementptr i8, ptr [[GEP1_A]], i64 [[V_A]] +; CHECK-NEXT: br label [[JOIN:%.*]] +; CHECK: else: +; CHECK-NEXT: [[GEP1_B:%.*]] = getelementptr i8, ptr [[P]], i64 [[A]] +; CHECK-NEXT: [[V_B:%.*]] = load i64, ptr [[GEP1_B]], align 8 +; CHECK-NEXT: [[GEP2_B:%.*]] = getelementptr i8, ptr [[GEP1_B]], i64 [[V_B]] +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: [[PHI1:%.*]] = phi i64 [ [[V_A]], [[IF]] ], [ [[V_B]], [[ELSE]] ] +; CHECK-NEXT: [[PHI2:%.*]] = phi ptr [ [[GEP2_A]], [[IF]] ], [ [[GEP2_B]], [[ELSE]] ] +; CHECK-NEXT: call void @use.ptr(ptr [[PHI2]]) +; CHECK-NEXT: ret i64 [[PHI1]] +; + br i1 %cond, label %if, label %else + +if: + call void @dummy() + %gep1.a = getelementptr i8, ptr %p, i64 %a + %v.a = load i64, ptr %gep1.a + %gep2.a = getelementptr i8, ptr %gep1.a, i64 %v.a + br label %join + +else: + %gep1.b = getelementptr i8, ptr %p, i64 %a + %v.b = load i64, ptr %gep1.b + %gep2.b = getelementptr i8, ptr %gep1.b, i64 %v.b + br label %join + +join: + %phi1 = phi i64 [ %v.a, %if ], [ %v.b, %else ] + %phi2 = phi ptr [ %gep2.a, %if ], [ %gep2.b, %else ] + call void @use.ptr(ptr %phi2) + ret i64 %phi1 +} + +define i64 @multi_use_in_block_inconsistent(i1 %cond, ptr %p, i64 %a, i64 %b) { +; CHECK-LABEL: @multi_use_in_block_inconsistent( +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: call void @dummy() +; CHECK-NEXT: [[GEP1_A:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[A:%.*]] +; CHECK-NEXT: [[V_A:%.*]] = load i64, ptr [[GEP1_A]], align 8 +; CHECK-NEXT: [[GEP2_A:%.*]] = getelementptr i8, ptr [[GEP1_A]], i64 [[V_A]] +; CHECK-NEXT: br label [[JOIN:%.*]] +; CHECK: else: +; CHECK-NEXT: [[GEP1_B:%.*]] = getelementptr i8, ptr [[P]], i64 [[A]] +; CHECK-NEXT: [[V_B:%.*]] = load i64, ptr [[P]], align 8 +; CHECK-NEXT: [[GEP2_B:%.*]] = getelementptr i8, ptr [[GEP1_B]], i64 [[V_B]] +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: [[PHI1:%.*]] = phi i64 [ [[V_A]], [[IF]] ], [ [[V_B]], [[ELSE]] ] +; CHECK-NEXT: [[PHI2:%.*]] = phi ptr [ [[GEP2_A]], [[IF]] ], [ [[GEP2_B]], [[ELSE]] ] +; CHECK-NEXT: call void @use.ptr(ptr [[PHI2]]) +; CHECK-NEXT: ret i64 [[PHI1]] +; + br i1 %cond, label %if, label %else + +if: + call void @dummy() + %gep1.a = getelementptr i8, ptr %p, i64 %a + %v.a = load i64, ptr %gep1.a + %gep2.a = getelementptr i8, ptr %gep1.a, i64 %v.a + br label %join + +else: + %gep1.b = getelementptr i8, ptr %p, i64 %a + %v.b = load i64, ptr %p + %gep2.b = getelementptr i8, ptr %gep1.b, i64 %v.b + br label %join + +join: + %phi1 = phi i64 [ %v.a, %if ], [ %v.b, %else ] + %phi2 = phi ptr [ %gep2.a, %if ], [ %gep2.b, %else ] + call void @use.ptr(ptr %phi2) + ret i64 %phi1 +} + +declare void @dummy() +declare void @use.ptr(ptr) + !12 = !{i32 1} From d62ff7195ef880bba6d2522bf5e882e7ef28cb7f Mon Sep 17 00:00:00 2001 From: Sergio Afonso Date: Fri, 14 Jun 2024 10:08:07 +0100 Subject: [PATCH 069/155] [Flang][OpenMP] NFC: Check omp.loop_nest in compound construct lowering (#95404) This patch updates tests containing compound loop constructs to also check for the `omp.loop_nest` operation. The "loop-combined.f90" test file is renamed to "loop-compound.f90" as well, to better indicate the types of constructs that are checked in it. --- flang/test/Lower/OpenMP/if-clause.f90 | 61 +++++++++++++++++++ .../{loop-combined.f90 => loop-compound.f90} | 11 +++- 2 files changed, 71 insertions(+), 1 deletion(-) rename flang/test/Lower/OpenMP/{loop-combined.f90 => loop-compound.f90} (90%) diff --git a/flang/test/Lower/OpenMP/if-clause.f90 b/flang/test/Lower/OpenMP/if-clause.f90 index 1413dd961c7f8b..2c9a66e7bc11ea 100644 --- a/flang/test/Lower/OpenMP/if-clause.f90 +++ b/flang/test/Lower/OpenMP/if-clause.f90 @@ -30,6 +30,7 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp do simd do i = 1, 10 end do @@ -38,6 +39,7 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp do simd if(.true.) do i = 1, 10 end do @@ -46,6 +48,7 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp do simd if(simd: .true.) do i = 1, 10 end do @@ -79,6 +82,10 @@ program main ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp parallel do do i = 1, 10 end do @@ -86,6 +93,10 @@ program main ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp parallel do if(.true.) do i = 1, 10 end do @@ -93,6 +104,10 @@ program main ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp parallel do if(parallel: .true.) do i = 1, 10 end do @@ -107,6 +122,7 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp parallel do simd do i = 1, 10 end do @@ -117,6 +133,7 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp parallel do simd if(.true.) do i = 1, 10 end do @@ -127,6 +144,7 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp parallel do simd if(parallel: .true.) if(simd: .false.) do i = 1, 10 end do @@ -137,6 +155,7 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp parallel do simd if(parallel: .true.) do i = 1, 10 end do @@ -148,6 +167,7 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp parallel do simd if(simd: .true.) do i = 1, 10 end do @@ -159,6 +179,7 @@ program main ! CHECK: omp.simd ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp simd do i = 1, 10 end do @@ -166,6 +187,7 @@ program main ! CHECK: omp.simd ! CHECK-SAME: if({{.*}}) + ! CHECK-NEXT: omp.loop_nest !$omp simd if(.true.) do i = 1, 10 end do @@ -173,6 +195,7 @@ program main ! CHECK: omp.simd ! CHECK-SAME: if({{.*}}) + ! CHECK-NEXT: omp.loop_nest !$omp simd if(simd: .true.) do i = 1, 10 end do @@ -257,6 +280,10 @@ program main ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp target parallel do do i = 1, 10 end do @@ -266,6 +293,10 @@ program main ! CHECK-SAME: if({{.*}}) ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp target parallel do if(.true.) do i = 1, 10 end do @@ -275,6 +306,10 @@ program main ! CHECK-SAME: if({{.*}}) ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp target parallel do if(target: .true.) if(parallel: .false.) do i = 1, 10 end do @@ -285,6 +320,10 @@ program main ! CHECK: omp.parallel ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp target parallel do if(target: .true.) do i = 1, 10 end do @@ -295,6 +334,10 @@ program main ! CHECK-SAME: { ! CHECK: omp.parallel ! CHECK-SAME: if({{.*}}) + ! CHECK: omp.wsloop + ! CHECK-NOT: if({{.*}}) + ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp target parallel do if(parallel: .true.) do i = 1, 10 end do @@ -312,6 +355,7 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp target parallel do simd do i = 1, 10 end do @@ -324,6 +368,7 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp target parallel do simd if(.true.) do i = 1, 10 end do @@ -336,6 +381,7 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp target parallel do simd if(target: .true.) if(parallel: .false.) & !$omp& if(simd: .true.) do i = 1, 10 @@ -350,6 +396,7 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp target parallel do simd if(target: .true.) do i = 1, 10 end do @@ -363,6 +410,7 @@ program main ! CHECK: omp.wsloop ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp target parallel do simd if(parallel: .true.) if(simd: .false.) do i = 1, 10 end do @@ -424,6 +472,7 @@ program main ! CHECK: omp.simd ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp target simd do i = 1, 10 end do @@ -433,6 +482,7 @@ program main ! CHECK-SAME: if({{.*}}) ! CHECK: omp.simd ! CHECK-SAME: if({{.*}}) + ! CHECK-NEXT: omp.loop_nest !$omp target simd if(.true.) do i = 1, 10 end do @@ -442,6 +492,7 @@ program main ! CHECK-SAME: if({{.*}}) ! CHECK: omp.simd ! CHECK-SAME: if({{.*}}) + ! CHECK-NEXT: omp.loop_nest !$omp target simd if(target: .true.) if(simd: .false.) do i = 1, 10 end do @@ -452,6 +503,7 @@ program main ! CHECK: omp.simd ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp target simd if(target: .true.) do i = 1, 10 end do @@ -462,6 +514,7 @@ program main ! CHECK-SAME: { ! CHECK: omp.simd ! CHECK-SAME: if({{.*}}) + ! CHECK-NEXT: omp.loop_nest !$omp target simd if(simd: .true.) do i = 1, 10 end do @@ -479,6 +532,7 @@ program main ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp target teams distribute do i = 1, 10 end do @@ -491,6 +545,7 @@ program main ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp target teams distribute if(.true.) do i = 1, 10 end do @@ -503,6 +558,7 @@ program main ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp target teams distribute if(target: .true.) if(teams: .false.) do i = 1, 10 end do @@ -516,6 +572,7 @@ program main ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp target teams distribute if(target: .true.) do i = 1, 10 end do @@ -529,6 +586,7 @@ program main ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp target teams distribute if(teams: .true.) do i = 1, 10 end do @@ -609,6 +667,7 @@ program main ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp teams distribute do i = 1, 10 end do @@ -619,6 +678,7 @@ program main ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp teams distribute if(.true.) do i = 1, 10 end do @@ -629,6 +689,7 @@ program main ! CHECK: omp.distribute ! CHECK-NOT: if({{.*}}) ! CHECK-SAME: { + ! CHECK-NEXT: omp.loop_nest !$omp teams distribute if(teams: .true.) do i = 1, 10 end do diff --git a/flang/test/Lower/OpenMP/loop-combined.f90 b/flang/test/Lower/OpenMP/loop-compound.f90 similarity index 90% rename from flang/test/Lower/OpenMP/loop-combined.f90 rename to flang/test/Lower/OpenMP/loop-compound.f90 index 65995fe0805629..5012008b076714 100644 --- a/flang/test/Lower/OpenMP/loop-combined.f90 +++ b/flang/test/Lower/OpenMP/loop-compound.f90 @@ -1,4 +1,5 @@ -! This test checks lowering of OpenMP combined loop constructs. +! This test checks lowering of OpenMP compound (combined and composite) loop +! constructs. ! RUN: bbc -fopenmp -emit-hlfir %s -o - | FileCheck %s ! RUN: %flang_fc1 -fopenmp -emit-hlfir %s -o - | FileCheck %s @@ -22,6 +23,7 @@ program main ! DO SIMD ! ---------------------------------------------------------------------------- ! CHECK: omp.wsloop + ! CHECK-NEXT: omp.loop_nest !$omp do simd do i = 1, 10 end do @@ -32,6 +34,7 @@ program main ! ---------------------------------------------------------------------------- ! CHECK: omp.parallel ! CHECK: omp.wsloop + ! CHECK-NEXT: omp.loop_nest !$omp parallel do simd do i = 1, 10 end do @@ -42,6 +45,7 @@ program main ! ---------------------------------------------------------------------------- ! CHECK: omp.parallel ! CHECK: omp.wsloop + ! CHECK-NEXT: omp.loop_nest !$omp parallel do do i = 1, 10 end do @@ -53,6 +57,7 @@ program main ! CHECK: omp.target ! CHECK: omp.parallel ! CHECK: omp.wsloop + ! CHECK-NEXT: omp.loop_nest !$omp target parallel do simd do i = 1, 10 end do @@ -64,6 +69,7 @@ program main ! CHECK: omp.target ! CHECK: omp.parallel ! CHECK: omp.wsloop + ! CHECK-NEXT: omp.loop_nest !$omp target parallel do do i = 1, 10 end do @@ -74,6 +80,7 @@ program main ! ---------------------------------------------------------------------------- ! CHECK: omp.target ! CHECK: omp.simd + ! CHECK-NEXT: omp.loop_nest !$omp target simd do i = 1, 10 end do @@ -86,6 +93,7 @@ program main ! CHECK: omp.target ! CHECK: omp.teams ! CHECK: omp.distribute + ! CHECK-NEXT: omp.loop_nest !$omp target teams distribute do i = 1, 10 end do @@ -97,6 +105,7 @@ program main ! CHECK: omp.teams ! CHECK: omp.distribute + ! CHECK-NEXT: omp.loop_nest !$omp teams distribute do i = 1, 10 end do From b6b0f975a6005de530262ed1c5643d1060c86d63 Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Fri, 14 Jun 2024 10:11:12 +0100 Subject: [PATCH 070/155] [flang][OpenMP] Support reduction of POINTER variables (#95148) Just treat them the same as ALLOCATABLE. gfortran doesn't allow POINTER objects in a REDUCTION clause, but so far as I can tell the standard explicitly allows it (openmp5.2 section 5.5.5). --- flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 22 +-- .../parallel-reduction-pointer-array.f90 | 131 ++++++++++++++++++ .../Lower/OpenMP/wsloop-reduction-pointer.f90 | 110 +++++++++++++++ 3 files changed, 255 insertions(+), 8 deletions(-) create mode 100644 flang/test/Lower/OpenMP/parallel-reduction-pointer-array.f90 create mode 100644 flang/test/Lower/OpenMP/wsloop-reduction-pointer.f90 diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index 60e933f5bc1f71..237f9cdf22a1b4 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -332,7 +332,9 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, fir::unwrapRefType(boxTy.getEleTy())); fir::HeapType heapTy = mlir::dyn_cast_or_null(boxTy.getEleTy()); - if ((!seqTy || seqTy.hasUnknownShape()) && !heapTy) + fir::PointerType ptrTy = + mlir::dyn_cast_or_null(boxTy.getEleTy()); + if ((!seqTy || seqTy.hasUnknownShape()) && !heapTy && !ptrTy) TODO(loc, "Unsupported boxed type in OpenMP reduction"); // load fir.ref> @@ -340,7 +342,7 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, lhs = builder.create(loc, lhs); rhs = builder.create(loc, rhs); - if (heapTy && !seqTy) { + if ((heapTy || ptrTy) && !seqTy) { // get box contents (heap pointers) lhs = builder.create(loc, lhs); rhs = builder.create(loc, rhs); @@ -350,8 +352,10 @@ static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, lhs = builder.create(loc, lhs); rhs = builder.create(loc, rhs); + mlir::Type eleTy = heapTy ? heapTy.getEleTy() : ptrTy.getEleTy(); + mlir::Value result = ReductionProcessor::createScalarCombiner( - builder, loc, redId, heapTy.getEleTy(), lhs, rhs); + builder, loc, redId, eleTy, lhs, rhs); builder.create(loc, result, lhsValAddr); builder.create(loc, lhsAddr); return; @@ -439,7 +443,7 @@ createReductionCleanupRegion(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type valTy = fir::unwrapRefType(redTy); if (auto boxTy = mlir::dyn_cast_or_null(valTy)) { - if (!mlir::isa(boxTy.getEleTy())) { + if (!mlir::isa(boxTy.getEleTy())) { mlir::Type innerTy = fir::extractSequenceType(boxTy); if (!mlir::isa(innerTy)) typeError(); @@ -533,12 +537,13 @@ createReductionInitRegion(fir::FirOpBuilder &builder, mlir::Location loc, // all arrays are boxed if (auto boxTy = mlir::dyn_cast_or_null(ty)) { assert(isByRef && "passing boxes by value is unsupported"); - bool isAllocatable = mlir::isa(boxTy.getEleTy()); + bool isAllocatableOrPointer = + mlir::isa(boxTy.getEleTy()); mlir::Value boxAlloca = builder.create(loc, ty); mlir::Type innerTy = fir::unwrapRefType(boxTy.getEleTy()); if (fir::isa_trivial(innerTy)) { // boxed non-sequence value e.g. !fir.box> - if (!isAllocatable) + if (!isAllocatableOrPointer) TODO(loc, "Reduction of non-allocatable trivial typed box"); fir::IfOp ifUnallocated = handleNullAllocatable(boxAlloca); @@ -560,7 +565,7 @@ createReductionInitRegion(fir::FirOpBuilder &builder, mlir::Location loc, TODO(loc, "Unsupported boxed type for reduction"); fir::IfOp ifUnallocated{nullptr}; - if (isAllocatable) { + if (isAllocatableOrPointer) { ifUnallocated = handleNullAllocatable(boxAlloca); builder.setInsertionPointToStart(&ifUnallocated.getElseRegion().front()); } @@ -587,7 +592,8 @@ createReductionInitRegion(fir::FirOpBuilder &builder, mlir::Location loc, mlir::OpBuilder::InsertionGuard guard(builder); createReductionCleanupRegion(builder, loc, reductionDecl); } else { - assert(!isAllocatable && "Allocatable arrays must be heap allocated"); + assert(!isAllocatableOrPointer && + "Pointer-like arrays must be heap allocated"); } // Put the temporary inside of a box: diff --git a/flang/test/Lower/OpenMP/parallel-reduction-pointer-array.f90 b/flang/test/Lower/OpenMP/parallel-reduction-pointer-array.f90 new file mode 100644 index 00000000000000..2c2f60cb72c9a1 --- /dev/null +++ b/flang/test/Lower/OpenMP/parallel-reduction-pointer-array.f90 @@ -0,0 +1,131 @@ +! RUN: bbc -emit-hlfir -fopenmp -o - %s | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s | FileCheck %s + +program reduce +integer :: i = 0 +integer, dimension(:), pointer :: r + +allocate(r(2)) + +!$omp parallel do reduction(+:r) +do i=0,10 + r(1) = i + r(2) = -i +enddo +!$omp end parallel do + +print *,r +deallocate(r) + +end program + +! CHECK-LABEL: omp.declare_reduction @add_reduction_byref_box_ptr_Uxi32 : !fir.ref>>> init { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>>): +! CHECK: %[[VAL_1:.*]] = arith.constant 0 : i32 +! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref>>> +! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.box>> +! CHECK: %[[VAL_4:.*]] = fir.box_addr %[[VAL_2]] : (!fir.box>>) -> !fir.ptr> +! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (!fir.ptr>) -> i64 +! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i64 +! CHECK: %[[VAL_7:.*]] = arith.cmpi eq, %[[VAL_5]], %[[VAL_6]] : i64 +! CHECK: fir.if %[[VAL_7]] { +! CHECK: %[[VAL_8:.*]] = fir.embox %[[VAL_4]] : (!fir.ptr>) -> !fir.box>> +! CHECK: fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref>>> +! CHECK: } else { +! CHECK: %[[VAL_9:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_10:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_9]] : (!fir.box>>, index) -> (index, index, index) +! CHECK: %[[VAL_11:.*]] = fir.shape %[[VAL_10]]#1 : (index) -> !fir.shape<1> +! CHECK: %[[VAL_12:.*]] = fir.allocmem !fir.array, %[[VAL_10]]#1 {bindc_name = ".tmp", uniq_name = ""} +! CHECK: %[[VAL_13:.*]] = arith.constant true +! CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_12]](%[[VAL_11]]) {uniq_name = ".tmp"} : (!fir.heap>, !fir.shape<1>) -> (!fir.box>, !fir.heap>) +! CHECK: %[[VAL_15:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_16:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_15]] : (!fir.box>>, index) -> (index, index, index) +! CHECK: %[[VAL_17:.*]] = fir.shape_shift %[[VAL_16]]#0, %[[VAL_16]]#1 : (index, index) -> !fir.shapeshift<1> +! CHECK: %[[VAL_18:.*]] = fir.rebox %[[VAL_14]]#0(%[[VAL_17]]) : (!fir.box>, !fir.shapeshift<1>) -> !fir.box>> +! CHECK: hlfir.assign %[[VAL_1]] to %[[VAL_18]] : i32, !fir.box>> +! CHECK: fir.store %[[VAL_18]] to %[[VAL_3]] : !fir.ref>>> +! CHECK: } +! CHECK: omp.yield(%[[VAL_3]] : !fir.ref>>>) +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>>, %[[VAL_1:.*]]: !fir.ref>>>): +! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref>>> +! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_1]] : !fir.ref>>> +! CHECK: %[[VAL_4:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_4]] : (!fir.box>>, index) -> (index, index, index) +! CHECK: %[[VAL_6:.*]] = fir.shape_shift %[[VAL_5]]#0, %[[VAL_5]]#1 : (index, index) -> !fir.shapeshift<1> +! CHECK: %[[VAL_7:.*]] = arith.constant 1 : index +! CHECK: fir.do_loop %[[VAL_8:.*]] = %[[VAL_7]] to %[[VAL_5]]#1 step %[[VAL_7]] unordered { +! CHECK: %[[VAL_9:.*]] = fir.array_coor %[[VAL_2]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box>>, !fir.shapeshift<1>, index) -> !fir.ref +! CHECK: %[[VAL_10:.*]] = fir.array_coor %[[VAL_3]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box>>, !fir.shapeshift<1>, index) -> !fir.ref +! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref +! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_10]] : !fir.ref +! CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_11]], %[[VAL_12]] : i32 +! CHECK: fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref +! CHECK: } +! CHECK: omp.yield(%[[VAL_0]] : !fir.ref>>>) +! CHECK-LABEL: } cleanup { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>>): +! CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref>>> +! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box>>) -> !fir.ptr> +! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.ptr>) -> i64 +! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64 +! CHECK: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64 +! CHECK: fir.if %[[VAL_5]] { +! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (!fir.ptr>) -> !fir.heap> +! CHECK: fir.freemem %[[VAL_6]] : !fir.heap> +! CHECK: } +! CHECK: omp.yield +! CHECK: } + +! CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "reduce"} { +! CHECK: %[[VAL_0:.*]] = fir.address_of(@_QFEi) : !fir.ref +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_2:.*]] = fir.address_of(@_QFEr) : !fir.ref>>> +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {fortran_attrs = {{.*}}, uniq_name = "_QFEr"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +! CHECK: %[[VAL_4:.*]] = arith.constant false +! CHECK: %[[VAL_5:.*]] = fir.absent !fir.box +! CHECK: %[[VAL_6:.*]] = fir.address_of( +! CHECK: %[[VAL_7:.*]] = arith.constant 8 : i32 +! CHECK: %[[VAL_8:.*]] = fir.zero_bits !fir.ptr> +! CHECK: %[[VAL_9:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_10:.*]] = fir.shape %[[VAL_9]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_11:.*]] = fir.embox %[[VAL_8]](%[[VAL_10]]) : (!fir.ptr>, !fir.shape<1>) -> !fir.box>> +! CHECK: fir.store %[[VAL_11]] to %[[VAL_3]]#1 : !fir.ref>>> +! CHECK: %[[VAL_12:.*]] = arith.constant 1 : index +! CHECK: %[[VAL_13:.*]] = arith.constant 2 : i32 +! CHECK: %[[VAL_14:.*]] = arith.constant 0 : i32 +! CHECK: %[[VAL_15:.*]] = fir.convert %[[VAL_3]]#1 : (!fir.ref>>>) -> !fir.ref> +! CHECK: %[[VAL_16:.*]] = fir.convert %[[VAL_12]] : (index) -> i64 +! CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_13]] : (i32) -> i64 +! CHECK: %[[VAL_18:.*]] = fir.call @_FortranAPointerSetBounds(%[[VAL_15]], %[[VAL_14]], %[[VAL_16]], %[[VAL_17]]) fastmath : (!fir.ref>, i32, i64, i64) -> none +! CHECK: %[[VAL_19:.*]] = fir.convert %[[VAL_3]]#1 : (!fir.ref>>>) -> !fir.ref> +! CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_6]] : (!fir.ref>) -> !fir.ref +! CHECK: %[[VAL_21:.*]] = fir.call @_FortranAPointerAllocate(%[[VAL_19]], %[[VAL_4]], %[[VAL_5]], %[[VAL_20]], %[[VAL_7]]) fastmath : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: omp.parallel { +! CHECK: %[[VAL_22:.*]] = fir.alloca i32 {bindc_name = "i", pinned, uniq_name = "_QFEi"} +! CHECK: %[[VAL_23:.*]]:2 = hlfir.declare %[[VAL_22]] {uniq_name = "_QFEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_24:.*]] = arith.constant 0 : i32 +! CHECK: %[[VAL_25:.*]] = arith.constant 10 : i32 +! CHECK: %[[VAL_26:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_ptr_Uxi32 %[[VAL_3]]#0 -> %[[VAL_27:.*]] : !fir.ref>>>) { +! CHECK: omp.loop_nest (%[[VAL_28:.*]]) : i32 = (%[[VAL_24]]) to (%[[VAL_25]]) inclusive step (%[[VAL_26]]) { +! CHECK: %[[VAL_29:.*]]:2 = hlfir.declare %[[VAL_27]] {fortran_attrs = {{.*}}, uniq_name = "_QFEr"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) +! CHECK: fir.store %[[VAL_28]] to %[[VAL_23]]#1 : !fir.ref +! CHECK: %[[VAL_30:.*]] = fir.load %[[VAL_23]]#0 : !fir.ref +! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref>>> +! CHECK: %[[VAL_32:.*]] = arith.constant 1 : index +! CHECK: %[[VAL_33:.*]] = hlfir.designate %[[VAL_31]] (%[[VAL_32]]) : (!fir.box>>, index) -> !fir.ref +! CHECK: hlfir.assign %[[VAL_30]] to %[[VAL_33]] : i32, !fir.ref +! CHECK: %[[VAL_34:.*]] = fir.load %[[VAL_23]]#0 : !fir.ref +! CHECK: %[[VAL_35:.*]] = arith.constant 0 : i32 +! CHECK: %[[VAL_36:.*]] = arith.subi %[[VAL_35]], %[[VAL_34]] : i32 +! CHECK: %[[VAL_37:.*]] = fir.load %[[VAL_29]]#0 : !fir.ref>>> +! CHECK: %[[VAL_38:.*]] = arith.constant 2 : index +! CHECK: %[[VAL_39:.*]] = hlfir.designate %[[VAL_37]] (%[[VAL_38]]) : (!fir.box>>, index) -> !fir.ref +! CHECK: hlfir.assign %[[VAL_36]] to %[[VAL_39]] : i32, !fir.ref +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: omp.terminator +! CHECK: } diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-pointer.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-pointer.f90 new file mode 100644 index 00000000000000..aab6efbcbc5fe7 --- /dev/null +++ b/flang/test/Lower/OpenMP/wsloop-reduction-pointer.f90 @@ -0,0 +1,110 @@ +! RUN: bbc -emit-hlfir -fopenmp -o - %s | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s | FileCheck %s + +program reduce_pointer + integer, pointer :: v + integer i + + allocate(v) + v = 0 + + !$omp parallel do private(i) reduction(+:v) + do i = 1, 5 + v = v + 42 + end do + !$omp end parallel do + + print *,v + deallocate(v) +end program + +! CHECK-LABEL: omp.declare_reduction @add_reduction_byref_box_ptr_i32 : !fir.ref>> init { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>): +! CHECK: %[[VAL_1:.*]] = arith.constant 0 : i32 +! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref>> +! CHECK: %[[VAL_3:.*]] = fir.alloca !fir.box> +! CHECK: %[[VAL_4:.*]] = fir.box_addr %[[VAL_2]] : (!fir.box>) -> !fir.ptr +! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (!fir.ptr) -> i64 +! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i64 +! CHECK: %[[VAL_7:.*]] = arith.cmpi eq, %[[VAL_5]], %[[VAL_6]] : i64 +! CHECK: fir.if %[[VAL_7]] { +! CHECK: %[[VAL_8:.*]] = fir.embox %[[VAL_4]] : (!fir.ptr) -> !fir.box> +! CHECK: fir.store %[[VAL_8]] to %[[VAL_3]] : !fir.ref>> +! CHECK: } else { +! CHECK: %[[VAL_9:.*]] = fir.allocmem i32 +! CHECK: fir.store %[[VAL_1]] to %[[VAL_9]] : !fir.heap +! CHECK: %[[VAL_10:.*]] = fir.embox %[[VAL_9]] : (!fir.heap) -> !fir.box> +! CHECK: fir.store %[[VAL_10]] to %[[VAL_3]] : !fir.ref>> +! CHECK: } +! CHECK: omp.yield(%[[VAL_3]] : !fir.ref>>) +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>, %[[VAL_1:.*]]: !fir.ref>>): +! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref>> +! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_1]] : !fir.ref>> +! CHECK: %[[VAL_4:.*]] = fir.box_addr %[[VAL_2]] : (!fir.box>) -> !fir.ptr +! CHECK: %[[VAL_5:.*]] = fir.box_addr %[[VAL_3]] : (!fir.box>) -> !fir.ptr +! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_4]] : !fir.ptr +! CHECK: %[[VAL_7:.*]] = fir.load %[[VAL_5]] : !fir.ptr +! CHECK: %[[VAL_8:.*]] = arith.addi %[[VAL_6]], %[[VAL_7]] : i32 +! CHECK: fir.store %[[VAL_8]] to %[[VAL_4]] : !fir.ptr +! CHECK: omp.yield(%[[VAL_0]] : !fir.ref>>) +! CHECK-LABEL: } cleanup { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>): +! CHECK: %[[VAL_1:.*]] = fir.load %[[VAL_0]] : !fir.ref>> +! CHECK: %[[VAL_2:.*]] = fir.box_addr %[[VAL_1]] : (!fir.box>) -> !fir.ptr +! CHECK: %[[VAL_3:.*]] = fir.convert %[[VAL_2]] : (!fir.ptr) -> i64 +! CHECK: %[[VAL_4:.*]] = arith.constant 0 : i64 +! CHECK: %[[VAL_5:.*]] = arith.cmpi ne, %[[VAL_3]], %[[VAL_4]] : i64 +! CHECK: fir.if %[[VAL_5]] { +! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_2]] : (!fir.ptr) -> !fir.heap +! CHECK: fir.freemem %[[VAL_6]] : !fir.heap +! CHECK: } +! CHECK: omp.yield +! CHECK: } + +! CHECK-LABEL: func.func @_QQmain() attributes {fir.bindc_name = "reduce_pointer"} { +! CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFEi"} +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_2:.*]] = fir.alloca !fir.box> {bindc_name = "v", uniq_name = "_QFEv"} +! CHECK: %[[VAL_3:.*]] = fir.zero_bits !fir.ptr +! CHECK: %[[VAL_4:.*]] = fir.embox %[[VAL_3]] : (!fir.ptr) -> !fir.box> +! CHECK: fir.store %[[VAL_4]] to %[[VAL_2]] : !fir.ref>> +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_2]] {fortran_attrs = {{.*}}, uniq_name = "_QFEv"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) +! CHECK: %[[VAL_6:.*]] = arith.constant false +! CHECK: %[[VAL_7:.*]] = fir.absent !fir.box +! CHECK: %[[VAL_8:.*]] = fir.address_of( +! CHECK: %[[VAL_9:.*]] = arith.constant 8 : i32 +! CHECK: %[[VAL_10:.*]] = fir.zero_bits !fir.ptr +! CHECK: %[[VAL_11:.*]] = fir.embox %[[VAL_10]] : (!fir.ptr) -> !fir.box> +! CHECK: fir.store %[[VAL_11]] to %[[VAL_5]]#1 : !fir.ref>> +! CHECK: %[[VAL_12:.*]] = fir.convert %[[VAL_5]]#1 : (!fir.ref>>) -> !fir.ref> +! CHECK: %[[VAL_13:.*]] = fir.convert %[[VAL_8]] : (!fir.ref>) -> !fir.ref +! CHECK: %[[VAL_14:.*]] = fir.call @_FortranAPointerAllocate(%[[VAL_12]], %[[VAL_6]], %[[VAL_7]], %[[VAL_13]], %[[VAL_9]]) fastmath : (!fir.ref>, i1, !fir.box, !fir.ref, i32) -> i32 +! CHECK: %[[VAL_15:.*]] = arith.constant 0 : i32 +! CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref>> +! CHECK: %[[VAL_17:.*]] = fir.box_addr %[[VAL_16]] : (!fir.box>) -> !fir.ptr +! CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_17]] : i32, !fir.ptr +! CHECK: omp.parallel { +! CHECK: %[[VAL_18:.*]] = fir.alloca i32 {bindc_name = "i", pinned, uniq_name = "_QFEi"} +! CHECK: %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]] {uniq_name = "_QFEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_20:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_21:.*]] = arith.constant 5 : i32 +! CHECK: %[[VAL_22:.*]] = arith.constant 1 : i32 +! CHECK: omp.wsloop reduction(byref @add_reduction_byref_box_ptr_i32 %[[VAL_5]]#0 -> %[[VAL_23:.*]] : !fir.ref>>) { +! CHECK: omp.loop_nest (%[[VAL_24:.*]]) : i32 = (%[[VAL_20]]) to (%[[VAL_21]]) inclusive step (%[[VAL_22]]) { +! CHECK: %[[VAL_25:.*]]:2 = hlfir.declare %[[VAL_23]] {fortran_attrs = {{.*}}, uniq_name = "_QFEv"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) +! CHECK: fir.store %[[VAL_24]] to %[[VAL_19]]#1 : !fir.ref +! CHECK: %[[VAL_26:.*]] = fir.load %[[VAL_25]]#0 : !fir.ref>> +! CHECK: %[[VAL_27:.*]] = fir.box_addr %[[VAL_26]] : (!fir.box>) -> !fir.ptr +! CHECK: %[[VAL_28:.*]] = fir.load %[[VAL_27]] : !fir.ptr +! CHECK: %[[VAL_29:.*]] = arith.constant 42 : i32 +! CHECK: %[[VAL_30:.*]] = arith.addi %[[VAL_28]], %[[VAL_29]] : i32 +! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_25]]#0 : !fir.ref>> +! CHECK: %[[VAL_32:.*]] = fir.box_addr %[[VAL_31]] : (!fir.box>) -> !fir.ptr +! CHECK: hlfir.assign %[[VAL_30]] to %[[VAL_32]] : i32, !fir.ptr +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: omp.terminator +! CHECK: } From 32cd703da578e769787a921d76b768164a4256b6 Mon Sep 17 00:00:00 2001 From: Hans Wennborg Date: Fri, 14 Jun 2024 11:09:01 +0200 Subject: [PATCH 071/155] [clang-cl] Support the /Ob3 flag According to the docs, this was added in VS2019 and specifies more aggressive inlining than /Ob2. Let's treat it the same as /Ob2 for now. --- clang/docs/UsersManual.rst | 1 + clang/include/clang/Driver/Options.td | 2 ++ clang/lib/Driver/ToolChains/MSVC.cpp | 1 + clang/test/Driver/cl-options.c | 2 ++ 4 files changed, 6 insertions(+) diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index ee30e4eff9ea08..15bf5e30cf8e29 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -4632,6 +4632,7 @@ Execute ``clang-cl /?`` to see a list of supported options: /Ob0 Disable function inlining /Ob1 Only inline functions which are (explicitly or implicitly) marked inline /Ob2 Inline functions as deemed beneficial by the compiler + /Ob3 Same as /Ob2 /Od Disable optimization /Og No effect /Oi- Disable use of builtin functions diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 96e522720cec87..1cb03ac1bffb6f 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -8262,6 +8262,8 @@ def : CLFlag<"Ob1">, Alias<_SLASH_O>, AliasArgs<["b1"]>, HelpText<"Only inline functions explicitly or implicitly marked inline">; def : CLFlag<"Ob2">, Alias<_SLASH_O>, AliasArgs<["b2"]>, HelpText<"Inline functions as deemed beneficial by the compiler">; +def : CLFlag<"Ob3">, Alias<_SLASH_O>, AliasArgs<["b3"]>, + HelpText<"Same as /Ob2">; def : CLFlag<"Od", [CLOption, DXCOption]>, Alias<_SLASH_O>, AliasArgs<["d"]>, HelpText<"Disable optimization">; def : CLFlag<"Og">, Alias<_SLASH_O>, AliasArgs<["g"]>, diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp index d03687208c5c6a..ca266e3e1d1d3c 100644 --- a/clang/lib/Driver/ToolChains/MSVC.cpp +++ b/clang/lib/Driver/ToolChains/MSVC.cpp @@ -880,6 +880,7 @@ static void TranslateOptArg(Arg *A, llvm::opt::DerivedArgList &DAL, DAL.AddFlagArg(A, Opts.getOption(options::OPT_finline_hint_functions)); break; case '2': + case '3': DAL.AddFlagArg(A, Opts.getOption(options::OPT_finline_functions)); break; } diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c index e77ec364170d1c..95d28e46bc582b 100644 --- a/clang/test/Driver/cl-options.c +++ b/clang/test/Driver/cl-options.c @@ -163,7 +163,9 @@ // Ob0: -fno-inline // RUN: %clang_cl /Ob2 -### -- %s 2>&1 | FileCheck -check-prefix=Ob2 %s +// RUN: %clang_cl /Ob3 -### -- %s 2>&1 | FileCheck -check-prefix=Ob2 %s // RUN: %clang_cl /Odb2 -### -- %s 2>&1 | FileCheck -check-prefix=Ob2 %s +// RUN: %clang_cl /Odb3 -### -- %s 2>&1 | FileCheck -check-prefix=Ob2 %s // RUN: %clang_cl /O2 /Ob2 -### -- %s 2>&1 | FileCheck -check-prefix=Ob2 %s // Ob2-NOT: warning: argument unused during compilation: '/O2' // Ob2: -finline-functions From 88e42c6779067c4b65624939be74db2d56ee017b Mon Sep 17 00:00:00 2001 From: Jannick Kremer <51118500+DeinAlptraum@users.noreply.github.com> Date: Fri, 14 Jun 2024 11:19:28 +0200 Subject: [PATCH 072/155] [libclang/python] Fix bugs in custom enum implementation and add tests (#95381) Do not allow initialization of enum from negative IDs (e.g. from_id(-1) currently produces the last known variant) Rename duplicate enums: CursorKind.OMP_TEAMS_DISTRIBUTE_DIRECTIVE and TypeKind.OBJCCLASS Add tests to cover these cases --- clang/bindings/python/clang/cindex.py | 8 ++-- .../python/tests/cindex/test_enums.py | 47 +++++++++++++++++++ clang/docs/ReleaseNotes.rst | 8 ++++ 3 files changed, 59 insertions(+), 4 deletions(-) create mode 100644 clang/bindings/python/tests/cindex/test_enums.py diff --git a/clang/bindings/python/clang/cindex.py b/clang/bindings/python/clang/cindex.py index 302d99dccd77b5..b3d51e4d2a668a 100644 --- a/clang/bindings/python/clang/cindex.py +++ b/clang/bindings/python/clang/cindex.py @@ -649,7 +649,7 @@ def name(self): @classmethod def from_id(cls, id): - if id >= len(cls._kinds) or cls._kinds[id] is None: + if id < 0 or id >= len(cls._kinds) or cls._kinds[id] is None: raise ValueError("Unknown template argument kind %d" % id) return cls._kinds[id] @@ -1336,7 +1336,7 @@ def __repr__(self): CursorKind.OMP_TEAMS_DISTRIBUTE_DIRECTIVE = CursorKind(271) # OpenMP teams distribute simd directive. -CursorKind.OMP_TEAMS_DISTRIBUTE_DIRECTIVE = CursorKind(272) +CursorKind.OMP_TEAMS_DISTRIBUTE_SIMD_DIRECTIVE = CursorKind(272) # OpenMP teams distribute parallel for simd directive. CursorKind.OMP_TEAMS_DISTRIBUTE_PARALLEL_FOR_SIMD_DIRECTIVE = CursorKind(273) @@ -2215,7 +2215,7 @@ def name(self): @staticmethod def from_id(id): - if id >= len(StorageClass._kinds) or not StorageClass._kinds[id]: + if id < 0 or id >= len(StorageClass._kinds) or not StorageClass._kinds[id]: raise ValueError("Unknown storage class %d" % id) return StorageClass._kinds[id] @@ -2395,7 +2395,7 @@ def __repr__(self): TypeKind.OCLRESERVEID = TypeKind(160) TypeKind.OBJCOBJECT = TypeKind(161) -TypeKind.OBJCCLASS = TypeKind(162) +TypeKind.OBJCTYPEPARAM = TypeKind(162) TypeKind.ATTRIBUTED = TypeKind(163) TypeKind.OCLINTELSUBGROUPAVCMCEPAYLOAD = TypeKind(164) diff --git a/clang/bindings/python/tests/cindex/test_enums.py b/clang/bindings/python/tests/cindex/test_enums.py new file mode 100644 index 00000000000000..6fc0e5ed77e3e7 --- /dev/null +++ b/clang/bindings/python/tests/cindex/test_enums.py @@ -0,0 +1,47 @@ +import unittest + +from clang.cindex import ( + CursorKind, + TemplateArgumentKind, + ExceptionSpecificationKind, + AvailabilityKind, + AccessSpecifier, + TypeKind, + RefQualifierKind, + LinkageKind, + TLSKind, + StorageClass, +) + + +class TestCursorKind(unittest.TestCase): + enums = [ + CursorKind, + TemplateArgumentKind, + ExceptionSpecificationKind, + AvailabilityKind, + AccessSpecifier, + TypeKind, + RefQualifierKind, + LinkageKind, + TLSKind, + StorageClass, + ] + + def test_from_id(self): + """Check that kinds can be constructed from valid IDs""" + for enum in self.enums: + self.assertEqual(enum.from_id(2), enum._kinds[2]) + with self.assertRaises(ValueError): + enum.from_id(len(enum._kinds)) + with self.assertRaises(ValueError): + enum.from_id(-1) + + def test_unique_kinds(self): + """Check that no kind name has been used multiple times""" + for enum in self.enums: + for id in range(len(enum._kinds)): + try: + enum.from_id(id).name + except ValueError: + pass diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 68355dbb5861b7..bae9f5e1bd02a1 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -135,6 +135,14 @@ Clang Frontend Potentially Breaking Changes - The ``hasTypeLoc`` AST matcher will no longer match a ``classTemplateSpecializationDecl``; existing uses should switch to ``templateArgumentLoc`` or ``hasAnyTemplateArgumentLoc`` instead. +Clang Python Bindings Potentially Breaking Changes +-------------------------------------------------- +- Renamed ``CursorKind`` variant 272 from ``OMP_TEAMS_DISTRIBUTE_DIRECTIVE`` + to ``OMP_TEAMS_DISTRIBUTE_SIMD_DIRECTIVE``. The previous name was incorrect, it was a duplicate + of variant 271. +- Renamed ``TypeKind`` variant 162 from ``OBJCCLASS`` to ``OBJCTYPEPARAM``. + The previous name was incorrect, it was a duplicate of variant 28. + What's New in Clang |release|? ============================== Some of the major new features and improvements to Clang are listed From ab0d01a5f0f17f20b106b0f6cc6d1b7d13cf4d65 Mon Sep 17 00:00:00 2001 From: Pierre van Houtryve Date: Fri, 14 Jun 2024 11:20:45 +0200 Subject: [PATCH 073/155] [MC] Cache MCRegAliasIterator (#93510) AMDGPU has a lot of registers, almost 9000. Many of those registers have aliases. For instance, SGPR0 has a ton of aliases due to the presence of register tuples. It's even worse if you query the aliases of a register tuple itself. A large register tuple can have hundreds of aliases because it may include 16 registers, and each of those registers have their own tuples as well. The current implementation of MCRegAliasIterator is not good at this. In some extreme cases it can iterate, 7000 more times than necessary, just giving duplicates over and over again and using a lot of expensive iterators. This patch implements a cache system for MCRegAliasIterator. It does the expensive part only once and then saves it for us so the next iterations on that register's aliases are just a map lookup. Furthermore, the cached data is uniqued (and sorted). Thus, this speeds up code by both speeding up the iterator itself, but also by minimizing the number of loop iterations users of the iterator do. --- llvm/include/llvm/MC/MCRegisterInfo.h | 65 +++++--------- llvm/lib/MC/MCRegisterInfo.cpp | 84 +++++++++++++++++++ .../CodeGen/ARM/constant-island-movwt.mir | 15 ++-- 3 files changed, 111 insertions(+), 53 deletions(-) diff --git a/llvm/include/llvm/MC/MCRegisterInfo.h b/llvm/include/llvm/MC/MCRegisterInfo.h index af5be9186108af..11205a5a44c86c 100644 --- a/llvm/include/llvm/MC/MCRegisterInfo.h +++ b/llvm/include/llvm/MC/MCRegisterInfo.h @@ -187,6 +187,9 @@ class MCRegisterInfo { DenseMap L2SEHRegs; // LLVM to SEH regs mapping DenseMap L2CVRegs; // LLVM to CV regs mapping + mutable std::vector> RegAliasesCache; + ArrayRef getCachedAliasesOf(MCPhysReg R) const; + /// Iterator class that can traverse the differentially encoded values in /// DiffLists. Don't use this class directly, use one of the adaptors below. class DiffListIterator @@ -263,6 +266,7 @@ class MCRegisterInfo { friend class MCRegUnitIterator; friend class MCRegUnitMaskIterator; friend class MCRegUnitRootIterator; + friend class MCRegAliasIterator; /// Initialize MCRegisterInfo, called by TableGen /// auto-generated routines. *DO NOT USE*. @@ -298,6 +302,8 @@ class MCRegisterInfo { EHDwarf2LRegsSize = 0; Dwarf2LRegs = nullptr; Dwarf2LRegsSize = 0; + + RegAliasesCache.resize(NumRegs); } /// Used to initialize LLVM register to Dwarf @@ -723,63 +729,30 @@ class MCRegUnitRootIterator { } }; -/// MCRegAliasIterator enumerates all registers aliasing Reg. If IncludeSelf is -/// set, Reg itself is included in the list. This iterator does not guarantee -/// any ordering or that entries are unique. +/// MCRegAliasIterator enumerates all registers aliasing Reg. class MCRegAliasIterator { private: - MCRegister Reg; - const MCRegisterInfo *MCRI; - bool IncludeSelf; - - MCRegUnitIterator RI; - MCRegUnitRootIterator RRI; - MCSuperRegIterator SI; + const MCPhysReg *It = nullptr; + const MCPhysReg *End = nullptr; public: MCRegAliasIterator(MCRegister Reg, const MCRegisterInfo *MCRI, - bool IncludeSelf) - : Reg(Reg), MCRI(MCRI), IncludeSelf(IncludeSelf) { - // Initialize the iterators. - for (RI = MCRegUnitIterator(Reg, MCRI); RI.isValid(); ++RI) { - for (RRI = MCRegUnitRootIterator(*RI, MCRI); RRI.isValid(); ++RRI) { - for (SI = MCSuperRegIterator(*RRI, MCRI, true); SI.isValid(); ++SI) { - if (!(!IncludeSelf && Reg == *SI)) - return; - } - } - } - } - - bool isValid() const { return RI.isValid(); } - - MCRegister operator*() const { - assert(SI.isValid() && "Cannot dereference an invalid iterator."); - return *SI; + bool IncludeSelf) { + ArrayRef Cache = MCRI->getCachedAliasesOf(Reg); + assert(Cache.back() == Reg); + It = Cache.begin(); + End = Cache.end(); + if (!IncludeSelf) + --End; } - void advance() { - // Assuming SI is valid. - ++SI; - if (SI.isValid()) return; - - ++RRI; - if (RRI.isValid()) { - SI = MCSuperRegIterator(*RRI, MCRI, true); - return; - } + bool isValid() const { return It != End; } - ++RI; - if (RI.isValid()) { - RRI = MCRegUnitRootIterator(*RI, MCRI); - SI = MCSuperRegIterator(*RRI, MCRI, true); - } - } + MCRegister operator*() const { return *It; } MCRegAliasIterator &operator++() { assert(isValid() && "Cannot move off the end of the list."); - do advance(); - while (!IncludeSelf && isValid() && *SI == Reg); + ++It; return *this; } }; diff --git a/llvm/lib/MC/MCRegisterInfo.cpp b/llvm/lib/MC/MCRegisterInfo.cpp index 334655616d8dbb..fde770a9c376c3 100644 --- a/llvm/lib/MC/MCRegisterInfo.cpp +++ b/llvm/lib/MC/MCRegisterInfo.cpp @@ -20,6 +20,90 @@ using namespace llvm; +namespace { +/// MCRegAliasIterator enumerates all registers aliasing Reg. This iterator +/// does not guarantee any ordering or that entries are unique. +class MCRegAliasIteratorImpl { +private: + MCRegister Reg; + const MCRegisterInfo *MCRI; + + MCRegUnitIterator RI; + MCRegUnitRootIterator RRI; + MCSuperRegIterator SI; + +public: + MCRegAliasIteratorImpl(MCRegister Reg, const MCRegisterInfo *MCRI) + : Reg(Reg), MCRI(MCRI) { + + // Initialize the iterators. + for (RI = MCRegUnitIterator(Reg, MCRI); RI.isValid(); ++RI) { + for (RRI = MCRegUnitRootIterator(*RI, MCRI); RRI.isValid(); ++RRI) { + for (SI = MCSuperRegIterator(*RRI, MCRI, true); SI.isValid(); ++SI) { + if (Reg != *SI) + return; + } + } + } + } + + bool isValid() const { return RI.isValid(); } + + MCRegister operator*() const { + assert(SI.isValid() && "Cannot dereference an invalid iterator."); + return *SI; + } + + void advance() { + // Assuming SI is valid. + ++SI; + if (SI.isValid()) + return; + + ++RRI; + if (RRI.isValid()) { + SI = MCSuperRegIterator(*RRI, MCRI, true); + return; + } + + ++RI; + if (RI.isValid()) { + RRI = MCRegUnitRootIterator(*RI, MCRI); + SI = MCSuperRegIterator(*RRI, MCRI, true); + } + } + + MCRegAliasIteratorImpl &operator++() { + assert(isValid() && "Cannot move off the end of the list."); + do + advance(); + while (isValid() && *SI == Reg); + return *this; + } +}; +} // namespace + +ArrayRef MCRegisterInfo::getCachedAliasesOf(MCPhysReg R) const { + auto &Aliases = RegAliasesCache[R]; + if (!Aliases.empty()) + return Aliases; + + for (MCRegAliasIteratorImpl It(R, this); It.isValid(); ++It) + Aliases.push_back(*It); + + sort(Aliases); + Aliases.erase(unique(Aliases), Aliases.end()); + assert(none_of(Aliases, [&](auto &Cur) { return R == Cur; }) && + "MCRegAliasIteratorImpl includes Self!"); + + // Always put "self" at the end, so the iterator can choose to ignore it. + // For registers without aliases, it also serves as a sentinel value that + // tells us to not recompute the alias set. + Aliases.push_back(R); + Aliases.shrink_to_fit(); + return Aliases; +} + MCRegister MCRegisterInfo::getMatchingSuperReg(MCRegister Reg, unsigned SubIdx, const MCRegisterClass *RC) const { diff --git a/llvm/test/CodeGen/ARM/constant-island-movwt.mir b/llvm/test/CodeGen/ARM/constant-island-movwt.mir index 7d21a4e4875c31..7b3e59eca84725 100644 --- a/llvm/test/CodeGen/ARM/constant-island-movwt.mir +++ b/llvm/test/CodeGen/ARM/constant-island-movwt.mir @@ -898,13 +898,14 @@ body: | # CHECK-NEXT: CONSTPOOL_ENTRY 1, %const.0, 4 # CHECK-NEXT: {{^ $}} # CHECK-NEXT: bb.2.entry (align 2): -# CHECK-NEXT: liveins: $d13, $s27, $r10, $r9, $r8, $s26, $d12, $s25, $s24, -# CHECK-SAME: $d15, $s30, $s31, $d14, $s28, $s29, $lr, $r0, $d21, -# CHECK-SAME: $r3, $q10, $d20, $d17, $r2, $d25, $q11, $d22, $d23, -# CHECK-SAME: $r1, $q8, $d16, $s3, $q14, $d28, $d29, $d19, $s17, -# CHECK-SAME: $d8, $s16, $r6, $r7, $r4, $q12, $q9, $d18, $s0, $q15, -# CHECK-SAME: $d30, $d31, $r12, $s1, $d0, $d24, $s2, $d1, $q0, $s6, -# CHECK-SAME: $d3, $d2, $s4, $q1, $s7, $s5, $d9, $s18, $s19, $q4 +# CHECK-NEXT: liveins: $s26, $s27, $r10, $r9, $r8, $d13, $s24, $s25, +# CHECK-SAME: $d12, $d15, $s30, $s31, $d14, $s28, $s29, $lr, +# CHECK-SAME: $d21, $q10, $r7, $r0, $d20, $d17, $r2, $q12, +# CHECK-SAME: $q11, $d22, $d23, $r1, $q8, $d16, $d30, $q14, +# CHECK-SAME: $d28, $d29, $d19, $s17, $r4, $d8, $r6, $r3, +# CHECK-SAME: $s16, $d25, $q9, $d18, $s0, $d31, $s3, $q15, +# CHECK-SAME: $r12, $d0, $s1, $d24, $d1, $s2, $q0, $s5, $d2, +# CHECK-SAME: $q1, $s4, $s7, $d3, $s6, $d9, $s18, $s19, $q4 # CHECK-NEXT: {{^ $}} # CHECK-NEXT: $r5 = t2MOVi16 target-flags(arm-lo16) @.str.84, 14 /* CC::al */, $noreg # CHECK-NEXT: $r5 = t2MOVTi16 $r5, target-flags(arm-hi16) @.str.84, 14 /* CC::al */, $noreg From 4bccd25467ce591869dad41c8b7c550093c20f1b Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 14 Jun 2024 10:22:13 +0100 Subject: [PATCH 074/155] [AArch64] LowerAVG - fallback to default expansion (#95416) The TargetLowering::expandAVG implementations now match or are better than the AArch64 override. --- .../Target/AArch64/AArch64ISelLowering.cpp | 46 +---- llvm/test/CodeGen/AArch64/sve-hadd.ll | 192 +++++++----------- 2 files changed, 74 insertions(+), 164 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index af8b9d9576ff7a..394b741f1c1d0a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -15007,55 +15007,13 @@ AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, return SDValue(); } -// When x and y are extended, lower: -// avgfloor(x, y) -> (x + y) >> 1 -// avgceil(x, y) -> (x + y + 1) >> 1 - -// Otherwise, lower to: -// avgfloor(x, y) -> (x >> 1) + (y >> 1) + (x & y & 1) -// avgceil(x, y) -> (x >> 1) + (y >> 1) + ((x || y) & 1) SDValue AArch64TargetLowering::LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const { if (Subtarget->hasSVE2()) return LowerToPredicatedOp(Op, DAG, NewOp); - SDLoc dl(Op); - SDValue OpA = Op->getOperand(0); - SDValue OpB = Op->getOperand(1); - EVT VT = Op.getValueType(); - bool IsCeil = - (Op->getOpcode() == ISD::AVGCEILS || Op->getOpcode() == ISD::AVGCEILU); - bool IsSigned = - (Op->getOpcode() == ISD::AVGFLOORS || Op->getOpcode() == ISD::AVGCEILS); - unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL; - - assert(VT.isScalableVector() && "Only expect to lower scalable vector op!"); - - auto IsZeroExtended = [&DAG](SDValue &Node) { - KnownBits Known = DAG.computeKnownBits(Node, 0); - return Known.Zero.isSignBitSet(); - }; - - auto IsSignExtended = [&DAG](SDValue &Node) { - return (DAG.ComputeNumSignBits(Node, 0) > 1); - }; - - SDValue ConstantOne = DAG.getConstant(1, dl, VT); - if ((!IsSigned && IsZeroExtended(OpA) && IsZeroExtended(OpB)) || - (IsSigned && IsSignExtended(OpA) && IsSignExtended(OpB))) { - SDValue Add = DAG.getNode(ISD::ADD, dl, VT, OpA, OpB); - if (IsCeil) - Add = DAG.getNode(ISD::ADD, dl, VT, Add, ConstantOne); - return DAG.getNode(ShiftOpc, dl, VT, Add, ConstantOne); - } - - SDValue ShiftOpA = DAG.getNode(ShiftOpc, dl, VT, OpA, ConstantOne); - SDValue ShiftOpB = DAG.getNode(ShiftOpc, dl, VT, OpB, ConstantOne); - - SDValue tmp = DAG.getNode(IsCeil ? ISD::OR : ISD::AND, dl, VT, OpA, OpB); - tmp = DAG.getNode(ISD::AND, dl, VT, tmp, ConstantOne); - SDValue Add = DAG.getNode(ISD::ADD, dl, VT, ShiftOpA, ShiftOpB); - return DAG.getNode(ISD::ADD, dl, VT, Add, tmp); + // Default to expand. + return SDValue(); } SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op, diff --git a/llvm/test/CodeGen/AArch64/sve-hadd.ll b/llvm/test/CodeGen/AArch64/sve-hadd.ll index 3fead88780e7d3..6017e13ce00352 100644 --- a/llvm/test/CodeGen/AArch64/sve-hadd.ll +++ b/llvm/test/CodeGen/AArch64/sve-hadd.ll @@ -5,12 +5,10 @@ define @hadds_v2i64( %s0, %s1) { ; SVE-LABEL: hadds_v2i64: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.d, z1.d, #1 -; SVE-NEXT: asr z3.d, z0.d, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: and z0.d, z0.d, z1.d -; SVE-NEXT: add z1.d, z3.d, z2.d -; SVE-NEXT: and z0.d, z0.d, #0x1 -; SVE-NEXT: add z0.d, z1.d, z0.d +; SVE-NEXT: asr z1.d, z2.d, #1 +; SVE-NEXT: add z0.d, z0.d, z1.d ; SVE-NEXT: ret ; ; SVE2-LABEL: hadds_v2i64: @@ -30,12 +28,10 @@ entry: define @hadds_v2i64_lsh( %s0, %s1) { ; SVE-LABEL: hadds_v2i64_lsh: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.d, z1.d, #1 -; SVE-NEXT: asr z3.d, z0.d, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: and z0.d, z0.d, z1.d -; SVE-NEXT: add z1.d, z3.d, z2.d -; SVE-NEXT: and z0.d, z0.d, #0x1 -; SVE-NEXT: add z0.d, z1.d, z0.d +; SVE-NEXT: asr z1.d, z2.d, #1 +; SVE-NEXT: add z0.d, z0.d, z1.d ; SVE-NEXT: ret ; ; SVE2-LABEL: hadds_v2i64_lsh: @@ -55,12 +51,10 @@ entry: define @haddu_v2i64( %s0, %s1) { ; SVE-LABEL: haddu_v2i64: ; SVE: // %bb.0: // %entry -; SVE-NEXT: lsr z2.d, z1.d, #1 -; SVE-NEXT: lsr z3.d, z0.d, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: and z0.d, z0.d, z1.d -; SVE-NEXT: add z1.d, z3.d, z2.d -; SVE-NEXT: and z0.d, z0.d, #0x1 -; SVE-NEXT: add z0.d, z1.d, z0.d +; SVE-NEXT: lsr z1.d, z2.d, #1 +; SVE-NEXT: add z0.d, z0.d, z1.d ; SVE-NEXT: ret ; ; SVE2-LABEL: haddu_v2i64: @@ -146,12 +140,10 @@ entry: define @hadds_v4i32( %s0, %s1) { ; SVE-LABEL: hadds_v4i32: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.s, z1.s, #1 -; SVE-NEXT: asr z3.s, z0.s, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: and z0.d, z0.d, z1.d -; SVE-NEXT: add z1.s, z3.s, z2.s -; SVE-NEXT: and z0.s, z0.s, #0x1 -; SVE-NEXT: add z0.s, z1.s, z0.s +; SVE-NEXT: asr z1.s, z2.s, #1 +; SVE-NEXT: add z0.s, z0.s, z1.s ; SVE-NEXT: ret ; ; SVE2-LABEL: hadds_v4i32: @@ -171,12 +163,10 @@ entry: define @hadds_v4i32_lsh( %s0, %s1) { ; SVE-LABEL: hadds_v4i32_lsh: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.s, z1.s, #1 -; SVE-NEXT: asr z3.s, z0.s, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: and z0.d, z0.d, z1.d -; SVE-NEXT: add z1.s, z3.s, z2.s -; SVE-NEXT: and z0.s, z0.s, #0x1 -; SVE-NEXT: add z0.s, z1.s, z0.s +; SVE-NEXT: asr z1.s, z2.s, #1 +; SVE-NEXT: add z0.s, z0.s, z1.s ; SVE-NEXT: ret ; ; SVE2-LABEL: hadds_v4i32_lsh: @@ -196,12 +186,10 @@ entry: define @haddu_v4i32( %s0, %s1) { ; SVE-LABEL: haddu_v4i32: ; SVE: // %bb.0: // %entry -; SVE-NEXT: lsr z2.s, z1.s, #1 -; SVE-NEXT: lsr z3.s, z0.s, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: and z0.d, z0.d, z1.d -; SVE-NEXT: add z1.s, z3.s, z2.s -; SVE-NEXT: and z0.s, z0.s, #0x1 -; SVE-NEXT: add z0.s, z1.s, z0.s +; SVE-NEXT: lsr z1.s, z2.s, #1 +; SVE-NEXT: add z0.s, z0.s, z1.s ; SVE-NEXT: ret ; ; SVE2-LABEL: haddu_v4i32: @@ -360,12 +348,10 @@ entry: define @hadds_v8i16( %s0, %s1) { ; SVE-LABEL: hadds_v8i16: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.h, z1.h, #1 -; SVE-NEXT: asr z3.h, z0.h, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: and z0.d, z0.d, z1.d -; SVE-NEXT: add z1.h, z3.h, z2.h -; SVE-NEXT: and z0.h, z0.h, #0x1 -; SVE-NEXT: add z0.h, z1.h, z0.h +; SVE-NEXT: asr z1.h, z2.h, #1 +; SVE-NEXT: add z0.h, z0.h, z1.h ; SVE-NEXT: ret ; ; SVE2-LABEL: hadds_v8i16: @@ -385,12 +371,10 @@ entry: define @hadds_v8i16_lsh( %s0, %s1) { ; SVE-LABEL: hadds_v8i16_lsh: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.h, z1.h, #1 -; SVE-NEXT: asr z3.h, z0.h, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: and z0.d, z0.d, z1.d -; SVE-NEXT: add z1.h, z3.h, z2.h -; SVE-NEXT: and z0.h, z0.h, #0x1 -; SVE-NEXT: add z0.h, z1.h, z0.h +; SVE-NEXT: asr z1.h, z2.h, #1 +; SVE-NEXT: add z0.h, z0.h, z1.h ; SVE-NEXT: ret ; ; SVE2-LABEL: hadds_v8i16_lsh: @@ -410,12 +394,10 @@ entry: define @haddu_v8i16( %s0, %s1) { ; SVE-LABEL: haddu_v8i16: ; SVE: // %bb.0: // %entry -; SVE-NEXT: lsr z2.h, z1.h, #1 -; SVE-NEXT: lsr z3.h, z0.h, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: and z0.d, z0.d, z1.d -; SVE-NEXT: add z1.h, z3.h, z2.h -; SVE-NEXT: and z0.h, z0.h, #0x1 -; SVE-NEXT: add z0.h, z1.h, z0.h +; SVE-NEXT: lsr z1.h, z2.h, #1 +; SVE-NEXT: add z0.h, z0.h, z1.h ; SVE-NEXT: ret ; ; SVE2-LABEL: haddu_v8i16: @@ -574,12 +556,10 @@ entry: define @hadds_v16i8( %s0, %s1) { ; SVE-LABEL: hadds_v16i8: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.b, z1.b, #1 -; SVE-NEXT: asr z3.b, z0.b, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: and z0.d, z0.d, z1.d -; SVE-NEXT: add z1.b, z3.b, z2.b -; SVE-NEXT: and z0.b, z0.b, #0x1 -; SVE-NEXT: add z0.b, z1.b, z0.b +; SVE-NEXT: asr z1.b, z2.b, #1 +; SVE-NEXT: add z0.b, z0.b, z1.b ; SVE-NEXT: ret ; ; SVE2-LABEL: hadds_v16i8: @@ -599,12 +579,10 @@ entry: define @hadds_v16i8_lsh( %s0, %s1) { ; SVE-LABEL: hadds_v16i8_lsh: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.b, z1.b, #1 -; SVE-NEXT: asr z3.b, z0.b, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: and z0.d, z0.d, z1.d -; SVE-NEXT: add z1.b, z3.b, z2.b -; SVE-NEXT: and z0.b, z0.b, #0x1 -; SVE-NEXT: add z0.b, z1.b, z0.b +; SVE-NEXT: asr z1.b, z2.b, #1 +; SVE-NEXT: add z0.b, z0.b, z1.b ; SVE-NEXT: ret ; ; SVE2-LABEL: hadds_v16i8_lsh: @@ -624,12 +602,10 @@ entry: define @haddu_v16i8( %s0, %s1) { ; SVE-LABEL: haddu_v16i8: ; SVE: // %bb.0: // %entry -; SVE-NEXT: lsr z2.b, z1.b, #1 -; SVE-NEXT: lsr z3.b, z0.b, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: and z0.d, z0.d, z1.d -; SVE-NEXT: add z1.b, z3.b, z2.b -; SVE-NEXT: and z0.b, z0.b, #0x1 -; SVE-NEXT: add z0.b, z1.b, z0.b +; SVE-NEXT: lsr z1.b, z2.b, #1 +; SVE-NEXT: add z0.b, z0.b, z1.b ; SVE-NEXT: ret ; ; SVE2-LABEL: haddu_v16i8: @@ -649,12 +625,10 @@ entry: define @rhadds_v2i64( %s0, %s1) { ; SVE-LABEL: rhadds_v2i64: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.d, z1.d, #1 -; SVE-NEXT: asr z3.d, z0.d, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: orr z0.d, z0.d, z1.d -; SVE-NEXT: add z1.d, z3.d, z2.d -; SVE-NEXT: and z0.d, z0.d, #0x1 -; SVE-NEXT: add z0.d, z1.d, z0.d +; SVE-NEXT: asr z1.d, z2.d, #1 +; SVE-NEXT: sub z0.d, z0.d, z1.d ; SVE-NEXT: ret ; ; SVE2-LABEL: rhadds_v2i64: @@ -675,12 +649,10 @@ entry: define @rhadds_v2i64_lsh( %s0, %s1) { ; SVE-LABEL: rhadds_v2i64_lsh: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.d, z1.d, #1 -; SVE-NEXT: asr z3.d, z0.d, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: orr z0.d, z0.d, z1.d -; SVE-NEXT: add z1.d, z3.d, z2.d -; SVE-NEXT: and z0.d, z0.d, #0x1 -; SVE-NEXT: add z0.d, z1.d, z0.d +; SVE-NEXT: asr z1.d, z2.d, #1 +; SVE-NEXT: sub z0.d, z0.d, z1.d ; SVE-NEXT: ret ; ; SVE2-LABEL: rhadds_v2i64_lsh: @@ -701,12 +673,10 @@ entry: define @rhaddu_v2i64( %s0, %s1) { ; SVE-LABEL: rhaddu_v2i64: ; SVE: // %bb.0: // %entry -; SVE-NEXT: lsr z2.d, z1.d, #1 -; SVE-NEXT: lsr z3.d, z0.d, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: orr z0.d, z0.d, z1.d -; SVE-NEXT: add z1.d, z3.d, z2.d -; SVE-NEXT: and z0.d, z0.d, #0x1 -; SVE-NEXT: add z0.d, z1.d, z0.d +; SVE-NEXT: lsr z1.d, z2.d, #1 +; SVE-NEXT: sub z0.d, z0.d, z1.d ; SVE-NEXT: ret ; ; SVE2-LABEL: rhaddu_v2i64: @@ -805,12 +775,10 @@ entry: define @rhadds_v4i32( %s0, %s1) { ; SVE-LABEL: rhadds_v4i32: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.s, z1.s, #1 -; SVE-NEXT: asr z3.s, z0.s, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: orr z0.d, z0.d, z1.d -; SVE-NEXT: add z1.s, z3.s, z2.s -; SVE-NEXT: and z0.s, z0.s, #0x1 -; SVE-NEXT: add z0.s, z1.s, z0.s +; SVE-NEXT: asr z1.s, z2.s, #1 +; SVE-NEXT: sub z0.s, z0.s, z1.s ; SVE-NEXT: ret ; ; SVE2-LABEL: rhadds_v4i32: @@ -831,12 +799,10 @@ entry: define @rhadds_v4i32_lsh( %s0, %s1) { ; SVE-LABEL: rhadds_v4i32_lsh: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.s, z1.s, #1 -; SVE-NEXT: asr z3.s, z0.s, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: orr z0.d, z0.d, z1.d -; SVE-NEXT: add z1.s, z3.s, z2.s -; SVE-NEXT: and z0.s, z0.s, #0x1 -; SVE-NEXT: add z0.s, z1.s, z0.s +; SVE-NEXT: asr z1.s, z2.s, #1 +; SVE-NEXT: sub z0.s, z0.s, z1.s ; SVE-NEXT: ret ; ; SVE2-LABEL: rhadds_v4i32_lsh: @@ -857,12 +823,10 @@ entry: define @rhaddu_v4i32( %s0, %s1) { ; SVE-LABEL: rhaddu_v4i32: ; SVE: // %bb.0: // %entry -; SVE-NEXT: lsr z2.s, z1.s, #1 -; SVE-NEXT: lsr z3.s, z0.s, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: orr z0.d, z0.d, z1.d -; SVE-NEXT: add z1.s, z3.s, z2.s -; SVE-NEXT: and z0.s, z0.s, #0x1 -; SVE-NEXT: add z0.s, z1.s, z0.s +; SVE-NEXT: lsr z1.s, z2.s, #1 +; SVE-NEXT: sub z0.s, z0.s, z1.s ; SVE-NEXT: ret ; ; SVE2-LABEL: rhaddu_v4i32: @@ -1040,12 +1004,10 @@ entry: define @rhadds_v8i16( %s0, %s1) { ; SVE-LABEL: rhadds_v8i16: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.h, z1.h, #1 -; SVE-NEXT: asr z3.h, z0.h, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: orr z0.d, z0.d, z1.d -; SVE-NEXT: add z1.h, z3.h, z2.h -; SVE-NEXT: and z0.h, z0.h, #0x1 -; SVE-NEXT: add z0.h, z1.h, z0.h +; SVE-NEXT: asr z1.h, z2.h, #1 +; SVE-NEXT: sub z0.h, z0.h, z1.h ; SVE-NEXT: ret ; ; SVE2-LABEL: rhadds_v8i16: @@ -1066,12 +1028,10 @@ entry: define @rhadds_v8i16_lsh( %s0, %s1) { ; SVE-LABEL: rhadds_v8i16_lsh: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.h, z1.h, #1 -; SVE-NEXT: asr z3.h, z0.h, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: orr z0.d, z0.d, z1.d -; SVE-NEXT: add z1.h, z3.h, z2.h -; SVE-NEXT: and z0.h, z0.h, #0x1 -; SVE-NEXT: add z0.h, z1.h, z0.h +; SVE-NEXT: asr z1.h, z2.h, #1 +; SVE-NEXT: sub z0.h, z0.h, z1.h ; SVE-NEXT: ret ; ; SVE2-LABEL: rhadds_v8i16_lsh: @@ -1092,12 +1052,10 @@ entry: define @rhaddu_v8i16( %s0, %s1) { ; SVE-LABEL: rhaddu_v8i16: ; SVE: // %bb.0: // %entry -; SVE-NEXT: lsr z2.h, z1.h, #1 -; SVE-NEXT: lsr z3.h, z0.h, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: orr z0.d, z0.d, z1.d -; SVE-NEXT: add z1.h, z3.h, z2.h -; SVE-NEXT: and z0.h, z0.h, #0x1 -; SVE-NEXT: add z0.h, z1.h, z0.h +; SVE-NEXT: lsr z1.h, z2.h, #1 +; SVE-NEXT: sub z0.h, z0.h, z1.h ; SVE-NEXT: ret ; ; SVE2-LABEL: rhaddu_v8i16: @@ -1275,12 +1233,10 @@ entry: define @rhadds_v16i8( %s0, %s1) { ; SVE-LABEL: rhadds_v16i8: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.b, z1.b, #1 -; SVE-NEXT: asr z3.b, z0.b, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: orr z0.d, z0.d, z1.d -; SVE-NEXT: add z1.b, z3.b, z2.b -; SVE-NEXT: and z0.b, z0.b, #0x1 -; SVE-NEXT: add z0.b, z1.b, z0.b +; SVE-NEXT: asr z1.b, z2.b, #1 +; SVE-NEXT: sub z0.b, z0.b, z1.b ; SVE-NEXT: ret ; ; SVE2-LABEL: rhadds_v16i8: @@ -1301,12 +1257,10 @@ entry: define @rhadds_v16i8_lsh( %s0, %s1) { ; SVE-LABEL: rhadds_v16i8_lsh: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.b, z1.b, #1 -; SVE-NEXT: asr z3.b, z0.b, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: orr z0.d, z0.d, z1.d -; SVE-NEXT: add z1.b, z3.b, z2.b -; SVE-NEXT: and z0.b, z0.b, #0x1 -; SVE-NEXT: add z0.b, z1.b, z0.b +; SVE-NEXT: asr z1.b, z2.b, #1 +; SVE-NEXT: sub z0.b, z0.b, z1.b ; SVE-NEXT: ret ; ; SVE2-LABEL: rhadds_v16i8_lsh: @@ -1327,12 +1281,10 @@ entry: define @rhaddu_v16i8( %s0, %s1) { ; SVE-LABEL: rhaddu_v16i8: ; SVE: // %bb.0: // %entry -; SVE-NEXT: lsr z2.b, z1.b, #1 -; SVE-NEXT: lsr z3.b, z0.b, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: orr z0.d, z0.d, z1.d -; SVE-NEXT: add z1.b, z3.b, z2.b -; SVE-NEXT: and z0.b, z0.b, #0x1 -; SVE-NEXT: add z0.b, z1.b, z0.b +; SVE-NEXT: lsr z1.b, z2.b, #1 +; SVE-NEXT: sub z0.b, z0.b, z1.b ; SVE-NEXT: ret ; ; SVE2-LABEL: rhaddu_v16i8: From 44df1167f88cabbb4cfde816f279337379ea30b3 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 14 Jun 2024 11:35:27 +0200 Subject: [PATCH 075/155] [Error] Add non-consuming toString (#95375) There are some places that want to convert an Error to string, but still retain the original Error object, for example to emit a non-fatal warning. This currently isn't possible, because the entire Error infra is move-based. And what people end up doing in this case is to move the Error... twice. This patch introduces a toStringWithoutConsuming() function to accommodate this use case. This also requires some infrastructure that allows visiting Errors without consuming them. --- llvm/include/llvm/Support/Error.h | 28 ++++++++++++++++++++ llvm/lib/Support/Error.cpp | 8 ++++++ llvm/tools/dsymutil/DwarfLinkerForBinary.cpp | 4 +-- llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp | 4 +-- llvm/unittests/Support/ErrorTest.cpp | 10 +++++++ 5 files changed, 50 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/Support/Error.h b/llvm/include/llvm/Support/Error.h index 1fa0d8cb709cc7..5120f6ab57c030 100644 --- a/llvm/include/llvm/Support/Error.h +++ b/llvm/include/llvm/Support/Error.h @@ -166,6 +166,9 @@ class [[nodiscard]] Error { // handleErrors needs to be able to set the Checked flag. template friend Error handleErrors(Error E, HandlerTs &&... Handlers); + // visitErrors needs direct access to the payload. + template + friend void visitErrors(const Error &E, HandlerT H); // Expected needs to be able to steal the payload when constructed from an // error. @@ -369,6 +372,10 @@ class ErrorList final : public ErrorInfo { // ErrorList. template friend Error handleErrors(Error E, HandlerTs &&... Handlers); + // visitErrors needs to be able to iterate the payload list of an + // ErrorList. + template + friend void visitErrors(const Error &E, HandlerT H); // joinErrors is implemented in terms of join. friend Error joinErrors(Error, Error); @@ -977,6 +984,23 @@ inline void handleAllErrors(Error E) { cantFail(std::move(E)); } +/// Visit all the ErrorInfo(s) contained in E by passing them to the respective +/// handler, without consuming the error. +template void visitErrors(const Error &E, HandlerT H) { + const ErrorInfoBase *Payload = E.getPtr(); + if (!Payload) + return; + + if (Payload->isA()) { + const ErrorList &List = static_cast(*Payload); + for (const auto &P : List.Payloads) + H(*P); + return; + } + + return H(*Payload); +} + /// Handle any errors (if present) in an Expected, then try a recovery path. /// /// If the incoming value is a success value it is returned unmodified. If it @@ -1031,6 +1055,10 @@ void logAllUnhandledErrors(Error E, raw_ostream &OS, Twine ErrorBanner = {}); /// is used to separate error messages. std::string toString(Error E); +/// Like toString(), but does not consume the error. This can be used to print +/// a warning while retaining the original error object. +std::string toStringWithoutConsuming(const Error &E); + /// Consume a Error without doing anything. This method should be used /// only where an error can be considered a reasonable and expected return /// value. diff --git a/llvm/lib/Support/Error.cpp b/llvm/lib/Support/Error.cpp index 34ec31e3b833fe..93481ca916d2a3 100644 --- a/llvm/lib/Support/Error.cpp +++ b/llvm/lib/Support/Error.cpp @@ -82,6 +82,14 @@ std::string toString(Error E) { return join(Errors.begin(), Errors.end(), "\n"); } +std::string toStringWithoutConsuming(const Error &E) { + SmallVector Errors; + visitErrors(E, [&Errors](const ErrorInfoBase &EI) { + Errors.push_back(EI.message()); + }); + return join(Errors.begin(), Errors.end(), "\n"); +} + std::error_code ErrorList::convertToErrorCode() const { return std::error_code(static_cast(ErrorErrorCode::MultipleErrors), getErrorErrorCat()); diff --git a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp index 83473704398dff..f6a35708dc0765 100644 --- a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp +++ b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp @@ -147,7 +147,7 @@ DwarfLinkerForBinary::loadObject(const DebugMapObject &Obj, if (!ObjectEntry) { auto Err = ObjectEntry.takeError(); reportWarning(Twine(Obj.getObjectFilename()) + ": " + - toString(std::move(Err)), + toStringWithoutConsuming(Err), Obj.getObjectFilename()); return errorToErrorCode(std::move(Err)); } @@ -156,7 +156,7 @@ DwarfLinkerForBinary::loadObject(const DebugMapObject &Obj, if (!Object) { auto Err = Object.takeError(); reportWarning(Twine(Obj.getObjectFilename()) + ": " + - toString(std::move(Err)), + toStringWithoutConsuming(Err), Obj.getObjectFilename()); return errorToErrorCode(std::move(Err)); } diff --git a/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp b/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp index 1cecbfc463fec5..b2362ecb75703b 100644 --- a/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp +++ b/llvm/tools/llvm-pdbutil/DumpOutputStyle.cpp @@ -1494,13 +1494,13 @@ Error DumpOutputStyle::dumpModuleSymsForPdb() { if (auto EC = Visitor.visitSymbolStreamFiltered(ModS.getSymbolArray(), Filter)) { P.formatLine("Error while processing symbol records. {0}", - toString(std::move(EC))); + toStringWithoutConsuming(EC)); return EC; } } else if (auto EC = Visitor.visitSymbolStream(ModS.getSymbolArray(), SS.Offset)) { P.formatLine("Error while processing symbol records. {0}", - toString(std::move(EC))); + toStringWithoutConsuming(EC)); return EC; } return Error::success(); diff --git a/llvm/unittests/Support/ErrorTest.cpp b/llvm/unittests/Support/ErrorTest.cpp index 5d866a67c0eaae..bd098a4988dc52 100644 --- a/llvm/unittests/Support/ErrorTest.cpp +++ b/llvm/unittests/Support/ErrorTest.cpp @@ -740,15 +740,25 @@ TEST(Error, ErrorCodeConversions) { TEST(Error, ErrorMessage) { EXPECT_EQ(toString(Error::success()), ""); + Error E0 = Error::success(); + EXPECT_EQ(toStringWithoutConsuming(E0), ""); + EXPECT_EQ(toString(std::move(E0)), ""); + Error E1 = make_error(0); + EXPECT_EQ(toStringWithoutConsuming(E1), "CustomError {0}"); EXPECT_EQ(toString(std::move(E1)), "CustomError {0}"); Error E2 = make_error(0); + visitErrors(E2, [](const ErrorInfoBase &EI) { + EXPECT_EQ(EI.message(), "CustomError {0}"); + }); handleAllErrors(std::move(E2), [](const CustomError &CE) { EXPECT_EQ(CE.message(), "CustomError {0}"); }); Error E3 = joinErrors(make_error(0), make_error(1)); + EXPECT_EQ(toStringWithoutConsuming(E3), "CustomError {0}\n" + "CustomError {1}"); EXPECT_EQ(toString(std::move(E3)), "CustomError {0}\n" "CustomError {1}"); } From 4f54b91842ea2ab9546459869df442f7e7fe59d6 Mon Sep 17 00:00:00 2001 From: c8ef Date: Fri, 14 Jun 2024 17:44:29 +0800 Subject: [PATCH 076/155] [SDPatternMatch] Only match ISD::SIGN_EXTEND in m_SExt (#95415) Context: https://github.com/llvm/llvm-project/pull/95365#discussion_r1638236603 The current implementation of `m_SExt` matches both `ISD::SIGN_EXTEND` and `ISD::SIGN_EXTEND_INREG`. However, in cases where we specifically need to match _only_ `ISD::SIGN_EXTEND`, such as in the SelectionDAG graph below, this can lead to issues and unintended combinations. ``` SelectionDAG has 13 nodes: t0: ch,glue = EntryToken t2: v2i32,ch = CopyFromReg t0, Register:v2i32 %0 t21: v2i32 = sign_extend_inreg t2, ValueType:ch:v2i8 t4: v2i32,ch = CopyFromReg t0, Register:v2i32 %1 t22: v2i32 = sign_extend_inreg t4, ValueType:ch:v2i8 t23: v2i32 = avgfloors t21, t22 t24: v2i32 = sign_extend_inreg t23, ValueType:ch:v2i8 t15: ch,glue = CopyToReg t0, Register:v2i32 $d0, t24 t16: ch = AArch64ISD::RET_GLUE t15, Register:v2i32 $d0, t15:1 ``` --- llvm/include/llvm/CodeGen/SDPatternMatch.h | 3 +-- llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp | 4 ---- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h index 071a27a7950642..f39fbd95b3beb7 100644 --- a/llvm/include/llvm/CodeGen/SDPatternMatch.h +++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h @@ -642,8 +642,7 @@ template inline UnaryOpc_match m_ZExt(const Opnd &Op) { } template inline auto m_SExt(const Opnd &Op) { - return m_AnyOf(UnaryOpc_match(ISD::SIGN_EXTEND, Op), - m_Node(ISD::SIGN_EXTEND_INREG, Op, m_Value())); + return UnaryOpc_match(ISD::SIGN_EXTEND, Op); } template inline UnaryOpc_match m_AnyExt(const Opnd &Op) { diff --git a/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp b/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp index 24930b965f1def..18d6ebeb9076a2 100644 --- a/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp +++ b/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp @@ -269,8 +269,6 @@ TEST_F(SelectionDAGPatternMatchTest, optionalResizing) { SDValue Op64 = DAG->getCopyFromReg(DAG->getEntryNode(), DL, 1, Int64VT); SDValue ZExt = DAG->getNode(ISD::ZERO_EXTEND, DL, Int64VT, Op32); SDValue SExt = DAG->getNode(ISD::SIGN_EXTEND, DL, Int64VT, Op32); - SDValue SExtInReg = DAG->getNode(ISD::SIGN_EXTEND_INREG, DL, Int64VT, Op64, - DAG->getValueType(Int32VT)); SDValue AExt = DAG->getNode(ISD::ANY_EXTEND, DL, Int64VT, Op32); SDValue Trunc = DAG->getNode(ISD::TRUNCATE, DL, Int32VT, Op64); @@ -284,8 +282,6 @@ TEST_F(SelectionDAGPatternMatchTest, optionalResizing) { EXPECT_TRUE(A == Op64); EXPECT_TRUE(sd_match(SExt, m_SExtOrSelf(m_Value(A)))); EXPECT_TRUE(A == Op32); - EXPECT_TRUE(sd_match(SExtInReg, m_SExtOrSelf(m_Value(A)))); - EXPECT_TRUE(A == Op64); EXPECT_TRUE(sd_match(Op32, m_AExtOrSelf(m_Value(A)))); EXPECT_TRUE(A == Op32); EXPECT_TRUE(sd_match(AExt, m_AExtOrSelf(m_Value(A)))); From 738fcbee687a50bfa83ba30daf65bab41307211a Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 14 Jun 2024 11:47:56 +0200 Subject: [PATCH 077/155] [SROA] Preserve all GEP flags during speculation Unlikely to matter in practice, as these GEPs are typically promoted away. --- llvm/lib/Transforms/Scalar/SROA.cpp | 9 ++++----- llvm/test/Transforms/SROA/phi-gep.ll | 29 ++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index a3df89b3556402..c2997913a0a255 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -3981,15 +3981,15 @@ class AggLoadStoreRewriter : public InstVisitor { SmallVector FalseOps = GetNewOps(False); IRB.SetInsertPoint(&GEPI); - bool IsInBounds = GEPI.isInBounds(); + GEPNoWrapFlags NW = GEPI.getNoWrapFlags(); Type *Ty = GEPI.getSourceElementType(); Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0], ArrayRef(TrueOps).drop_front(), - True->getName() + ".sroa.gep", IsInBounds); + True->getName() + ".sroa.gep", NW); Value *NFalse = IRB.CreateGEP(Ty, FalseOps[0], ArrayRef(FalseOps).drop_front(), - False->getName() + ".sroa.gep", IsInBounds); + False->getName() + ".sroa.gep", NW); Value *NSel = IRB.CreateSelect(Sel->getCondition(), NTrue, NFalse, Sel->getName() + ".sroa.sel"); @@ -4069,7 +4069,6 @@ class AggLoadStoreRewriter : public InstVisitor { PHINode *NewPhi = IRB.CreatePHI(GEPI.getType(), Phi->getNumIncomingValues(), Phi->getName() + ".sroa.phi"); - bool IsInBounds = GEPI.isInBounds(); Type *SourceTy = GEPI.getSourceElementType(); // We only handle arguments, constants, and static allocas here, so we can // insert GEPs at the end of the entry block. @@ -4084,7 +4083,7 @@ class AggLoadStoreRewriter : public InstVisitor { SmallVector NewOps = GetNewOps(Op); NewGEP = IRB.CreateGEP(SourceTy, NewOps[0], ArrayRef(NewOps).drop_front(), - Phi->getName() + ".sroa.gep", IsInBounds); + Phi->getName() + ".sroa.gep", GEPI.getNoWrapFlags()); } NewPhi->addIncoming(NewGEP, BB); } diff --git a/llvm/test/Transforms/SROA/phi-gep.ll b/llvm/test/Transforms/SROA/phi-gep.ll index c2dfa7578eb4e1..776624c0798cfb 100644 --- a/llvm/test/Transforms/SROA/phi-gep.ll +++ b/llvm/test/Transforms/SROA/phi-gep.ll @@ -363,6 +363,35 @@ exit: ret void } +define void @test_sroa_gep_phi_select_same_block_nuw(i1 %c1, i1 %c2, ptr %ptr) { +; CHECK-LABEL: @test_sroa_gep_phi_select_same_block_nuw( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [[PAIR:%.*]], align 8 +; CHECK-NEXT: br label [[WHILE_BODY:%.*]] +; CHECK: while.body: +; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ [[ALLOCA]], [[ENTRY:%.*]] ], [ [[SELECT:%.*]], [[WHILE_BODY]] ] +; CHECK-NEXT: [[SELECT]] = select i1 [[C1:%.*]], ptr [[PHI]], ptr [[PTR:%.*]] +; CHECK-NEXT: [[PHI_SROA_GEP:%.*]] = getelementptr nuw [[PAIR]], ptr [[PHI]], i64 1 +; CHECK-NEXT: [[PTR_SROA_GEP:%.*]] = getelementptr nuw [[PAIR]], ptr [[PTR]], i64 1 +; CHECK-NEXT: [[SELECT_SROA_SEL:%.*]] = select i1 [[C1]], ptr [[PHI_SROA_GEP]], ptr [[PTR_SROA_GEP]] +; CHECK-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[WHILE_BODY]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %alloca = alloca %pair, align 8 + br label %while.body + +while.body: + %phi = phi ptr [ %alloca, %entry ], [ %select, %while.body ] + %select = select i1 %c1, ptr %phi, ptr %ptr + %gep = getelementptr nuw %pair, ptr %select, i64 1 + br i1 %c2, label %exit, label %while.body + +exit: + ret void +} + define i32 @test_sroa_gep_cast_phi_gep(i1 %cond) { ; CHECK-LABEL: @test_sroa_gep_cast_phi_gep( ; CHECK-NEXT: entry: From f1a29ec082ead82c6a4d61e515222d6bcf046a5b Mon Sep 17 00:00:00 2001 From: David Green Date: Fri, 14 Jun 2024 10:52:01 +0100 Subject: [PATCH 078/155] [AArch64] Add i128 and fp128 tests to fptoi. NFC --- llvm/test/CodeGen/AArch64/fptoi.ll | 1373 +++++++++++++++++++++++++++- 1 file changed, 1359 insertions(+), 14 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll index 01585d02adcb13..3b8054a635bcda 100644 --- a/llvm/test/CodeGen/AArch64/fptoi.ll +++ b/llvm/test/CodeGen/AArch64/fptoi.ll @@ -1,8 +1,55 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16 -; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16 -; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 +; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16 +; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16 + +; CHECK-GI: warning: Instruction selection used fallback path for fptos_f64_i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_f64_i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_f32_i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_f32_i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_f128_i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_f128_i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_f128_i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_f128_i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_f128_i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_f128_i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_f128_i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_f128_i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_f128_i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_f128_i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v2f64_v2i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v2f64_v2i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f64_v3i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f64_v3i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v2f32_v2i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v2f32_v2i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f32_v3i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f32_v3i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v2f16_v2i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v2f16_v2i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f16_v3i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f16_v3i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v2f128_v2i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v2f128_v2i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f128_v3i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f128_v3i64 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v2f128_v2i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v2f128_v2i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f128_v3i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f128_v3i32 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v2f128_v2i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v2f128_v2i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f128_v3i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f128_v3i16 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v2f128_v2i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v2f128_v2i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f128_v3i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f128_v3i8 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v2f128_v2i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v2f128_v2i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptos_v3f128_v3i128 +; CHECK-GI-NEXT: warning: Instruction selection used fallback path for fptou_v3f128_v3i128 define i64 @fptos_f64_i64(double %a) { ; CHECK-LABEL: fptos_f64_i64: @@ -94,6 +141,34 @@ entry: ret i8 %c } +define i128 @fptos_f64_i128(double %a) { +; CHECK-LABEL: fptos_f64_i128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __fixdfti +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %c = fptosi double %a to i128 + ret i128 %c +} + +define i128 @fptou_f64_i128(double %a) { +; CHECK-LABEL: fptou_f64_i128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __fixunsdfti +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %c = fptoui double %a to i128 + ret i128 %c +} + define i64 @fptos_f32_i64(float %a) { ; CHECK-LABEL: fptos_f32_i64: ; CHECK: // %bb.0: // %entry @@ -184,6 +259,34 @@ entry: ret i8 %c } +define i128 @fptos_f32_i128(float %a) { +; CHECK-LABEL: fptos_f32_i128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %c = fptosi float %a to i128 + ret i128 %c +} + +define i128 @fptou_f32_i128(float %a) { +; CHECK-LABEL: fptou_f32_i128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __fixunssfti +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %c = fptoui float %a to i128 + ret i128 %c +} + define i64 @fptos_f16_i64(half %a) { ; CHECK-SD-NOFP16-LABEL: fptos_f16_i64: ; CHECK-SD-NOFP16: // %bb.0: // %entry @@ -400,6 +503,200 @@ entry: ret i8 %c } +define i128 @fptos_f16_i128(half %a) { +; CHECK-SD-LABEL: fptos_f16_i128: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: bl __fixhfti +; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-SD-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptos_f16_i128: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: fcvtzs x0, s0 +; CHECK-GI-NOFP16-NEXT: asr x1, x0, #63 +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptos_f16_i128: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fcvtzs x0, h0 +; CHECK-GI-FP16-NEXT: asr x1, x0, #63 +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptosi half %a to i128 + ret i128 %c +} + +define i128 @fptou_f16_i128(half %a) { +; CHECK-SD-LABEL: fptou_f16_i128: +; CHECK-SD: // %bb.0: // %entry +; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: bl __fixunshfti +; CHECK-SD-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-SD-NEXT: ret +; +; CHECK-GI-NOFP16-LABEL: fptou_f16_i128: +; CHECK-GI-NOFP16: // %bb.0: // %entry +; CHECK-GI-NOFP16-NEXT: fcvt s0, h0 +; CHECK-GI-NOFP16-NEXT: mov x1, xzr +; CHECK-GI-NOFP16-NEXT: fcvtzu x0, s0 +; CHECK-GI-NOFP16-NEXT: ret +; +; CHECK-GI-FP16-LABEL: fptou_f16_i128: +; CHECK-GI-FP16: // %bb.0: // %entry +; CHECK-GI-FP16-NEXT: fcvtzu x0, h0 +; CHECK-GI-FP16-NEXT: mov x1, xzr +; CHECK-GI-FP16-NEXT: ret +entry: + %c = fptoui half %a to i128 + ret i128 %c +} + +define i64 @fptos_f128_i64(fp128 %a) { +; CHECK-LABEL: fptos_f128_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __fixtfdi +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %c = fptosi fp128 %a to i64 + ret i64 %c +} + +define i64 @fptou_f128_i64(fp128 %a) { +; CHECK-LABEL: fptou_f128_i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __fixunstfdi +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %c = fptoui fp128 %a to i64 + ret i64 %c +} + +define i32 @fptos_f128_i32(fp128 %a) { +; CHECK-LABEL: fptos_f128_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %c = fptosi fp128 %a to i32 + ret i32 %c +} + +define i32 @fptou_f128_i32(fp128 %a) { +; CHECK-LABEL: fptou_f128_i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __fixunstfsi +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %c = fptoui fp128 %a to i32 + ret i32 %c +} + +define i16 @fptos_f128_i16(fp128 %a) { +; CHECK-LABEL: fptos_f128_i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %c = fptosi fp128 %a to i16 + ret i16 %c +} + +define i16 @fptou_f128_i16(fp128 %a) { +; CHECK-LABEL: fptou_f128_i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %c = fptoui fp128 %a to i16 + ret i16 %c +} + +define i8 @fptos_f128_i8(fp128 %a) { +; CHECK-LABEL: fptos_f128_i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %c = fptosi fp128 %a to i8 + ret i8 %c +} + +define i8 @fptou_f128_i8(fp128 %a) { +; CHECK-LABEL: fptou_f128_i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %c = fptoui fp128 %a to i8 + ret i8 %c +} + +define i128 @fptos_f128_i128(fp128 %a) { +; CHECK-LABEL: fptos_f128_i128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __fixtfti +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %c = fptosi fp128 %a to i128 + ret i128 %c +} + +define i128 @fptou_f128_i128(fp128 %a) { +; CHECK-LABEL: fptou_f128_i128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl __fixunstfti +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %c = fptoui fp128 %a to i128 + ret i128 %c +} + define <2 x i64> @fptos_v2f64_v2i64(<2 x double> %a) { ; CHECK-LABEL: fptos_v2f64_v2i64: ; CHECK: // %bb.0: // %entry @@ -1183,14 +1480,14 @@ define <8 x i16> @fptos_v8f64_v8i16(<8 x double> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fcvtzs v3.2d, v3.2d ; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-SD-NEXT: adrp x8, .LCPI54_0 +; CHECK-SD-NEXT: adrp x8, .LCPI70_0 ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-SD-NEXT: xtn v6.2s, v3.2d ; CHECK-SD-NEXT: xtn v5.2s, v2.2d ; CHECK-SD-NEXT: xtn v4.2s, v1.2d ; CHECK-SD-NEXT: xtn v3.2s, v0.2d -; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI54_0] +; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI70_0] ; CHECK-SD-NEXT: tbl v0.16b, { v3.16b, v4.16b, v5.16b, v6.16b }, v0.16b ; CHECK-SD-NEXT: ret ; @@ -1214,14 +1511,14 @@ define <8 x i16> @fptou_v8f64_v8i16(<8 x double> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fcvtzs v3.2d, v3.2d ; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-SD-NEXT: adrp x8, .LCPI55_0 +; CHECK-SD-NEXT: adrp x8, .LCPI71_0 ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-SD-NEXT: xtn v6.2s, v3.2d ; CHECK-SD-NEXT: xtn v5.2s, v2.2d ; CHECK-SD-NEXT: xtn v4.2s, v1.2d ; CHECK-SD-NEXT: xtn v3.2s, v0.2d -; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI55_0] +; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI71_0] ; CHECK-SD-NEXT: tbl v0.16b, { v3.16b, v4.16b, v5.16b, v6.16b }, v0.16b ; CHECK-SD-NEXT: ret ; @@ -1245,7 +1542,7 @@ define <16 x i16> @fptos_v16f64_v16i16(<16 x double> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fcvtzs v3.2d, v3.2d ; CHECK-SD-NEXT: fcvtzs v7.2d, v7.2d -; CHECK-SD-NEXT: adrp x8, .LCPI56_0 +; CHECK-SD-NEXT: adrp x8, .LCPI72_0 ; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d ; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d @@ -1258,7 +1555,7 @@ define <16 x i16> @fptos_v16f64_v16i16(<16 x double> %a) { ; CHECK-SD-NEXT: xtn v22.2s, v6.2d ; CHECK-SD-NEXT: xtn v17.2s, v1.2d ; CHECK-SD-NEXT: xtn v21.2s, v5.2d -; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI56_0] +; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI72_0] ; CHECK-SD-NEXT: xtn v16.2s, v0.2d ; CHECK-SD-NEXT: xtn v20.2s, v4.2d ; CHECK-SD-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b @@ -1292,7 +1589,7 @@ define <16 x i16> @fptou_v16f64_v16i16(<16 x double> %a) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: fcvtzs v3.2d, v3.2d ; CHECK-SD-NEXT: fcvtzs v7.2d, v7.2d -; CHECK-SD-NEXT: adrp x8, .LCPI57_0 +; CHECK-SD-NEXT: adrp x8, .LCPI73_0 ; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d ; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d @@ -1305,7 +1602,7 @@ define <16 x i16> @fptou_v16f64_v16i16(<16 x double> %a) { ; CHECK-SD-NEXT: xtn v22.2s, v6.2d ; CHECK-SD-NEXT: xtn v17.2s, v1.2d ; CHECK-SD-NEXT: xtn v21.2s, v5.2d -; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI57_0] +; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI73_0] ; CHECK-SD-NEXT: xtn v16.2s, v0.2d ; CHECK-SD-NEXT: xtn v20.2s, v4.2d ; CHECK-SD-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b @@ -1352,7 +1649,7 @@ define <32 x i16> @fptos_v32f64_v32i16(<32 x double> %a) { ; CHECK-SD-NEXT: .cfi_offset b15, -64 ; CHECK-SD-NEXT: fcvtzs v3.2d, v3.2d ; CHECK-SD-NEXT: fcvtzs v18.2d, v2.2d -; CHECK-SD-NEXT: adrp x8, .LCPI58_0 +; CHECK-SD-NEXT: adrp x8, .LCPI74_0 ; CHECK-SD-NEXT: fcvtzs v19.2d, v1.2d ; CHECK-SD-NEXT: ldp q20, q21, [sp, #160] ; CHECK-SD-NEXT: fcvtzs v22.2d, v0.2d @@ -1387,7 +1684,7 @@ define <32 x i16> @fptos_v32f64_v32i16(<32 x double> %a) { ; CHECK-SD-NEXT: xtn v12.2s, v16.2d ; CHECK-SD-NEXT: xtn v27.2s, v5.2d ; CHECK-SD-NEXT: xtn v26.2s, v4.2d -; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI58_0] +; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI74_0] ; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b ; CHECK-SD-NEXT: tbl v2.16b, { v8.16b, v9.16b, v10.16b, v11.16b }, v4.16b ; CHECK-SD-NEXT: tbl v3.16b, { v12.16b, v13.16b, v14.16b, v15.16b }, v4.16b @@ -1456,7 +1753,7 @@ define <32 x i16> @fptou_v32f64_v32i16(<32 x double> %a) { ; CHECK-SD-NEXT: .cfi_offset b15, -64 ; CHECK-SD-NEXT: fcvtzs v3.2d, v3.2d ; CHECK-SD-NEXT: fcvtzs v18.2d, v2.2d -; CHECK-SD-NEXT: adrp x8, .LCPI59_0 +; CHECK-SD-NEXT: adrp x8, .LCPI75_0 ; CHECK-SD-NEXT: fcvtzs v19.2d, v1.2d ; CHECK-SD-NEXT: ldp q20, q21, [sp, #160] ; CHECK-SD-NEXT: fcvtzs v22.2d, v0.2d @@ -1491,7 +1788,7 @@ define <32 x i16> @fptou_v32f64_v32i16(<32 x double> %a) { ; CHECK-SD-NEXT: xtn v12.2s, v16.2d ; CHECK-SD-NEXT: xtn v27.2s, v5.2d ; CHECK-SD-NEXT: xtn v26.2s, v4.2d -; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI59_0] +; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI75_0] ; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b ; CHECK-SD-NEXT: tbl v2.16b, { v8.16b, v9.16b, v10.16b, v11.16b }, v4.16b ; CHECK-SD-NEXT: tbl v3.16b, { v12.16b, v13.16b, v14.16b, v15.16b }, v4.16b @@ -1992,6 +2289,158 @@ entry: ret <32 x i8> %c } +define <2 x i128> @fptos_v2f64_v2i128(<2 x double> %a) { +; CHECK-LABEL: fptos_v2f64_v2i128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov d0, v0.d[1] +; CHECK-NEXT: bl __fixdfti +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mov x20, x1 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: bl __fixdfti +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: mov x2, x19 +; CHECK-NEXT: mov x3, x20 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret +entry: + %c = fptosi <2 x double> %a to <2 x i128> + ret <2 x i128> %c +} + +define <2 x i128> @fptou_v2f64_v2i128(<2 x double> %a) { +; CHECK-LABEL: fptou_v2f64_v2i128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov d0, v0.d[1] +; CHECK-NEXT: bl __fixunsdfti +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mov x20, x1 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: bl __fixunsdfti +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: mov x2, x19 +; CHECK-NEXT: mov x3, x20 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret +entry: + %c = fptoui <2 x double> %a to <2 x i128> + ret <2 x i128> %c +} + +define <3 x i128> @fptos_v3f64_v3i128(<3 x double> %a) { +; CHECK-LABEL: fptos_v3f64_v3i128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp d9, d8, [sp, #-64]! // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w21, -24 +; CHECK-NEXT: .cfi_offset w22, -32 +; CHECK-NEXT: .cfi_offset w30, -48 +; CHECK-NEXT: .cfi_offset b8, -56 +; CHECK-NEXT: .cfi_offset b9, -64 +; CHECK-NEXT: fmov d9, d0 +; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: fmov d8, d2 +; CHECK-NEXT: bl __fixdfti +; CHECK-NEXT: fmov d0, d8 +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mov x20, x1 +; CHECK-NEXT: bl __fixdfti +; CHECK-NEXT: fmov d0, d9 +; CHECK-NEXT: mov x21, x0 +; CHECK-NEXT: mov x22, x1 +; CHECK-NEXT: bl __fixdfti +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: mov x2, x19 +; CHECK-NEXT: mov x3, x20 +; CHECK-NEXT: mov x4, x21 +; CHECK-NEXT: mov x5, x22 +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ldp d9, d8, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: ret +entry: + %c = fptosi <3 x double> %a to <3 x i128> + ret <3 x i128> %c +} + +define <3 x i128> @fptou_v3f64_v3i128(<3 x double> %a) { +; CHECK-LABEL: fptou_v3f64_v3i128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp d9, d8, [sp, #-64]! // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w21, -24 +; CHECK-NEXT: .cfi_offset w22, -32 +; CHECK-NEXT: .cfi_offset w30, -48 +; CHECK-NEXT: .cfi_offset b8, -56 +; CHECK-NEXT: .cfi_offset b9, -64 +; CHECK-NEXT: fmov d9, d0 +; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: fmov d8, d2 +; CHECK-NEXT: bl __fixunsdfti +; CHECK-NEXT: fmov d0, d8 +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mov x20, x1 +; CHECK-NEXT: bl __fixunsdfti +; CHECK-NEXT: fmov d0, d9 +; CHECK-NEXT: mov x21, x0 +; CHECK-NEXT: mov x22, x1 +; CHECK-NEXT: bl __fixunsdfti +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: mov x2, x19 +; CHECK-NEXT: mov x3, x20 +; CHECK-NEXT: mov x4, x21 +; CHECK-NEXT: mov x5, x22 +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ldp d9, d8, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: ret +entry: + %c = fptoui <3 x double> %a to <3 x i128> + ret <3 x i128> %c +} + define <2 x i64> @fptos_v2f32_v2i64(<2 x float> %a) { ; CHECK-LABEL: fptos_v2f32_v2i64: ; CHECK: // %bb.0: // %entry @@ -3120,6 +3569,160 @@ entry: ret <32 x i8> %c } +define <2 x i128> @fptos_v2f32_v2i128(<2 x float> %a) { +; CHECK-LABEL: fptos_v2f32_v2i128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mov x20, x1 +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: mov x2, x19 +; CHECK-NEXT: mov x3, x20 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret +entry: + %c = fptosi <2 x float> %a to <2 x i128> + ret <2 x i128> %c +} + +define <2 x i128> @fptou_v2f32_v2i128(<2 x float> %a) { +; CHECK-LABEL: fptou_v2f32_v2i128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: bl __fixunssfti +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mov x20, x1 +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: bl __fixunssfti +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: mov x2, x19 +; CHECK-NEXT: mov x3, x20 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret +entry: + %c = fptoui <2 x float> %a to <2 x i128> + ret <2 x i128> %c +} + +define <3 x i128> @fptos_v3f32_v3i128(<3 x float> %a) { +; CHECK-LABEL: fptos_v3f32_v3i128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w21, -24 +; CHECK-NEXT: .cfi_offset w22, -32 +; CHECK-NEXT: .cfi_offset w30, -48 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mov x20, x1 +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov x21, x0 +; CHECK-NEXT: mov x22, x1 +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: mov x2, x21 +; CHECK-NEXT: mov x3, x22 +; CHECK-NEXT: mov x4, x19 +; CHECK-NEXT: mov x5, x20 +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ret +entry: + %c = fptosi <3 x float> %a to <3 x i128> + ret <3 x i128> %c +} + +define <3 x i128> @fptou_v3f32_v3i128(<3 x float> %a) { +; CHECK-LABEL: fptou_v3f32_v3i128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w21, -24 +; CHECK-NEXT: .cfi_offset w22, -32 +; CHECK-NEXT: .cfi_offset w30, -48 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: bl __fixunssfti +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mov x20, x1 +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: bl __fixunssfti +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov x21, x0 +; CHECK-NEXT: mov x22, x1 +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: bl __fixunssfti +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: mov x2, x21 +; CHECK-NEXT: mov x3, x22 +; CHECK-NEXT: mov x4, x19 +; CHECK-NEXT: mov x5, x20 +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ret +entry: + %c = fptoui <3 x float> %a to <3 x i128> + ret <3 x i128> %c +} + define <2 x i64> @fptos_v2f16_v2i64(<2 x half> %a) { ; CHECK-SD-NOFP16-LABEL: fptos_v2f16_v2i64: ; CHECK-SD-NOFP16: // %bb.0: // %entry @@ -6177,3 +6780,745 @@ entry: %c = fptoui <32 x half> %a to <32 x i8> ret <32 x i8> %c } + +define <2 x i128> @fptos_v2f16_v2i128(<2 x half> %a) { +; CHECK-LABEL: fptos_v2f16_v2i128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov h0, v0.h[1] +; CHECK-NEXT: bl __fixhfti +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mov x20, x1 +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-NEXT: bl __fixhfti +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: mov x2, x19 +; CHECK-NEXT: mov x3, x20 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret +entry: + %c = fptosi <2 x half> %a to <2 x i128> + ret <2 x i128> %c +} + +define <2 x i128> @fptou_v2f16_v2i128(<2 x half> %a) { +; CHECK-LABEL: fptou_v2f16_v2i128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov h0, v0.h[1] +; CHECK-NEXT: bl __fixunshfti +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mov x20, x1 +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-NEXT: bl __fixunshfti +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: mov x2, x19 +; CHECK-NEXT: mov x3, x20 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret +entry: + %c = fptoui <2 x half> %a to <2 x i128> + ret <2 x i128> %c +} + +define <3 x i128> @fptos_v3f16_v3i128(<3 x half> %a) { +; CHECK-LABEL: fptos_v3f16_v3i128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w21, -24 +; CHECK-NEXT: .cfi_offset w22, -32 +; CHECK-NEXT: .cfi_offset w30, -48 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov h0, v0.h[1] +; CHECK-NEXT: bl __fixhfti +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mov x20, x1 +; CHECK-NEXT: mov h0, v0.h[2] +; CHECK-NEXT: bl __fixhfti +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov x21, x0 +; CHECK-NEXT: mov x22, x1 +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-NEXT: bl __fixhfti +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: mov x2, x19 +; CHECK-NEXT: mov x3, x20 +; CHECK-NEXT: mov x4, x21 +; CHECK-NEXT: mov x5, x22 +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ret +entry: + %c = fptosi <3 x half> %a to <3 x i128> + ret <3 x i128> %c +} + +define <3 x i128> @fptou_v3f16_v3i128(<3 x half> %a) { +; CHECK-LABEL: fptou_v3f16_v3i128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w21, -24 +; CHECK-NEXT: .cfi_offset w22, -32 +; CHECK-NEXT: .cfi_offset w30, -48 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov h0, v0.h[1] +; CHECK-NEXT: bl __fixunshfti +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mov x20, x1 +; CHECK-NEXT: mov h0, v0.h[2] +; CHECK-NEXT: bl __fixunshfti +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov x21, x0 +; CHECK-NEXT: mov x22, x1 +; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-NEXT: bl __fixunshfti +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: mov x2, x19 +; CHECK-NEXT: mov x3, x20 +; CHECK-NEXT: mov x4, x21 +; CHECK-NEXT: mov x5, x22 +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ret +entry: + %c = fptoui <3 x half> %a to <3 x i128> + ret <3 x i128> %c +} + +define <2 x i64> @fptos_v2f128_v2i64(<2 x fp128> %a) { +; CHECK-LABEL: fptos_v2f128_v2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: bl __fixtfdi +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixtfdi +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret +entry: + %c = fptosi <2 x fp128> %a to <2 x i64> + ret <2 x i64> %c +} + +define <2 x i64> @fptou_v2f128_v2i64(<2 x fp128> %a) { +; CHECK-LABEL: fptou_v2f128_v2i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: bl __fixunstfdi +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixunstfdi +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret +entry: + %c = fptoui <2 x fp128> %a to <2 x i64> + ret <2 x i64> %c +} + +define <3 x i64> @fptos_v3f128_v3i64(<3 x fp128> %a) { +; CHECK-LABEL: fptos_v3f128_v3i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: str d8, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #56] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset b8, -16 +; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: bl __fixtfdi +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixtfdi +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: fmov d8, x0 +; CHECK-NEXT: bl __fixtfdi +; CHECK-NEXT: fmov d0, d8 +; CHECK-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload +; CHECK-NEXT: ldr d8, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: fmov d1, x0 +; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ret +entry: + %c = fptosi <3 x fp128> %a to <3 x i64> + ret <3 x i64> %c +} + +define <3 x i64> @fptou_v3f128_v3i64(<3 x fp128> %a) { +; CHECK-LABEL: fptou_v3f128_v3i64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: str d8, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #56] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset b8, -16 +; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: bl __fixunstfdi +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixunstfdi +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: fmov d8, x0 +; CHECK-NEXT: bl __fixunstfdi +; CHECK-NEXT: fmov d0, d8 +; CHECK-NEXT: ldr q2, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload +; CHECK-NEXT: ldr d8, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: fmov d1, x0 +; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2 +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ret +entry: + %c = fptoui <3 x fp128> %a to <3 x i64> + ret <3 x i64> %c +} + +define <2 x i32> @fptos_v2f128_v2i32(<2 x fp128> %a) { +; CHECK-LABEL: fptos_v2f128_v2i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.s[1], w0 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret +entry: + %c = fptosi <2 x fp128> %a to <2 x i32> + ret <2 x i32> %c +} + +define <2 x i32> @fptou_v2f128_v2i32(<2 x fp128> %a) { +; CHECK-LABEL: fptou_v2f128_v2i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: bl __fixunstfsi +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixunstfsi +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.s[1], w0 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret +entry: + %c = fptoui <2 x fp128> %a to <2 x i32> + ret <2 x i32> %c +} + +define <3 x i32> @fptos_v3f128_v3i32(<3 x fp128> %a) { +; CHECK-LABEL: fptos_v3f128_v3i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.s[1], w0 +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.s[2], w0 +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ret +entry: + %c = fptosi <3 x fp128> %a to <3 x i32> + ret <3 x i32> %c +} + +define <3 x i32> @fptou_v3f128_v3i32(<3 x fp128> %a) { +; CHECK-LABEL: fptou_v3f128_v3i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: stp q1, q2, [sp] // 32-byte Folded Spill +; CHECK-NEXT: bl __fixunstfsi +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixunstfsi +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.s[1], w0 +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixunstfsi +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.s[2], w0 +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ret +entry: + %c = fptoui <3 x fp128> %a to <3 x i32> + ret <3 x i32> %c +} + +define <2 x i16> @fptos_v2f128_v2i16(<2 x fp128> %a) { +; CHECK-LABEL: fptos_v2f128_v2i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.s[1], w0 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret +entry: + %c = fptosi <2 x fp128> %a to <2 x i16> + ret <2 x i16> %c +} + +define <2 x i16> @fptou_v2f128_v2i16(<2 x fp128> %a) { +; CHECK-LABEL: fptou_v2f128_v2i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.s[1], w0 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret +entry: + %c = fptoui <2 x fp128> %a to <2 x i16> + ret <2 x i16> %c +} + +define <3 x i16> @fptos_v3f128_v3i16(<3 x fp128> %a) { +; CHECK-LABEL: fptos_v3f128_v3i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str d8, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset b8, -16 +; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: fmov s8, w0 +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.s[1], w0 +; CHECK-NEXT: uzp1 v0.4h, v0.4h, v8.4h +; CHECK-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret +entry: + %c = fptosi <3 x fp128> %a to <3 x i16> + ret <3 x i16> %c +} + +define <3 x i16> @fptou_v3f128_v3i16(<3 x fp128> %a) { +; CHECK-LABEL: fptou_v3f128_v3i16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str d8, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset b8, -16 +; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: fmov s8, w0 +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.s[1], w0 +; CHECK-NEXT: uzp1 v0.4h, v0.4h, v8.4h +; CHECK-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret +entry: + %c = fptoui <3 x fp128> %a to <3 x i16> + ret <3 x i16> %c +} + +define <2 x i8> @fptos_v2f128_v2i8(<2 x fp128> %a) { +; CHECK-LABEL: fptos_v2f128_v2i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.s[1], w0 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret +entry: + %c = fptosi <2 x fp128> %a to <2 x i8> + ret <2 x i8> %c +} + +define <2 x i8> @fptou_v2f128_v2i8(<2 x fp128> %a) { +; CHECK-LABEL: fptou_v2f128_v2i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.s[1], w0 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret +entry: + %c = fptoui <2 x fp128> %a to <2 x i8> + ret <2 x i8> %c +} + +define <3 x i8> @fptos_v3f128_v3i8(<3 x fp128> %a) { +; CHECK-LABEL: fptos_v3f128_v3i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str d8, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset b8, -16 +; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: fmov s8, w0 +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.s[1], w0 +; CHECK-NEXT: uzp1 v0.4h, v0.4h, v8.4h +; CHECK-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: umov w0, v0.h[0] +; CHECK-NEXT: umov w1, v0.h[1] +; CHECK-NEXT: umov w2, v0.h[2] +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret +entry: + %c = fptosi <3 x fp128> %a to <3 x i8> + ret <3 x i8> %c +} + +define <3 x i8> @fptou_v3f128_v3i8(<3 x fp128> %a) { +; CHECK-LABEL: fptou_v3f128_v3i8: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str d8, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset b8, -16 +; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: fmov s8, w0 +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: bl __fixtfsi +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.s[1], w0 +; CHECK-NEXT: uzp1 v0.4h, v0.4h, v8.4h +; CHECK-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: umov w0, v0.h[0] +; CHECK-NEXT: umov w1, v0.h[1] +; CHECK-NEXT: umov w2, v0.h[2] +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret +entry: + %c = fptoui <3 x fp128> %a to <3 x i8> + ret <3 x i8> %c +} + +define <2 x i128> @fptos_v2f128_v2i128(<2 x fp128> %a) { +; CHECK-LABEL: fptos_v2f128_v2i128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: bl __fixtfti +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mov x20, x1 +; CHECK-NEXT: bl __fixtfti +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: mov x2, x19 +; CHECK-NEXT: mov x3, x20 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret +entry: + %c = fptosi <2 x fp128> %a to <2 x i128> + ret <2 x i128> %c +} + +define <2 x i128> @fptou_v2f128_v2i128(<2 x fp128> %a) { +; CHECK-LABEL: fptou_v2f128_v2i128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: bl __fixunstfti +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mov x20, x1 +; CHECK-NEXT: bl __fixunstfti +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: mov x2, x19 +; CHECK-NEXT: mov x3, x20 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: ret +entry: + %c = fptoui <2 x fp128> %a to <2 x i128> + ret <2 x i128> %c +} + +define <3 x i128> @fptos_v3f128_v3i128(<3 x fp128> %a) { +; CHECK-LABEL: fptos_v3f128_v3i128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #80 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w21, -24 +; CHECK-NEXT: .cfi_offset w22, -32 +; CHECK-NEXT: .cfi_offset w30, -48 +; CHECK-NEXT: stp q2, q0, [sp] // 32-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: bl __fixtfti +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mov x20, x1 +; CHECK-NEXT: bl __fixtfti +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov x21, x0 +; CHECK-NEXT: mov x22, x1 +; CHECK-NEXT: bl __fixtfti +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: mov x2, x19 +; CHECK-NEXT: mov x3, x20 +; CHECK-NEXT: mov x4, x21 +; CHECK-NEXT: mov x5, x22 +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: ret +entry: + %c = fptosi <3 x fp128> %a to <3 x i128> + ret <3 x i128> %c +} + +define <3 x i128> @fptou_v3f128_v3i128(<3 x fp128> %a) { +; CHECK-LABEL: fptou_v3f128_v3i128: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #80 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w21, -24 +; CHECK-NEXT: .cfi_offset w22, -32 +; CHECK-NEXT: .cfi_offset w30, -48 +; CHECK-NEXT: stp q2, q0, [sp] // 32-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: bl __fixunstfti +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mov x20, x1 +; CHECK-NEXT: bl __fixunstfti +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov x21, x0 +; CHECK-NEXT: mov x22, x1 +; CHECK-NEXT: bl __fixunstfti +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: mov x2, x19 +; CHECK-NEXT: mov x3, x20 +; CHECK-NEXT: mov x4, x21 +; CHECK-NEXT: mov x5, x22 +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: ret +entry: + %c = fptoui <3 x fp128> %a to <3 x i128> + ret <3 x i128> %c +} From 1ceede3318c29af83b219cca137f5e2c563fc871 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 14 Jun 2024 12:25:25 +0200 Subject: [PATCH 079/155] [AMDGPULowerBufferFatPointers] Don't try to preserve flags for constant expressions We expect all of these ConstantExpr ctors to fold away, don't try to preserve flags, especially as the flags are not correct. --- .../AMDGPU/AMDGPULowerBufferFatPointers.cpp | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp index ea654dbd487bca..0b261d8e33907c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp @@ -680,35 +680,28 @@ Constant *FatPtrConstMaterializer::materializeBufferFatPtrConst(Constant *C) { report_fatal_error( "Scalable vector or unsized struct in fat pointer GEP"); Constant *OffAccum = nullptr; - // Accumulate offsets together before adding to the base in order to - // preserve as many of the inbounds properties as possible. for (auto [Arg, Multiple] : VariableOffs) { Constant *NewArg = InternalMapper.mapConstant(*cast(Arg)); NewArg = ConstantFoldIntegerCast(NewArg, OffTy, /*IsSigned=*/true, DL); if (!Multiple.isOne()) { if (Multiple.isPowerOf2()) { NewArg = ConstantExpr::getShl( - NewArg, - CE->getIntegerValue( - OffTy, APInt(BufferOffsetWidth, Multiple.logBase2())), - /*hasNUW=*/InBounds, /*HasNSW=*/InBounds); + NewArg, CE->getIntegerValue(OffTy, APInt(BufferOffsetWidth, + Multiple.logBase2()))); } else { - NewArg = - ConstantExpr::getMul(NewArg, CE->getIntegerValue(OffTy, Multiple), - /*hasNUW=*/InBounds, /*hasNSW=*/InBounds); + NewArg = ConstantExpr::getMul(NewArg, + CE->getIntegerValue(OffTy, Multiple)); } } if (OffAccum) { - OffAccum = ConstantExpr::getAdd(OffAccum, NewArg, /*hasNUW=*/InBounds, - /*hasNSW=*/InBounds); + OffAccum = ConstantExpr::getAdd(OffAccum, NewArg); } else { OffAccum = NewArg; } } Constant *NewConstOff = CE->getIntegerValue(OffTy, NewConstOffVal); if (OffAccum) - OffAccum = ConstantExpr::getAdd(OffAccum, NewConstOff, - /*hasNUW=*/InBounds, /*hasNSW=*/InBounds); + OffAccum = ConstantExpr::getAdd(OffAccum, NewConstOff); else OffAccum = NewConstOff; bool HasNonNegativeOff = false; From 71f8b441ed6a944ceb4530b49e8588dcbb1e0066 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Thu, 13 Jun 2024 20:09:02 +0900 Subject: [PATCH 080/155] Reapply: [MC/DC][Coverage] Loosen the limit of NumConds from 6 (#82448) By storing possible test vectors instead of combinations of conditions, the restriction is dramatically relaxed. This introduces two options to `cc1`: * `-fmcdc-max-conditions=32767` * `-fmcdc-max-test-vectors=2147483646` This change makes coverage mapping, profraw, and profdata incompatible with Clang-18. - Bitmap semantics changed. It is incompatible with previous format. - `BitmapIdx` in `Decision` points to the end of the bitmap. - Bitmap is packed per function. - `llvm-cov` can understand `profdata` generated by `llvm-profdata-18`. RFC: https://discourse.llvm.org/t/rfc-coverage-new-algorithm-and-file-format-for-mc-dc/76798 -- Change(s) since llvmorg-19-init-14288-g7ead2d8c7e91 - Update compiler-rt/test/profile/ContinuousSyncMode/image-with-mcdc.c --- clang/docs/SourceBasedCodeCoverage.rst | 29 ++++- clang/include/clang/Basic/CodeGenOptions.def | 2 + clang/include/clang/Driver/Options.td | 8 ++ clang/lib/CodeGen/CodeGenPGO.cpp | 50 +++++---- clang/lib/CodeGen/CoverageMappingGen.cpp | 77 ++++++++++++- clang/lib/CodeGen/MCDCState.h | 4 +- .../CoverageMapping/branch-constfolded.cpp | 34 +++--- clang/test/CoverageMapping/logical.cpp | 8 +- clang/test/CoverageMapping/mcdc-class.cpp | 4 +- .../CoverageMapping/mcdc-error-conditions.cpp | 105 +++++++++++++++++- .../mcdc-logical-scalar-ids.cpp | 30 ++--- .../mcdc-logical-stmt-ids-all.cpp | 32 +++--- .../CoverageMapping/mcdc-logical-stmt-ids.cpp | 30 ++--- .../test/CoverageMapping/mcdc-scratch-space.c | 12 +- .../CoverageMapping/mcdc-system-headers.cpp | 8 +- clang/test/Profile/c-mcdc-class.cpp | 38 ++++--- clang/test/Profile/c-mcdc-logicalop-ternary.c | 18 +-- clang/test/Profile/c-mcdc-nested-ternary.c | 35 +++--- clang/test/Profile/c-mcdc-not.c | 53 ++++----- clang/test/Profile/c-mcdc.c | 63 +++++------ .../ContinuousSyncMode/image-with-mcdc.c | 2 +- llvm/docs/CoverageMappingFormat.rst | 2 +- llvm/docs/LangRef.rst | 18 ++- llvm/include/llvm/IR/IntrinsicInst.h | 11 +- .../ProfileData/Coverage/CoverageMapping.h | 3 +- llvm/include/llvm/ProfileData/InstrProf.h | 2 +- .../ProfileData/Coverage/CoverageMapping.cpp | 34 ++++-- .../Instrumentation/InstrProfiling.cpp | 15 ++- .../InstrProfiling/inline-data-var-create.ll | 6 +- .../Instrumentation/InstrProfiling/mcdc.ll | 3 +- .../llvm-cov/Inputs/mcdc-const-folding.o | Bin 34504 -> 34528 bytes .../Inputs/mcdc-const-folding.proftext | 36 +++--- llvm/test/tools/llvm-cov/Inputs/mcdc-const.o | Bin 5208 -> 5208 bytes .../tools/llvm-cov/Inputs/mcdc-const.proftext | 6 +- .../tools/llvm-cov/Inputs/mcdc-general-18.o | Bin 0 -> 6456 bytes .../llvm-cov/Inputs/mcdc-general-18.profdata | Bin 0 -> 888 bytes .../test/tools/llvm-cov/Inputs/mcdc-general.o | Bin 6456 -> 6544 bytes .../llvm-cov/Inputs/mcdc-general.proftext | 11 +- llvm/test/tools/llvm-cov/Inputs/mcdc-macro.o | Bin 6480 -> 6408 bytes .../tools/llvm-cov/Inputs/mcdc-macro.proftext | 15 +-- llvm/test/tools/llvm-cov/Inputs/mcdc-maxbs.o | Bin 4112 -> 4112 bytes llvm/test/tools/llvm-cov/mcdc-general-18.test | 20 ++++ .../ProfileData/CoverageMappingTest.cpp | 4 +- 43 files changed, 530 insertions(+), 298 deletions(-) create mode 100644 llvm/test/tools/llvm-cov/Inputs/mcdc-general-18.o create mode 100644 llvm/test/tools/llvm-cov/Inputs/mcdc-general-18.profdata create mode 100644 llvm/test/tools/llvm-cov/mcdc-general-18.test diff --git a/clang/docs/SourceBasedCodeCoverage.rst b/clang/docs/SourceBasedCodeCoverage.rst index cee706289284db..73910e134a5891 100644 --- a/clang/docs/SourceBasedCodeCoverage.rst +++ b/clang/docs/SourceBasedCodeCoverage.rst @@ -484,10 +484,31 @@ MC/DC Instrumentation --------------------- When instrumenting for Modified Condition/Decision Coverage (MC/DC) using the -clang option ``-fcoverage-mcdc``, users are limited to at most **six** leaf-level -conditions in a boolean expression. A warning will be generated for boolean -expressions that contain more than six, and they will not be instrumented for -MC/DC. +clang option ``-fcoverage-mcdc``, there are two hard limits. + +The maximum number of terms is limited to 32767, which is practical for +handwritten expressions. To be more restrictive in order to enforce coding rules, +use ``-Xclang -fmcdc-max-conditions=n``. Expressions with exceeded condition +counts ``n`` will generate warnings and will be excluded in the MC/DC coverage. + +The number of test vectors (the maximum number of possible combinations of +expressions) is limited to 2,147,483,646. In this case, approximately +256MiB (==2GiB/8) is used to record test vectors. + +To reduce memory usage, users can limit the maximum number of test vectors per +expression with ``-Xclang -fmcdc-max-test-vectors=m``. +If the number of test vectors resulting from the analysis of an expression +exceeds ``m``, a warning will be issued and the expression will be excluded +from the MC/DC coverage. + +The number of test vectors ``m``, for ``n`` terms in an expression, can be +``m <= 2^n`` in the theoretical worst case, but is usually much smaller. +In simple cases, such as expressions consisting of a sequence of single +operators, ``m == n+1``. For example, ``(a && b && c && d && e && f && g)`` +requires 8 test vectors. + +Expressions such as ``((a0 && b0) || (a1 && b1) || ...)`` can cause the +number of test vectors to increase exponentially. Also, if a boolean expression is embedded in the nest of another boolean expression but separated by a non-logical operator, this is also not supported. diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 7ffc40a00504fb..e3f6da4a84f694 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -223,6 +223,8 @@ CODEGENOPT(CoverageMapping , 1, 0) ///< Generate coverage mapping regions to CODEGENOPT(DumpCoverageMapping , 1, 0) ///< Dump the generated coverage mapping ///< regions. CODEGENOPT(MCDCCoverage , 1, 0) ///< Enable MC/DC code coverage criteria. +VALUE_CODEGENOPT(MCDCMaxConds, 16, 32767) ///< MC/DC Maximum conditions. +VALUE_CODEGENOPT(MCDCMaxTVs, 32, 0x7FFFFFFE) ///< MC/DC Maximum test vectors. /// If -fpcc-struct-return or -freg-struct-return is specified. ENUM_CODEGENOPT(StructReturnConvention, StructReturnConventionKind, 2, SRCK_Default) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 1cb03ac1bffb6f..4ab8638175dd3f 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1790,6 +1790,14 @@ defm mcdc_coverage : BoolFOption<"coverage-mcdc", "Enable MC/DC criteria when generating code coverage">, NegFlag, BothFlags<[], [ClangOption, CLOption]>>; +def fmcdc_max_conditions_EQ : Joined<["-"], "fmcdc-max-conditions=">, + Group, Visibility<[CC1Option]>, + HelpText<"Maximum number of conditions in MC/DC coverage">, + MarshallingInfoInt, "32767">; +def fmcdc_max_test_vectors_EQ : Joined<["-"], "fmcdc-max-test-vectors=">, + Group, Visibility<[CC1Option]>, + HelpText<"Maximum number of test vectors in MC/DC coverage">, + MarshallingInfoInt, "0x7FFFFFFE">; def fprofile_generate : Flag<["-"], "fprofile-generate">, Group, Visibility<[ClangOption, CLOption]>, HelpText<"Generate instrumented code to collect execution counts into default.profraw (overridden by LLVM_PROFILE_FILE env var)">; diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp index db8e6f55302adc..59139e342de886 100644 --- a/clang/lib/CodeGen/CodeGenPGO.cpp +++ b/clang/lib/CodeGen/CodeGenPGO.cpp @@ -167,8 +167,6 @@ struct MapRegionCounters : public RecursiveASTVisitor { PGOHash Hash; /// The map of statements to counters. llvm::DenseMap &CounterMap; - /// The next bitmap byte index to assign. - unsigned NextMCDCBitmapIdx; /// The state of MC/DC Coverage in this function. MCDC::State &MCDCState; /// Maximum number of supported MC/DC conditions in a boolean expression. @@ -183,7 +181,7 @@ struct MapRegionCounters : public RecursiveASTVisitor { MCDC::State &MCDCState, unsigned MCDCMaxCond, DiagnosticsEngine &Diag) : NextCounter(0), Hash(HashVersion), CounterMap(CounterMap), - NextMCDCBitmapIdx(0), MCDCState(MCDCState), MCDCMaxCond(MCDCMaxCond), + MCDCState(MCDCState), MCDCMaxCond(MCDCMaxCond), ProfileVersion(ProfileVersion), Diag(Diag) {} // Blocks and lambdas are handled as separate functions, so we need not @@ -314,11 +312,8 @@ struct MapRegionCounters : public RecursiveASTVisitor { return true; } - // Otherwise, allocate the number of bytes required for the bitmap - // based on the number of conditions. Must be at least 1-byte long. - MCDCState.DecisionByStmt[BinOp].BitmapIdx = NextMCDCBitmapIdx; - unsigned SizeInBits = std::max(1L << NumCond, CHAR_BIT); - NextMCDCBitmapIdx += SizeInBits / CHAR_BIT; + // Otherwise, allocate the Decision. + MCDCState.DecisionByStmt[BinOp].BitmapIdx = 0; } return true; } @@ -1083,7 +1078,9 @@ void CodeGenPGO::mapRegionCounters(const Decl *D) { // for most embedded applications. Setting a maximum value prevents the // bitmap footprint from growing too large without the user's knowledge. In // the future, this value could be adjusted with a command-line option. - unsigned MCDCMaxConditions = (CGM.getCodeGenOpts().MCDCCoverage) ? 6 : 0; + unsigned MCDCMaxConditions = + (CGM.getCodeGenOpts().MCDCCoverage ? CGM.getCodeGenOpts().MCDCMaxConds + : 0); RegionCounterMap.reset(new llvm::DenseMap); RegionMCDCState.reset(new MCDC::State); @@ -1099,7 +1096,6 @@ void CodeGenPGO::mapRegionCounters(const Decl *D) { Walker.TraverseDecl(const_cast(CD)); assert(Walker.NextCounter > 0 && "no entry counter mapped for decl"); NumRegionCounters = Walker.NextCounter; - RegionMCDCState->BitmapBytes = Walker.NextMCDCBitmapIdx; FunctionHash = Walker.Hash.finalize(); } @@ -1232,7 +1228,7 @@ void CodeGenPGO::emitMCDCParameters(CGBuilderTy &Builder) { // anything. llvm::Value *Args[3] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), Builder.getInt64(FunctionHash), - Builder.getInt32(RegionMCDCState->BitmapBytes)}; + Builder.getInt32(RegionMCDCState->BitmapBits)}; Builder.CreateCall( CGM.getIntrinsic(llvm::Intrinsic::instrprof_mcdc_parameters), Args); } @@ -1250,6 +1246,11 @@ void CodeGenPGO::emitMCDCTestVectorBitmapUpdate(CGBuilderTy &Builder, if (DecisionStateIter == RegionMCDCState->DecisionByStmt.end()) return; + // Don't create tvbitmap_update if the record is allocated but excluded. + // Or `bitmap |= (1 << 0)` would be wrongly executed to the next bitmap. + if (DecisionStateIter->second.Indices.size() == 0) + return; + // Extract the offset of the global bitmap associated with this expression. unsigned MCDCTestVectorBitmapOffset = DecisionStateIter->second.BitmapIdx; auto *I8PtrTy = llvm::PointerType::getUnqual(CGM.getLLVMContext()); @@ -1261,7 +1262,7 @@ void CodeGenPGO::emitMCDCTestVectorBitmapUpdate(CGBuilderTy &Builder, // index represents an executed test vector. llvm::Value *Args[5] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), Builder.getInt64(FunctionHash), - Builder.getInt32(RegionMCDCState->BitmapBytes), + Builder.getInt32(0), // Unused Builder.getInt32(MCDCTestVectorBitmapOffset), MCDCCondBitmapAddr.emitRawPointer(CGF)}; Builder.CreateCall( @@ -1305,19 +1306,22 @@ void CodeGenPGO::emitMCDCCondBitmapUpdate(CGBuilderTy &Builder, const Expr *S, // Extract the ID of the condition we are setting in the bitmap. const auto &Branch = BranchStateIter->second; assert(Branch.ID >= 0 && "Condition has no ID!"); + assert(Branch.DecisionStmt); - auto *I8PtrTy = llvm::PointerType::getUnqual(CGM.getLLVMContext()); + // Cancel the emission if the Decision is erased after the allocation. + const auto DecisionIter = + RegionMCDCState->DecisionByStmt.find(Branch.DecisionStmt); + if (DecisionIter == RegionMCDCState->DecisionByStmt.end()) + return; - // Emit intrinsic that updates a dedicated temporary value on the stack after - // a condition is evaluated. After the set of conditions has been updated, - // the resulting value is used to update the boolean expression's bitmap. - llvm::Value *Args[5] = {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy), - Builder.getInt64(FunctionHash), - Builder.getInt32(Branch.ID), - MCDCCondBitmapAddr.emitRawPointer(CGF), Val}; - Builder.CreateCall( - CGM.getIntrinsic(llvm::Intrinsic::instrprof_mcdc_condbitmap_update), - Args); + const auto &TVIdxs = DecisionIter->second.Indices[Branch.ID]; + + auto *CurTV = Builder.CreateLoad(MCDCCondBitmapAddr, + "mcdc." + Twine(Branch.ID + 1) + ".cur"); + auto *NewTV = Builder.CreateAdd(CurTV, Builder.getInt32(TVIdxs[true])); + NewTV = Builder.CreateSelect( + Val, NewTV, Builder.CreateAdd(CurTV, Builder.getInt32(TVIdxs[false]))); + Builder.CreateStore(NewTV, MCDCCondBitmapAddr); } void CodeGenPGO::setValueProfilingFlag(llvm::Module &M) { diff --git a/clang/lib/CodeGen/CoverageMappingGen.cpp b/clang/lib/CodeGen/CoverageMappingGen.cpp index 6ce2d32dd292ed..ba483d857d5f46 100644 --- a/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -195,6 +195,10 @@ class SourceMappingRegion { return std::holds_alternative(MCDCParams); } + const auto &getMCDCBranchParams() const { + return mcdc::getParams(MCDCParams); + } + bool isMCDCDecision() const { return std::holds_alternative(MCDCParams); } @@ -204,6 +208,8 @@ class SourceMappingRegion { } const mcdc::Parameters &getMCDCParams() const { return MCDCParams; } + + void resetMCDCParams() { MCDCParams = mcdc::Parameters(); } }; /// Spelling locations for the start and end of a source region. @@ -748,6 +754,7 @@ struct MCDCCoverageBuilder { llvm::SmallVector DecisionStack; MCDC::State &MCDCState; + const Stmt *DecisionStmt = nullptr; mcdc::ConditionID NextID = 0; bool NotMapped = false; @@ -777,7 +784,8 @@ struct MCDCCoverageBuilder { /// Set the given condition's ID. void setCondID(const Expr *Cond, mcdc::ConditionID ID) { - MCDCState.BranchByStmt[CodeGenFunction::stripCond(Cond)].ID = ID; + MCDCState.BranchByStmt[CodeGenFunction::stripCond(Cond)] = {ID, + DecisionStmt}; } /// Return the ID of a given condition. @@ -808,6 +816,11 @@ struct MCDCCoverageBuilder { if (NotMapped) return; + if (NextID == 0) { + DecisionStmt = E; + assert(MCDCState.DecisionByStmt.contains(E)); + } + const mcdc::ConditionIDs &ParentDecision = DecisionStack.back(); // If the operator itself has an assigned ID, this means it represents a @@ -2122,13 +2135,41 @@ struct CounterCoverageMappingBuilder subtractCounters(ParentCount, TrueCount)); } - void createDecision(const BinaryOperator *E) { + void createOrCancelDecision(const BinaryOperator *E, unsigned Since) { unsigned NumConds = MCDCBuilder.getTotalConditionsAndReset(E); if (NumConds == 0) return; + // Extract [ID, Conds] to construct the graph. + llvm::SmallVector CondIDs(NumConds); + for (const auto &SR : ArrayRef(SourceRegions).slice(Since)) { + if (SR.isMCDCBranch()) { + auto [ID, Conds] = SR.getMCDCBranchParams(); + CondIDs[ID] = Conds; + } + } + + // Construct the graph and calculate `Indices`. + mcdc::TVIdxBuilder Builder(CondIDs); + unsigned NumTVs = Builder.NumTestVectors; + unsigned MaxTVs = CVM.getCodeGenModule().getCodeGenOpts().MCDCMaxTVs; + assert(MaxTVs < mcdc::TVIdxBuilder::HardMaxTVs); + + if (NumTVs > MaxTVs) { + // NumTVs exceeds MaxTVs -- warn and cancel the Decision. + cancelDecision(E, Since, NumTVs, MaxTVs); + return; + } + + // Update the state for CodeGenPGO + assert(MCDCState.DecisionByStmt.contains(E)); + MCDCState.DecisionByStmt[E] = { + MCDCState.BitmapBits, // Top + std::move(Builder.Indices), + }; + auto DecisionParams = mcdc::DecisionParameters{ - MCDCState.DecisionByStmt[E].BitmapIdx, + MCDCState.BitmapBits += NumTVs, // Tail NumConds, }; @@ -2136,6 +2177,28 @@ struct CounterCoverageMappingBuilder createDecisionRegion(E, DecisionParams); } + // Warn and cancel the Decision. + void cancelDecision(const BinaryOperator *E, unsigned Since, int NumTVs, + int MaxTVs) { + auto &Diag = CVM.getCodeGenModule().getDiags(); + unsigned DiagID = + Diag.getCustomDiagID(DiagnosticsEngine::Warning, + "unsupported MC/DC boolean expression; " + "number of test vectors (%0) exceeds max (%1). " + "Expression will not be covered"); + Diag.Report(E->getBeginLoc(), DiagID) << NumTVs << MaxTVs; + + // Restore MCDCBranch to Branch. + for (auto &SR : MutableArrayRef(SourceRegions).slice(Since)) { + assert(!SR.isMCDCDecision() && "Decision shouldn't be seen here"); + if (SR.isMCDCBranch()) + SR.resetMCDCParams(); + } + + // Tell CodeGenPGO not to instrument. + MCDCState.DecisionByStmt.erase(E); + } + /// Check if E belongs to system headers. bool isExprInSystemHeader(const BinaryOperator *E) const { return (!SystemHeadersCoverage && @@ -2152,6 +2215,8 @@ struct CounterCoverageMappingBuilder bool IsRootNode = MCDCBuilder.isIdle(); + unsigned SourceRegionsSince = SourceRegions.size(); + // Keep track of Binary Operator and assign MCDC condition IDs. MCDCBuilder.pushAndAssignIDs(E); @@ -2190,7 +2255,7 @@ struct CounterCoverageMappingBuilder // Create MCDC Decision Region if at top-level (root). if (IsRootNode) - createDecision(E); + createOrCancelDecision(E, SourceRegionsSince); } // Determine whether the right side of OR operation need to be visited. @@ -2211,6 +2276,8 @@ struct CounterCoverageMappingBuilder bool IsRootNode = MCDCBuilder.isIdle(); + unsigned SourceRegionsSince = SourceRegions.size(); + // Keep track of Binary Operator and assign MCDC condition IDs. MCDCBuilder.pushAndAssignIDs(E); @@ -2253,7 +2320,7 @@ struct CounterCoverageMappingBuilder // Create MCDC Decision Region if at top-level (root). if (IsRootNode) - createDecision(E); + createOrCancelDecision(E, SourceRegionsSince); } void VisitLambdaExpr(const LambdaExpr *LE) { diff --git a/clang/lib/CodeGen/MCDCState.h b/clang/lib/CodeGen/MCDCState.h index 29b6f0fb681aa0..e0dd28ff90ed12 100644 --- a/clang/lib/CodeGen/MCDCState.h +++ b/clang/lib/CodeGen/MCDCState.h @@ -27,16 +27,18 @@ using namespace llvm::coverage::mcdc; /// Per-Function MC/DC state struct State { - unsigned BitmapBytes = 0; + unsigned BitmapBits = 0; struct Decision { unsigned BitmapIdx; + llvm::SmallVector> Indices; }; llvm::DenseMap DecisionByStmt; struct Branch { ConditionID ID; + const Stmt *DecisionStmt; }; llvm::DenseMap BranchByStmt; diff --git a/clang/test/CoverageMapping/branch-constfolded.cpp b/clang/test/CoverageMapping/branch-constfolded.cpp index c8755d5d752b63..1e7e32808e8382 100644 --- a/clang/test/CoverageMapping/branch-constfolded.cpp +++ b/clang/test/CoverageMapping/branch-constfolded.cpp @@ -4,93 +4,93 @@ // RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++11 -fcoverage-mcdc -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -emit-llvm-only -main-file-name branch-constfolded.cpp %s | FileCheck %s -check-prefix=MCDC // CHECK-LABEL: _Z6fand_0b: -bool fand_0(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:20 = M:0, C:2 +bool fand_0(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:20 = M:3, C:2 return false && a; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:15 = 0, 0 } // CHECK: Branch,File 0, [[@LINE-1]]:19 -> [[@LINE-1]]:20 = #2, (#1 - #2) // CHECK-LABEL: _Z6fand_1b: -bool fand_1(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:19 = M:0, C:2 +bool fand_1(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:19 = M:3, C:2 return a && true; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:11 = #1, (#0 - #1) } // CHECK: Branch,File 0, [[@LINE-1]]:15 -> [[@LINE-1]]:19 = 0, 0 // CHECK-LABEL: _Z6fand_2bb: -bool fand_2(bool a, bool b) {// MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:25 = M:0, C:3 +bool fand_2(bool a, bool b) {// MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:25 = M:4, C:3 return false && a && b; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:15 = 0, 0 } // CHECK: Branch,File 0, [[@LINE-1]]:19 -> [[@LINE-1]]:20 = #4, (#3 - #4) // CHECK: Branch,File 0, [[@LINE-2]]:24 -> [[@LINE-2]]:25 = #2, (#1 - #2) // CHECK-LABEL: _Z6fand_3bb: -bool fand_3(bool a, bool b) {// MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:24 = M:0, C:3 +bool fand_3(bool a, bool b) {// MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:24 = M:4, C:3 return a && true && b; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:11 = #3, (#0 - #3) } // CHECK: Branch,File 0, [[@LINE-1]]:15 -> [[@LINE-1]]:19 = 0, 0 // CHECK: Branch,File 0, [[@LINE-2]]:23 -> [[@LINE-2]]:24 = #2, (#1 - #2) // CHECK-LABEL: _Z6fand_4bb: -bool fand_4(bool a, bool b) {// MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:25 = M:0, C:3 +bool fand_4(bool a, bool b) {// MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:25 = M:4, C:3 return a && b && false; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:11 = #3, (#0 - #3) } // CHECK: Branch,File 0, [[@LINE-1]]:15 -> [[@LINE-1]]:16 = #4, (#3 - #4) // CHECK: Branch,File 0, [[@LINE-2]]:20 -> [[@LINE-2]]:25 = 0, 0 // CHECK-LABEL: _Z6fand_5b: -bool fand_5(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:23 = M:0, C:2 +bool fand_5(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:23 = M:3, C:2 return false && true; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:15 = 0, 0 } // CHECK: Branch,File 0, [[@LINE-1]]:19 -> [[@LINE-1]]:23 = 0, 0 // CHECK-LABEL: _Z6fand_6b: -bool fand_6(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:19 = M:0, C:2 +bool fand_6(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:19 = M:3, C:2 return true && a; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:14 = 0, 0 } // CHECK: Branch,File 0, [[@LINE-1]]:18 -> [[@LINE-1]]:19 = #2, (#1 - #2) // CHECK-LABEL: _Z6fand_7b: -bool fand_7(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:20 = M:0, C:2 +bool fand_7(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:20 = M:3, C:2 return a && false; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:11 = #1, (#0 - #1) } // CHECK: Branch,File 0, [[@LINE-1]]:15 -> [[@LINE-1]]:20 = 0, 0 // CHECK-LABEL: _Z5for_0b: -bool for_0(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:19 = M:0, C:2 +bool for_0(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:19 = M:3, C:2 return true || a; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:14 = 0, 0 } // CHECK: Branch,File 0, [[@LINE-1]]:18 -> [[@LINE-1]]:19 = (#1 - #2), #2 // CHECK-LABEL: _Z5for_1b: -bool for_1(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:20 = M:0, C:2 +bool for_1(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:20 = M:3, C:2 return a || false; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:11 = (#0 - #1), #1 } // CHECK: Branch,File 0, [[@LINE-1]]:15 -> [[@LINE-1]]:20 = 0, 0 // CHECK-LABEL: _Z5for_2bb: -bool for_2(bool a, bool b) {// MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:24 = M:0, C:3 +bool for_2(bool a, bool b) {// MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:24 = M:4, C:3 return true || a || b; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:14 = 0, 0 } // CHECK: Branch,File 0, [[@LINE-1]]:18 -> [[@LINE-1]]:19 = (#3 - #4), #4 // CHECK: Branch,File 0, [[@LINE-2]]:23 -> [[@LINE-2]]:24 = (#1 - #2), #2 // CHECK-LABEL: _Z5for_3bb: -bool for_3(bool a, bool b) {// MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:25 = M:0, C:3 +bool for_3(bool a, bool b) {// MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:25 = M:4, C:3 return a || false || b; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:11 = (#0 - #3), #3 } // CHECK: Branch,File 0, [[@LINE-1]]:15 -> [[@LINE-1]]:20 = 0, 0 // CHECK: Branch,File 0, [[@LINE-2]]:24 -> [[@LINE-2]]:25 = (#1 - #2), #2 // CHECK-LABEL: _Z5for_4bb: -bool for_4(bool a, bool b) {// MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:24 = M:0, C:3 +bool for_4(bool a, bool b) {// MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:24 = M:4, C:3 return a || b || true; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:11 = (#0 - #3), #3 } // CHECK: Branch,File 0, [[@LINE-1]]:15 -> [[@LINE-1]]:16 = (#3 - #4), #4 // CHECK: Branch,File 0, [[@LINE-2]]:20 -> [[@LINE-2]]:24 = 0, 0 // CHECK-LABEL: _Z5for_5b: -bool for_5(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:23 = M:0, C:2 +bool for_5(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:23 = M:3, C:2 return true || false; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:14 = 0, 0 } // CHECK: Branch,File 0, [[@LINE-1]]:18 -> [[@LINE-1]]:23 = 0, 0 // CHECK-LABEL: _Z5for_6b: -bool for_6(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:20 = M:0, C:2 +bool for_6(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:20 = M:3, C:2 return false || a; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:15 = 0, 0 } // CHECK: Branch,File 0, [[@LINE-1]]:19 -> [[@LINE-1]]:20 = (#1 - #2), #2 // CHECK-LABEL: _Z5for_7b: -bool for_7(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:19 = M:0, C:2 +bool for_7(bool a) { // MCDC: Decision,File 0, [[@LINE+1]]:10 -> [[@LINE+1]]:19 = M:3, C:2 return a || true; // CHECK: Branch,File 0, [[@LINE]]:10 -> [[@LINE]]:11 = (#0 - #1), #1 } // CHECK: Branch,File 0, [[@LINE-1]]:15 -> [[@LINE-1]]:19 = 0, 0 // CHECK-LABEL: _Z5for_8b: -bool for_8(bool a) { // MCDC: Decision,File 0, [[@LINE+3]]:7 -> [[@LINE+3]]:20 = M:0, C:2 +bool for_8(bool a) { // MCDC: Decision,File 0, [[@LINE+3]]:7 -> [[@LINE+3]]:20 = M:3, C:2 // CHECK: Branch,File 0, [[@LINE+2]]:7 -> [[@LINE+2]]:11 = 0, 0 // CHECK: Branch,File 0, [[@LINE+1]]:15 -> [[@LINE+1]]:20 = 0, 0 if (true && false) diff --git a/clang/test/CoverageMapping/logical.cpp b/clang/test/CoverageMapping/logical.cpp index 7de59e1429808a..2a22d6cca45189 100644 --- a/clang/test/CoverageMapping/logical.cpp +++ b/clang/test/CoverageMapping/logical.cpp @@ -3,22 +3,22 @@ int main() { // CHECK: File 0, [[@LINE]]:12 -> [[@LINE+23]]:2 = #0 bool bt = true; - bool bf = false; // MCDC: Decision,File 0, [[@LINE+1]]:12 -> [[@LINE+1]]:20 = M:0, C:2 + bool bf = false; // MCDC: Decision,File 0, [[@LINE+1]]:12 -> [[@LINE+1]]:20 = M:3, C:2 bool a = bt && bf; // CHECK-NEXT: File 0, [[@LINE]]:12 -> [[@LINE]]:14 = #0 // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:12 -> [[@LINE-1]]:14 = #1, (#0 - #1) // CHECK-NEXT: File 0, [[@LINE-2]]:18 -> [[@LINE-2]]:20 = #1 // CHECK-NEXT: Branch,File 0, [[@LINE-3]]:18 -> [[@LINE-3]]:20 = #2, (#1 - #2) - // MCDC: Decision,File 0, [[@LINE+1]]:7 -> [[@LINE+2]]:9 = M:1, C:2 + // MCDC: Decision,File 0, [[@LINE+1]]:7 -> [[@LINE+2]]:9 = M:6, C:2 a = bt && // CHECK-NEXT: File 0, [[@LINE]]:7 -> [[@LINE]]:9 = #0 bf; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:7 -> [[@LINE-1]]:9 = #3, (#0 - #3) // CHECK-NEXT: File 0, [[@LINE-1]]:7 -> [[@LINE-1]]:9 = #3 // CHECK-NEXT: Branch,File 0, [[@LINE-2]]:7 -> [[@LINE-2]]:9 = #4, (#3 - #4) - // MCDC: Decision,File 0, [[@LINE+1]]:7 -> [[@LINE+1]]:15 = M:2, C:2 + // MCDC: Decision,File 0, [[@LINE+1]]:7 -> [[@LINE+1]]:15 = M:9, C:2 a = bf || bt; // CHECK-NEXT: File 0, [[@LINE]]:7 -> [[@LINE]]:9 = #0 // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:7 -> [[@LINE-1]]:9 = (#0 - #5), #5 // CHECK-NEXT: File 0, [[@LINE-2]]:13 -> [[@LINE-2]]:15 = #5 // CHECK-NEXT: Branch,File 0, [[@LINE-3]]:13 -> [[@LINE-3]]:15 = (#5 - #6), #6 - // MCDC: Decision,File 0, [[@LINE+1]]:7 -> [[@LINE+2]]:9 = M:3, C:2 + // MCDC: Decision,File 0, [[@LINE+1]]:7 -> [[@LINE+2]]:9 = M:12, C:2 a = bf || // CHECK-NEXT: File 0, [[@LINE]]:7 -> [[@LINE]]:9 = #0 bt; // CHECK-NEXT: Branch,File 0, [[@LINE-1]]:7 -> [[@LINE-1]]:9 = (#0 - #7), #7 // CHECK-NEXT: File 0, [[@LINE-1]]:7 -> [[@LINE-1]]:9 = #7 diff --git a/clang/test/CoverageMapping/mcdc-class.cpp b/clang/test/CoverageMapping/mcdc-class.cpp index dcf6123ee0fc74..7b2937830be766 100644 --- a/clang/test/CoverageMapping/mcdc-class.cpp +++ b/clang/test/CoverageMapping/mcdc-class.cpp @@ -23,9 +23,9 @@ Value::~Value(void) { bar(); } -// CHECK-LABEL: Decision,File 0, 18:7 -> 18:31 = M:0, C:2 +// CHECK-LABEL: Decision,File 0, 18:7 -> 18:31 = M:3, C:2 // CHECK-NEXT: Branch,File 0, 18:7 -> 18:17 = (#0 - #2), #2 [1,0,2] // CHECK: Branch,File 0, 18:21 -> 18:31 = (#2 - #3), #3 [2,0,0] -// CHECK-LABEL: Decision,File 0, 22:7 -> 22:31 = M:0, C:2 +// CHECK-LABEL: Decision,File 0, 22:7 -> 22:31 = M:3, C:2 // CHECK-NEXT: Branch,File 0, 22:7 -> 22:17 = (#0 - #2), #2 [1,0,2] // CHECK: Branch,File 0, 22:21 -> 22:31 = (#2 - #3), #3 [2,0,0] diff --git a/clang/test/CoverageMapping/mcdc-error-conditions.cpp b/clang/test/CoverageMapping/mcdc-error-conditions.cpp index d34ed693434795..8f5d6bd66897c9 100644 --- a/clang/test/CoverageMapping/mcdc-error-conditions.cpp +++ b/clang/test/CoverageMapping/mcdc-error-conditions.cpp @@ -1,7 +1,108 @@ -// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++11 -fcoverage-mcdc -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -emit-llvm-only %s 2>&1| FileCheck %s +// RUN: %clang_cc1 -triple %itanium_abi_triple -std=c++11 -fcoverage-mcdc -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -emit-llvm-only %s 2>&1| FileCheck %s --check-prefixes=CHECK,COND7,TV + +// RUN: %clang_cc1 -fmcdc-max-test-vectors=8 -triple %itanium_abi_triple -std=c++11 -fcoverage-mcdc -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -emit-llvm-only %s 2>&1| FileCheck %s --check-prefixes=CHECK,COND7,TV +// RUN: %clang_cc1 -fmcdc-max-test-vectors=7 -triple %itanium_abi_triple -std=c++11 -fcoverage-mcdc -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -emit-llvm-only %s 2>&1| FileCheck %s --check-prefixes=CHECK,TV7,TV + +// RUN: %clang_cc1 -fmcdc-max-conditions=287 -triple %itanium_abi_triple -std=c++11 -fcoverage-mcdc -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -emit-llvm-only %s 2>&1| FileCheck %s --check-prefixes=CHECK,COND7,TV +// RUN: %clang_cc1 -fmcdc-max-conditions=286 -triple %itanium_abi_triple -std=c++11 -fcoverage-mcdc -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -emit-llvm-only %s 2>&1| FileCheck %s --check-prefixes=CHECK,COND7,COND +// RUN: %clang_cc1 -fmcdc-max-conditions=7 -triple %itanium_abi_triple -std=c++11 -fcoverage-mcdc -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -emit-llvm-only %s 2>&1| FileCheck %s --check-prefixes=CHECK,COND7,COND +// RUN: %clang_cc1 -fmcdc-max-conditions=6 -triple %itanium_abi_triple -std=c++11 -fcoverage-mcdc -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -emit-llvm-only %s 2>&1| FileCheck %s --check-prefixes=CHECK,COND6,COND bool func_conditions(bool a, bool b, bool c, bool d, bool e, bool f, bool g) { + // TV7: :[[@LINE+2]]:10: warning: unsupported MC/DC boolean expression; number of test vectors (8) exceeds max + // COND6: :[[@LINE+1]]:10: warning: unsupported MC/DC boolean expression; number of conditions (7) exceeds max return a && b && c && d && e && f && g; } -// CHECK: warning: unsupported MC/DC boolean expression; number of conditions{{.*}} exceeds max +// From clang-tidy/misc/MisleadingIdentifier.cpp +bool func_isR(unsigned CP) { + // TV: :[[@LINE+2]]:10: warning: unsupported MC/DC boolean expression; number of test vectors (2147483647) exceeds max + // COND: :[[@LINE+1]]:10: warning: unsupported MC/DC boolean expression; number of conditions (287) exceeds max + return (CP == 0x0590) || (CP == 0x05BE) || (CP == 0x05C0) || (CP == 0x05C3) || + (CP == 0x05C6) || (0x05C8 <= CP && CP <= 0x05CF) || + (0x05D0 <= CP && CP <= 0x05EA) || (0x05EB <= CP && CP <= 0x05EE) || + (0x05EF <= CP && CP <= 0x05F2) || (0x05F3 <= CP && CP <= 0x05F4) || + (0x05F5 <= CP && CP <= 0x05FF) || (0x07C0 <= CP && CP <= 0x07C9) || + (0x07CA <= CP && CP <= 0x07EA) || (0x07F4 <= CP && CP <= 0x07F5) || + (CP == 0x07FA) || (0x07FB <= CP && CP <= 0x07FC) || + (0x07FE <= CP && CP <= 0x07FF) || (0x0800 <= CP && CP <= 0x0815) || + (CP == 0x081A) || (CP == 0x0824) || (CP == 0x0828) || + (0x082E <= CP && CP <= 0x082F) || (0x0830 <= CP && CP <= 0x083E) || + (CP == 0x083F) || (0x0840 <= CP && CP <= 0x0858) || + (0x085C <= CP && CP <= 0x085D) || (CP == 0x085E) || (CP == 0x085F) || + (CP == 0x200F) || (CP == 0xFB1D) || (0xFB1F <= CP && CP <= 0xFB28) || + (0xFB2A <= CP && CP <= 0xFB36) || (CP == 0xFB37) || + (0xFB38 <= CP && CP <= 0xFB3C) || (CP == 0xFB3D) || (CP == 0xFB3E) || + (CP == 0xFB3F) || (0xFB40 <= CP && CP <= 0xFB41) || (CP == 0xFB42) || + (0xFB43 <= CP && CP <= 0xFB44) || (CP == 0xFB45) || + (0xFB46 <= CP && CP <= 0xFB4F) || (0x10800 <= CP && CP <= 0x10805) || + (0x10806 <= CP && CP <= 0x10807) || (CP == 0x10808) || + (CP == 0x10809) || (0x1080A <= CP && CP <= 0x10835) || + (CP == 0x10836) || (0x10837 <= CP && CP <= 0x10838) || + (0x10839 <= CP && CP <= 0x1083B) || (CP == 0x1083C) || + (0x1083D <= CP && CP <= 0x1083E) || (0x1083F <= CP && CP <= 0x10855) || + (CP == 0x10856) || (CP == 0x10857) || + (0x10858 <= CP && CP <= 0x1085F) || (0x10860 <= CP && CP <= 0x10876) || + (0x10877 <= CP && CP <= 0x10878) || (0x10879 <= CP && CP <= 0x1087F) || + (0x10880 <= CP && CP <= 0x1089E) || (0x1089F <= CP && CP <= 0x108A6) || + (0x108A7 <= CP && CP <= 0x108AF) || (0x108B0 <= CP && CP <= 0x108DF) || + (0x108E0 <= CP && CP <= 0x108F2) || (CP == 0x108F3) || + (0x108F4 <= CP && CP <= 0x108F5) || (0x108F6 <= CP && CP <= 0x108FA) || + (0x108FB <= CP && CP <= 0x108FF) || (0x10900 <= CP && CP <= 0x10915) || + (0x10916 <= CP && CP <= 0x1091B) || (0x1091C <= CP && CP <= 0x1091E) || + (0x10920 <= CP && CP <= 0x10939) || (0x1093A <= CP && CP <= 0x1093E) || + (CP == 0x1093F) || (0x10940 <= CP && CP <= 0x1097F) || + (0x10980 <= CP && CP <= 0x109B7) || (0x109B8 <= CP && CP <= 0x109BB) || + (0x109BC <= CP && CP <= 0x109BD) || (0x109BE <= CP && CP <= 0x109BF) || + (0x109C0 <= CP && CP <= 0x109CF) || (0x109D0 <= CP && CP <= 0x109D1) || + (0x109D2 <= CP && CP <= 0x109FF) || (CP == 0x10A00) || + (CP == 0x10A04) || (0x10A07 <= CP && CP <= 0x10A0B) || + (0x10A10 <= CP && CP <= 0x10A13) || (CP == 0x10A14) || + (0x10A15 <= CP && CP <= 0x10A17) || (CP == 0x10A18) || + (0x10A19 <= CP && CP <= 0x10A35) || (0x10A36 <= CP && CP <= 0x10A37) || + (0x10A3B <= CP && CP <= 0x10A3E) || (0x10A40 <= CP && CP <= 0x10A48) || + (0x10A49 <= CP && CP <= 0x10A4F) || (0x10A50 <= CP && CP <= 0x10A58) || + (0x10A59 <= CP && CP <= 0x10A5F) || (0x10A60 <= CP && CP <= 0x10A7C) || + (0x10A7D <= CP && CP <= 0x10A7E) || (CP == 0x10A7F) || + (0x10A80 <= CP && CP <= 0x10A9C) || (0x10A9D <= CP && CP <= 0x10A9F) || + (0x10AA0 <= CP && CP <= 0x10ABF) || (0x10AC0 <= CP && CP <= 0x10AC7) || + (CP == 0x10AC8) || (0x10AC9 <= CP && CP <= 0x10AE4) || + (0x10AE7 <= CP && CP <= 0x10AEA) || (0x10AEB <= CP && CP <= 0x10AEF) || + (0x10AF0 <= CP && CP <= 0x10AF6) || (0x10AF7 <= CP && CP <= 0x10AFF) || + (0x10B00 <= CP && CP <= 0x10B35) || (0x10B36 <= CP && CP <= 0x10B38) || + (0x10B40 <= CP && CP <= 0x10B55) || (0x10B56 <= CP && CP <= 0x10B57) || + (0x10B58 <= CP && CP <= 0x10B5F) || (0x10B60 <= CP && CP <= 0x10B72) || + (0x10B73 <= CP && CP <= 0x10B77) || (0x10B78 <= CP && CP <= 0x10B7F) || + (0x10B80 <= CP && CP <= 0x10B91) || (0x10B92 <= CP && CP <= 0x10B98) || + (0x10B99 <= CP && CP <= 0x10B9C) || (0x10B9D <= CP && CP <= 0x10BA8) || + (0x10BA9 <= CP && CP <= 0x10BAF) || (0x10BB0 <= CP && CP <= 0x10BFF) || + (0x10C00 <= CP && CP <= 0x10C48) || (0x10C49 <= CP && CP <= 0x10C7F) || + (0x10C80 <= CP && CP <= 0x10CB2) || (0x10CB3 <= CP && CP <= 0x10CBF) || + (0x10CC0 <= CP && CP <= 0x10CF2) || (0x10CF3 <= CP && CP <= 0x10CF9) || + (0x10CFA <= CP && CP <= 0x10CFF) || (0x10D40 <= CP && CP <= 0x10E5F) || + (CP == 0x10E7F) || (0x10E80 <= CP && CP <= 0x10EA9) || + (CP == 0x10EAA) || (CP == 0x10EAD) || + (0x10EAE <= CP && CP <= 0x10EAF) || (0x10EB0 <= CP && CP <= 0x10EB1) || + (0x10EB2 <= CP && CP <= 0x10EFF) || (0x10F00 <= CP && CP <= 0x10F1C) || + (0x10F1D <= CP && CP <= 0x10F26) || (CP == 0x10F27) || + (0x10F28 <= CP && CP <= 0x10F2F) || (0x10F70 <= CP && CP <= 0x10F81) || + (0x10F86 <= CP && CP <= 0x10F89) || (0x10F8A <= CP && CP <= 0x10FAF) || + (0x10FB0 <= CP && CP <= 0x10FC4) || (0x10FC5 <= CP && CP <= 0x10FCB) || + (0x10FCC <= CP && CP <= 0x10FDF) || (0x10FE0 <= CP && CP <= 0x10FF6) || + (0x10FF7 <= CP && CP <= 0x10FFF) || (0x1E800 <= CP && CP <= 0x1E8C4) || + (0x1E8C5 <= CP && CP <= 0x1E8C6) || (0x1E8C7 <= CP && CP <= 0x1E8CF) || + (0x1E8D7 <= CP && CP <= 0x1E8FF) || (0x1E900 <= CP && CP <= 0x1E943) || + (CP == 0x1E94B) || (0x1E94C <= CP && CP <= 0x1E94F) || + (0x1E950 <= CP && CP <= 0x1E959) || (0x1E95A <= CP && CP <= 0x1E95D) || + (0x1E95E <= CP && CP <= 0x1E95F) || (0x1E960 <= CP && CP <= 0x1EC6F) || + (0x1ECC0 <= CP && CP <= 0x1ECFF) || (0x1ED50 <= CP && CP <= 0x1EDFF); +} + +// CHECK: _Z15func_conditionsbbbbbbb: +// TV8-NOT: Decision, +// COND6-NOT: Decision, +// COND7: Decision,File 0, {{[0-9]+}}:10 -> {{[0-9]+}}:41 = M:8, C:7 +// CHECK: _Z8func_isRj: +// CHECK-NOT: Decision, +// CHECK-NOT: Branch,{{.*}}] +// CHECK: Branch,File 0, [[@LINE-10]]:64 -> [[@LINE-10]]:77 = diff --git a/clang/test/CoverageMapping/mcdc-logical-scalar-ids.cpp b/clang/test/CoverageMapping/mcdc-logical-scalar-ids.cpp index c820b5df5ad3a9..0694f7dbc294aa 100644 --- a/clang/test/CoverageMapping/mcdc-logical-scalar-ids.cpp +++ b/clang/test/CoverageMapping/mcdc-logical-scalar-ids.cpp @@ -10,25 +10,25 @@ bool func_scalar_and(bool a, bool b, bool c, bool d, bool e, bool f) { return bar(res1, res2, res3, res4, res5); } -// CHECK-LABEL: Decision,File 0, 5:17 -> 5:23 = M:0, C:2 +// CHECK-LABEL: Decision,File 0, 5:17 -> 5:23 = M:3, C:2 // CHECK-NEXT: Branch,File 0, 5:17 -> 5:18 = #1, (#0 - #1) [1,2,0] // CHECK: Branch,File 0, 5:22 -> 5:23 = #2, (#1 - #2) [2,0,0] -// CHECK-LABEL: Decision,File 0, 6:17 -> 6:28 = M:1, C:3 +// CHECK-LABEL: Decision,File 0, 6:17 -> 6:28 = M:7, C:3 // CHECK-NEXT: Branch,File 0, 6:17 -> 6:18 = #5, (#0 - #5) [1,3,0] // CHECK: Branch,File 0, 6:22 -> 6:23 = #6, (#5 - #6) [3,2,0] // CHECK: Branch,File 0, 6:27 -> 6:28 = #4, (#3 - #4) [2,0,0] -// CHECK-LABEL: Decision,File 0, 7:17 -> 7:33 = M:2, C:4 +// CHECK-LABEL: Decision,File 0, 7:17 -> 7:33 = M:12, C:4 // CHECK-NEXT: Branch,File 0, 7:17 -> 7:18 = #11, (#0 - #11) [1,4,0] // CHECK: Branch,File 0, 7:22 -> 7:23 = #12, (#11 - #12) [4,3,0] // CHECK: Branch,File 0, 7:27 -> 7:28 = #10, (#9 - #10) [3,2,0] // CHECK: Branch,File 0, 7:32 -> 7:33 = #8, (#7 - #8) [2,0,0] -// CHECK-LABEL: Decision,File 0, 8:17 -> 8:38 = M:4, C:5 +// CHECK-LABEL: Decision,File 0, 8:17 -> 8:38 = M:18, C:5 // CHECK-NEXT: Branch,File 0, 8:17 -> 8:18 = #19, (#0 - #19) [1,5,0] // CHECK: Branch,File 0, 8:22 -> 8:23 = #20, (#19 - #20) [5,4,0] // CHECK: Branch,File 0, 8:27 -> 8:28 = #18, (#17 - #18) [4,3,0] // CHECK: Branch,File 0, 8:32 -> 8:33 = #16, (#15 - #16) [3,2,0] // CHECK: Branch,File 0, 8:37 -> 8:38 = #14, (#13 - #14) [2,0,0] -// CHECK-LABEL: Decision,File 0, 9:17 -> 9:43 = M:8, C:6 +// CHECK-LABEL: Decision,File 0, 9:17 -> 9:43 = M:25, C:6 // CHECK-NEXT: Branch,File 0, 9:17 -> 9:18 = #29, (#0 - #29) [1,6,0] // CHECK: Branch,File 0, 9:22 -> 9:23 = #30, (#29 - #30) [6,5,0] // CHECK: Branch,File 0, 9:27 -> 9:28 = #28, (#27 - #28) [5,4,0] @@ -45,25 +45,25 @@ bool func_scalar_or(bool a, bool b, bool c, bool d, bool e, bool f) { return bar(res1, res2, res3, res4, res5); } -// CHECK-LABEL: Decision,File 0, 40:17 -> 40:23 = M:0, C:2 +// CHECK-LABEL: Decision,File 0, 40:17 -> 40:23 = M:3, C:2 // CHECK-NEXT: Branch,File 0, 40:17 -> 40:18 = (#0 - #1), #1 [1,0,2] // CHECK: Branch,File 0, 40:22 -> 40:23 = (#1 - #2), #2 [2,0,0] -// CHECK-LABEL: Decision,File 0, 41:17 -> 41:28 = M:1, C:3 +// CHECK-LABEL: Decision,File 0, 41:17 -> 41:28 = M:7, C:3 // CHECK-NEXT: Branch,File 0, 41:17 -> 41:18 = (#0 - #5), #5 [1,0,3] // CHECK: Branch,File 0, 41:22 -> 41:23 = (#5 - #6), #6 [3,0,2] // CHECK: Branch,File 0, 41:27 -> 41:28 = (#3 - #4), #4 [2,0,0] -// CHECK-LABEL: Decision,File 0, 42:17 -> 42:33 = M:2, C:4 +// CHECK-LABEL: Decision,File 0, 42:17 -> 42:33 = M:12, C:4 // CHECK-NEXT: Branch,File 0, 42:17 -> 42:18 = (#0 - #11), #11 [1,0,4] // CHECK: Branch,File 0, 42:22 -> 42:23 = (#11 - #12), #12 [4,0,3] // CHECK: Branch,File 0, 42:27 -> 42:28 = (#9 - #10), #10 [3,0,2] // CHECK: Branch,File 0, 42:32 -> 42:33 = (#7 - #8), #8 [2,0,0] -// CHECK-LABEL: Decision,File 0, 43:17 -> 43:38 = M:4, C:5 +// CHECK-LABEL: Decision,File 0, 43:17 -> 43:38 = M:18, C:5 // CHECK-NEXT: Branch,File 0, 43:17 -> 43:18 = (#0 - #19), #19 [1,0,5] // CHECK: Branch,File 0, 43:22 -> 43:23 = (#19 - #20), #20 [5,0,4] // CHECK: Branch,File 0, 43:27 -> 43:28 = (#17 - #18), #18 [4,0,3] // CHECK: Branch,File 0, 43:32 -> 43:33 = (#15 - #16), #16 [3,0,2] // CHECK: Branch,File 0, 43:37 -> 43:38 = (#13 - #14), #14 [2,0,0] -// CHECK-LABEL: Decision,File 0, 44:17 -> 44:43 = M:8, C:6 +// CHECK-LABEL: Decision,File 0, 44:17 -> 44:43 = M:25, C:6 // CHECK-NEXT: Branch,File 0, 44:17 -> 44:18 = (#0 - #29), #29 [1,0,6] // CHECK: Branch,File 0, 44:22 -> 44:23 = (#29 - #30), #30 [6,0,5] // CHECK: Branch,File 0, 44:27 -> 44:28 = (#27 - #28), #28 [5,0,4] @@ -81,26 +81,26 @@ bool func_scalar_mix(bool a, bool b, bool c, bool d, bool e, bool f) { return bar(res1, res2, res3, res4, res5); } -// CHECK-LABEL: Decision,File 0, 76:17 -> 76:23 = M:0, C:2 +// CHECK-LABEL: Decision,File 0, 76:17 -> 76:23 = M:3, C:2 // CHECK-NEXT: Branch,File 0, 76:17 -> 76:18 = (#0 - #1), #1 [1,0,2] // CHECK: Branch,File 0, 76:22 -> 76:23 = (#1 - #2), #2 [2,0,0] -// CHECK-LABEL: Decision,File 0, 77:17 -> 77:30 = M:1, C:3 +// CHECK-LABEL: Decision,File 0, 77:17 -> 77:30 = M:7, C:3 // CHECK-NEXT: Branch,File 0, 77:17 -> 77:18 = #3, (#0 - #3) [1,2,0] // CHECK: Branch,File 0, 77:23 -> 77:24 = (#3 - #4), #4 [2,0,3] // CHECK: Branch,File 0, 77:28 -> 77:29 = (#4 - #5), #5 [3,0,0] -// CHECK-LABEL: Decision,File 0, 78:17 -> 78:37 = M:2, C:4 +// CHECK-LABEL: Decision,File 0, 78:17 -> 78:37 = M:14, C:4 // CHECK-NEXT: File 0 // CHECK-NEXT: Branch,File 0, 78:18 -> 78:19 = (#0 - #7), #7 [1,2,3] // CHECK: Branch,File 0, 78:23 -> 78:24 = (#7 - #8), #8 [3,2,0] // CHECK: Branch,File 0, 78:30 -> 78:31 = (#6 - #9), #9 [2,0,4] // CHECK: Branch,File 0, 78:35 -> 78:36 = (#9 - #10), #10 [4,0,0] -// CHECK-LABEL: Decision,File 0, 79:17 -> 79:42 = M:4, C:5 +// CHECK-LABEL: Decision,File 0, 79:17 -> 79:42 = M:22, C:5 // CHECK-NEXT: Branch,File 0, 79:17 -> 79:18 = #12, (#0 - #12) [1,3,0] // CHECK: Branch,File 0, 79:23 -> 79:24 = (#12 - #13), #13 [3,2,4] // CHECK: Branch,File 0, 79:28 -> 79:29 = (#13 - #14), #14 [4,2,0] // CHECK: Branch,File 0, 79:35 -> 79:36 = (#11 - #15), #15 [2,0,5] // CHECK: Branch,File 0, 79:40 -> 79:41 = (#15 - #16), #16 [5,0,0] -// CHECK-LABEL: Decision,File 0, 80:17 -> 80:49 = M:8, C:6 +// CHECK-LABEL: Decision,File 0, 80:17 -> 80:49 = M:37, C:6 // CHECK-NEXT: File 0 // CHECK-NEXT: Branch,File 0, 80:18 -> 80:19 = (#0 - #19), #19 [1,3,4] // CHECK: Branch,File 0, 80:23 -> 80:24 = (#19 - #20), #20 [4,3,0] diff --git a/clang/test/CoverageMapping/mcdc-logical-stmt-ids-all.cpp b/clang/test/CoverageMapping/mcdc-logical-stmt-ids-all.cpp index 6f47a4b901a8a7..d7436079d18105 100644 --- a/clang/test/CoverageMapping/mcdc-logical-stmt-ids-all.cpp +++ b/clang/test/CoverageMapping/mcdc-logical-stmt-ids-all.cpp @@ -6,7 +6,7 @@ bool func_if_and(bool a, bool b, bool c, bool d, bool e, bool f) { return false; } -// CHECK-LABEL: Decision,File 0, 4:7 -> 4:33 = M:0, C:6 +// CHECK-LABEL: Decision,File 0, 4:7 -> 4:33 = M:7, C:6 // CHECK-NEXT: Branch,File 0, 4:7 -> 4:8 = #10, (#0 - #10) [1,6,0] // CHECK: Branch,File 0, 4:12 -> 4:13 = #11, (#10 - #11) [6,5,0] // CHECK: Branch,File 0, 4:17 -> 4:18 = #9, (#8 - #9) [5,4,0] @@ -20,7 +20,7 @@ bool func_if_or(bool a, bool b, bool c, bool d, bool e, bool f) { return false; } -// CHECK-LABEL: Decision,File 0, 18:7 -> 18:33 = M:0, C:6 +// CHECK-LABEL: Decision,File 0, 18:7 -> 18:33 = M:7, C:6 // CHECK-NEXT: Branch,File 0, 18:7 -> 18:8 = (#0 - #10), #10 [1,0,6] // CHECK: Branch,File 0, 18:12 -> 18:13 = (#10 - #11), #11 [6,0,5] // CHECK: Branch,File 0, 18:17 -> 18:18 = (#8 - #9), #9 [5,0,4] @@ -33,7 +33,7 @@ bool func_while_and(bool a, bool b, bool c, bool d, bool e, bool f) { return false; } -// CHECK-LABEL: Decision,File 0, 32:10 -> 32:36 = M:0, C:6 +// CHECK-LABEL: Decision,File 0, 32:10 -> 32:36 = M:7, C:6 // CHECK-NEXT: Branch,File 0, 32:10 -> 32:11 = #10, (#0 - #10) [1,6,0] // CHECK: Branch,File 0, 32:15 -> 32:16 = #11, (#10 - #11) [6,5,0] // CHECK: Branch,File 0, 32:20 -> 32:21 = #9, (#8 - #9) [5,4,0] @@ -46,7 +46,7 @@ bool func_while_or(bool a, bool b, bool c, bool d, bool e, bool f) { return false; } -// CHECK-LABEL: Decision,File 0, 45:10 -> 45:36 = M:0, C:6 +// CHECK-LABEL: Decision,File 0, 45:10 -> 45:36 = M:7, C:6 // CHECK-NEXT: Branch,File 0, 45:10 -> 45:11 = (#0 - #10), #10 [1,0,6] // CHECK: Branch,File 0, 45:15 -> 45:16 = (#10 - #11), #11 [6,0,5] // CHECK: Branch,File 0, 45:20 -> 45:21 = (#8 - #9), #9 [5,0,4] @@ -59,7 +59,7 @@ bool func_for_and(bool a, bool b, bool c, bool d, bool e, bool f) { return false; } -// CHECK-LABEL: Decision,File 0, 58:9 -> 58:35 = M:0, C:6 +// CHECK-LABEL: Decision,File 0, 58:9 -> 58:35 = M:7, C:6 // CHECK-NEXT: Branch,File 0, 58:9 -> 58:10 = #10, (#0 - #10) [1,6,0] // CHECK: Branch,File 0, 58:14 -> 58:15 = #11, (#10 - #11) [6,5,0] // CHECK: Branch,File 0, 58:19 -> 58:20 = #9, (#8 - #9) [5,4,0] @@ -72,7 +72,7 @@ bool func_for_or(bool a, bool b, bool c, bool d, bool e, bool f) { return false; } -// CHECK-LABEL: Decision,File 0, 71:9 -> 71:35 = M:0, C:6 +// CHECK-LABEL: Decision,File 0, 71:9 -> 71:35 = M:7, C:6 // CHECK-NEXT: Branch,File 0, 71:9 -> 71:10 = (#0 - #10), #10 [1,0,6] // CHECK: Branch,File 0, 71:14 -> 71:15 = (#10 - #11), #11 [6,0,5] // CHECK: Branch,File 0, 71:19 -> 71:20 = (#8 - #9), #9 [5,0,4] @@ -85,7 +85,7 @@ bool func_do_and(bool a, bool b, bool c, bool d, bool e, bool f) { return false; } -// CHECK-LABEL: Decision,File 0, 84:16 -> 84:42 = M:0, C:6 +// CHECK-LABEL: Decision,File 0, 84:16 -> 84:42 = M:7, C:6 // CHECK-NEXT: Branch,File 0, 84:16 -> 84:17 = #10, ((#0 + #1) - #10) [1,6,0] // CHECK: Branch,File 0, 84:21 -> 84:22 = #11, (#10 - #11) [6,5,0] // CHECK: Branch,File 0, 84:26 -> 84:27 = #9, (#8 - #9) [5,4,0] @@ -98,7 +98,7 @@ bool func_do_or(bool a, bool b, bool c, bool d, bool e, bool f) { return false; } -// CHECK-LABEL: Decision,File 0, 97:16 -> 97:42 = M:0, C:6 +// CHECK-LABEL: Decision,File 0, 97:16 -> 97:42 = M:7, C:6 // CHECK-NEXT: Branch,File 0, 97:16 -> 97:17 = ((#0 + #1) - #10), #10 [1,0,6] // CHECK: Branch,File 0, 97:21 -> 97:22 = (#10 - #11), #11 [6,0,5] // CHECK: Branch,File 0, 97:26 -> 97:27 = (#8 - #9), #9 [5,0,4] @@ -110,7 +110,7 @@ bool func_ternary_and(bool a, bool b, bool c, bool d, bool e, bool f) { return (a && b && c && d && e && f) ? true : false; } -// CHECK-LABEL: Decision,File 0, 110:11 -> 110:37 = M:0, C:6 +// CHECK-LABEL: Decision,File 0, 110:11 -> 110:37 = M:7, C:6 // CHECK-NEXT: Branch,File 0, 110:11 -> 110:12 = #10, (#0 - #10) [1,6,0] // CHECK: Branch,File 0, 110:16 -> 110:17 = #11, (#10 - #11) [6,5,0] // CHECK: Branch,File 0, 110:21 -> 110:22 = #9, (#8 - #9) [5,4,0] @@ -122,7 +122,7 @@ bool func_ternary_or(bool a, bool b, bool c, bool d, bool e, bool f) { return (a || b || c || d || e || f) ? true : false; } -// CHECK-LABEL: Decision,File 0, 122:11 -> 122:37 = M:0, C:6 +// CHECK-LABEL: Decision,File 0, 122:11 -> 122:37 = M:7, C:6 // CHECK-NEXT: Branch,File 0, 122:11 -> 122:12 = (#0 - #10), #10 [1,0,6] // CHECK: Branch,File 0, 122:16 -> 122:17 = (#10 - #11), #11 [6,0,5] // CHECK: Branch,File 0, 122:21 -> 122:22 = (#8 - #9), #9 [5,0,4] @@ -137,7 +137,7 @@ bool func_if_nested_if(bool a, bool b, bool c, bool d, bool e) { return false; } -// CHECK-LABEL: Decision,File 0, 134:7 -> 134:30 = M:0, C:5 +// CHECK-LABEL: Decision,File 0, 134:7 -> 134:30 = M:8, C:5 // CHECK-NEXT: Branch,File 0, 134:7 -> 134:8 = (#0 - #6), #6 [1,0,4] // CHECK: Branch,File 0, 134:13 -> 134:14 = #7, (#6 - #7) [4,5,3] // CHECK: Branch,File 0, 134:18 -> 134:19 = #8, (#7 - #8) [5,0,3] @@ -148,7 +148,7 @@ bool func_ternary_nested_if(bool a, bool b, bool c, bool d, bool e) { return (a || (b && c) || d || e) ? true : false; } -// CHECK-LABEL: Decision,File 0, 148:11 -> 148:34 = M:0, C:5 +// CHECK-LABEL: Decision,File 0, 148:11 -> 148:34 = M:8, C:5 // CHECK-NEXT: Branch,File 0, 148:11 -> 148:12 = (#0 - #6), #6 [1,0,4] // CHECK: Branch,File 0, 148:17 -> 148:18 = #7, (#6 - #7) [4,5,3] // CHECK: Branch,File 0, 148:22 -> 148:23 = #8, (#7 - #8) [5,0,3] @@ -162,7 +162,7 @@ bool func_if_nested_if_2(bool a, bool b, bool c, bool d, bool e) { return false; } -// CHECK-LABEL: Decision,File 0, 159:7 -> 159:32 = M:0, C:5 +// CHECK-LABEL: Decision,File 0, 159:7 -> 159:32 = M:9, C:5 // CHECK-NEXT: Branch,File 0, 159:7 -> 159:8 = (#0 - #2), #2 [1,0,2] // CHECK: Branch,File 0, 159:14 -> 159:15 = #7, (#2 - #7) [2,5,4] // CHECK: Branch,File 0, 159:19 -> 159:20 = #8, (#7 - #8) [5,3,4] @@ -173,7 +173,7 @@ bool func_ternary_nested_if_2(bool a, bool b, bool c, bool d, bool e) { return (a || ((b && c) || d) && e) ? true : false; } -// CHECK-LABEL: Decision,File 0, 173:11 -> 173:36 = M:0, C:5 +// CHECK-LABEL: Decision,File 0, 173:11 -> 173:36 = M:9, C:5 // CHECK-NEXT: Branch,File 0, 173:11 -> 173:12 = (#0 - #2), #2 [1,0,2] // CHECK: Branch,File 0, 173:18 -> 173:19 = #7, (#2 - #7) [2,5,4] // CHECK: Branch,File 0, 173:23 -> 173:24 = #8, (#7 - #8) [5,3,4] @@ -187,7 +187,7 @@ bool func_if_nested_if_3(bool a, bool b, bool c, bool d, bool e, bool f) { return false; } -// CHECK-LABEL: Decision,File 0, 184:7 -> 184:39 = M:0, C:6 +// CHECK-LABEL: Decision,File 0, 184:7 -> 184:39 = M:12, C:6 // CHECK: Branch,File 0, 184:8 -> 184:9 = #5, (#0 - #5) [1,4,3] // CHECK: Branch,File 0, 184:14 -> 184:15 = (#5 - #6), #6 [4,2,5] // CHECK: Branch,File 0, 184:19 -> 184:20 = (#6 - #7), #7 [5,2,3] @@ -199,7 +199,7 @@ bool func_ternary_nested_if_3(bool a, bool b, bool c, bool d, bool e, bool f) { return ((a && (b || c) || (d && e)) && f) ? true : false; } -// CHECK-LABEL: Decision,File 0, 199:11 -> 199:43 = M:0, C:6 +// CHECK-LABEL: Decision,File 0, 199:11 -> 199:43 = M:12, C:6 // CHECK: Branch,File 0, 199:12 -> 199:13 = #5, (#0 - #5) [1,4,3] // CHECK: Branch,File 0, 199:18 -> 199:19 = (#5 - #6), #6 [4,2,5] // CHECK: Branch,File 0, 199:23 -> 199:24 = (#6 - #7), #7 [5,2,3] diff --git a/clang/test/CoverageMapping/mcdc-logical-stmt-ids.cpp b/clang/test/CoverageMapping/mcdc-logical-stmt-ids.cpp index 99854ec27a3fbd..655bbf25ee1031 100644 --- a/clang/test/CoverageMapping/mcdc-logical-stmt-ids.cpp +++ b/clang/test/CoverageMapping/mcdc-logical-stmt-ids.cpp @@ -10,25 +10,25 @@ bool func_if_and(bool a, bool b, bool c, bool d, bool e, bool f) { return false; } -// CHECK-LABEL: Decision,File 0, 4:7 -> 4:13 = M:0, C:2 +// CHECK-LABEL: Decision,File 0, 4:7 -> 4:13 = M:3, C:2 // CHECK-NEXT: Branch,File 0, 4:7 -> 4:8 = #2, (#0 - #2) [1,2,0] // CHECK: Branch,File 0, 4:12 -> 4:13 = #3, (#2 - #3) [2,0,0] -// CHECK-LABEL: Decision,File 0, 5:9 -> 5:20 = M:1, C:3 +// CHECK-LABEL: Decision,File 0, 5:9 -> 5:20 = M:7, C:3 // CHECK-NEXT: Branch,File 0, 5:9 -> 5:10 = #7, (#1 - #7) [1,3,0] // CHECK: Branch,File 0, 5:14 -> 5:15 = #8, (#7 - #8) [3,2,0] // CHECK: Branch,File 0, 5:19 -> 5:20 = #6, (#5 - #6) [2,0,0] -// CHECK-LABEL: Decision,File 0, 6:11 -> 6:27 = M:2, C:4 +// CHECK-LABEL: Decision,File 0, 6:11 -> 6:27 = M:12, C:4 // CHECK-NEXT: Branch,File 0, 6:11 -> 6:12 = #14, (#4 - #14) [1,4,0] // CHECK: Branch,File 0, 6:16 -> 6:17 = #15, (#14 - #15) [4,3,0] // CHECK: Branch,File 0, 6:21 -> 6:22 = #13, (#12 - #13) [3,2,0] // CHECK: Branch,File 0, 6:26 -> 6:27 = #11, (#10 - #11) [2,0,0] -// CHECK-LABEL: Decision,File 0, 7:13 -> 7:34 = M:4, C:5 +// CHECK-LABEL: Decision,File 0, 7:13 -> 7:34 = M:18, C:5 // CHECK-NEXT: Branch,File 0, 7:13 -> 7:14 = #23, (#9 - #23) [1,5,0] // CHECK: Branch,File 0, 7:18 -> 7:19 = #24, (#23 - #24) [5,4,0] // CHECK: Branch,File 0, 7:23 -> 7:24 = #22, (#21 - #22) [4,3,0] // CHECK: Branch,File 0, 7:28 -> 7:29 = #20, (#19 - #20) [3,2,0] // CHECK: Branch,File 0, 7:33 -> 7:34 = #18, (#17 - #18) [2,0,0] -// CHECK-LABEL: Decision,File 0, 8:16 -> 8:42 = M:8, C:6 +// CHECK-LABEL: Decision,File 0, 8:16 -> 8:42 = M:25, C:6 // CHECK-NEXT: Branch,File 0, 8:16 -> 8:17 = #34, (#16 - #34) [1,6,0] // CHECK: Branch,File 0, 8:21 -> 8:22 = #35, (#34 - #35) [6,5,0] // CHECK: Branch,File 0, 8:26 -> 8:27 = #33, (#32 - #33) [5,4,0] @@ -46,25 +46,25 @@ bool func_if_or(bool a, bool b, bool c, bool d, bool e, bool f) { return false; } -// CHECK-LABEL: Decision,File 0, 40:7 -> 40:13 = M:0, C:2 +// CHECK-LABEL: Decision,File 0, 40:7 -> 40:13 = M:3, C:2 // CHECK-NEXT: Branch,File 0, 40:7 -> 40:8 = (#0 - #2), #2 [1,0,2] // CHECK: Branch,File 0, 40:12 -> 40:13 = (#2 - #3), #3 [2,0,0] -// CHECK-LABEL: Decision,File 0, 41:9 -> 41:20 = M:1, C:3 +// CHECK-LABEL: Decision,File 0, 41:9 -> 41:20 = M:7, C:3 // CHECK-NEXT: Branch,File 0, 41:9 -> 41:10 = (#1 - #7), #7 [1,0,3] // CHECK: Branch,File 0, 41:14 -> 41:15 = (#7 - #8), #8 [3,0,2] // CHECK: Branch,File 0, 41:19 -> 41:20 = (#5 - #6), #6 [2,0,0] -// CHECK-LABEL: Decision,File 0, 42:11 -> 42:27 = M:2, C:4 +// CHECK-LABEL: Decision,File 0, 42:11 -> 42:27 = M:12, C:4 // CHECK-NEXT: Branch,File 0, 42:11 -> 42:12 = (#4 - #14), #14 [1,0,4] // CHECK: Branch,File 0, 42:16 -> 42:17 = (#14 - #15), #15 [4,0,3] // CHECK: Branch,File 0, 42:21 -> 42:22 = (#12 - #13), #13 [3,0,2] // CHECK: Branch,File 0, 42:26 -> 42:27 = (#10 - #11), #11 [2,0,0] -// CHECK-LABEL: Decision,File 0, 43:13 -> 43:34 = M:4, C:5 +// CHECK-LABEL: Decision,File 0, 43:13 -> 43:34 = M:18, C:5 // CHECK-NEXT: Branch,File 0, 43:13 -> 43:14 = (#9 - #23), #23 [1,0,5] // CHECK: Branch,File 0, 43:18 -> 43:19 = (#23 - #24), #24 [5,0,4] // CHECK: Branch,File 0, 43:23 -> 43:24 = (#21 - #22), #22 [4,0,3] // CHECK: Branch,File 0, 43:28 -> 43:29 = (#19 - #20), #20 [3,0,2] // CHECK: Branch,File 0, 43:33 -> 43:34 = (#17 - #18), #18 [2,0,0] -// CHECK-LABEL: Decision,File 0, 44:16 -> 44:42 = M:8, C:6 +// CHECK-LABEL: Decision,File 0, 44:16 -> 44:42 = M:25, C:6 // CHECK-NEXT: Branch,File 0, 44:16 -> 44:17 = (#16 - #34), #34 [1,0,6] // CHECK: Branch,File 0, 44:21 -> 44:22 = (#34 - #35), #35 [6,0,5] // CHECK: Branch,File 0, 44:26 -> 44:27 = (#32 - #33), #33 [5,0,4] @@ -82,26 +82,26 @@ bool func_if_mix(bool a, bool b, bool c, bool d, bool e, bool f) { return false; } -// CHECK-LABEL: Decision,File 0, 76:7 -> 76:13 = M:0, C:2 +// CHECK-LABEL: Decision,File 0, 76:7 -> 76:13 = M:3, C:2 // CHECK-NEXT: Branch,File 0, 76:7 -> 76:8 = (#0 - #2), #2 [1,0,2] // CHECK: Branch,File 0, 76:12 -> 76:13 = (#2 - #3), #3 [2,0,0] -// CHECK-LABEL: Decision,File 0, 77:9 -> 77:22 = M:1, C:3 +// CHECK-LABEL: Decision,File 0, 77:9 -> 77:22 = M:7, C:3 // CHECK-NEXT: Branch,File 0, 77:9 -> 77:10 = #5, (#1 - #5) [1,2,0] // CHECK: Branch,File 0, 77:15 -> 77:16 = (#5 - #6), #6 [2,0,3] // CHECK: Branch,File 0, 77:20 -> 77:21 = (#6 - #7), #7 [3,0,0] -// CHECK-LABEL: Decision,File 0, 78:11 -> 78:31 = M:2, C:4 +// CHECK-LABEL: Decision,File 0, 78:11 -> 78:31 = M:14, C:4 // CHECK-NEXT: File 0 // CHECK-NEXT: Branch,File 0, 78:12 -> 78:13 = (#4 - #10), #10 [1,2,3] // CHECK: Branch,File 0, 78:17 -> 78:18 = (#10 - #11), #11 [3,2,0] // CHECK: Branch,File 0, 78:24 -> 78:25 = (#9 - #12), #12 [2,0,4] // CHECK: Branch,File 0, 78:29 -> 78:30 = (#12 - #13), #13 [4,0,0] -// CHECK-LABEL: Decision,File 0, 79:13 -> 79:38 = M:4, C:5 +// CHECK-LABEL: Decision,File 0, 79:13 -> 79:38 = M:22, C:5 // CHECK-NEXT: Branch,File 0, 79:13 -> 79:14 = #16, (#8 - #16) [1,3,0] // CHECK: Branch,File 0, 79:19 -> 79:20 = (#16 - #17), #17 [3,2,4] // CHECK: Branch,File 0, 79:24 -> 79:25 = (#17 - #18), #18 [4,2,0] // CHECK: Branch,File 0, 79:31 -> 79:32 = (#15 - #19), #19 [2,0,5] // CHECK: Branch,File 0, 79:36 -> 79:37 = (#19 - #20), #20 [5,0,0] -// CHECK-LABEL: Decision,File 0, 80:15 -> 80:47 = M:8, C:6 +// CHECK-LABEL: Decision,File 0, 80:15 -> 80:47 = M:37, C:6 // CHECK-NEXT: File 0 // CHECK-NEXT: Branch,File 0, 80:16 -> 80:17 = (#14 - #24), #24 [1,3,4] // CHECK: Branch,File 0, 80:21 -> 80:22 = (#24 - #25), #25 [4,3,0] diff --git a/clang/test/CoverageMapping/mcdc-scratch-space.c b/clang/test/CoverageMapping/mcdc-scratch-space.c index 2b5b12d9dcad65..a263e9b688faed 100644 --- a/clang/test/CoverageMapping/mcdc-scratch-space.c +++ b/clang/test/CoverageMapping/mcdc-scratch-space.c @@ -2,14 +2,14 @@ // CHECK: builtin_macro0: int builtin_macro0(int a) { - // CHECK: Decision,File 0, [[@LINE+1]]:11 -> [[@LINE+2]]:15 = M:0, C:2 + // CHECK: Decision,File 0, [[@LINE+1]]:11 -> [[@LINE+2]]:15 = M:3, C:2 return (__LINE__ // CHECK: Branch,File 0, [[@LINE]]:11 -> [[@LINE]]:11 = 0, 0 [1,2,0] && a); // CHECK: Branch,File 0, [[@LINE]]:14 -> [[@LINE]]:15 = #2, (#1 - #2) [2,0,0] } // CHECK: builtin_macro1: int builtin_macro1(int a) { - // CHECK: Decision,File 0, [[@LINE+1]]:11 -> [[@LINE+2]]:22 = M:0, C:2 + // CHECK: Decision,File 0, [[@LINE+1]]:11 -> [[@LINE+2]]:22 = M:3, C:2 return (a // CHECK: Branch,File 0, [[@LINE]]:11 -> [[@LINE]]:12 = (#0 - #1), #1 [1,0,2] || __LINE__); // CHECK: Branch,File 0, [[@LINE]]:14 -> [[@LINE]]:14 = 0, 0 [2,0,0] } @@ -18,7 +18,7 @@ int builtin_macro1(int a) { // CHECK: pre0: int pre0(int pre_a, int b_post) { - // CHECK: Decision,File 0, [[@LINE+2]]:11 -> [[@LINE+3]]:20 = M:0, C:2 + // CHECK: Decision,File 0, [[@LINE+2]]:11 -> [[@LINE+3]]:20 = M:3, C:2 // CHECK: Expansion,File 0, [[@LINE+1]]:11 -> [[@LINE+1]]:14 = #0 (Expanded file = 1) return (PRE(a) && b_post); @@ -30,7 +30,7 @@ int pre0(int pre_a, int b_post) { // CHECK: pre1: int pre1(int pre_a, int b_post) { - // CHECK: Decision,File 0, [[@LINE+3]]:11 -> [[@LINE+4]]:20 = M:0, C:2 + // CHECK: Decision,File 0, [[@LINE+3]]:11 -> [[@LINE+4]]:20 = M:3, C:2 // CHECK: Expansion,File 0, [[@LINE+2]]:11 -> [[@LINE+2]]:14 = #0 (Expanded file = 1) // CHECK: Branch,File 0, [[@LINE+2]]:14 -> [[@LINE+2]]:20 = #2, (#1 - #2) [2,0,0] return (PRE(foo) @@ -43,7 +43,7 @@ int pre1(int pre_a, int b_post) { // CHECK: post0: int post0(int pre_a, int b_post) { - // CHECK: Decision,File 0, [[@LINE+2]]:11 -> [[@LINE+3]]:18 = M:0, C:2 + // CHECK: Decision,File 0, [[@LINE+2]]:11 -> [[@LINE+3]]:18 = M:3, C:2 // CHECK: Branch,File 0, [[@LINE+1]]:11 -> [[@LINE+1]]:16 = (#0 - #1), #1 [1,0,2] return (pre_a || POST(b)); @@ -55,7 +55,7 @@ int post0(int pre_a, int b_post) { // CHECK: post1: int post1(int pre_a, int b_post) { - // CHECK: Decision,File 0, [[@LINE+3]]:11 -> [[@LINE+4]]:18 = M:0, C:2 + // CHECK: Decision,File 0, [[@LINE+3]]:11 -> [[@LINE+4]]:18 = M:3, C:2 // CHECK: Branch,File 0, [[@LINE+2]]:11 -> [[@LINE+2]]:16 = (#0 - #1), #1 [1,0,2] // CHECK: Expansion,File 0, [[@LINE+2]]:14 -> [[@LINE+2]]:18 = 0 (Expanded file = 1) return (pre_a diff --git a/clang/test/CoverageMapping/mcdc-system-headers.cpp b/clang/test/CoverageMapping/mcdc-system-headers.cpp index 4dfbb17c2bba81..ae26ed5fe469f2 100644 --- a/clang/test/CoverageMapping/mcdc-system-headers.cpp +++ b/clang/test/CoverageMapping/mcdc-system-headers.cpp @@ -15,7 +15,7 @@ // CHECK: _Z5func0i: int func0(int a) { - // CHECK: Decision,File 0, [[@LINE+3]]:11 -> [[@LINE+3]]:21 = M:0, C:2 + // CHECK: Decision,File 0, [[@LINE+3]]:11 -> [[@LINE+3]]:21 = M:3, C:2 // W_SYS: Expansion,File 0, [[@LINE+2]]:11 -> [[@LINE+2]]:16 = #0 (Expanded file = 1) // X_SYS: Branch,File 0, [[@LINE+1]]:11 -> [[@LINE+1]]:11 = 0, 0 [1,2,0] return (CONST && a); @@ -25,7 +25,7 @@ int func0(int a) { // CHECK: _Z5func1ii: int func1(int a, int b) { - // CHECK: Decision,File 0, [[@LINE+2]]:11 -> [[@LINE+2]]:21 = M:0, C:2 + // CHECK: Decision,File 0, [[@LINE+2]]:11 -> [[@LINE+2]]:21 = M:3, C:2 // CHECK: Branch,File 0, [[@LINE+1]]:11 -> [[@LINE+1]]:12 = (#0 - #1), #1 [1,0,2] return (a || EXPR1(b)); // W_SYS: Expansion,File 0, [[@LINE-1]]:16 -> [[@LINE-1]]:21 = #1 (Expanded file = 1) @@ -35,8 +35,8 @@ int func1(int a, int b) { // CHECK: _Z5func2ii: int func2(int a, int b) { - // W_SYS: Decision,File 0, [[@LINE+5]]:11 -> [[@LINE+5]]:28 = M:0, C:3 - // X_SYS: Decision,File 0, [[@LINE+4]]:11 -> [[@LINE+4]]:28 = M:0, C:2 + // W_SYS: Decision,File 0, [[@LINE+5]]:11 -> [[@LINE+5]]:28 = M:4, C:3 + // X_SYS: Decision,File 0, [[@LINE+4]]:11 -> [[@LINE+4]]:28 = M:3, C:2 // W_SYS: Expansion,File 0, [[@LINE+3]]:11 -> [[@LINE+3]]:16 = #0 (Expanded file = 1) // W_SYS: Expansion,File 0, [[@LINE+2]]:23 -> [[@LINE+2]]:28 = #1 (Expanded file = 2) // X_SYS: Branch,File 0, [[@LINE+1]]:11 -> [[@LINE+1]]:11 = #1, (#0 - #1) [1,2,0] diff --git a/clang/test/Profile/c-mcdc-class.cpp b/clang/test/Profile/c-mcdc-class.cpp index 6aab55add32807..748344194ef86d 100644 --- a/clang/test/Profile/c-mcdc-class.cpp +++ b/clang/test/Profile/c-mcdc-class.cpp @@ -36,23 +36,24 @@ Value::~Value(void) { // SHIFT FIRST CONDITION WITH ID = 0. // MCDCCTOR: %[[LAB1:[0-9]+]] = load i32, ptr %value, align 4 // MCDCCTOR-DAG: %[[BOOL:cmp[0-9]*]] = icmp ne i32 %[[LAB1]], 2 -// MCDCCTOR-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDCCTOR-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 -// MCDCCTOR-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 0 -// MCDCCTOR-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] +// MCDCCTOR-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 +// MCDCCTOR-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 0 +// MCDCCTOR-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 0 +// MCDCCTOR-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] // MCDCCTOR-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT SECOND CONDITION WITH ID = 1. // MCDCCTOR: %[[LAB1:[0-9]+]] = load i32, ptr %value2, align 4 // MCDCCTOR-DAG: %[[BOOL:cmp[0-9]*]] = icmp ne i32 %[[LAB1]], 6 -// MCDCCTOR-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDCCTOR-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 -// MCDCCTOR-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 1 -// MCDCCTOR-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] +// MCDCCTOR-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 +// MCDCCTOR-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 2 +// MCDCCTOR-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 1 +// MCDCCTOR-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] // MCDCCTOR-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // UPDATE FINAL BITMASK WITH RESULT. -// MCDCCTOR-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDCCTOR-DAG: %[[TEMP0:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDCCTOR: %[[TEMP:[0-9]+]] = add i32 %[[TEMP0]], 0 // MCDCCTOR: %[[LAB1:[0-9]+]] = lshr i32 %[[TEMP]], 3 // MCDCCTOR: %[[LAB4:[0-9]+]] = getelementptr inbounds i8, ptr @__profbm__ZN5ValueC2Ev, i32 %[[LAB1]] // MCDCCTOR: %[[LAB5:[0-9]+]] = and i32 %[[TEMP]], 7 @@ -73,23 +74,24 @@ Value::~Value(void) { // SHIFT FIRST CONDITION WITH ID = 0. // MCDCDTOR: %[[LAB1:[0-9]+]] = load i32, ptr %value, align 4 // MCDCDTOR-DAG: %[[BOOL:cmp[0-9]*]] = icmp ne i32 %[[LAB1]], 2 -// MCDCDTOR-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDCDTOR-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 -// MCDCDTOR-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 0 -// MCDCDTOR-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] +// MCDCDTOR-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 +// MCDCDTOR-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 0 +// MCDCDTOR-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 0 +// MCDCDTOR-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] // MCDCDTOR-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT SECOND CONDITION WITH ID = 1. // MCDCDTOR: %[[LAB1:[0-9]+]] = load i32, ptr %value2, align 4 // MCDCDTOR-DAG: %[[BOOL:cmp[0-9]*]] = icmp ne i32 %[[LAB1]], 3 -// MCDCDTOR-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDCDTOR-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 -// MCDCDTOR-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 1 -// MCDCDTOR-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] +// MCDCDTOR-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 +// MCDCDTOR-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 2 +// MCDCDTOR-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 1 +// MCDCDTOR-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] // MCDCDTOR-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // UPDATE FINAL BITMASK WITH RESULT. -// MCDCDTOR-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDCDTOR-DAG: %[[TEMP0:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDCDTOR: %[[TEMP:[0-9]+]] = add i32 %[[TEMP0]], 0 // MCDCDTOR: %[[LAB1:[0-9]+]] = lshr i32 %[[TEMP]], 3 // MCDCDTOR: %[[LAB4:[0-9]+]] = getelementptr inbounds i8, ptr @__profbm__ZN5ValueD2Ev, i32 %[[LAB1]] // MCDCDTOR: %[[LAB5:[0-9]+]] = and i32 %[[TEMP]], 7 diff --git a/clang/test/Profile/c-mcdc-logicalop-ternary.c b/clang/test/Profile/c-mcdc-logicalop-ternary.c index 3e6b6b1e380d1e..91174befb5fe54 100644 --- a/clang/test/Profile/c-mcdc-logicalop-ternary.c +++ b/clang/test/Profile/c-mcdc-logicalop-ternary.c @@ -9,7 +9,7 @@ int test(int a, int b, int c, int d, int e, int f) { // NOMCDC-NOT: __profbm_test // MCDC BOOKKEEPING. -// MCDC: @__profbm_test = private global [3 x i8] zeroinitializer +// MCDC: @__profbm_test = private global [2 x i8] zeroinitializer // ALLOCATE MCDC TEMP AND ZERO IT. // MCDC-LABEL: @test( @@ -18,7 +18,8 @@ int test(int a, int b, int c, int d, int e, int f) { // TERNARY TRUE SHOULD UPDATE THE BITMAP WITH RESULT AT ELEMENT 0. // MCDC-LABEL: cond.true: -// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[TEMP0:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC: %[[TEMP:[0-9]+]] = add i32 %[[TEMP0]], 0 // MCDC: %[[LAB1:[0-9]+]] = lshr i32 %[[TEMP]], 3 // MCDC: %[[LAB4:[0-9]+]] = getelementptr inbounds i8, ptr @__profbm_test, i32 %[[LAB1]] // MCDC: %[[LAB5:[0-9]+]] = and i32 %[[TEMP]], 7 @@ -34,9 +35,10 @@ int test(int a, int b, int c, int d, int e, int f) { // TERNARY TRUE YIELDS TERNARY LHS LOGICAL-AND. // TERNARY LHS LOGICAL-AND SHOULD UPDATE THE BITMAP WITH RESULT AT ELEMENT 1. // MCDC-LABEL: land.end: -// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[TEMP0:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC: %[[TEMP:[0-9]+]] = add i32 %[[TEMP0]], 3 // MCDC: %[[LAB1:[0-9]+]] = lshr i32 %[[TEMP]], 3 -// MCDC: %[[LAB4:[0-9]+]] = getelementptr inbounds i8, ptr getelementptr inbounds ([3 x i8], ptr @__profbm_test, i32 0, i32 1), i32 %[[LAB1]] +// MCDC: %[[LAB4:[0-9]+]] = getelementptr inbounds i8, ptr @__profbm_test, i32 %[[LAB1]] // MCDC: %[[LAB5:[0-9]+]] = and i32 %[[TEMP]], 7 // MCDC: %[[LAB6:[0-9]+]] = trunc i32 %[[LAB5]] to i8 // MCDC: %[[LAB7:[0-9]+]] = shl i8 1, %[[LAB6]] @@ -46,7 +48,8 @@ int test(int a, int b, int c, int d, int e, int f) { // TERNARY FALSE SHOULD UPDATE THE BITMAP WITH RESULT AT ELEMENT 0. // MCDC-LABEL: cond.false: -// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[TEMP0:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC: %[[TEMP:[0-9]+]] = add i32 %[[TEMP0]], 0 // MCDC: %[[LAB1:[0-9]+]] = lshr i32 %[[TEMP]], 3 // MCDC: %[[LAB4:[0-9]+]] = getelementptr inbounds i8, ptr @__profbm_test, i32 %[[LAB1]] // MCDC: %[[LAB5:[0-9]+]] = and i32 %[[TEMP]], 7 @@ -62,9 +65,10 @@ int test(int a, int b, int c, int d, int e, int f) { // TERNARY FALSE YIELDS TERNARY RHS LOGICAL-OR. // TERNARY RHS LOGICAL-OR SHOULD UPDATE THE BITMAP WITH RESULT AT ELEMENT 2. // MCDC-LABEL: lor.end: -// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[TEMP0:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC: %[[TEMP:[0-9]+]] = add i32 %[[TEMP0]], 6 // MCDC: %[[LAB1:[0-9]+]] = lshr i32 %[[TEMP]], 3 -// MCDC: %[[LAB4:[0-9]+]] = getelementptr inbounds i8, ptr getelementptr inbounds ([3 x i8], ptr @__profbm_test, i32 0, i32 2), i32 %[[LAB1]] +// MCDC: %[[LAB4:[0-9]+]] = getelementptr inbounds i8, ptr @__profbm_test, i32 %[[LAB1]] // MCDC: %[[LAB5:[0-9]+]] = and i32 %[[TEMP]], 7 // MCDC: %[[LAB6:[0-9]+]] = trunc i32 %[[LAB5]] to i8 // MCDC: %[[LAB7:[0-9]+]] = shl i8 1, %[[LAB6]] diff --git a/clang/test/Profile/c-mcdc-nested-ternary.c b/clang/test/Profile/c-mcdc-nested-ternary.c index ebea17ca146ae6..8576ba39e583b4 100644 --- a/clang/test/Profile/c-mcdc-nested-ternary.c +++ b/clang/test/Profile/c-mcdc-nested-ternary.c @@ -20,42 +20,43 @@ int test(int b, int c, int d, int e, int f) { // MCDC-LABEL: cond.true: // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %c.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 -// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 -// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 0 -// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 0 +// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 0 +// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // TERNARY FALSE SHOULD SHIFT ID = 0 FOR CONDITION 'd'. // MCDC-LABEL: cond.false: // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %d.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 -// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 -// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 0 -// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 0 +// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 0 +// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT SECOND CONDITION WITH ID = 2. // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %e.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 -// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 -// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 2 -// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 0 +// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 1 +// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT THIRD CONDITION WITH ID = 1. // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %f.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 -// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 -// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 1 -// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 3 +// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 2 +// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // UPDATE FINAL BITMASK WITH RESULT. -// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[TEMP0:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC: %[[TEMP:[0-9]+]] = add i32 %[[TEMP0]], 0 // MCDC: %[[LAB1:[0-9]+]] = lshr i32 %[[TEMP]], 3 // MCDC: %[[LAB4:[0-9]+]] = getelementptr inbounds i8, ptr @__profbm_test, i32 %[[LAB1]] // MCDC: %[[LAB5:[0-9]+]] = and i32 %[[TEMP]], 7 diff --git a/clang/test/Profile/c-mcdc-not.c b/clang/test/Profile/c-mcdc-not.c index 165bfbae3349da..ae683c3fe40fe7 100644 --- a/clang/test/Profile/c-mcdc-not.c +++ b/clang/test/Profile/c-mcdc-not.c @@ -9,7 +9,7 @@ int test(int a, int b, int c, int d, int e, int f) { // NOMCDC-NOT: __profbm_test // MCDC BOOKKEEPING. -// MCDC: @__profbm_test = private global [8 x i8] zeroinitializer +// MCDC: @__profbm_test = private global [2 x i8] zeroinitializer // MCDC: @__profc_test = private global [9 x i64] zeroinitializer // ALLOCATE MCDC TEMP AND ZERO IT. @@ -21,61 +21,62 @@ int test(int a, int b, int c, int d, int e, int f) { // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %a.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 // MCDC-DAG: %[[LNOT:lnot[0-9]*]] = xor i1 %[[BOOL]] -// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[LNOT]] to i32 -// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 0 -// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 0 +// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 0 +// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[LNOT]], i32 %[[LAB2]], i32 %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT SECOND CONDITION WITH ID = 2. // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %b.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 -// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 -// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 2 -// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 14 +// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 1 +// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT THIRD CONDITION WITH ID = 1. // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %c.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 // MCDC-DAG: %[[LNOT:lnot[0-9]*]] = xor i1 %[[BOOL]] -// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[LNOT]] to i32 -// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 1 -// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 0 +// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 0 +// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[LNOT]], i32 %[[LAB2]], i32 %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT FOURTH CONDITION WITH ID = 4. // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %d.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 -// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 -// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 4 -// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 12 +// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 2 +// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT FIFTH CONDITION WITH ID = 3. // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %e.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 -// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 -// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 3 -// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 0 +// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 0 +// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT SIXTH CONDITION WITH ID = 5. // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %f.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 // MCDC-DAG: %[[LNOT:lnot[0-9]*]] = xor i1 %[[BOOL]] -// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[LNOT]] to i32 -// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 5 -// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 8 +// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 4 +// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[LNOT]], i32 %[[LAB2]], i32 %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // UPDATE FINAL BITMASK WITH RESULT. -// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[TEMP0:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC: %[[TEMP:[0-9]+]] = add i32 %[[TEMP0]], 0 // MCDC: %[[LAB1:[0-9]+]] = lshr i32 %[[TEMP]], 3 // MCDC: %[[LAB4:[0-9]+]] = getelementptr inbounds i8, ptr @__profbm_test, i32 %[[LAB1]] // MCDC: %[[LAB5:[0-9]+]] = and i32 %[[TEMP]], 7 diff --git a/clang/test/Profile/c-mcdc.c b/clang/test/Profile/c-mcdc.c index 823160329b31f8..251c18baa861dd 100644 --- a/clang/test/Profile/c-mcdc.c +++ b/clang/test/Profile/c-mcdc.c @@ -11,85 +11,80 @@ int test(int a, int b, int c, int d, int e, int f) { // NOPROFPASS-NOT: __profbm_test // MCDC BOOKKEEPING. -// MCDC: @__profbm_test = private global [8 x i8] zeroinitializer +// MCDC: @__profbm_test = private global [2 x i8] zeroinitializer // MCDC: @__profc_test = private global [9 x i64] zeroinitializer // ALLOCATE MCDC TEMP AND ZERO IT. // NOPROFPASS-LABEL: @test( -// NOPROFPASS: call void @llvm.instrprof.mcdc.parameters(ptr @__profn_test, i64 [[HASH:[0-9]+]], i32 8) +// NOPROFPASS: call void @llvm.instrprof.mcdc.parameters(ptr @__profn_test, i64 [[HASH:[0-9]+]], i32 15) // MCDC-LABEL: @test( // MCDC: %mcdc.addr = alloca i32, align 4 // MCDC: store i32 0, ptr %mcdc.addr, align 4 // SHIFT FIRST CONDITION WITH ID = 0. -// NOPROFPASS: call void @llvm.instrprof.mcdc.condbitmap.update(ptr @__profn_test, i64 [[HASH]], i32 0, ptr %mcdc.addr, i1 %tobool{{[0-9]*}}) // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %a.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 -// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 -// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 0 -// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 0 +// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 0 +// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT SECOND CONDITION WITH ID = 2. // NOPROFPASS-LABEL: land.lhs.true: -// NOPROFPASS: call void @llvm.instrprof.mcdc.condbitmap.update(ptr @__profn_test, i64 [[HASH]], i32 2, ptr %mcdc.addr, i1 %tobool{{[0-9]*}}) // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %b.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 -// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 -// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 2 -// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 14 +// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 1 +// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT THIRD CONDITION WITH ID = 1. // NOPROFPASS-LABEL: lor.rhs: -// NOPROFPASS: call void @llvm.instrprof.mcdc.condbitmap.update(ptr @__profn_test, i64 [[HASH]], i32 1, ptr %mcdc.addr, i1 %tobool{{[0-9]*}}) // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %c.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 -// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 -// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 1 -// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 0 +// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 0 +// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT FOURTH CONDITION WITH ID = 4. // NOPROFPASS-LABEL: land.lhs.true3: -// NOPROFPASS: call void @llvm.instrprof.mcdc.condbitmap.update(ptr @__profn_test, i64 [[HASH]], i32 4, ptr %mcdc.addr, i1 %tobool{{[0-9]*}}) // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %d.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 -// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 -// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 4 -// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 12 +// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 2 +// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT FIFTH CONDITION WITH ID = 3. // NOPROFPASS-LABEL: lor.rhs6: -// NOPROFPASS: call void @llvm.instrprof.mcdc.condbitmap.update(ptr @__profn_test, i64 [[HASH]], i32 3, ptr %mcdc.addr, i1 %tobool{{[0-9]*}}) // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %e.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 -// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 -// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 3 -// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 0 +// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 0 +// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // SHIFT SIXTH CONDITION WITH ID = 5. // NOPROFPASS-LABEL: land.rhs: -// NOPROFPASS: call void @llvm.instrprof.mcdc.condbitmap.update(ptr @__profn_test, i64 [[HASH]], i32 5, ptr %mcdc.addr, i1 %tobool{{[0-9]*}}) // MCDC: %[[LAB1:[0-9]+]] = load i32, ptr %f.addr, align 4 // MCDC-DAG: %[[BOOL:tobool[0-9]*]] = icmp ne i32 %[[LAB1]], 0 -// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 -// MCDC-DAG: %[[LAB2:[0-9]+]] = zext i1 %[[BOOL]] to i32 -// MCDC-DAG: %[[LAB3:[0-9]+]] = shl i32 %[[LAB2]], 5 -// MCDC-DAG: %[[LAB4:[0-9]+]] = or i32 %[[TEMP]], %[[LAB3]] +// MCDC-DAG: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC-DAG: %[[LAB2:[0-9]+]] = add i32 %[[TEMP]], 8 +// MCDC-DAG: %[[LAB3:[0-9]+]] = add i32 %[[TEMP]], 4 +// MCDC-DAG: %[[LAB4:[0-9]+]] = select i1 %[[BOOL]], i32 %[[LAB2]], i32 %[[LAB3]] // MCDC-DAG: store i32 %[[LAB4]], ptr %mcdc.addr, align 4 // UPDATE FINAL BITMASK WITH RESULT. // NOPROFPASS-LABEL: lor.end: -// NOPROFPASS: call void @llvm.instrprof.mcdc.tvbitmap.update(ptr @__profn_test, i64 [[HASH]], i32 8, i32 0, ptr %mcdc.addr) -// MCDC-DAG: %[[TEMP:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// NOPROFPASS: call void @llvm.instrprof.mcdc.tvbitmap.update(ptr @__profn_test, i64 [[HASH]], i32 0, i32 0, ptr %mcdc.addr) +// MCDC-DAG: %[[TEMP0:mcdc.temp[0-9]*]] = load i32, ptr %mcdc.addr, align 4 +// MCDC: %[[TEMP:[0-9]+]] = add i32 %[[TEMP0]], 0 // MCDC: %[[LAB1:[0-9]+]] = lshr i32 %[[TEMP]], 3 // MCDC: %[[LAB4:[0-9]+]] = getelementptr inbounds i8, ptr @__profbm_test, i32 %[[LAB1]] // MCDC: %[[LAB5:[0-9]+]] = and i32 %[[TEMP]], 7 diff --git a/compiler-rt/test/profile/ContinuousSyncMode/image-with-mcdc.c b/compiler-rt/test/profile/ContinuousSyncMode/image-with-mcdc.c index 748af46ee52fa5..d2afe7048f37f4 100644 --- a/compiler-rt/test/profile/ContinuousSyncMode/image-with-mcdc.c +++ b/compiler-rt/test/profile/ContinuousSyncMode/image-with-mcdc.c @@ -7,7 +7,7 @@ // CHECK: Num Bitmap Bytes: // CHECK-NEXT: $1 // CHECK-NEXT: Bitmap Byte Values: -// CHECK-NEXT: 8 +// CHECK-NEXT: 0x4 #include #include extern int __llvm_profile_is_continuous_mode_enabled(void); diff --git a/llvm/docs/CoverageMappingFormat.rst b/llvm/docs/CoverageMappingFormat.rst index f2ae8df5ad7f82..96bdf8fa71be73 100644 --- a/llvm/docs/CoverageMappingFormat.rst +++ b/llvm/docs/CoverageMappingFormat.rst @@ -148,7 +148,7 @@ There are several kinds of mapping regions: * Decision regions associate multiple branch regions with a boolean expression in the source code. This information also includes the number of - bitmap bytes needed to represent the expression's executed test vectors as + bitmap bits needed to represent the expression's executed test vectors as well as the total number of instrumentable branch conditions that comprise the expression. Decision regions are used to visualize Modified Condition/Decision Coverage (MC/DC) in *llvm-cov* for each boolean diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index f39b8dc6c90d47..10d53bea149ef6 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -14411,7 +14411,7 @@ Syntax: :: declare void @llvm.instrprof.mcdc.parameters(ptr , i64 , - i32 ) + i32 ) Overview: """"""""" @@ -14429,7 +14429,7 @@ name of the entity being instrumented. This should generally be the The second argument is a hash value that can be used by the consumer of the profile data to detect changes to the instrumented source. -The third argument is the number of bitmap bytes required by the function to +The third argument is the number of bitmap bits required by the function to record the number of test vectors executed for each boolean expression. Semantics: @@ -14496,7 +14496,7 @@ Syntax: :: declare void @llvm.instrprof.mcdc.tvbitmap.update(ptr , i64 , - i32 ) + i32 ) i32 , ptr ) @@ -14506,10 +14506,9 @@ Overview: The '``llvm.instrprof.mcdc.tvbitmap.update``' intrinsic is used to track MC/DC test vector execution after each boolean expression has been fully executed. The overall value of the condition bitmap, after it has been successively -updated using the '``llvm.instrprof.mcdc.condbitmap.update``' intrinsic with -the true or false evaluation of each condition, uniquely identifies an executed -MC/DC test vector and is used as a bit index into the global test vector -bitmap. +updated with the true or false evaluation of each condition, uniquely identifies +an executed MC/DC test vector and is used as a bit index into the global test +vector bitmap. Arguments: """""""""" @@ -14521,10 +14520,9 @@ name of the entity being instrumented. This should generally be the The second argument is a hash value that can be used by the consumer of the profile data to detect changes to the instrumented source. -The third argument is the number of bitmap bytes required by the function to -record the number of test vectors executed for each boolean expression. +The third argument is not used. -The fourth argument is the byte index into the global test vector bitmap +The fourth argument is the bit index into the global test vector bitmap corresponding to the function. The fifth argument is the address of the condition bitmap, which contains a diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index 9010e1a1c896bf..1ac4a5fffb43bb 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -33,6 +33,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Value.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/MathExtras.h" #include #include #include @@ -1580,11 +1581,17 @@ class InstrProfMCDCBitmapInstBase : public InstrProfInstBase { return isa(V) && classof(cast(V)); } - /// \return The number of bytes used for the MCDC bitmaps for the instrumented + /// \return The number of bits used for the MCDC bitmaps for the instrumented /// function. - ConstantInt *getNumBitmapBytes() const { + ConstantInt *getNumBitmapBits() const { return cast(const_cast(getArgOperand(2))); } + + /// \return The number of bytes used for the MCDC bitmaps for the instrumented + /// function. + auto getNumBitmapBytes() const { + return alignTo(getNumBitmapBits()->getZExtValue(), CHAR_BIT) / CHAR_BIT; + } }; /// This represents the llvm.instrprof.mcdc.parameters intrinsic. diff --git a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h index da031040452491..5fc497db8df54a 100644 --- a/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h +++ b/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h @@ -677,7 +677,8 @@ class CounterMappingContext { /// pairs. Expected evaluateMCDCRegion(const CounterMappingRegion &Region, - ArrayRef Branches); + ArrayRef Branches, + bool IsVersion11); unsigned getMaxCounterID(const Counter &C) const; }; diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 5cd8c3283373e3..0c899e6d84965c 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -1161,7 +1161,7 @@ enum ProfVersion { Version10 = 10, // An additional field is used for bitmap bytes. Version11 = 11, - // VTable profiling, + // VTable profiling, decision record and bitmap are modified for mcdc. Version12 = 12, // The current version is 12. CurrentVersion = INSTR_PROF_INDEX_VERSION diff --git a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp index 8c81bbe8e9c4ee..455124efd5b248 100644 --- a/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp +++ b/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp @@ -384,15 +384,18 @@ class MCDCRecordProcessor : NextIDsBuilder, mcdc::TVIdxBuilder { DenseSet TVIdxs; #endif + bool IsVersion11; + public: MCDCRecordProcessor(const BitVector &Bitmap, const CounterMappingRegion &Region, - ArrayRef Branches) + ArrayRef Branches, + bool IsVersion11) : NextIDsBuilder(Branches), TVIdxBuilder(this->NextIDs), Bitmap(Bitmap), Region(Region), DecisionParams(Region.getDecisionParams()), Branches(Branches), NumConditions(DecisionParams.NumConditions), Folded(NumConditions, false), IndependencePairs(NumConditions), - ExecVectors(ExecVectorsByCond[false]) {} + ExecVectors(ExecVectorsByCond[false]), IsVersion11(IsVersion11) {} private: // Walk the binary decision diagram and try assigning both false and true to @@ -415,7 +418,9 @@ class MCDCRecordProcessor : NextIDsBuilder, mcdc::TVIdxBuilder { assert(TVIdx < SavedNodes[ID].Width); assert(TVIdxs.insert(NextTVIdx).second && "Duplicate TVIdx"); - if (!Bitmap[DecisionParams.BitmapIdx * CHAR_BIT + TV.getIndex()]) + if (!Bitmap[IsVersion11 + ? DecisionParams.BitmapIdx * CHAR_BIT + TV.getIndex() + : DecisionParams.BitmapIdx - NumTestVectors + NextTVIdx]) continue; // Copy the completed test vector to the vector of testvectors. @@ -521,9 +526,9 @@ class MCDCRecordProcessor : NextIDsBuilder, mcdc::TVIdxBuilder { Expected CounterMappingContext::evaluateMCDCRegion( const CounterMappingRegion &Region, - ArrayRef Branches) { + ArrayRef Branches, bool IsVersion11) { - MCDCRecordProcessor MCDCProcessor(Bitmap, Region, Branches); + MCDCRecordProcessor MCDCProcessor(Bitmap, Region, Branches, IsVersion11); return MCDCProcessor.processMCDCRecord(); } @@ -610,8 +615,8 @@ static unsigned getMaxCounterID(const CounterMappingContext &Ctx, } /// Returns the bit count -static unsigned getMaxBitmapSize(const CounterMappingContext &Ctx, - const CoverageMappingRecord &Record) { +static unsigned getMaxBitmapSize(const CoverageMappingRecord &Record, + bool IsVersion11) { unsigned MaxBitmapIdx = 0; unsigned NumConditions = 0; // Scan max(BitmapIdx). @@ -626,8 +631,12 @@ static unsigned getMaxBitmapSize(const CounterMappingContext &Ctx, NumConditions = DecisionParams.NumConditions; } } - unsigned SizeInBits = llvm::alignTo(uint64_t(1) << NumConditions, CHAR_BIT); - return MaxBitmapIdx * CHAR_BIT + SizeInBits; + + if (IsVersion11) + MaxBitmapIdx = MaxBitmapIdx * CHAR_BIT + + llvm::alignTo(uint64_t(1) << NumConditions, CHAR_BIT); + + return MaxBitmapIdx; } namespace { @@ -815,6 +824,9 @@ Error CoverageMapping::loadFunctionRecord( } Ctx.setCounts(Counts); + bool IsVersion11 = + ProfileReader.getVersion() < IndexedInstrProf::ProfVersion::Version12; + BitVector Bitmap; if (Error E = ProfileReader.getFunctionBitmap(Record.FunctionName, Record.FunctionHash, Bitmap)) { @@ -826,7 +838,7 @@ Error CoverageMapping::loadFunctionRecord( } if (IPE != instrprof_error::unknown_function) return make_error(IPE); - Bitmap = BitVector(getMaxBitmapSize(Ctx, Record)); + Bitmap = BitVector(getMaxBitmapSize(Record, IsVersion11)); } Ctx.setBitmap(std::move(Bitmap)); @@ -884,7 +896,7 @@ Error CoverageMapping::loadFunctionRecord( // DecisionRegion, all of the information is now available to process. // This is where the bulk of the MC/DC progressing takes place. Expected Record = - Ctx.evaluateMCDCRegion(*MCDCDecision, MCDCBranches); + Ctx.evaluateMCDCRegion(*MCDCDecision, MCDCBranches, IsVersion11); if (auto E = Record.takeError()) { consumeError(std::move(E)); return Error::success(); diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index f9b58d9f278214..0c79eaa812b5fc 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -936,9 +936,6 @@ Value *InstrLowerer::getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I) { auto *Bitmaps = getOrCreateRegionBitmaps(I); IRBuilder<> Builder(I); - auto *Addr = Builder.CreateConstInBoundsGEP2_32( - Bitmaps->getValueType(), Bitmaps, 0, I->getBitmapIndex()->getZExtValue()); - if (isRuntimeCounterRelocationEnabled()) { LLVMContext &Ctx = M.getContext(); Ctx.diagnose(DiagnosticInfoPGOProfile( @@ -948,7 +945,7 @@ Value *InstrLowerer::getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I) { DS_Warning)); } - return Addr; + return Bitmaps; } void InstrLowerer::lowerCover(InstrProfCoverInst *CoverInstruction) { @@ -1018,9 +1015,11 @@ void InstrLowerer::lowerMCDCTestVectorBitmapUpdate( auto *MCDCCondBitmapAddr = Update->getMCDCCondBitmapAddr(); auto *BitmapAddr = getBitmapAddress(Update); - // Load Temp Val. + // Load Temp Val + BitmapIdx. // %mcdc.temp = load i32, ptr %mcdc.addr, align 4 - auto *Temp = Builder.CreateLoad(Int32Ty, MCDCCondBitmapAddr, "mcdc.temp"); + auto *Temp = Builder.CreateAdd( + Builder.CreateLoad(Int32Ty, MCDCCondBitmapAddr, "mcdc.temp"), + Update->getBitmapIndex()); // Calculate byte offset using div8. // %1 = lshr i32 %mcdc.temp, 3 @@ -1415,7 +1414,7 @@ GlobalVariable * InstrLowerer::createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc, StringRef Name, GlobalValue::LinkageTypes Linkage) { - uint64_t NumBytes = Inc->getNumBitmapBytes()->getZExtValue(); + uint64_t NumBytes = Inc->getNumBitmapBytes(); auto *BitmapTy = ArrayType::get(Type::getInt8Ty(M.getContext()), NumBytes); auto GV = new GlobalVariable(M, BitmapTy, false, Linkage, Constant::getNullValue(BitmapTy), Name); @@ -1434,7 +1433,7 @@ InstrLowerer::getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc) { // the corresponding profile section. auto *BitmapPtr = setupProfileSection(Inc, IPSK_bitmap); PD.RegionBitmaps = BitmapPtr; - PD.NumBitmapBytes = Inc->getNumBitmapBytes()->getZExtValue(); + PD.NumBitmapBytes = Inc->getNumBitmapBytes(); return PD.RegionBitmaps; } diff --git a/llvm/test/Instrumentation/InstrProfiling/inline-data-var-create.ll b/llvm/test/Instrumentation/InstrProfiling/inline-data-var-create.ll index 7c064f547141f3..456103164378e9 100644 --- a/llvm/test/Instrumentation/InstrProfiling/inline-data-var-create.ll +++ b/llvm/test/Instrumentation/InstrProfiling/inline-data-var-create.ll @@ -27,21 +27,21 @@ declare void @llvm.instrprof.mcdc.parameters(ptr %0, i64 %1, i32 %2) define internal void @foobar() { call void @llvm.instrprof.increment(ptr @__profn_foobar, i64 123456, i32 32, i32 0) - call void @llvm.instrprof.mcdc.parameters(ptr @__profn_foobar, i64 123456, i32 99) + call void @llvm.instrprof.mcdc.parameters(ptr @__profn_foobar, i64 123456, i32 792) ret void } define void @foo() { call void @llvm.instrprof.increment(ptr @__profn_foo, i64 123456, i32 32, i32 0) - call void @llvm.instrprof.mcdc.parameters(ptr @__profn_foo, i64 123456, i32 21) + call void @llvm.instrprof.mcdc.parameters(ptr @__profn_foo, i64 123456, i32 168) call void @foobar() ret void } define void @bar() { call void @llvm.instrprof.increment(ptr @__profn_bar, i64 123456, i32 32, i32 0) - call void @llvm.instrprof.mcdc.parameters(ptr @__profn_bar, i64 123456, i32 23) + call void @llvm.instrprof.mcdc.parameters(ptr @__profn_bar, i64 123456, i32 184) call void @foobar() ret void } diff --git a/llvm/test/Instrumentation/InstrProfiling/mcdc.ll b/llvm/test/Instrumentation/InstrProfiling/mcdc.ll index a7f1e606e35fa0..e9ae80891ea6e6 100644 --- a/llvm/test/Instrumentation/InstrProfiling/mcdc.ll +++ b/llvm/test/Instrumentation/InstrProfiling/mcdc.ll @@ -30,7 +30,8 @@ entry: ; CHECK-NEXT: store i32 %[[LAB3]], ptr %mcdc.addr, align 4 call void @llvm.instrprof.mcdc.tvbitmap.update(ptr @__profn_test, i64 99278, i32 1, i32 0, ptr %mcdc.addr) - ; CHECK: %[[TEMP:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 + ; CHECK: %[[TEMP0:mcdc.*]] = load i32, ptr %mcdc.addr, align 4 + ; CHECK-NEXT: %[[TEMP:[0-9]+]] = add i32 %[[TEMP0]], 0 ; CHECK-NEXT: %[[LAB4:[0-9]+]] = lshr i32 %[[TEMP]], 3 ; CHECK-NEXT: %[[LAB7:[0-9]+]] = getelementptr inbounds i8, ptr @__profbm_test, i32 %[[LAB4]] ; CHECK-NEXT: %[[LAB8:[0-9]+]] = and i32 %[[TEMP]], 7 diff --git a/llvm/test/tools/llvm-cov/Inputs/mcdc-const-folding.o b/llvm/test/tools/llvm-cov/Inputs/mcdc-const-folding.o index 4f54fa7b8a1a1492a9a455bbf6fa651844345c59..7a16162f29c00777d9a05f5fed1cc4cbf333c56c 100644 GIT binary patch delta 5899 zcmai04Qx}_74|hDF*Lz(34QTOe!MP_Zf1!EB!NI3^V208h=$*kUq2ece}2FTA!&#M zH4AGy)jC3}v@*q-rh(Q;F-g~IQmY_3+D@BV5TmH9T0C_t6J;YJqNS~tedqeVC}BCqJF|7qV=1TLsie*3uUQLzlcw{l@_=AhRzH6yQs-f z9_GydABzD*`|Pvl;x{susvn=yFEB^zc$IdFrR7XV>(YJi^ov{bN!Sazzsm9rjkiKo z7r7TI9A6NxBxmwU;&$GGN$a1;9G_n9Q!0sGnc|$G$fxeJb`gcs3esgm?DUH%tGmY= z^EGAh*5<49I95B>nw?S_nlQ27<9l~6H0e;Fe_th?7g%??Z-(sE_#AXJZH_&zTFLl$ zD<4d@a!hadKhxIdl&Rcs79TJBr4`?D+PL-EXxp4*P$YvmXa2aX<-AB9QI#6G;XFO$ zJdMmy?pc1Gqm_FxB1N3nD9u$c<2iHiP)O+#ktPAO3wBXH`GD zkc&iD>n2s5Cgr{^UC}HRJ5;DAeYO|7@v5l0@^O`8DJxcwT~bVhRY@w)JBwaVH|9(} zCo6sTBDVw1)0bBI#v(T`cq^$zchXC#bgE;CzQ)OW`PJ0Gptiv)dh02evH1Va7r64# z3P%wQWjt0Ixt(%c#;5Szj@+nnp8j3G_5$f$LEmxAllx+$I>$UQg-$xwz+RJf&NLpk zNqb4!ho*C+LE7I;-zd^3>w(>{tBN_HWXzp23ku7u!xQ;4#R{60Ux{&T{#>z!j=~mEEWdKr zV%|es*|>*<9NLO#B!A|kYm$ycZYsoztpN6=iZ#)^0*}}{u%;mDbQP(jH`8sl2W@s+ z8I63`aPL#mGWFY`&_42%81A@=TDhePy+*Y_r>kkK+&`iJtXxGsB}PrHRkG6fp+ehK zoD1bEsN%m>@wLgYZIOy+QUY`3*^}WSh2Ba_&b5qOrG`7AP^$@N6nd2cKxa&uoV!M$ z^QooO=#D7VnpY_FeHFL1;RE=*Iu~%htV#w{yiCQd8lF?}NJ@N>ieF8}?Y5?YX}(|D zMGnR18}5RnIOeh0GI6V9TL~rR8)6B$78qg`TlW@?=~8SC?VaN-;2ht}!3hnCIv+&_N9?)Xq%H8fvC|4hDxhWJF<`(%PSRqZaX>k*CV zq$rpLbOE-6t-DfV7HiDqGy=3nL+1hgLal2(dCCm;4h>xZGzT|dO;iiCQ$rUaYW3te z^#J{G3OZY6+H9vO4m6g6^4{?E3MGKPuAz@1YAL@*u5v>ROZzbe%8kI?6sDz{Tv~jq7N1RV%;nY4%^F%p382*)Y67)-wUJzl4Y8d9u#If9 z!Wwf2buE6F!iyaOPZ=1$V(UJyF&dy{)%^xt26{z9>$N&QAW>t(h&9JSz?F~TX$Gv z9@cbvsTQ1wZ;Za#yXY-k>PZH#O9)RrLqD0Q4^!%I{)prMJoBH^hBu$EX(L zEOJ#DBA0snhC3hs=zMYC^eivM!K{$hPYEzrYRpH#wrX1^)wWOon!DM$+cc*B1`ksU z&{Gp$kxhb@Hx~2;iNeP(CcrX%*fjU%=@8T+3aqq4K!E z=MDsLTYYozF~Y@h{ry^)61QpyXz|C9WA&#t8K<`bE_YCIxWcH$X`&I--=-<(Z>f{y zS!TG;YN-B}dW~vWVuq4+Yxm+e3TE?%krwCAfhSNkGvYen9H z`^z$>iZT+_6CaQ)%NL9A-A6v4)(0e~^JSse$OLMAKn_W40Fa;av5i?N349r#MjSD=0K8s3mYqiemQmptqI z6x^6Yp>#M!aFOy;8tKydPO#Q zHUon<3a?#y!EL}cd+}`~F?gf!s>!nj^KA#V#Vh)yHw_i zi+3OauW&Ci8n~0pdqMJUQlbIKJ|Mfj&}UvByg_)^CGU>pL3q|qkm9R}Kr-xjbBucEp^#pqsC{D*~3pk;y6Fku^xFMd%!&Dpkij6 zkW8G+7#OG@wqNqv$vnZmioxEY1ngPzJi}uX_A|*jO9_lC$P@7kKbf#~6q7aqd!0Nd Nd0$LeygN?L{SRp}gY^Ia delta 6068 zcmai2dr(x@8Q*gSVHI6K;BrwBf|+1?Q(YBVFtTid5u!Fcd_Te?%0qmBg0dKetzsb6 z8OYb9WG0zNr)kr)X;@7<(SNc`G~&c*HBB;UO{daHGj@oRFt!X!?9ku2dk&m#)AY>T zZ_oYxzVrLO^WAgKy>RP{Hh4zs&SZl2Hnfh3rYOu2u?qM!L$$3(MT-1;?|$(BYqubyK)eF<7~GBrjfFFnV)20r|2bRrj(1KVfuJLU|$ z_igdWGc6%oXr4oWrZjOQe)3O_Hq8-zq!2Yc!x`|i;Rt`EGe9G(C^Cxo$*rZdeMhnp zAKr?#W;!*W$(a6#em=4L{xhy0YKYxDQ^3xv&OrFg&Y>jM*kyMe2O_CCa(>)H*GGCYQKB#8X405p5MI!q4aqn@1l_ zbGBIpat6+^@bs-GI@+8n#QhPFYK{%dMw59=RZfhXTnXP&Tv3_P=4FM6p(Vzv%=I&- zQceAinqnjtRC;V^(9}a9d*p^TS%Z$7j^{D2sU`P*!9s@(&%#=9)sDk~YR5N&cO7lk zeaFqFEHSzKXS4X4%y0i)bSH)s{%94yofNcyp0yHfzZwzB${IMlO$dd&Bc%5NyYyys zdisL0(59Qr;(f901kh$vkB@jP!qwxDZ-FE~Wb%R-1d z-xk7or{iY69tXp12gd-Lu2Pp}~ zAnWOu7JegjWul9(QM6cG0cLe=8I-N~tyC<-6+PNT|J$}AzL60LY zT@1Q3-DZooX||=5zF0S^1-e4~whHzoDgpb3Kr_Ve62V?4AJ_;HW~pF5#)-?7SCD_P zUbIHAGVU{itrhqhF{(n~ZwkCr;5hp!!nTtHIh1?pOc`!8Y8HbB|^HxcXVCzFS z7Mq0ry@^meLVZwp#VfBk;>jx~^Rd$aJQHaI_I_>+r&tGkUYUw#4yCWq4Y$HR1oj&-uXR)ccC*4R2b+M4 zw@UJXtykC_pfZPpp_icT~2!KagM!1i)$yrOvWQ&{HzD`G2k<6VW_rRZECC)iIE zmhW_#&*xORQZE_~W2N#x6}(Q-oKWxv1)og=h?Ojc^SbR<*!yS%>@tOI1}igpl+qv8 z*;*=r-OjC1qj(-tbPkXYK0flp_Hb+TDxR(I6t}k5Xb9|E3frRSd_ZxlboLQ>VE@8x z(HDwml;ZiNg5xDKd4~K$!TG+FD`YzLt(ZEms{3NEKg^TQDvTP zY*hT!6L=@}z^Bf#O_t}0|Ka%@4M68bg;mezpVA1}*A+HR$;xjjzoW2f4j)juTW5cy z64<|UTlAIUnW5JpwQM;x=Ht6W8MPRrWVH`aAJ`)byIf(9S(+y;&0b<2yuTDr{+%Lq0^|hy zd%^PT&nPV3Umm^aGi9s?tbEseuHfpadoK)^dm)7e5Ni=%f@nNeH!hipi$=h%QP}4c z_6L;yh;BS7Sb26y<#s9o+pUcKu`>1*@`3%0!oHxeZs8d)`M5`;A`M<*-uMB_8&8;D)#NPHjh8rtb~5xc$-Y6AV6O}G*I{e~^?+R=Ua=pA zv1@1mto*3F7skFxBVZX%0d1kskXgl3dJztH!LAEqTd1T+FS;z)rZ851RCb2pY2q+_ zDGbL)2Grz7<)tuIepG%MhRcu2FT?P?6!*B!E}UQVxFedqNI9izSTgxav+eE6LR0Og zfzoVNOTkh%+d?^IZq`6v*w?In3kA_1v-<7i-Qd;=?UY-VhZj^3H@%^oZt`w)YqzZ% z8Y@kpm`$eUngPdBI*o28@8;218JNwc{ZoD_qE(S+!&*8GB!8wv27ttDG3~ESBA$)# z0TL_DgDM&T60_B`w@f0QP4EG7YYHjLrm;b{a znLz6*5hk9-%kx+3m!| zU#DO-gm*!>#zZjhE(q7)$xZn-OJ5s#YazTF!nLMmOQ7-H5Uw-r(c&0E7JFSR!OCI@ z1?wPO1K~ZUR)K$J)s)h;J*K^O%K4~tt{`tclxv|}Z?XUdYs+ZeUei7(d^3sFO5bh@ z?uGCk2=6nsC5hC!2g2yO&^_+P9~Zp%0rKvLVm%c1o7&k#8m~vL2Qab3P!Tf8_mnH- ztbJWT;|KD{wGWyNrWTt-t^1(aVA_-UYZMI$RAp=VNtSjRc^jdCo$z9w%23Vg`a#t>w$o9UKQJ9(R+*a*cY6icV^MkqF; zSh|ewt9A+JehM~2@emXbo9sGy4?*#;seP(b>j5_o1|Iend5=J`35rLs2$E^M35qRP z1j$m7Rk?(7ECpMjco>Rall7DLFciJmCCSu!P%7q=w-t&}NKnTAyK z)w!5qg>RtXQ7C$$__V2Y8`SDWLDB6+x7jVc&XV^SBwHbQ3=1lS##eVE%Bm8zj-)g6>i2RYBepkZOn22~!KqrS$kDiuFBA>rR^X0RK2c xi?&wAU`V%{f+yW9nsPea%t77`x0X5I8g;!RlW%&!0m?bWOX`K~qzk7W_&3g0Q!xMl diff --git a/llvm/test/tools/llvm-cov/Inputs/mcdc-const-folding.proftext b/llvm/test/tools/llvm-cov/Inputs/mcdc-const-folding.proftext index 82335a8195faa4..97d78a39f43562 100644 --- a/llvm/test/tools/llvm-cov/Inputs/mcdc-const-folding.proftext +++ b/llvm/test/tools/llvm-cov/Inputs/mcdc-const-folding.proftext @@ -40,7 +40,7 @@ _Z5case2b # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0xa +0x6 _Z5case3b @@ -55,7 +55,7 @@ _Z5case3b # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x9 +0x5 _Z5case4b @@ -70,7 +70,7 @@ _Z5case4b # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x2 +0x1 _Z5case5b @@ -85,7 +85,7 @@ _Z5case5b # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x6 +0x5 _Z5case6b @@ -100,7 +100,7 @@ _Z5case6b # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x5 +0x6 _Z5case7b @@ -166,7 +166,7 @@ _Z5caseabb # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0xa2 +0xe _Z5casebbb @@ -183,7 +183,7 @@ _Z5casebbb # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0xa1 +0xd _Z5casecbb @@ -200,7 +200,7 @@ _Z5casecbb # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x2 +0x1 _Z5casedbb @@ -217,7 +217,7 @@ _Z5casedbb # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x12 +0x3 _Z5caseebb @@ -234,7 +234,7 @@ _Z5caseebb # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x14 +0xa _Z5casefbb @@ -251,7 +251,7 @@ _Z5casefbb # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x6 +0x9 _Z5casegbb @@ -268,7 +268,7 @@ _Z5casegbb # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x23 +0x7 _Z5casehbb @@ -302,7 +302,7 @@ _Z5caseibb # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x83 +0xb _Z5casejbb @@ -319,7 +319,7 @@ _Z5casejbb # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0xa1 +0xd _Z5casekbb @@ -336,7 +336,7 @@ _Z5casekbb # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x12 +0x3 _Z5caselbb @@ -353,7 +353,7 @@ _Z5caselbb # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x12 +0x3 _Z5casembb @@ -370,7 +370,7 @@ _Z5casembb # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x12 +0x3 _Z5casenbb @@ -387,7 +387,7 @@ _Z5casenbb # Num Bitmap Bytes: $1 # Bitmap Byte Values: -0x6 +0x9 main diff --git a/llvm/test/tools/llvm-cov/Inputs/mcdc-const.o b/llvm/test/tools/llvm-cov/Inputs/mcdc-const.o index 1145fcc6f7125452262d0745b9de0591c1e48cda..bc38b71b5de072cedeb2172d160d6705d4855c47 100644 GIT binary patch delta 59 zcmcbiaYJK5A|uPhq;w#0%9@d7G8>}_khEqD2a=N+yBWDR%QHDLGBRyWWDe(HWSTsY LS$^{k{stBR?>Y~s delta 61 zcmcbiaYJK5A|unpq;w#0%9@dBG8>}_khEqD2a=N+yBQfa%QHDLPVV3p*j&Ku!NbTh Nc_Op?<{SJCEC3mc5Xk@l diff --git a/llvm/test/tools/llvm-cov/Inputs/mcdc-const.proftext b/llvm/test/tools/llvm-cov/Inputs/mcdc-const.proftext index 10253f26e0a680..16eba3a0180803 100644 --- a/llvm/test/tools/llvm-cov/Inputs/mcdc-const.proftext +++ b/llvm/test/tools/llvm-cov/Inputs/mcdc-const.proftext @@ -14,12 +14,10 @@ _Z4testbbbb 0 0 # Num Bitmap Bytes: -$4 +$2 # Bitmap Byte Values: 0x1 -0x2 -0x0 -0x0 +0x4 main diff --git a/llvm/test/tools/llvm-cov/Inputs/mcdc-general-18.o b/llvm/test/tools/llvm-cov/Inputs/mcdc-general-18.o new file mode 100644 index 0000000000000000000000000000000000000000..e802f51132c068f0f4e2921c952710cb5c5665e5 GIT binary patch literal 6456 zcmd5=eQaA-6~FJjWU*VvcC)5U(xq}rH?p!k>?X~c(jYsHvs7eLwzAQ|z;zubjTV2% zd5H!L7F4%e5a~8hCH_Df|1qHN&o~k&z2nn6Lm{Z zC|`2!B`h8~_tr|&n}B?okPBBWG95Qwd)~P8d!JeLIE)^z;#c{y(@$`+Tyb`+r=+UP zu6#LEehzeKf2kD>%(d^rx4SlCt6&s5^&LRgc0w<_nDKK){L+=UZy=0b{s0Ob+^yVo z;H!qd?i`^tRPs}3!C1Wfmr(fy&(O<@J*{|U5VU?nySdHy7yV z>nnjI)|@O~caG97wB~MFHi|ud@+ZE}CBBeIeSQs$xhw0NHt5UH3R<YH`(dXikSd zxIn%+Fo~WU&3~*SolBJb$Id@^5hev&0)(NFzP|diT;!cyZicL#_(`rD49GQH8y>0vl%uFVHie>2pOV9?C2Fu z$PD2V{%EUdSSGGoMXLx#x7=hwY*_il&0>dNY!&U%_DyD!z*W2G6kXBIAV|1s7PlCu zmX?-$%|^4>T@B*HAg#Toi7eXFW{6fR=0~TH|Xs(L9sF;)Y}A=8L-C6hIip{1FKiVEjH@AR z#E?`;?!n4HB`H-7+gKT>>=R)($UY1LnE2;gD}NfCI(zwMnFS(ns#N(?-mnM>zqB8%CQ zw2ckfQ9C*_>)@2TWPIs9VLTExb_7~{PeC%{0N+A;a)cAX@JO&@Bp68qdlSLv$>uXn zPgsJQK5xEym1bxY1G*^qj6bo^JeEk+V_%h!P`f!PLWABhl0QhG3AmL9)5I!Lk zfcU2pdyaYYyy2csc%V0l_bNJ$GQROTeGLFZY_G!Hml;S5ah92<2=65e#=nY%K;(L4 z3I7JzlKPuA;0RdNPI%iLQa|7U%F2=*Qvh08vWvC&52YBc1*`hpX>WT~F5#uFLlzuc z^~kDrt#;QE+iC%jB2ZVG@?7N10sblY)XVZsz)_#|r(Uc(!mNaPIz9aj6G4ff(oK+wa0voZpcH3XV^i{Uj9}{qvK1P{ZY+3)sUNeh0}5 z3aucVLxXy{88yuh_f0#NBDCZ zUMBoS4d*$#py1da62%(|j`>WHpI<6CURk+6zaw1l&mR?ib*|piaDFcDE4bQ+7HlLC z_tTYx=u&VzZ#bVG1xG(0P=Gxe9w0}16&z0?_H#hN(GTk?;_}5l|htu6!ee#Na{YWRT=kD5APKo*(7TTXm;mHe~9gj|+SLi#f@$2};3JX(tWN3&`yZ+?NO4>g)mH{ncIY z6tiOhADTQIOGW81{m<+UICt>(R;N8QK#$Y8#QT~ke@7AXZZNK9oDBY|4i{iJ|0>-H z0%CC$g!QvH+(1|5{|AKVhBdn48xha(y$~GZ>o>h96~*rXs7PX&`Qw0Yygdrh+v4hT zJdBUGK+c~z-Zx}V%m}PJic@^P&+!iXW4TfMXr1_Y+vm8<8^u3Q3Vd(mczXP2U_<6F zBCI0$yJX*6gL?gExKaN5>*SBIIdA620K@qBx4%jq^xXBia&z@G8^w=Pe16XO{P3?9 zj>~)!uzK+$6#v^^D7n)6mjL5>_Ailr{6p*yg0aj+tN$sw=ts#O^T+d>HRAJfY@HoRIctLd;U9xx7nWg2yEb*=Y;jn zkbNfu#MzGd+ko?0RsEj+;%WA*`w!Q?sEx09mh6{UkvR8{;lIHGXCcl2@ptR=zYF%* vp8J0VFl(Yx0Ja35Dn-KXlK)eI?+=n*>i__H?x=Ty<~L58XovSsJ^p_HjJhJT literal 0 HcmV?d00001 diff --git a/llvm/test/tools/llvm-cov/Inputs/mcdc-general-18.profdata b/llvm/test/tools/llvm-cov/Inputs/mcdc-general-18.profdata new file mode 100644 index 0000000000000000000000000000000000000000..d351096967dbf315b7bedf94441be0f19a936ba3 GIT binary patch literal 888 zcmeyLQ&5zjmf6V7fE`3Iqly=x@!6o#0#KR>N{c{gb~Kt5$~T140_so(8nvK}5iA1J z58*K|IPgLo05b>XoClLx!0OS(8>ApEgvc^5z-Wbq5R+m0VfGwQ=LDOBE}pQC3oH)v zA1{~(Busg*iZ9p=Q2}!|EIdBc@qyK&iyKJuV-=6E!!CZu5xcmV6LxWX9O6t2zpnN^ zH%pm%8x}GeV0SPuPiEWlv_4U0G!4GUgLxCihtV*7umr6SbtlY= cFt0(92LnShRK5XC9h3_*4rV+9oB>e)05F*>P5=M^ literal 0 HcmV?d00001 diff --git a/llvm/test/tools/llvm-cov/Inputs/mcdc-general.o b/llvm/test/tools/llvm-cov/Inputs/mcdc-general.o index e802f51132c068f0f4e2921c952710cb5c5665e5..8bed601221a1459b613300a7386e5251a3fdce1d 100644 GIT binary patch delta 1809 zcmZ`(T}&KR6rMW^+u3DzW_Nb!>=@F5Az^Eq!ctoZsT~F-(zGU}HAd04s15YNR4`jZ z`T}hi!y2pLd9a%J!;?=YAhE)OmQ8u+)-`ph1?ix+X5q<2DEr-9cvo|`{$|aQSUv_pil^A>5kzJzXJyiPqwF zac$46r4NX^cInE6kbOQUXJ*r9V1U&QQc#P>^Woyl6Kr8I$P0~djV@Yd4hsmkp)fVU zmS#{p_n}lCt7yZQ5Ls%*Uj8=)LqenwpEt_~Db;`FZ>RgjJ@bQ>`Ei{J=}s$d_=U$P z?iQ&P?N#1okM?@LN4uL_(FVD_j;~{>+2X;_kdrEvUwTK9o{~cT4XKA#7%nL@d-`d& zmb3F28Eds?Zl{k4oEFzB?w`xq+`=zPB`~?BJ)0|Taf=j{P;f(f{!kOu3yTVGvg7=m z^1OY_K7RDd1j~Ns!o}a<8F{ys73rNNcf+{6_aPe5vCp}aI%YMj%8fp35_bBIvrX`} z?*t3Nim!=r&Xa`gZ@Bos?jkd0xR&iW%&!D z?cyQfw;yVPhIv$2F1th6y|5fG%u^+7nXpg8S&Tg+EJCT@duxuo0h2+4y+;|?hg^Tg zktv?@vWTLJ0X%m_)c4$Tk?TLm-@N1Tq=}vo_I-#}7-kv$6j9vHA9YMA0^`VTEn!~} zY#sC?yR(F)XCm1REM)9&5p3yPZ6#6e`y74%-UVYJgY}R$U-S6T)b6g%Qa@}2Qt0_U z_x!};NmIL3CLxD;zA9ngqP9`;@FKGFCF}`@lx{#x*hu{<*wVTF^h9}kyT^O;LBDFG zbXqft`)c>at8FX6R|UQcCc}o=;$dkP_pKj?pOHP_Vg1OuJ2}GHE)PqwwRlLn!B!1+ zlDaZad3?N`55qY6T;%#=DB%55%E`Xqv0wB2a}S99iAODyhEZ)G+UJzzlT%3jlo`3h z*g1&mPqKgDIlU=3O|?-P{P2OkhqXdMx8P7b4z;lz>=1OuESQfga52`*=AjU?f)f8z zlBA>rU9ml^13KeYASpl})cCi;bY(T%j5o6eh*nzcDs-a01!JhYp@3R}Xrd1O))Nph z6?k@Yb3maTq9|ZiHn9j4Hd}Hc0=lt}dN3^Z5{#k#2nwjb#BhsELubO0J0kFIVjmlT L0zv~2HS7KbEcK>Y delta 1785 zcmZ`(eP|PB7{B)}jk%cIrEQWoH^u1`P0IdgTDK1EdUvwS2-{e7NQZN!AhUHFqwF?> zF{qQKC{`&8RlTApewwyYWY-X6K0uQo_{M()}p|79-?SDnW=qEDW)3y~x4v=ie< zzK~$j`OcAx9I^Gu>xD>@T6SlhZjM>{U$uMp zo`bjKCz2|U3^5G=&nBGZI)j_RocK(_7Vv`RrV1Xlt2ejU~8b0 z1WE@39}*G-61)=4W8LxWQGXynoSlw^5*DewRSK{NB-2&w>B{?8rYP?*h*}(6w zLpDs|U^oKbg(GAb`cV6kpjFWSr&>EjC}O_juz>a}FZUVdXU__H75#s?{_-+-g~euN z_TlmPU3b>5l3Is9!*LyEB9u^=SV@ibb()vBW-Hu8HYc#GAKp&&IcSVfW0$}NkoBkQ z0UOy~fn~dNc;@|H^>?5zN{#oI(Tu_u*x^waM0*OdQM%)sI;FqavVgM=`Bl0o;H>&S zUi}Ibqm=x@IwZfdPWl!Er8wiFfQ$XF;J+C5VtW5PBH%Fu{cWs+ZjBl(0?V>Rcvu@8 zMs~|GtPK+yCEH<8r^X9{hSl|d8@pivH~C5(>wl@Yp;4#gh-V96W1gdeQtae2R%ORY zffcKi1YC?dE#Ttze();Zf*Jke#y9w^Ov;{QX)t!5kQNwd+D`t0iKdS53BJj>2DsDo zq|&NLFcC_C)tn}27-%+$1>5u*PViC9EotjBUuMuv*= z3+v5-f;5OtU-BVSdeNuMpo|{Uo}8lx5hUo3{m$K4UPeE-=bZ2Necw6XId>*@FV+`g zQ||sq(0{C{>qDw*wd;4on!`rbxt2cE-?pVW#}b8fNHoQilM;oSegU%3(BtBcLNGcl zdi9;F?mM@rmzlO_VRl|9oFMo`t|w8t9c9?racaeV)jYJ-{nMOR+#SUbU()-^kI4^^ z_g`~%-f1c`W(3yxJ*^6E3`kc{qSFP(7+bdpHc_}U=H8r^?bz~McjKJ)-QcAzG za9qg56rD&E+=CN6_S&AqAm5CLHu*Bk|GnU;)Y&t6I@GoO-ex}o{(Y?810r?82)hV1v^IN7friKyNhp0mE(6ysh&DjPpQG%_Fm~Ollnl_ z>r_ulb=IrF#?soRleCSSsDgYT-&xhkSEI&;Tg+>2MrejUuZ`Kp0B4meP>rdL8f?XO z8e@D6B)Er)r{>SSg9EDl_=MOaMevW^EbvKuzfp@`u&@=h{=HVLE0#Xrvr^zrgbFRt zTgKW2MvM|WSjL_f_!;-YOYEm{+#KI>J-ow7a(%M6BwGIxNxquwl7;1Y?g+es=gGTP z#`*j3}GAU#H4^frg>T(g<5||@p6ITYB%QR1grWF?|wSzax*aw2`we*jR>LKqHg{|nzTFlzt+ delta 1412 zcmZ8hOKcNY6!m-d7*FDi$Dc5ECEBElL;_aji4aNDD#J??b=nQkRuu7fRdrFJevlU| z5HXmbSgK2+t8`a^yMt~*LItd9OI2iV+5l2j6&6euRU#J&M8!w_xc83Vq$XE-_xap& z?&n>NJUzHPIMdH{eRf;V%O8k+@9inx5yhu=Q}Lk{|CunKOf^MhZYlTf?2%U{&B{n4 zGFK6$@g+UCU8>||){(ndErq8~M`UP0Y4n+u1~1muH4{_K$Xu1x)#}0~`hD&K_NlJ= zdu}bT+beddi~5EVA(^mB(AAyAAG7yfn>_wF@|io_$C1C>`*CE_z0JzhT`Zm3Eebyc zWV&5)skJgD%MkX;JDXl?mUp|dsRubts%|pOXmpwNhB{pnrSMXr+NwFZUP$hudS6j% zqPT=z`A8#o!0X)FpewpqsooEas}DySr(gen3i(!>)$Bq=5&7S7sPW>X+bbd6t5Ntd zbWj`-M-Q*zG_)m=-?A?AcJ+Qpu{|L?WdqCyDgF)m>s+t^Jm-Ta&$t|$F$^_)+4ljp=AonSAh%u^W1-j5$0G7!$|EX8Jefu z(Sp9CAUX{5ghU?OCvjmzJS_1QC}YiQ8`yxvH{lkt^$qMHiGPM_r@?*&6aA{|ACR6s zhis>jV)U7y(IkF6-iuV|4I6K7X=CZV=kSpAwZ%ITQ#M(qElzscDw%*s*z27(ma^pX zn)|3%>Wem(a*tCY+DX_OHrUs21~vFmxaRHc%NF!~4_8t119f4)frH95siYlsyH-{K~RPxPH?e+NluzRUU$%Y&m^ zY!^7)ao7<}vl+-nEoQs;8sZz^Al}pBV`+8?vN4O*pp1UM8~qcbe?V{CV#mNq#32_? zvxiVd{0toQm!LOcvCmy?E|FG;-9HBW4q+fZ#7<(4rP?w!50^3a46@x8TZS_FY0R_O zFgV6m2t|`L-)4&s2jScJDBl%?6})^JylRZ%Q?iD|W}%G!3MN?Ue6aP@85l?o@r(u^ PCP%SjIZ0DF$*unZ3}9jl diff --git a/llvm/test/tools/llvm-cov/Inputs/mcdc-macro.proftext b/llvm/test/tools/llvm-cov/Inputs/mcdc-macro.proftext index 35ecc42b5802a6..44057ffd7d885e 100644 --- a/llvm/test/tools/llvm-cov/Inputs/mcdc-macro.proftext +++ b/llvm/test/tools/llvm-cov/Inputs/mcdc-macro.proftext @@ -27,21 +27,12 @@ func 0 0 # Num Bitmap Bytes: -$13 +$4 # Bitmap Byte Values: 0x0 +0x9 0x0 -0x0 -0x20 -0x8 -0x0 -0x20 -0x0 -0x0 -0x0 -0x0 -0x0 -0x0 +0x1 func1 diff --git a/llvm/test/tools/llvm-cov/Inputs/mcdc-maxbs.o b/llvm/test/tools/llvm-cov/Inputs/mcdc-maxbs.o index bc993b19e84733fdf7670a5aec58219519837ec2..21f9d3a17dad0d1531e38ec1fbffb7dca3085535 100644 GIT binary patch delta 404 zcmbQBFhOBLc)h>Je^v${Xy65rEKuGKMj+=fM2yk!(oSX|*P|D#(BnTt=Y`Hw2VXHa zJY;v+#^`YAnMW^Jvjc|-7gP^Z^MA*p<50tPfedT@A6?A1lM`Xi>un%uFQ9!gyqJEAwSW!6P7*K(G~vg&3F^j!e#EJ;mPLziTslf_WE!#7A=Mee{23WFf2JB9DElSQPzJ*ThbJpAMl&)_u3-#fWY~P3QIL_5dGbT1#f(Qcw=!R5 z6g&!21_WDyScrj%;ppT{*7J<^lQ*)7Ge%Di1d{2KKeCE5mP~GB6PN4)t7QPHV_;wh zVjU=dGDyATDl}OWD1ZOthd|jANU}UY_MiU{aAmR}yEx;`$(ro*fsQ)K{+yBV!RC#e N|A7wOypcPe1pps?Vvqm; diff --git a/llvm/test/tools/llvm-cov/mcdc-general-18.test b/llvm/test/tools/llvm-cov/mcdc-general-18.test new file mode 100644 index 00000000000000..8707238c4cdcbe --- /dev/null +++ b/llvm/test/tools/llvm-cov/mcdc-general-18.test @@ -0,0 +1,20 @@ +// Test Version11 (clang-18) files. +// mcdc-general.test is used as test patterns. + +// RUN: llvm-cov show --show-mcdc %S/Inputs/mcdc-general-18.o -instr-profile %S/Inputs/mcdc-general-18.profdata -path-equivalence=.,%S/Inputs | FileCheck %S/mcdc-general.test +// RUN: llvm-cov report --show-mcdc-summary %S/Inputs/mcdc-general-18.o -instr-profile %S/Inputs/mcdc-general-18.profdata -show-functions -path-equivalence=.,%S/Inputs %S/Inputs/mcdc-general.cpp | FileCheck %S/mcdc-general.test -check-prefix=REPORT + +// Turn off MC/DC visualization. +// RUN: llvm-cov show %S/Inputs/mcdc-general-18.o -instr-profile %S/Inputs/mcdc-general-18.profdata -path-equivalence=.,%S/Inputs | FileCheck %S/mcdc-general.test -check-prefix=NOMCDC + +// Turn off MC/DC summary. +// RUN: llvm-cov report %S/Inputs/mcdc-general-18.o -instr-profile %S/Inputs/mcdc-general-18.profdata -show-functions -path-equivalence=.,%S/Inputs %S/Inputs/mcdc-general.cpp | FileCheck %S/mcdc-general.test -check-prefix=REPORT_NOMCDC + +// Test file-level report. +// RUN: llvm-cov report --show-mcdc-summary %S/Inputs/mcdc-general-18.o -instr-profile %S/Inputs/mcdc-general-18.profdata -path-equivalence=.,%S/Inputs %S/Inputs/mcdc-general.cpp | FileCheck %S/mcdc-general.test -check-prefix=FILEREPORT + +// Test html output. +// RUN: rm -rf %t.html.dir +// RUN: llvm-cov show --show-mcdc-summary --show-mcdc %S/Inputs/mcdc-general-18.o -instr-profile %S/Inputs/mcdc-general-18.profdata -path-equivalence=.,%S/Inputs -format html -o %t.html.dir +// RUN: FileCheck -check-prefix=HTML -input-file=%t.html.dir/coverage/mcdc-general.cpp.html %S/mcdc-general.test +// RUN: FileCheck -check-prefix HTML-INDEX -input-file %t.html.dir/index.html %S/mcdc-general.test diff --git a/llvm/unittests/ProfileData/CoverageMappingTest.cpp b/llvm/unittests/ProfileData/CoverageMappingTest.cpp index f063a33205b30a..ef147674591c51 100644 --- a/llvm/unittests/ProfileData/CoverageMappingTest.cpp +++ b/llvm/unittests/ProfileData/CoverageMappingTest.cpp @@ -872,7 +872,7 @@ TEST_P(CoverageMappingTest, non_code_region_bitmask) { addCMR(Counter::getCounter(2), "file", 1, 1, 5, 5); addCMR(Counter::getCounter(3), "file", 1, 1, 5, 5); - addMCDCDecisionCMR(0, 2, "file", 7, 1, 7, 6); + addMCDCDecisionCMR(3, 2, "file", 7, 1, 7, 6); addMCDCBranchCMR(Counter::getCounter(0), Counter::getCounter(1), 0, {-1, 1}, "file", 7, 2, 7, 3); addMCDCBranchCMR(Counter::getCounter(2), Counter::getCounter(3), 1, {-1, -1}, @@ -895,7 +895,7 @@ TEST_P(CoverageMappingTest, decision_before_expansion) { addCMR(Counter::getCounter(0), "foo", 3, 23, 5, 2); // This(4:11) was put after Expansion(4:11) before the fix - addMCDCDecisionCMR(0, 2, "foo", 4, 11, 4, 20); + addMCDCDecisionCMR(3, 2, "foo", 4, 11, 4, 20); addExpansionCMR("foo", "A", 4, 11, 4, 12); addExpansionCMR("foo", "B", 4, 19, 4, 20); From b650764b16b5c8790325775ac5f87f0b1c0beca7 Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Fri, 14 Jun 2024 11:49:12 +0100 Subject: [PATCH 081/155] [lldb][test] Add test for completing ObjCObjectType (#95405) This is a minimal reproducer for a crash where we would try to call `DumpTypeDescription` on an incomplete type. This crash surfaced as part of an NFC refactor of some of the logic in `GetCompleteQualType`: ``` (lldb) expr -l objc -- *(id)0x1234 Stack dump: 0. Program arguments: ./bin/lldb a.out -o "b main" -o run -o "expr -l objc -- *(id)0x1234" Stack dump without symbol names (ensure you have llvm-symbolizer in your PATH or set the environment var LLVM_SYMBOLIZER_PATH to point to it): 0 lldb 0x0000000102ec768c llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) + 56 1 lldb 0x0000000102ec6010 llvm::sys::RunSignalHandlers() + 112 2 lldb 0x0000000102ec7fa8 SignalHandler(int) + 292 3 libsystem_platform.dylib 0x000000018c7a8c44 _sigtramp + 56 4 LLDB 0x0000000116b2030c lldb_private::TypeSystemClang::DumpTypeDescription(void*, lldb_private::Stream&, lldb::DescriptionLevel, lldb_private::ExecutionContextScope*) + 588 5 LLDB 0x00000001166b5124 lldb_private::CompilerType::DumpTypeDescription(lldb_private::Stream*, lldb::DescriptionLevel, lldb_private::ExecutionContextScope*) const + 228 6 LLDB 0x0000000116d4f08c IRForTarget::CreateResultVariable(llvm::Function&) + 2076 ``` rdar://129633122 --- lldb/test/Shell/Expr/Inputs/objc-cast.cpp | 1 + lldb/test/Shell/Expr/TestObjCIDCast.test | 9 +++++++++ 2 files changed, 10 insertions(+) create mode 100644 lldb/test/Shell/Expr/Inputs/objc-cast.cpp create mode 100644 lldb/test/Shell/Expr/TestObjCIDCast.test diff --git a/lldb/test/Shell/Expr/Inputs/objc-cast.cpp b/lldb/test/Shell/Expr/Inputs/objc-cast.cpp new file mode 100644 index 00000000000000..76e8197013aabc --- /dev/null +++ b/lldb/test/Shell/Expr/Inputs/objc-cast.cpp @@ -0,0 +1 @@ +int main() { return 0; } diff --git a/lldb/test/Shell/Expr/TestObjCIDCast.test b/lldb/test/Shell/Expr/TestObjCIDCast.test new file mode 100644 index 00000000000000..0611171da09e2e --- /dev/null +++ b/lldb/test/Shell/Expr/TestObjCIDCast.test @@ -0,0 +1,9 @@ +// UNSUPPORTED: system-linux, system-windows +// +// RUN: %clangxx_host %p/Inputs/objc-cast.cpp -g -o %t +// RUN: %lldb %t \ +// RUN: -o "b main" -o run -o "expression --language objc -- *(id)0x1" \ +// RUN: 2>&1 | FileCheck %s + +// CHECK: (lldb) expression --language objc -- *(id)0x1 +// CHECK: error: Couldn't apply expression side effects : Couldn't dematerialize a result variable: couldn't read its memory From 90fd99c0795711e1cf762a02b29b0a702f86a264 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 14 Jun 2024 12:28:51 +0100 Subject: [PATCH 082/155] Recommit "[VPlan] First step towards VPlan cost modeling. (#92555)" This reverts commit 46080abe9b136821eda2a1a27d8a13ceac349f8c. Extra tests have been added in 52d29eb287. Original message: This adds a new interface to compute the cost of recipes, VPBasicBlocks, VPRegionBlocks and VPlan, initially falling back to the legacy cost model for all recipes. Follow-up patches will gradually migrate recipes to compute their own costs step-by-step. It also adds getBestPlan function to LVP which computes the cost of all VPlans and picks the most profitable one together with the most profitable VF. The VPlan selected by the VPlan cost model is executed and there is an assert to catch cases where the VPlan cost model and the legacy cost model disagree. Even though I checked a number of different build configurations on AArch64 and X86, there may be some differences that have been missed. Additional discussions and context can be found in @arcbbb's https://github.com/llvm/llvm-project/pull/67647 and https://github.com/llvm/llvm-project/pull/67934 which is an earlier version of the current PR. PR: https://github.com/llvm/llvm-project/pull/92555 --- .../Vectorize/LoopVectorizationPlanner.h | 17 +- .../Transforms/Vectorize/LoopVectorize.cpp | 222 ++++++++++++++++-- llvm/lib/Transforms/Vectorize/VPlan.cpp | 86 +++++++ llvm/lib/Transforms/Vectorize/VPlan.h | 71 +++++- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 35 +++ .../Transforms/Vectorize/VPlanTransforms.cpp | 5 + llvm/lib/Transforms/Vectorize/VPlanValue.h | 3 +- .../RISCV/riscv-vector-reverse.ll | 2 + 8 files changed, 414 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index c03c278fcebe78..6011e160762202 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -344,6 +344,16 @@ class LoopVectorizationPlanner { /// A builder used to construct the current plan. VPBuilder Builder; + /// Computes the cost of \p Plan for vectorization factor \p VF. + /// + /// The current implementation requires access to the + /// LoopVectorizationLegality to handle inductions and reductions, which is + /// why it is kept separate from the VPlan-only cost infrastructure. + /// + /// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has + /// been retired. + InstructionCost cost(VPlan &Plan, ElementCount VF) const; + public: LoopVectorizationPlanner( Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, @@ -365,6 +375,9 @@ class LoopVectorizationPlanner { /// Return the best VPlan for \p VF. VPlan &getBestPlanFor(ElementCount VF) const; + /// Return the most profitable plan and fix its VF to the most profitable one. + VPlan &getBestPlan() const; + /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan /// according to the best selected \p VF and \p UF. /// @@ -443,7 +456,9 @@ class LoopVectorizationPlanner { ElementCount MinVF); /// \return The most profitable vectorization factor and the cost of that VF. - /// This method checks every VF in \p CandidateVFs. + /// This method checks every VF in \p CandidateVFs. This is now only used to + /// verify the decisions by the new VPlan-based cost-model and will be retired + /// once the VPlan-based cost-model is stabilized. VectorizationFactor selectVectorizationFactor(const ElementCountSet &CandidateVFs); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 37b8023e1fcf2f..5b652068a7ba9a 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -290,7 +290,7 @@ static cl::opt ForceTargetMaxVectorInterleaveFactor( cl::desc("A flag that overrides the target's max interleave factor for " "vectorized loops.")); -static cl::opt ForceTargetInstructionCost( +cl::opt ForceTargetInstructionCost( "force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " @@ -412,14 +412,6 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL) { return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty); } -/// A helper function that returns the reciprocal of the block probability of -/// predicated blocks. If we return X, we are assuming the predicated block -/// will execute once for every X iterations of the loop header. -/// -/// TODO: We should use actual block probability here, if available. Currently, -/// we always assume predicated blocks have a 50% chance of executing. -static unsigned getReciprocalPredBlockProb() { return 2; } - /// Returns "best known" trip count for the specified loop \p L as defined by /// the following procedure: /// 1) Returns exact trip count if it is known. @@ -1621,6 +1613,16 @@ class LoopVectorizationCostModel { /// \p VF is the vectorization factor chosen for the original loop. bool isEpilogueVectorizationProfitable(const ElementCount VF) const; + /// Return the cost of instructions in an inloop reduction pattern, if I is + /// part of that pattern. + std::optional + getReductionPatternCost(Instruction *I, ElementCount VF, Type *VectorTy, + TTI::TargetCostKind CostKind) const; + + /// Returns the execution time cost of an instruction for a given vector + /// width. Vector width of one means scalar. + VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF); + private: unsigned NumPredStores = 0; @@ -1646,21 +1648,11 @@ class LoopVectorizationCostModel { /// of elements. ElementCount getMaxLegalScalableVF(unsigned MaxSafeElements); - /// Returns the execution time cost of an instruction for a given vector - /// width. Vector width of one means scalar. - VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF); - /// The cost-computation logic from getInstructionCost which provides /// the vector type as an output parameter. InstructionCost getInstructionCost(Instruction *I, ElementCount VF, Type *&VectorTy); - /// Return the cost of instructions in an inloop reduction pattern, if I is - /// part of that pattern. - std::optional - getReductionPatternCost(Instruction *I, ElementCount VF, Type *VectorTy, - TTI::TargetCostKind CostKind) const; - /// Calculate vectorization cost of memory instruction \p I. InstructionCost getMemoryInstructionCost(Instruction *I, ElementCount VF); @@ -7288,7 +7280,10 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { if (!MaxFactors.hasVector()) return VectorizationFactor::Disabled(); - // Select the optimal vectorization factor. + // Select the optimal vectorization factor according to the legacy cost-model. + // This is now only used to verify the decisions by the new VPlan-based + // cost-model and will be retired once the VPlan-based cost-model is + // stabilized. VectorizationFactor VF = selectVectorizationFactor(VFCandidates); assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero."); if (!hasPlanWithVF(VF.Width)) { @@ -7299,6 +7294,182 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { return VF; } +InstructionCost VPCostContext::getLegacyCost(Instruction *UI, + ElementCount VF) const { + return CM.getInstructionCost(UI, VF).first; +} + +bool VPCostContext::skipCostComputation(Instruction *UI, bool IsVector) const { + return (IsVector && CM.VecValuesToIgnore.contains(UI)) || + SkipCostComputation.contains(UI); +} + +InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan, + ElementCount VF) const { + InstructionCost Cost = 0; + LLVMContext &LLVMCtx = OrigLoop->getHeader()->getContext(); + VPCostContext CostCtx(CM.TTI, Legal->getWidestInductionType(), LLVMCtx, CM); + + // Cost modeling for inductions is inaccurate in the legacy cost model + // compared to the recipes that are generated. To match here initially during + // VPlan cost model bring up directly use the induction costs from the legacy + // cost model. Note that we do this as pre-processing; the VPlan may not have + // any recipes associated with the original induction increment instruction + // and may replace truncates with VPWidenIntOrFpInductionRecipe. We precompute + // the cost of both induction increment instructions that are represented by + // recipes and those that are not, to avoid distinguishing between them here, + // and skip all recipes that represent induction increments (the former case) + // later on, if they exist, to avoid counting them twice. Similarly we + // pre-compute the cost of any optimized truncates. + // TODO: Switch to more accurate costing based on VPlan. + for (const auto &[IV, IndDesc] : Legal->getInductionVars()) { + Instruction *IVInc = cast( + IV->getIncomingValueForBlock(OrigLoop->getLoopLatch())); + if (CostCtx.SkipCostComputation.insert(IVInc).second) { + InstructionCost InductionCost = CostCtx.getLegacyCost(IVInc, VF); + LLVM_DEBUG({ + dbgs() << "Cost of " << InductionCost << " for VF " << VF + << ":\n induction increment " << *IVInc << "\n"; + IVInc->dump(); + }); + Cost += InductionCost; + } + for (User *U : IV->users()) { + auto *CI = cast(U); + if (!CostCtx.CM.isOptimizableIVTruncate(CI, VF)) + continue; + assert(!CostCtx.SkipCostComputation.contains(CI) && + "Same cast for multiple inductions?"); + CostCtx.SkipCostComputation.insert(CI); + InstructionCost CastCost = CostCtx.getLegacyCost(CI, VF); + LLVM_DEBUG({ + dbgs() << "Cost of " << CastCost << " for VF " << VF + << ":\n induction cast " << *CI << "\n"; + CI->dump(); + }); + Cost += CastCost; + } + } + + /// Compute the cost of all exiting conditions of the loop using the legacy + /// cost model. This is to match the legacy behavior, which adds the cost of + /// all exit conditions. Note that this over-estimates the cost, as there will + /// be a single condition to control the vector loop. + SmallVector Exiting; + CM.TheLoop->getExitingBlocks(Exiting); + // Add the cost of all exit conditions. + for (BasicBlock *EB : Exiting) { + auto *Term = dyn_cast(EB->getTerminator()); + if (!Term) + continue; + if (auto *CondI = dyn_cast(Term->getOperand(0))) { + assert(!CostCtx.SkipCostComputation.contains(CondI) && + "Condition already skipped?"); + CostCtx.SkipCostComputation.insert(CondI); + Cost += CostCtx.getLegacyCost(CondI, VF); + } + } + + // The legacy cost model has special logic to compute the cost of in-loop + // reductions, which may be smaller than the sum of all instructions involved + // in the reduction. For AnyOf reductions, VPlan codegen may remove the select + // which the legacy cost model uses to assign cost. Pre-compute their costs + // for now. + // TODO: Switch to costing based on VPlan once the logic has been ported. + for (const auto &[RedPhi, RdxDesc] : Legal->getReductionVars()) { + if (!CM.isInLoopReduction(RedPhi) && + !RecurrenceDescriptor::isAnyOfRecurrenceKind( + RdxDesc.getRecurrenceKind())) + continue; + + // AnyOf reduction codegen may remove the select. To match the legacy cost + // model, pre-compute the cost for AnyOf reductions here. + if (RecurrenceDescriptor::isAnyOfRecurrenceKind( + RdxDesc.getRecurrenceKind())) { + auto *Select = cast(*find_if( + RedPhi->users(), [](User *U) { return isa(U); })); + assert(!CostCtx.SkipCostComputation.contains(Select) && + "reduction op visited multiple times"); + CostCtx.SkipCostComputation.insert(Select); + auto ReductionCost = CostCtx.getLegacyCost(Select, VF); + LLVM_DEBUG(dbgs() << "Cost of " << ReductionCost << " for VF " << VF + << ":\n any-of reduction " << *Select << "\n"); + Cost += ReductionCost; + continue; + } + + const auto &ChainOps = RdxDesc.getReductionOpChain(RedPhi, OrigLoop); + SetVector ChainOpsAndOperands(ChainOps.begin(), + ChainOps.end()); + // Also include the operands of instructions in the chain, as the cost-model + // may mark extends as free. + for (auto *ChainOp : ChainOps) { + for (Value *Op : ChainOp->operands()) { + if (auto *I = dyn_cast(Op)) + ChainOpsAndOperands.insert(I); + } + } + + // Pre-compute the cost for I, if it has a reduction pattern cost. + for (Instruction *I : ChainOpsAndOperands) { + auto ReductionCost = CM.getReductionPatternCost( + I, VF, ToVectorTy(I->getType(), VF), TTI::TCK_RecipThroughput); + if (!ReductionCost) + continue; + + assert(!CostCtx.SkipCostComputation.contains(I) && + "reduction op visited multiple times"); + CostCtx.SkipCostComputation.insert(I); + LLVM_DEBUG(dbgs() << "Cost of " << ReductionCost << " for VF " << VF + << ":\n in-loop reduction " << *I << "\n"); + Cost += *ReductionCost; + } + } + + // Now compute and add the VPlan-based cost. + Cost += Plan.cost(VF, CostCtx); + LLVM_DEBUG(dbgs() << "Cost for VF " << VF << ": " << Cost << "\n"); + return Cost; +} + +VPlan &LoopVectorizationPlanner::getBestPlan() const { + // If there is a single VPlan with a single VF, return it directly. + VPlan &FirstPlan = *VPlans[0]; + if (VPlans.size() == 1 && size(FirstPlan.vectorFactors()) == 1) + return FirstPlan; + + VPlan *BestPlan = &FirstPlan; + ElementCount ScalarVF = ElementCount::getFixed(1); + assert(hasPlanWithVF(ScalarVF) && + "More than a single plan/VF w/o any plan having scalar VF"); + + InstructionCost ScalarCost = cost(getBestPlanFor(ScalarVF), ScalarVF); + VectorizationFactor BestFactor(ScalarVF, ScalarCost, ScalarCost); + + bool ForceVectorization = Hints.getForce() == LoopVectorizeHints::FK_Enabled; + if (ForceVectorization) { + // Ignore scalar width, because the user explicitly wants vectorization. + // Initialize cost to max so that VF = 2 is, at least, chosen during cost + // evaluation. + BestFactor.Cost = InstructionCost::getMax(); + } + + for (auto &P : VPlans) { + for (ElementCount VF : P->vectorFactors()) { + if (VF.isScalar()) + continue; + InstructionCost Cost = cost(*P, VF); + VectorizationFactor CurrentFactor(VF, Cost, ScalarCost); + if (isMoreProfitable(CurrentFactor, BestFactor)) { + BestFactor = CurrentFactor; + BestPlan = &*P; + } + } + } + BestPlan->setVF(BestFactor.Width); + return *BestPlan; +} + VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const { assert(count_if(VPlans, [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) == @@ -10157,8 +10328,15 @@ bool LoopVectorizePass::processLoop(Loop *L) { VF.MinProfitableTripCount, IC, &LVL, &CM, BFI, PSI, Checks); - VPlan &BestPlan = LVP.getBestPlanFor(VF.Width); - LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false); + VPlan &BestPlan = LVP.getBestPlan(); + assert(size(BestPlan.vectorFactors()) == 1 && + "Plan should have a single VF"); + ElementCount Width = *BestPlan.vectorFactors().begin(); + LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << Width + << "\n"); + assert(VF.Width == Width && + "VPlan cost model and legacy cost model disagreed"); + LVP.executePlan(Width, IC, BestPlan, LB, DT, false); ++LoopsVectorized; // Add metadata to disable runtime unrolling a scalar loop when there diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index f17be451e6846a..ad6a7183208300 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -747,6 +747,64 @@ void VPRegionBlock::execute(VPTransformState *State) { State->Instance.reset(); } +InstructionCost VPBasicBlock::cost(ElementCount VF, VPCostContext &Ctx) { + InstructionCost Cost = 0; + for (VPRecipeBase &R : Recipes) + Cost += R.cost(VF, Ctx); + return Cost; +} + +InstructionCost VPRegionBlock::cost(ElementCount VF, VPCostContext &Ctx) { + if (!isReplicator()) { + InstructionCost Cost = 0; + for (VPBlockBase *Block : vp_depth_first_shallow(getEntry())) + Cost += Block->cost(VF, Ctx); + return Cost; + } + + // Compute the cost of a replicate region. Replicating isn't supported for + // scalable vectors, return an invalid cost for them. + // TODO: Discard scalable VPlans with replicate recipes earlier after + // construction. + if (VF.isScalable()) + return InstructionCost::getInvalid(); + + // First compute the cost of the conditionally executed recipes, followed by + // account for the branching cost, except if the mask is a header mask or + // uniform condition. + using namespace llvm::VPlanPatternMatch; + VPBasicBlock *Then = cast(getEntry()->getSuccessors()[0]); + InstructionCost ThenCost = Then->cost(VF, Ctx); + + // Note the cost estimates below closely match the current legacy cost model. + auto *BOM = cast(&getEntryBasicBlock()->front()); + VPValue *Cond = BOM->getOperand(0); + + // Check if Cond is a uniform compare or a header mask and don't account for + // branching costs. A uniform condition corresponding to a single branch per + // VF, and the header mask will always be true except in the last iteration. + if (vputils::isUniformBoolean(Cond) || + vputils::isHeaderMask(Cond, *getPlan())) + return ThenCost; + + // For the scalar case, we may not always execute the original predicated + // block, Thus, scale the block's cost by the probability of executing it. + if (VF.isScalar()) + return ThenCost / getReciprocalPredBlockProb(); + + // Add the cost for branches around scalarized and predicated blocks. + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; + + auto *Vec_i1Ty = VectorType::get(IntegerType::getInt1Ty(Ctx.LLVMCtx), VF); + auto FixedVF = VF.getFixedValue(); // Known to be non scalable. + InstructionCost Cost = ThenCost; + Cost += Ctx.TTI.getScalarizationOverhead(Vec_i1Ty, APInt::getAllOnes(FixedVF), + /*Insert*/ false, /*Extract*/ true, + CostKind); + Cost += Ctx.TTI.getCFInstrCost(Instruction::Br, CostKind) * FixedVF; + return Cost; +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { @@ -920,6 +978,12 @@ void VPlan::execute(VPTransformState *State) { "DT not preserved correctly"); } +InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) { + // For now only return the cost of the vector loop region, ignoring any other + // blocks, like the preheader or middle blocks. + return getVectorLoopRegion()->cost(VF, Ctx); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPlan::printLiveIns(raw_ostream &O) const { VPSlotTracker SlotTracker(this); @@ -1454,3 +1518,25 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, Plan.addSCEVExpansion(Expr, Expanded); return Expanded; } + +bool vputils::isUniformBoolean(VPValue *Cond) { + if (match(Cond, m_Not(m_VPValue()))) + Cond = Cond->getDefiningRecipe()->getOperand(0); + auto *R = Cond->getDefiningRecipe(); + if (!R) + return true; + // TODO: match additional patterns preserving uniformity of booleans, e.g., + // AND/OR/etc. + return match(R, m_Binary(m_VPValue(), m_VPValue())) && + all_of(R->operands(), [](VPValue *Op) { + return vputils::isUniformAfterVectorization(Op); + }); +} + +bool vputils::isHeaderMask(VPValue *V, VPlan &Plan) { + VPValue *Op; + return isa(V) || + match(V, m_ActiveLaneMask(m_VPValue(), m_VPValue())) || + (match(V, m_Binary(m_VPValue(), m_VPValue(Op))) && + Op == Plan.getOrCreateBackedgeTakenCount()); +} diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 5bb88e4a57dc37..4c0972e517263c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -42,6 +42,7 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/IR/FMF.h" #include "llvm/IR/Operator.h" +#include "llvm/Support/InstructionCost.h" #include #include #include @@ -64,8 +65,11 @@ class VPlan; class VPReplicateRecipe; class VPlanSlp; class Value; +class LoopVectorizationCostModel; class LoopVersioning; +struct VPCostContext; + namespace Intrinsic { typedef unsigned ID; } @@ -82,6 +86,14 @@ Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE, Loop *CurLoop = nullptr); +/// A helper function that returns the reciprocal of the block probability of +/// predicated blocks. If we return X, we are assuming the predicated block +/// will execute once for every X iterations of the loop header. +/// +/// TODO: We should use actual block probability here, if available. Currently, +/// we always assume predicated blocks have a 50% chance of executing. +inline unsigned getReciprocalPredBlockProb() { return 2; } + /// A range of powers-of-2 vectorization factors with fixed start and /// adjustable end. The range includes start and excludes end, e.g.,: /// [1, 16) = {1, 2, 4, 8} @@ -624,6 +636,9 @@ class VPBlockBase { /// VPBlockBase, thereby "executing" the VPlan. virtual void execute(VPTransformState *State) = 0; + /// Return the cost of the block. + virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx) = 0; + /// Delete all blocks reachable from a given VPBlockBase, inclusive. static void deleteCFG(VPBlockBase *Entry); @@ -707,6 +722,27 @@ class VPLiveOut : public VPUser { #endif }; +/// Struct to hold various analysis needed for cost computations. +struct VPCostContext { + const TargetTransformInfo &TTI; + VPTypeAnalysis Types; + LLVMContext &LLVMCtx; + LoopVectorizationCostModel &CM; + SmallPtrSet SkipCostComputation; + + VPCostContext(const TargetTransformInfo &TTI, Type *CanIVTy, + LLVMContext &LLVMCtx, LoopVectorizationCostModel &CM) + : TTI(TTI), Types(CanIVTy, LLVMCtx), LLVMCtx(LLVMCtx), CM(CM) {} + + /// Return the cost for \p UI with \p VF using the legacy cost model as + /// fallback until computing the cost of all recipes migrates to VPlan. + InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const; + + /// Return true if the cost for \p UI shouldn't be computed, e.g. because it + /// has already been pre-computed. + bool skipCostComputation(Instruction *UI, bool IsVector) const; +}; + /// VPRecipeBase is a base class modeling a sequence of one or more output IR /// instructions. VPRecipeBase owns the VPValues it defines through VPDef /// and is responsible for deleting its defined values. Single-value @@ -746,6 +782,11 @@ class VPRecipeBase : public ilist_node_with_parent, /// this VPRecipe, thereby "executing" the VPlan. virtual void execute(VPTransformState &State) = 0; + /// Return the cost of this recipe, taking into account if the cost + /// computation should be skipped and the ForceTargetInstructionCost flag. + /// Also takes care of printing the cost for debugging. + virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx); + /// Insert an unlinked recipe into a basic block immediately before /// the specified recipe. void insertBefore(VPRecipeBase *InsertPos); @@ -806,6 +847,11 @@ class VPRecipeBase : public ilist_node_with_parent, /// Returns the debug location of the recipe. DebugLoc getDebugLoc() const { return DL; } + +protected: + /// Compute the cost of this recipe using the legacy cost model and the + /// underlying instructions. + InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const; }; // Helper macro to define common classof implementations for recipes. @@ -1381,8 +1427,6 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags { ResultTy(ResultTy) { assert(UI.getOpcode() == Opcode && "opcode of underlying cast doesn't match"); - assert(UI.getType() == ResultTy && - "result type of underlying cast doesn't match"); } VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy) @@ -2096,6 +2140,8 @@ class VPInterleaveRecipe : public VPRecipeBase { "Op must be an operand of the recipe"); return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op); } + + Instruction *getInsertPos() const { return IG->getInsertPos(); } }; /// A recipe to represent inloop reduction operations, performing a reduction on @@ -2910,6 +2956,9 @@ class VPBasicBlock : public VPBlockBase { /// this VPBasicBlock, thereby "executing" the VPlan. void execute(VPTransformState *State) override; + /// Return the cost of this VPBasicBlock. + InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override; + /// Return the position of the first non-phi node recipe in the block. iterator getFirstNonPhi(); @@ -3084,6 +3133,9 @@ class VPRegionBlock : public VPBlockBase { /// this VPRegionBlock, thereby "executing" the VPlan. void execute(VPTransformState *State) override; + // Return the cost of this region. + InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override; + void dropAllReferences(VPValue *NewValue) override; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -3203,6 +3255,9 @@ class VPlan { /// Generate the IR code for this VPlan. void execute(VPTransformState *State); + /// Return the cost of this plan. + InstructionCost cost(ElementCount VF, VPCostContext &Ctx); + VPBasicBlock *getEntry() { return Entry; } const VPBasicBlock *getEntry() const { return Entry; } @@ -3246,6 +3301,11 @@ class VPlan { return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); }); } + iterator_range::iterator> + vectorFactors() const { + return {VFs.begin(), VFs.end()}; + } + bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); } bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); } @@ -3665,6 +3725,13 @@ inline bool isUniformAfterVectorization(VPValue *VPV) { return VPI->isVectorToScalar(); return false; } + +/// Return true if \p Cond is a uniform boolean. +bool isUniformBoolean(VPValue *Cond); + +/// Return true if \p V is a header mask in \p Plan. +bool isHeaderMask(VPValue *V, VPlan &Plan); + } // end namespace vputils } // end namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 7a482455473e40..b491ea5a18b543 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -39,6 +39,7 @@ using VectorParts = SmallVector; namespace llvm { extern cl::opt EnableVPlanNativePath; } +extern cl::opt ForceTargetInstructionCost; #define LV_NAME "loop-vectorize" #define DEBUG_TYPE LV_NAME @@ -255,6 +256,40 @@ void VPRecipeBase::moveBefore(VPBasicBlock &BB, insertBefore(BB, I); } +InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) { + if (auto *S = dyn_cast(this)) { + auto *UI = dyn_cast_or_null(S->getUnderlyingValue()); + if (UI && Ctx.skipCostComputation(UI, VF.isVector())) + return 0; + } + + InstructionCost RecipeCost = computeCost(VF, Ctx); + if (ForceTargetInstructionCost.getNumOccurrences() > 0 && + RecipeCost.isValid()) + RecipeCost = InstructionCost(ForceTargetInstructionCost); + + LLVM_DEBUG({ + dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": "; + dump(); + }); + return RecipeCost; +} + +InstructionCost VPRecipeBase::computeCost(ElementCount VF, + VPCostContext &Ctx) const { + // Compute the cost for the recipe falling back to the legacy cost model using + // the underlying instruction. If there is no underlying instruction, returns + // 0. + Instruction *UI = nullptr; + if (auto *S = dyn_cast(this)) + UI = dyn_cast_or_null(S->getUnderlyingValue()); + else if (auto *IG = dyn_cast(this)) + UI = IG->getInsertPos(); + else if (auto *WidenMem = dyn_cast(this)) + UI = &WidenMem->getIngredient(); + return UI ? Ctx.getLegacyCost(UI, VF) : 0; +} + FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const { assert(OpType == OperationType::FPMathOp && "recipe doesn't have fast math flags"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 8ec67eb2f54bda..082a442bf399d2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -999,6 +999,10 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { : Instruction::ZExt; auto *VPC = new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy); + if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) { + // UnderlyingExt has distinct return type, used to retain legacy cost. + VPC->setUnderlyingValue(UnderlyingExt); + } VPC->insertBefore(&R); Trunc->replaceAllUsesWith(VPC); } else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) { @@ -1518,6 +1522,7 @@ void VPlanTransforms::dropPoisonGeneratingRecipes( VPInstruction *New = Builder.createOverflowingOp( Instruction::Add, {A, B}, {false, false}, RecWithFlags->getDebugLoc()); + New->setUnderlyingValue(RecWithFlags->getUnderlyingValue()); RecWithFlags->replaceAllUsesWith(New); RecWithFlags->eraseFromParent(); CurRec = New; diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 8d945f6f2b8ea8..fa6a65ff2f3ada 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -74,8 +74,7 @@ class VPValue { public: /// Return the underlying Value attached to this VPValue. - Value *getUnderlyingValue() { return UnderlyingVal; } - const Value *getUnderlyingValue() const { return UnderlyingVal; } + Value *getUnderlyingValue() const { return UnderlyingVal; } /// An enumeration for keeping track of the concrete subclass of VPValue that /// are actually instantiated. diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index b5aa96eb23f5e5..41879f3ebef5a5 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -119,6 +119,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LV: Interleaving is not beneficial. ; CHECK-NEXT: LV: Found a vectorizable loop (vscale x 4) in ; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop +; CHECK-NEXT: VF picked by VPlan cost model: vscale x 4 ; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1 ; CHECK: LV: Interleaving disabled by the pass manager ; CHECK-NEXT: LV: Vectorizing: innermost loop. @@ -260,6 +261,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LV: Interleaving is not beneficial. ; CHECK-NEXT: LV: Found a vectorizable loop (vscale x 4) in ; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop +; CHECK-NEXT: VF picked by VPlan cost model: vscale x 4 ; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1 ; CHECK: LV: Interleaving disabled by the pass manager ; CHECK-NEXT: LV: Vectorizing: innermost loop. From ad702e057cf7fc1ffdc0f78f563b416170ea7d57 Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Fri, 14 Jun 2024 12:56:11 +0100 Subject: [PATCH 083/155] [AArch64] Extend SVE diagnostics. (#94976) The SVE diagnostics were guarded by a FD->hasBody() check that prevented the diagnostic from being emitted for code that still triggered the backend crashes that the errors were meant to avoid, because FD->hasBody() returns false for a function that Clang is currently processing. This is not done for the equivalent RISC-V code, and is not needed for AArch64 either, so remove it. Errors were also emitted in the wrong location, errors were emitted at the called function's location, rather than at the caller's, which meant that just removing the FD->hasBody() check resulted in incomprehensible errors. Change this as well. The aarch64-mangle-sve-vectors.cpp test was using -target-feature wrong which was exposed as a result of these changes. Different target features need to be passed in as different -target-feature options. aarch64-targetattr-arch.c has a test_errors() function that needs to be split in two. Now that svundef_s8() is diagnosed for its use of svint8_t, the "needs target feature sve" diagnostic is no longer emitted, but this affects all calls in the same function. To ensure we still check this for its __crc32cd call, move that into a separate function. Fixes #94766. --- clang/lib/Sema/Sema.cpp | 7 ++--- clang/test/CodeGen/aarch64-targetattr-arch.c | 8 ++++-- .../target.c | 28 ++++++++++++------- .../CodeGenCXX/aarch64-mangle-sve-vectors.cpp | 4 +-- .../Sema/aarch64-sme2-sve2p1-diagnostics.c | 3 ++ clang/test/Sema/arm-sve-target.cpp | 2 +- 6 files changed, 33 insertions(+), 19 deletions(-) diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index a612dcd4b4d031..907a05a5d1b493 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -2093,16 +2093,15 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) { } // Don't allow SVE types in functions without a SVE target. - if (Ty->isSVESizelessBuiltinType() && FD && FD->hasBody()) { + if (Ty->isSVESizelessBuiltinType() && FD) { llvm::StringMap CallerFeatureMap; Context.getFunctionFeatureMap(CallerFeatureMap, FD); if (!Builtin::evaluateRequiredTargetFeatures("sve", CallerFeatureMap)) { if (!Builtin::evaluateRequiredTargetFeatures("sme", CallerFeatureMap)) - Diag(D->getLocation(), diag::err_sve_vector_in_non_sve_target) << Ty; + Diag(Loc, diag::err_sve_vector_in_non_sve_target) << Ty; else if (!IsArmStreamingFunction(FD, /*IncludeLocallyStreaming=*/true)) { - Diag(D->getLocation(), diag::err_sve_vector_in_non_streaming_function) - << Ty; + Diag(Loc, diag::err_sve_vector_in_non_streaming_function) << Ty; } } } diff --git a/clang/test/CodeGen/aarch64-targetattr-arch.c b/clang/test/CodeGen/aarch64-targetattr-arch.c index ed731d0378625d..5de73d6027845e 100644 --- a/clang/test/CodeGen/aarch64-targetattr-arch.c +++ b/clang/test/CodeGen/aarch64-targetattr-arch.c @@ -29,14 +29,18 @@ float16_t test_fp16_on_v9(float16_t x, float16_t y) return vabdh_f16(x, y); } -void test_errors() +void test_error1() { #ifdef HAS8 // expected-error@+2{{always_inline function '__crc32cd' requires target feature 'crc'}} #endif __crc32cd(1, 1); +} + +void test_error2() +{ #if defined(HAS8) || defined(HAS81) -// expected-error@+2{{'svundef_s8' needs target feature sve}} +// expected-error@+2{{SVE vector type 'svint8_t' (aka '__SVInt8_t') cannot be used in a target without sve}} #endif svundef_s8(); } diff --git a/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/target.c b/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/target.c index a08c452fdc7fe9..bc5f01e7ce0ffd 100644 --- a/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/target.c +++ b/clang/test/CodeGen/aarch64_neon_sve_bridge_intrinsics/target.c @@ -18,15 +18,23 @@ void target_svebf16(svbfloat16_t t, bfloat16x8_t m) { } void base(int8x16_t n, bfloat16x8_t m) { - // expected-error@+1 {{'svundef_s8' needs target feature sve}} - svset_neonq_s8(svundef_s8(), n); // expected-error {{'svset_neonq_s8' needs target feature sve}} - // expected-error@+1 {{'svundef_s8' needs target feature sve}} - svget_neonq_s8(svundef_s8()); // expected-error {{'svget_neonq_s8' needs target feature sve}} - svdup_neonq_s8(n); // expected-error {{'svdup_neonq_s8' needs target feature sve}} + // expected-error@+3 {{SVE vector type 'svint8_t' (aka '__SVInt8_t') cannot be used in a target without sve}} + // expected-error@+2 {{SVE vector type 'svint8_t' (aka '__SVInt8_t') cannot be used in a target without sve}} + // expected-error@+1 {{SVE vector type 'svint8_t' (aka '__SVInt8_t') cannot be used in a target without sve}} + svset_neonq_s8(svundef_s8(), n); + // expected-error@+2 {{SVE vector type 'svint8_t' (aka '__SVInt8_t') cannot be used in a target without sve}} + // expected-error@+1 {{SVE vector type 'svint8_t' (aka '__SVInt8_t') cannot be used in a target without sve}} + svget_neonq_s8(svundef_s8()); + // expected-error@+1 {{SVE vector type 'svint8_t' (aka '__SVInt8_t') cannot be used in a target without sve}} + svdup_neonq_s8(n); - // expected-error@+1 {{'svundef_bf16' needs target feature sve}} - svset_neonq_bf16(svundef_bf16(), m); // expected-error {{'svset_neonq_bf16' needs target feature sve,bf16}} - // expected-error@+1 {{'svundef_bf16' needs target feature sve}} - svget_neonq_bf16(svundef_bf16()); // expected-error {{'svget_neonq_bf16' needs target feature sve,bf16}} - svdup_neonq_bf16(m); // expected-error {{'svdup_neonq_bf16' needs target feature sve,bf16}} + // expected-error@+3 {{SVE vector type 'svbfloat16_t' (aka '__SVBfloat16_t') cannot be used in a target without sve}} + // expected-error@+2 {{SVE vector type 'svbfloat16_t' (aka '__SVBfloat16_t') cannot be used in a target without sve}} + // expected-error@+1 {{SVE vector type 'svbfloat16_t' (aka '__SVBfloat16_t') cannot be used in a target without sve}} + svset_neonq_bf16(svundef_bf16(), m); + // expected-error@+2 {{SVE vector type 'svbfloat16_t' (aka '__SVBfloat16_t') cannot be used in a target without sve}} + // expected-error@+1 {{SVE vector type 'svbfloat16_t' (aka '__SVBfloat16_t') cannot be used in a target without sve}} + svget_neonq_bf16(svundef_bf16()); + // expected-error@+1 {{SVE vector type 'svbfloat16_t' (aka '__SVBfloat16_t') cannot be used in a target without sve}} + svdup_neonq_bf16(m); } diff --git a/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp b/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp index f87a254f5ae720..752b2beca3881b 100644 --- a/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp +++ b/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp @@ -1,8 +1,8 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu %s -emit-llvm -o - \ -// RUN: -target-feature +sve,+bf16 | FileCheck %s +// RUN: -target-feature +sve -target-feature +bf16 | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu %s -emit-llvm -o - \ -// RUN: -target-feature +sve,+bf16 -fclang-abi-compat=17 | FileCheck %s --check-prefix=COMPAT_17 +// RUN: -target-feature +sve -target-feature +bf16 -fclang-abi-compat=17 | FileCheck %s --check-prefix=COMPAT_17 void f(__SVInt8_t, __SVInt8_t); void f(__SVInt16_t, __SVInt16_t); diff --git a/clang/test/Sema/aarch64-sme2-sve2p1-diagnostics.c b/clang/test/Sema/aarch64-sme2-sve2p1-diagnostics.c index 2012221b480412..97cdd6d9fb3a80 100644 --- a/clang/test/Sema/aarch64-sme2-sve2p1-diagnostics.c +++ b/clang/test/Sema/aarch64-sme2-sve2p1-diagnostics.c @@ -10,6 +10,9 @@ // expected-error@+2 {{SVE vector type 'svuint8x2_t' (aka '__clang_svuint8x2_t') cannot be used in a non-streaming function}} __attribute__((target("+sme2"))) svuint8x2_t sme2_or_sve2p1_intrinsic_test_sme2_invalid(svcount_t png, const uint8_t *rn) { + // expected-error@+4 {{SVE vector type 'svcount_t' (aka '__SVCount_t') cannot be used in a non-streaming function}} + // expected-error@+3 {{SVE vector type 'svuint8x2_t' (aka '__clang_svuint8x2_t') cannot be used in a non-streaming function}} + // expected-error@+2 {{SVE vector type 'svcount_t' (aka '__SVCount_t') cannot be used in a non-streaming function}} // expected-warning@+1 {{builtin call has undefined behaviour when called from a non-streaming function}} return svldnt1_u8_x2(png, rn); } diff --git a/clang/test/Sema/arm-sve-target.cpp b/clang/test/Sema/arm-sve-target.cpp index a753f772cc3720..1567475e681da3 100644 --- a/clang/test/Sema/arm-sve-target.cpp +++ b/clang/test/Sema/arm-sve-target.cpp @@ -23,7 +23,7 @@ void test_var_target3() { __SVFloat32_t other_ret(); __SVFloat32_t test_ret() { // expected-error {{SVE vector type '__SVFloat32_t' cannot be used in a target without sve}} - return other_ret(); + return other_ret(); // expected-error {{SVE vector type '__SVFloat32_t' cannot be used in a target without sve}} } __attribute__((target("sve"))) From 0113f26fad00e4798883b02eb7a049ea545a13de Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Fri, 14 Jun 2024 13:12:01 +0100 Subject: [PATCH 084/155] [AArch64][SME] Enable subreg liveness tracking for AArch64 (#92142) The SME dot instructions in these tests operate on contiguous register tuples which use one subregister from each of the loads. When using the strided register form for all loads, enabling subreg liveness tracking will allow us to recognise that there is no overlap between the register tuples used by each of the dot instructions. This is the first in a series of patches to improve the allocation of strided and contiguous registers for SME. --- llvm/lib/Target/AArch64/AArch64Subtarget.h | 1 + .../Atomics/aarch64-atomicrmw-lse2_lse128.ll | 90 +- .../Atomics/aarch64-atomicrmw-v8_1a.ll | 110 +- .../aarch64_be-atomicrmw-lse2_lse128.ll | 150 +- .../Atomics/aarch64_be-atomicrmw-v8_1a.ll | 170 +- .../AArch64/GlobalISel/arm64-atomic-128.ll | 20 - .../aarch64-interleaved-access-w-undef.ll | 29 +- .../aarch64-neon-vector-insert-uaddlv.ll | 24 +- .../test/CodeGen/AArch64/aarch64-sysreg128.ll | 2 - llvm/test/CodeGen/AArch64/arm64-atomic-128.ll | 58 +- llvm/test/CodeGen/AArch64/arm64-dup.ll | 15 +- .../AArch64/arm64-indexed-vector-ldst.ll | 1872 +---------------- llvm/test/CodeGen/AArch64/arm64-ld1.ll | 228 +- llvm/test/CodeGen/AArch64/arm64-neon-copy.ll | 18 +- .../AArch64/arm64-neon-copyPhysReg-tuple.ll | 35 +- llvm/test/CodeGen/AArch64/arm64-tbl.ll | 300 +-- llvm/test/CodeGen/AArch64/arm64-zip.ll | 28 +- .../test/CodeGen/AArch64/atomicrmw-xchg-fp.ll | 4 +- llvm/test/CodeGen/AArch64/bf16-shuffle.ll | 27 +- .../CodeGen/AArch64/build-vector-two-dup.ll | 2 +- .../complex-deinterleaving-multiuses.ll | 28 +- .../CodeGen/AArch64/extract-vector-elt.ll | 2 - .../CodeGen/AArch64/fp-conversion-to-tbl.ll | 34 +- llvm/test/CodeGen/AArch64/fptoi.ll | 246 +-- .../test/CodeGen/AArch64/fptosi-sat-vector.ll | 100 +- .../test/CodeGen/AArch64/fptoui-sat-vector.ll | 152 +- llvm/test/CodeGen/AArch64/insert-subvector.ll | 16 +- .../AArch64/neon-bitwise-instructions.ll | 12 +- .../CodeGen/AArch64/neon-extracttruncate.ll | 4 - .../CodeGen/AArch64/neon-reverseshuffle.ll | 4 - .../CodeGen/AArch64/neon-widen-shuffle.ll | 6 - llvm/test/CodeGen/AArch64/seqpairspill.mir | 12 +- llvm/test/CodeGen/AArch64/shuffle-tbl34.ll | 83 +- llvm/test/CodeGen/AArch64/shuffles.ll | 60 +- llvm/test/CodeGen/AArch64/shufflevector.ll | 115 +- .../AArch64/sme2-intrinsics-add-sub-za16.ll | 24 - .../CodeGen/AArch64/sme2-intrinsics-add.ll | 60 - .../CodeGen/AArch64/sme2-intrinsics-cvtn.ll | 4 - .../CodeGen/AArch64/sme2-intrinsics-fmlas.ll | 100 +- .../AArch64/sme2-intrinsics-fp-dots.ll | 52 +- .../AArch64/sme2-intrinsics-insert-mova.ll | 144 -- .../AArch64/sme2-intrinsics-int-dots.ll | 576 +++-- .../CodeGen/AArch64/sme2-intrinsics-max.ll | 400 ++-- .../CodeGen/AArch64/sme2-intrinsics-min.ll | 400 ++-- .../CodeGen/AArch64/sme2-intrinsics-mlall.ll | 244 +-- .../CodeGen/AArch64/sme2-intrinsics-mlals.ll | 193 +- .../CodeGen/AArch64/sme2-intrinsics-rshl.ll | 208 +- .../sme2-intrinsics-select-sme-tileslice.ll | 2 - .../AArch64/sme2-intrinsics-sqdmulh.ll | 104 +- .../CodeGen/AArch64/sme2-intrinsics-sub.ll | 60 - .../CodeGen/AArch64/sme2-intrinsics-vdot.ll | 382 +++- .../AArch64/sve-fixed-length-shuffles.ll | 2 +- .../sve-intrinsics-stN-reg-imm-addr-mode.ll | 119 -- .../sve-intrinsics-stN-reg-reg-addr-mode.ll | 63 - .../CodeGen/AArch64/sve-intrinsics-stores.ll | 81 - .../CodeGen/AArch64/sve-merging-stores.ll | 13 +- ...-streaming-mode-fixed-length-ld2-alloca.ll | 8 +- ...sve-streaming-mode-fixed-length-shuffle.ll | 11 +- .../AArch64/sve2-intrinsics-perm-tb.ll | 40 +- .../AArch64/sve2p1-intrinsics-bfclamp.ll | 6 - .../AArch64/sve2p1-intrinsics-fclamp.ll | 18 - .../sve2p1-intrinsics-multivec-stores.ll | 153 -- .../AArch64/sve2p1-intrinsics-sclamp.ll | 24 - .../AArch64/sve2p1-intrinsics-selx4.ll | 128 +- .../AArch64/sve2p1-intrinsics-stores.ll | 96 +- .../AArch64/sve2p1-intrinsics-uclamp.ll | 24 - .../AArch64/sve2p1-intrinsics-uzpx4.ll | 20 +- .../AArch64/sve2p1-intrinsics-while-pp.ll | 32 - .../AArch64/swift-error-unreachable-use.ll | 1 + llvm/test/CodeGen/AArch64/tbl-loops.ll | 79 +- llvm/test/CodeGen/AArch64/trunc-to-tbl.ll | 12 +- llvm/test/CodeGen/AArch64/vldn_shuffle.ll | 84 +- 72 files changed, 2559 insertions(+), 5465 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 7ef7a89b5749fe..9912190e1bcede 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -149,6 +149,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { const Triple &getTargetTriple() const { return TargetTriple; } bool enableMachineScheduler() const override { return true; } bool enablePostRAScheduler() const override { return usePostRAScheduler(); } + bool enableSubRegLiveness() const override { return true; } bool enableMachinePipeliner() const override; bool useDFAforSMS() const override { return false; } diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll index a1712a5ec7a27c..444f579f232420 100644 --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll @@ -2273,10 +2273,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: casp x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16 @@ -2298,10 +2298,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspa x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16 @@ -2323,10 +2323,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspl x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value release, align 16 @@ -2348,10 +2348,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspal x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16 @@ -2373,10 +2373,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspal x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16 @@ -3406,7 +3406,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3427,7 +3427,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3448,7 +3448,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3469,7 +3469,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3490,7 +3490,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3947,7 +3947,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lt +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -3975,7 +3975,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lt +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4003,7 +4003,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_max_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lt +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4031,7 +4031,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lt +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4059,7 +4059,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lt +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4592,7 +4592,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, ge +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4620,7 +4620,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, ge +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4648,7 +4648,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_min_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, ge +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4676,7 +4676,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, ge +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4704,7 +4704,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, ge +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5237,7 +5237,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lo +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5265,7 +5265,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lo +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5293,7 +5293,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umax_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lo +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5321,7 +5321,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lo +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5349,7 +5349,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lo +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5877,7 +5877,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, hs +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5905,7 +5905,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, hs +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5933,7 +5933,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umin_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, hs +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5961,7 +5961,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, hs +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5989,7 +5989,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, hs +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll index ee5fbe39b4492c..62af028defde56 100644 --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll @@ -1616,7 +1616,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 +; -O1: and x9, x5, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -1637,7 +1637,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 +; -O1: and x9, x5, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -1658,7 +1658,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_and_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 +; -O1: and x9, x5, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -1679,7 +1679,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 +; -O1: and x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -1700,7 +1700,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 +; -O1: and x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -2343,10 +2343,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: casp x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16 @@ -2368,10 +2368,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspa x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16 @@ -2393,10 +2393,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspl x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value release, align 16 @@ -2418,10 +2418,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspal x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16 @@ -2443,10 +2443,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspal x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16 @@ -2996,7 +2996,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 +; -O1: orr x9, x5, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3017,7 +3017,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 +; -O1: orr x9, x5, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3038,7 +3038,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_or_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 +; -O1: orr x9, x5, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3059,7 +3059,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 +; -O1: orr x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3080,7 +3080,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 +; -O1: orr x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3531,7 +3531,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3552,7 +3552,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3573,7 +3573,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3594,7 +3594,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3615,7 +3615,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -4072,7 +4072,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lt +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4100,7 +4100,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lt +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4128,7 +4128,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_max_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lt +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4156,7 +4156,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lt +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4184,7 +4184,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lt +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4717,7 +4717,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, ge +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4745,7 +4745,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, ge +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4773,7 +4773,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_min_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, ge +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4801,7 +4801,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, ge +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4829,7 +4829,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, ge +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5362,7 +5362,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lo +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5390,7 +5390,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lo +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5418,7 +5418,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umax_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lo +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5446,7 +5446,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lo +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5474,7 +5474,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, lo +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -6002,7 +6002,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, hs +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -6030,7 +6030,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, hs +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -6058,7 +6058,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umin_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, hs +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -6086,7 +6086,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, hs +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -6114,7 +6114,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x7, x3, hs +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll index 83e383f335637c..f043f99327308b 100644 --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll @@ -517,7 +517,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu ; ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x7, x3 +; -O1: adds x9, x5, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -534,7 +534,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x7, x3 +; -O1: adds x9, x5, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -551,7 +551,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_add_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x7, x3 +; -O1: adds x9, x5, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -568,7 +568,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x7, x3 +; -O1: adds x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -585,7 +585,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x7, x3 +; -O1: adds x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1102,7 +1102,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x7, x3 +; -O1: subs x9, x5, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1119,7 +1119,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x7, x3 +; -O1: subs x9, x5, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1136,7 +1136,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x7, x3 +; -O1: subs x9, x5, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1153,7 +1153,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x7, x3 +; -O1: subs x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1170,7 +1170,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x7, x3 +; -O1: subs x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -2356,10 +2356,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: casp x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16 @@ -2379,10 +2379,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspa x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16 @@ -2402,10 +2402,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspl x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value release, align 16 @@ -2425,10 +2425,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspal x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16 @@ -2448,10 +2448,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspal x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16 @@ -3479,7 +3479,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3498,7 +3498,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3517,7 +3517,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3536,7 +3536,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3555,7 +3555,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -4004,8 +4004,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu ; ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lt +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4025,8 +4025,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lt +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4046,8 +4046,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_max_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lt +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4067,8 +4067,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lt +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4088,8 +4088,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lt +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4589,8 +4589,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu ; ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, ge +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4610,8 +4610,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, ge +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4631,8 +4631,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_min_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, ge +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4652,8 +4652,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, ge +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4673,8 +4673,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, ge +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5174,8 +5174,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lo +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5195,8 +5195,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lo +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5216,8 +5216,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lo +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5237,8 +5237,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lo +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5258,8 +5258,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lo +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5759,8 +5759,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, hs +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5780,8 +5780,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, hs +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5801,8 +5801,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, hs +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5822,8 +5822,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, hs +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5843,8 +5843,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, hs +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll index 0c3ed9b0f1de0f..df7b57e7e18f46 100644 --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll @@ -542,7 +542,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu ; ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x7, x3 +; -O1: adds x9, x5, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -559,7 +559,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x7, x3 +; -O1: adds x9, x5, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -576,7 +576,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_add_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x7, x3 +; -O1: adds x9, x5, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -593,7 +593,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x7, x3 +; -O1: adds x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -610,7 +610,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x7, x3 +; -O1: adds x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1127,7 +1127,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x7, x3 +; -O1: subs x9, x5, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1144,7 +1144,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x7, x3 +; -O1: subs x9, x5, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1161,7 +1161,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x7, x3 +; -O1: subs x9, x5, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1178,7 +1178,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x7, x3 +; -O1: subs x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1195,7 +1195,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x7, x3 +; -O1: subs x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1674,7 +1674,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 +; -O1: and x9, x5, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1693,7 +1693,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 +; -O1: and x9, x5, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1712,7 +1712,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_and_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 +; -O1: and x9, x5, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1731,7 +1731,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 +; -O1: and x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1750,7 +1750,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 +; -O1: and x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -2406,10 +2406,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: casp x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16 @@ -2429,10 +2429,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspa x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16 @@ -2452,10 +2452,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspl x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value release, align 16 @@ -2475,10 +2475,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspal x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16 @@ -2498,10 +2498,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x7, x3 -; -O1: mvn x10, x8 -; -O1: mvn x11, x9 -; -O1: caspal x4, x5, x10, x11, [x0] +; -O1: and x9, x5, x3 +; -O1: mvn x8, x8 +; -O1: mvn x9, x9 +; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16 @@ -3049,7 +3049,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 +; -O1: orr x9, x5, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3068,7 +3068,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 +; -O1: orr x9, x5, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3087,7 +3087,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_or_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 +; -O1: orr x9, x5, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3106,7 +3106,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 +; -O1: orr x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3125,7 +3125,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x7, x3 +; -O1: orr x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3574,7 +3574,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3593,7 +3593,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3612,7 +3612,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3631,7 +3631,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3650,7 +3650,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x7, x3 +; -O1: eor x9, x5, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -4099,8 +4099,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu ; ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lt +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4120,8 +4120,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lt +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4141,8 +4141,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_max_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lt +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4162,8 +4162,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lt +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4183,8 +4183,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lt +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4684,8 +4684,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu ; ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, ge +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4705,8 +4705,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, ge +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4726,8 +4726,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_min_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, ge +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4747,8 +4747,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, ge +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4768,8 +4768,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, ge +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5269,8 +5269,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lo +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5290,8 +5290,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lo +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5311,8 +5311,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lo +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5332,8 +5332,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lo +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5353,8 +5353,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, lo +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5854,8 +5854,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, hs +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5875,8 +5875,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, hs +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5896,8 +5896,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, hs +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5917,8 +5917,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, hs +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5938,8 +5938,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x7 -; -O1: csel x9, x7, x3, hs +; -O1: cmp x3, x5 +; -O1: csel x9, x5, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll index 1fe63c9be8c629..80310a11add697 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll @@ -53,10 +53,6 @@ define void @val_compare_and_swap(ptr %p, i128 %oldval, i128 %newval) { ; ; CHECK-CAS-O1-LABEL: val_compare_and_swap: ; CHECK-CAS-O1: // %bb.0: -; CHECK-CAS-O1-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 -; CHECK-CAS-O1-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 -; CHECK-CAS-O1-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 -; CHECK-CAS-O1-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 ; CHECK-CAS-O1-NEXT: caspa x2, x3, x4, x5, [x0] ; CHECK-CAS-O1-NEXT: mov v0.d[0], x2 ; CHECK-CAS-O1-NEXT: mov v0.d[1], x3 @@ -180,10 +176,6 @@ define void @val_compare_and_swap_monotonic_seqcst(ptr %p, i128 %oldval, i128 %n ; ; CHECK-CAS-O1-LABEL: val_compare_and_swap_monotonic_seqcst: ; CHECK-CAS-O1: // %bb.0: -; CHECK-CAS-O1-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 -; CHECK-CAS-O1-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 -; CHECK-CAS-O1-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 -; CHECK-CAS-O1-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 ; CHECK-CAS-O1-NEXT: caspal x2, x3, x4, x5, [x0] ; CHECK-CAS-O1-NEXT: mov v0.d[0], x2 ; CHECK-CAS-O1-NEXT: mov v0.d[1], x3 @@ -307,10 +299,6 @@ define void @val_compare_and_swap_release_acquire(ptr %p, i128 %oldval, i128 %ne ; ; CHECK-CAS-O1-LABEL: val_compare_and_swap_release_acquire: ; CHECK-CAS-O1: // %bb.0: -; CHECK-CAS-O1-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 -; CHECK-CAS-O1-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 -; CHECK-CAS-O1-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 -; CHECK-CAS-O1-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 ; CHECK-CAS-O1-NEXT: caspal x2, x3, x4, x5, [x0] ; CHECK-CAS-O1-NEXT: mov v0.d[0], x2 ; CHECK-CAS-O1-NEXT: mov v0.d[1], x3 @@ -434,10 +422,6 @@ define void @val_compare_and_swap_monotonic(ptr %p, i128 %oldval, i128 %newval) ; ; CHECK-CAS-O1-LABEL: val_compare_and_swap_monotonic: ; CHECK-CAS-O1: // %bb.0: -; CHECK-CAS-O1-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 -; CHECK-CAS-O1-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 -; CHECK-CAS-O1-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 -; CHECK-CAS-O1-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 ; CHECK-CAS-O1-NEXT: caspal x2, x3, x4, x5, [x0] ; CHECK-CAS-O1-NEXT: mov v0.d[0], x2 ; CHECK-CAS-O1-NEXT: mov v0.d[1], x3 @@ -658,10 +642,6 @@ define i128 @val_compare_and_swap_return(ptr %p, i128 %oldval, i128 %newval) { ; ; CHECK-CAS-O1-LABEL: val_compare_and_swap_return: ; CHECK-CAS-O1: // %bb.0: -; CHECK-CAS-O1-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 -; CHECK-CAS-O1-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 -; CHECK-CAS-O1-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 -; CHECK-CAS-O1-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 ; CHECK-CAS-O1-NEXT: caspa x2, x3, x4, x5, [x0] ; CHECK-CAS-O1-NEXT: mov x0, x2 ; CHECK-CAS-O1-NEXT: mov x1, x3 diff --git a/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll b/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll index 07fbe5d7310f60..7141f53802bff7 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll @@ -27,9 +27,8 @@ BB: define void @f_undef_15(<8 x i64> %a, ptr %dst) { ; CHECK-LABEL: f_undef_15: ; CHECK: // %bb.0: // %BB -; CHECK-NEXT: // kill: def $q0 killed $q0 def $q0_q1 -; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: st2 { v0.2d, v1.2d }, [x8], #32 ; CHECK-NEXT: st2 { v0.2d, v1.2d }, [x8] ; CHECK-NEXT: add x8, x0, #64 @@ -46,19 +45,17 @@ BB: define void @f_undef_1(<8 x i64> %a, ptr %dst) { ; CHECK-LABEL: f_undef_1: ; CHECK: // %bb.0: // %BB -; CHECK-NEXT: mov v16.16b, v0.16b -; CHECK-NEXT: mov v5.16b, v2.16b -; CHECK-NEXT: // kill: def $q1 killed $q1 def $q1_q2 -; CHECK-NEXT: // kill: def $q3 killed $q3 def $q3_q4 +; CHECK-NEXT: mov v4.16b, v2.16b +; CHECK-NEXT: mov v5.16b, v0.16b ; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: mov v6.16b, v0.16b ; CHECK-NEXT: mov v2.16b, v1.16b -; CHECK-NEXT: mov v4.16b, v3.16b -; CHECK-NEXT: mov v17.16b, v16.16b -; CHECK-NEXT: mov v6.16b, v5.16b -; CHECK-NEXT: st2 { v16.2d, v17.2d }, [x8], #32 +; CHECK-NEXT: st2 { v5.2d, v6.2d }, [x8], #32 +; CHECK-NEXT: mov v5.16b, v4.16b ; CHECK-NEXT: st2 { v1.2d, v2.2d }, [x8] ; CHECK-NEXT: add x8, x0, #64 -; CHECK-NEXT: st2 { v5.2d, v6.2d }, [x8] +; CHECK-NEXT: st2 { v4.2d, v5.2d }, [x8] +; CHECK-NEXT: mov v4.16b, v3.16b ; CHECK-NEXT: add x8, x0, #96 ; CHECK-NEXT: st2 { v3.2d, v4.2d }, [x8] ; CHECK-NEXT: ret @@ -73,11 +70,10 @@ define void @noundefs(<8 x i32> %a, <8 x i32> %b, ptr %dst) { ; CHECK-LABEL: noundefs: ; CHECK: // %bb.0: // %BB ; CHECK-NEXT: mov v5.16b, v2.16b -; CHECK-NEXT: // kill: def $q3 killed $q3 def $q2_q3 ; CHECK-NEXT: mov v4.16b, v0.16b -; CHECK-NEXT: mov v2.16b, v1.16b +; CHECK-NEXT: mov v2.16b, v3.16b ; CHECK-NEXT: st2 { v4.4s, v5.4s }, [x0], #32 -; CHECK-NEXT: st2 { v2.4s, v3.4s }, [x0] +; CHECK-NEXT: st2 { v1.4s, v2.4s }, [x0] ; CHECK-NEXT: ret BB: %S = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> @@ -89,11 +85,10 @@ define void @undefs(<8 x i32> %a, <8 x i32> %b, ptr %dst) { ; CHECK-LABEL: undefs: ; CHECK: // %bb.0: // %BB ; CHECK-NEXT: mov v5.16b, v2.16b -; CHECK-NEXT: // kill: def $q3 killed $q3 def $q2_q3 ; CHECK-NEXT: mov v4.16b, v0.16b -; CHECK-NEXT: mov v2.16b, v1.16b +; CHECK-NEXT: mov v2.16b, v3.16b ; CHECK-NEXT: st2 { v4.4s, v5.4s }, [x0], #32 -; CHECK-NEXT: st2 { v2.4s, v3.4s }, [x0] +; CHECK-NEXT: st2 { v1.4s, v2.4s }, [x0] ; CHECK-NEXT: ret BB: %S = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> diff --git a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll index 3c8aca5145261d..f0fcafa5302e6d 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll @@ -146,11 +146,11 @@ define void @insert_vec_v6i64_uaddlv_from_v4i32(ptr %0) { ; CHECK-LABEL: insert_vec_v6i64_uaddlv_from_v4i32: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 -; CHECK-NEXT: movi.2d v2, #0000000000000000 ; CHECK-NEXT: uaddlv.4s d1, v0 -; CHECK-NEXT: str d2, [x0, #16] ; CHECK-NEXT: mov.d v0[0], v1[0] +; CHECK-NEXT: movi.2d v1, #0000000000000000 ; CHECK-NEXT: ucvtf.2d v0, v0 +; CHECK-NEXT: str d1, [x0, #16] ; CHECK-NEXT: fcvtn v0.2s, v0.2d ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret @@ -210,9 +210,9 @@ define void @insert_vec_v8i16_uaddlv_from_v8i16(ptr %0) { ; CHECK-NEXT: stp xzr, xzr, [x0, #16] ; CHECK-NEXT: uaddlv.8h s0, v0 ; CHECK-NEXT: mov.h v1[0], v0[0] -; CHECK-NEXT: ushll.4s v1, v1, #0 -; CHECK-NEXT: ucvtf.4s v1, v1 -; CHECK-NEXT: str q1, [x0] +; CHECK-NEXT: ushll.4s v0, v1, #0 +; CHECK-NEXT: ucvtf.4s v0, v0 +; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret entry: @@ -232,10 +232,10 @@ define void @insert_vec_v3i16_uaddlv_from_v8i16(ptr %0) { ; CHECK-NEXT: add x8, x0, #8 ; CHECK-NEXT: uaddlv.8h s0, v0 ; CHECK-NEXT: mov.h v1[0], v0[0] -; CHECK-NEXT: ushll.4s v1, v1, #0 -; CHECK-NEXT: ucvtf.4s v1, v1 -; CHECK-NEXT: st1.s { v1 }[2], [x8] -; CHECK-NEXT: str d1, [x0] +; CHECK-NEXT: ushll.4s v0, v1, #0 +; CHECK-NEXT: ucvtf.4s v0, v0 +; CHECK-NEXT: st1.s { v0 }[2], [x8] +; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret entry: @@ -278,9 +278,9 @@ define void @insert_vec_v16i8_uaddlv_from_v8i8(ptr %0) { ; CHECK-NEXT: stp q0, q0, [x0, #32] ; CHECK-NEXT: mov.h v2[0], v1[0] ; CHECK-NEXT: bic.4h v2, #255, lsl #8 -; CHECK-NEXT: ushll.4s v2, v2, #0 -; CHECK-NEXT: ucvtf.4s v2, v2 -; CHECK-NEXT: stp q2, q0, [x0] +; CHECK-NEXT: ushll.4s v1, v2, #0 +; CHECK-NEXT: ucvtf.4s v1, v1 +; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll b/llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll index 7f20b5e5ee4df5..75a96be9b435e2 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll @@ -24,8 +24,6 @@ declare i128 @llvm.read_volatile_register.i128(metadata) #1 define void @test_wsr128(i128 noundef %v) #0 { ; CHECK-LE-LABEL: test_wsr128: ; CHECK-LE: // %bb.0: // %entry -; CHECK-LE-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 -; CHECK-LE-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 ; CHECK-LE-NEXT: msrr S1_2_C3_C4_5, x0, x1 ; CHECK-LE-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll index 37c61d0a4a0fb6..4a84c673af8cfc 100644 --- a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll +++ b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll @@ -43,10 +43,6 @@ define i128 @val_compare_and_swap(ptr %p, i128 %oldval, i128 %newval) { ; ; LSE-LABEL: val_compare_and_swap: ; LSE: // %bb.0: -; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 -; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 -; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 -; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 ; LSE-NEXT: caspa x2, x3, x4, x5, [x0] ; LSE-NEXT: mov x0, x2 ; LSE-NEXT: mov x1, x3 @@ -94,10 +90,6 @@ define i128 @val_compare_and_swap_seqcst(ptr %p, i128 %oldval, i128 %newval) { ; ; LSE-LABEL: val_compare_and_swap_seqcst: ; LSE: // %bb.0: -; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 -; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 -; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 -; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 ; LSE-NEXT: caspal x2, x3, x4, x5, [x0] ; LSE-NEXT: mov x0, x2 ; LSE-NEXT: mov x1, x3 @@ -145,10 +137,6 @@ define i128 @val_compare_and_swap_release(ptr %p, i128 %oldval, i128 %newval) { ; ; LSE-LABEL: val_compare_and_swap_release: ; LSE: // %bb.0: -; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 -; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 -; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 -; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 ; LSE-NEXT: caspl x2, x3, x4, x5, [x0] ; LSE-NEXT: mov x0, x2 ; LSE-NEXT: mov x1, x3 @@ -196,10 +184,6 @@ define i128 @val_compare_and_swap_monotonic(ptr %p, i128 %oldval, i128 %newval) ; ; LSE-LABEL: val_compare_and_swap_monotonic: ; LSE: // %bb.0: -; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 -; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 -; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 -; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 ; LSE-NEXT: casp x2, x3, x4, x5, [x0] ; LSE-NEXT: mov x0, x2 ; LSE-NEXT: mov x1, x3 @@ -251,7 +235,7 @@ define void @fetch_and_nand(ptr %p, i128 %bits) { ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 -; LSE-NEXT: and x8, x7, x3 +; LSE-NEXT: and x8, x5, x3 ; LSE-NEXT: and x9, x4, x2 ; LSE-NEXT: mvn x10, x9 ; LSE-NEXT: mvn x11, x8 @@ -311,7 +295,7 @@ define void @fetch_and_or(ptr %p, i128 %bits) { ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 ; LSE-NEXT: orr x8, x4, x2 -; LSE-NEXT: orr x9, x7, x3 +; LSE-NEXT: orr x9, x5, x3 ; LSE-NEXT: mov x4, x6 ; LSE-NEXT: mov x5, x7 ; LSE-NEXT: caspal x4, x5, x8, x9, [x0] @@ -368,7 +352,7 @@ define void @fetch_and_add(ptr %p, i128 %bits) { ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 ; LSE-NEXT: adds x8, x4, x2 -; LSE-NEXT: adc x9, x7, x3 +; LSE-NEXT: adc x9, x5, x3 ; LSE-NEXT: mov x4, x6 ; LSE-NEXT: mov x5, x7 ; LSE-NEXT: caspal x4, x5, x8, x9, [x0] @@ -424,7 +408,7 @@ define void @fetch_and_sub(ptr %p, i128 %bits) { ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 ; LSE-NEXT: subs x8, x4, x2 -; LSE-NEXT: sbc x9, x7, x3 +; LSE-NEXT: sbc x9, x5, x3 ; LSE-NEXT: mov x4, x6 ; LSE-NEXT: mov x5, x7 ; LSE-NEXT: caspal x4, x5, x8, x9, [x0] @@ -484,8 +468,8 @@ define void @fetch_and_min(ptr %p, i128 %bits) { ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 ; LSE-NEXT: cmp x2, x4 -; LSE-NEXT: sbcs xzr, x3, x7 -; LSE-NEXT: csel x9, x7, x3, ge +; LSE-NEXT: sbcs xzr, x3, x5 +; LSE-NEXT: csel x9, x5, x3, ge ; LSE-NEXT: csel x8, x4, x2, ge ; LSE-NEXT: mov x4, x6 ; LSE-NEXT: mov x5, x7 @@ -546,8 +530,8 @@ define void @fetch_and_max(ptr %p, i128 %bits) { ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 ; LSE-NEXT: cmp x2, x4 -; LSE-NEXT: sbcs xzr, x3, x7 -; LSE-NEXT: csel x9, x7, x3, lt +; LSE-NEXT: sbcs xzr, x3, x5 +; LSE-NEXT: csel x9, x5, x3, lt ; LSE-NEXT: csel x8, x4, x2, lt ; LSE-NEXT: mov x4, x6 ; LSE-NEXT: mov x5, x7 @@ -608,8 +592,8 @@ define void @fetch_and_umin(ptr %p, i128 %bits) { ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 ; LSE-NEXT: cmp x2, x4 -; LSE-NEXT: sbcs xzr, x3, x7 -; LSE-NEXT: csel x9, x7, x3, hs +; LSE-NEXT: sbcs xzr, x3, x5 +; LSE-NEXT: csel x9, x5, x3, hs ; LSE-NEXT: csel x8, x4, x2, hs ; LSE-NEXT: mov x4, x6 ; LSE-NEXT: mov x5, x7 @@ -670,8 +654,8 @@ define void @fetch_and_umax(ptr %p, i128 %bits) { ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 ; LSE-NEXT: cmp x2, x4 -; LSE-NEXT: sbcs xzr, x3, x7 -; LSE-NEXT: csel x9, x7, x3, lo +; LSE-NEXT: sbcs xzr, x3, x5 +; LSE-NEXT: csel x9, x5, x3, lo ; LSE-NEXT: csel x8, x4, x2, lo ; LSE-NEXT: mov x4, x6 ; LSE-NEXT: mov x5, x7 @@ -714,8 +698,8 @@ define i128 @atomic_load_seq_cst(ptr %p) { ; ; LSE-LABEL: atomic_load_seq_cst: ; LSE: // %bb.0: -; LSE-NEXT: mov x2, #0 -; LSE-NEXT: mov x3, #0 +; LSE-NEXT: mov x2, #0 // =0x0 +; LSE-NEXT: mov x3, #0 // =0x0 ; LSE-NEXT: caspal x2, x3, x2, x3, [x0] ; LSE-NEXT: mov x0, x2 ; LSE-NEXT: mov x1, x3 @@ -747,8 +731,8 @@ define i128 @atomic_load_relaxed(i64, i64, ptr %p) { ; ; LSE-LABEL: atomic_load_relaxed: ; LSE: // %bb.0: -; LSE-NEXT: mov x0, #0 -; LSE-NEXT: mov x1, #0 +; LSE-NEXT: mov x0, #0 // =0x0 +; LSE-NEXT: mov x1, #0 // =0x0 ; LSE-NEXT: casp x0, x1, x0, x1, [x2] ; LSE-NEXT: ret %r = load atomic i128, ptr %p monotonic, align 16 @@ -779,9 +763,7 @@ define void @atomic_store_seq_cst(i128 %in, ptr %p) { ; ; LSE-LABEL: atomic_store_seq_cst: ; LSE: // %bb.0: -; LSE-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 ; LSE-NEXT: ldp x4, x5, [x2] -; LSE-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 ; LSE-NEXT: .LBB14_1: // %atomicrmw.start ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 ; LSE-NEXT: mov x6, x4 @@ -821,9 +803,7 @@ define void @atomic_store_release(i128 %in, ptr %p) { ; ; LSE-LABEL: atomic_store_release: ; LSE: // %bb.0: -; LSE-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 ; LSE-NEXT: ldp x4, x5, [x2] -; LSE-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 ; LSE-NEXT: .LBB15_1: // %atomicrmw.start ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 ; LSE-NEXT: mov x6, x4 @@ -863,9 +843,7 @@ define void @atomic_store_relaxed(i128 %in, ptr %p) { ; ; LSE-LABEL: atomic_store_relaxed: ; LSE: // %bb.0: -; LSE-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 ; LSE-NEXT: ldp x4, x5, [x2] -; LSE-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 ; LSE-NEXT: .LBB16_1: // %atomicrmw.start ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 ; LSE-NEXT: mov x6, x4 @@ -921,10 +899,6 @@ define void @cmpxchg_dead(ptr %ptr, i128 %desired, i128 %new) { ; ; LSE-LABEL: cmpxchg_dead: ; LSE: // %bb.0: -; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 -; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 -; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 -; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 ; LSE-NEXT: casp x2, x3, x4, x5, [x0] ; LSE-NEXT: ret cmpxchg ptr %ptr, i128 %desired, i128 %new monotonic monotonic diff --git a/llvm/test/CodeGen/AArch64/arm64-dup.ll b/llvm/test/CodeGen/AArch64/arm64-dup.ll index 2bf5419e54830b..979a8b16f4217b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-dup.ll +++ b/llvm/test/CodeGen/AArch64/arm64-dup.ll @@ -463,9 +463,7 @@ define <4 x i32> @test_perfectshuffle_dupext_v4i32(<4 x i32> %a, <4 x i32> %b) n ; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI35_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 ; CHECK-GI-NEXT: ret %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -482,9 +480,7 @@ define <4 x float> @test_perfectshuffle_dupext_v4f32(<4 x float> %a, <4 x float> ; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4f32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI36_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 ; CHECK-GI-NEXT: ret %r = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> @@ -504,14 +500,13 @@ define void @disguised_dup(<4 x float> %x, ptr %p1, ptr %p2) { ; CHECK-GI-LABEL: disguised_dup: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI37_1 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_1] +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI37_1] ; CHECK-GI-NEXT: adrp x8, .LCPI37_0 -; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_0] -; CHECK-GI-NEXT: tbl.16b v2, { v0, v1 }, v2 +; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v1 +; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI37_0] +; CHECK-GI-NEXT: tbl.16b v1, { v0, v1 }, v1 ; CHECK-GI-NEXT: str q0, [x0] -; CHECK-GI-NEXT: str q2, [x1] +; CHECK-GI-NEXT: str q1, [x1] ; CHECK-GI-NEXT: ret %shuf = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %dup = shufflevector <4 x float> %shuf, <4 x float> undef, <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll index 628fb550a0532b..fc469a3169deb2 100644 --- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -5490,18 +5490,14 @@ declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.b { v0, v1 }[0], [x0], #2 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.b { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A) @@ -5513,18 +5509,14 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(ptr %A, ptr %ptr, < define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.b { v0, v1 }[0], [x0], x2 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, x2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.b { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A) @@ -5539,18 +5531,14 @@ declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8>, define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.b { v0, v1 }[0], [x0], #2 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.b { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A) @@ -5562,18 +5550,14 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(ptr %A, ptr %ptr, <8 x define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.b { v0, v1 }[0], [x0], x2 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, x2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.b { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A) @@ -5588,18 +5572,14 @@ declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8>, <8 x define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.h { v0, v1 }[0], [x0], #4 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, #4 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.h { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #4 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A) @@ -5611,19 +5591,15 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(ptr %A, ptr %ptr, < define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.h { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.h { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A) @@ -5638,18 +5614,14 @@ declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16>, define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.h { v0, v1 }[0], [x0], #4 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, #4 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.h { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #4 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A) @@ -5661,19 +5633,15 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(ptr %A, ptr %ptr, < define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.h { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.h { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A) @@ -5688,18 +5656,14 @@ declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16>, define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], #8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, #8 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #8 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A) @@ -5711,19 +5675,15 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(ptr %A, ptr %ptr, < define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A) @@ -5738,18 +5698,14 @@ declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32>, define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], #8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, #8 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #8 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A) @@ -5761,19 +5717,15 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(ptr %A, ptr %ptr, < define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A) @@ -5788,18 +5740,14 @@ declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32>, define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], #16 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, #16 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #16 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A) @@ -5811,19 +5759,15 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(ptr %A, ptr %ptr, < define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A) @@ -5838,18 +5782,14 @@ declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64>, define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], #16 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #16 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A) @@ -5861,19 +5801,15 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(ptr %A, ptr %ptr, < define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A) @@ -5888,18 +5824,14 @@ declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64>, define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], #8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, #8 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #8 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A) @@ -5911,19 +5843,15 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(ptr %A, ptr %pt define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A) @@ -5938,18 +5866,14 @@ declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x fl define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], #8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, #8 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #8 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A) @@ -5961,19 +5885,15 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(ptr %A, ptr %pt define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A) @@ -5988,18 +5908,14 @@ declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x fl define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], #16 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, #16 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #16 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A) @@ -6011,19 +5927,15 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(ptr %A, ptr % define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A) @@ -6038,18 +5950,14 @@ declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], #16 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #16 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A) @@ -6061,19 +5969,15 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(ptr %A, ptr % define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A) @@ -6088,20 +5992,14 @@ declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.b { v0, v1, v2 }[0], [x0], #3 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A) @@ -6113,20 +6011,14 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(ptr %A, define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.b { v0, v1, v2 }[0], [x0], x2 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, x2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A) @@ -6141,20 +6033,14 @@ declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0( define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.b { v0, v1, v2 }[0], [x0], #3 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A) @@ -6166,20 +6052,14 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(ptr %A, ptr define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.b { v0, v1, v2 }[0], [x0], x2 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, x2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A) @@ -6194,20 +6074,14 @@ declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.h { v0, v1, v2 }[0], [x0], #6 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, #6 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #6 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A) @@ -6219,21 +6093,15 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(ptr %A, define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.h { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A) @@ -6248,20 +6116,14 @@ declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0( define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.h { v0, v1, v2 }[0], [x0], #6 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, #6 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #6 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A) @@ -6273,21 +6135,15 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(ptr %A, define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.h { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A) @@ -6302,20 +6158,14 @@ declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0( define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, #12 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #12 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A) @@ -6327,21 +6177,15 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(ptr %A, define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A) @@ -6356,20 +6200,14 @@ declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0( define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, #12 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #12 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A) @@ -6381,21 +6219,15 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(ptr %A, define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A) @@ -6410,20 +6242,14 @@ declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0( define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, #24 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #24 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A) @@ -6435,21 +6261,15 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(ptr %A, define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A) @@ -6464,20 +6284,14 @@ declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0( define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #24 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A) @@ -6489,21 +6303,15 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(ptr %A, define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A) @@ -6518,20 +6326,14 @@ declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0( define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, #12 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #12 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A) @@ -6543,21 +6345,15 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(pt define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A) @@ -6572,20 +6368,14 @@ declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, #12 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #12 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A) @@ -6597,21 +6387,15 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(pt define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A) @@ -6626,20 +6410,14 @@ declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, #24 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #24 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A) @@ -6651,21 +6429,15 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A) @@ -6680,20 +6452,14 @@ declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane. define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #24 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A) @@ -6705,21 +6471,15 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A) @@ -6734,22 +6494,14 @@ declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane. define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], #4 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, #4 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #4 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A) @@ -6761,22 +6513,14 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4la define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], x2 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, x2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A) @@ -6791,22 +6535,14 @@ declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lan define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], #4 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, #4 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #4 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A) @@ -6818,22 +6554,14 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(pt define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], x2 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, x2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A) @@ -6848,22 +6576,14 @@ declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], #8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, #8 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #8 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A) @@ -6875,23 +6595,15 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4la define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A) @@ -6906,22 +6618,14 @@ declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lan define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], #8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, #8 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #8 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A) @@ -6933,23 +6637,15 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4la define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A) @@ -6964,22 +6660,14 @@ declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lan define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, #16 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #16 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A) @@ -6991,23 +6679,15 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4la define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A) @@ -7022,22 +6702,14 @@ declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lan define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #16 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A) @@ -7049,23 +6721,15 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4la define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A) @@ -7080,22 +6744,14 @@ declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lan define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #32 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A) @@ -7107,23 +6763,15 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4la define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A) @@ -7138,22 +6786,14 @@ declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lan define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #32 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A) @@ -7165,23 +6805,15 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4la define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A) @@ -7196,22 +6828,14 @@ declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lan define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, #16 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #16 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A) @@ -7223,23 +6847,15 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A) @@ -7254,22 +6870,14 @@ declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neo define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #16 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A) @@ -7281,23 +6889,15 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A) @@ -7312,22 +6912,14 @@ declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neo define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #32 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A) @@ -7339,23 +6931,15 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A) @@ -7370,22 +6954,14 @@ declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, #32 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A) @@ -7397,23 +6973,15 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A) @@ -7428,17 +6996,13 @@ declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64 define ptr @test_v16i8_post_imm_st2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.16b { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.16b { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A) @@ -7449,17 +7013,13 @@ define ptr @test_v16i8_post_imm_st2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C define ptr @test_v16i8_post_reg_st2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.16b { v0, v1 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.16b { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A) @@ -7473,17 +7033,13 @@ declare void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8>, <16 x i8>, ptr) define ptr @test_v8i8_post_imm_st2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st2.8b { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st2.8b { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A) @@ -7494,17 +7050,13 @@ define ptr @test_v8i8_post_imm_st2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) n define ptr @test_v8i8_post_reg_st2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st2.8b { v0, v1 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st2.8b { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A) @@ -7518,17 +7070,13 @@ declare void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8>, <8 x i8>, ptr) define ptr @test_v8i16_post_imm_st2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.8h { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.8h { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A) @@ -7540,8 +7088,6 @@ define ptr @test_v8i16_post_reg_st2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C ; CHECK-SD-LABEL: test_v8i16_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.8h { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7549,8 +7095,6 @@ define ptr @test_v8i16_post_reg_st2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.8h { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A) @@ -7564,17 +7108,13 @@ declare void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16>, <8 x i16>, ptr) define ptr @test_v4i16_post_imm_st2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st2.4h { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st2.4h { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A) @@ -7586,8 +7126,6 @@ define ptr @test_v4i16_post_reg_st2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C ; CHECK-SD-LABEL: test_v4i16_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st2.4h { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7595,8 +7133,6 @@ define ptr @test_v4i16_post_reg_st2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st2.4h { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A) @@ -7610,17 +7146,13 @@ declare void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16>, <4 x i16>, ptr) define ptr @test_v4i32_post_imm_st2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.4s { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.4s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A) @@ -7632,8 +7164,6 @@ define ptr @test_v4i32_post_reg_st2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C ; CHECK-SD-LABEL: test_v4i32_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.4s { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7641,8 +7171,6 @@ define ptr @test_v4i32_post_reg_st2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.4s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A) @@ -7656,17 +7184,13 @@ declare void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32>, <4 x i32>, ptr) define ptr @test_v2i32_post_imm_st2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st2.2s { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st2.2s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A) @@ -7678,8 +7202,6 @@ define ptr @test_v2i32_post_reg_st2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C ; CHECK-SD-LABEL: test_v2i32_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st2.2s { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7687,8 +7209,6 @@ define ptr @test_v2i32_post_reg_st2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st2.2s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A) @@ -7702,17 +7222,13 @@ declare void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32>, <2 x i32>, ptr) define ptr @test_v2i64_post_imm_st2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.2d { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.2d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A) @@ -7724,8 +7240,6 @@ define ptr @test_v2i64_post_reg_st2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C ; CHECK-SD-LABEL: test_v2i64_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.2d { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7733,8 +7247,6 @@ define ptr @test_v2i64_post_reg_st2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.2d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A) @@ -7748,17 +7260,13 @@ declare void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64>, <2 x i64>, ptr) define ptr @test_v1i64_post_imm_st2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A) @@ -7770,8 +7278,6 @@ define ptr @test_v1i64_post_reg_st2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C ; CHECK-SD-LABEL: test_v1i64_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7779,8 +7285,6 @@ define ptr @test_v1i64_post_reg_st2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A) @@ -7794,17 +7298,13 @@ declare void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64>, <1 x i64>, ptr) define ptr @test_v4f32_post_imm_st2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.4s { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.4s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A) @@ -7816,8 +7316,6 @@ define ptr @test_v4f32_post_reg_st2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float ; CHECK-SD-LABEL: test_v4f32_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.4s { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7825,8 +7323,6 @@ define ptr @test_v4f32_post_reg_st2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.4s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A) @@ -7840,17 +7336,13 @@ declare void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float>, <4 x float>, ptr) define ptr @test_v2f32_post_imm_st2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st2.2s { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st2.2s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A) @@ -7862,8 +7354,6 @@ define ptr @test_v2f32_post_reg_st2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float ; CHECK-SD-LABEL: test_v2f32_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st2.2s { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7871,8 +7361,6 @@ define ptr @test_v2f32_post_reg_st2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st2.2s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A) @@ -7886,17 +7374,13 @@ declare void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float>, <2 x float>, ptr) define ptr @test_v2f64_post_imm_st2(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.2d { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.2d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A) @@ -7908,8 +7392,6 @@ define ptr @test_v2f64_post_reg_st2(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub ; CHECK-SD-LABEL: test_v2f64_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.2d { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7917,8 +7399,6 @@ define ptr @test_v2f64_post_reg_st2(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.2d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A) @@ -7932,17 +7412,13 @@ declare void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double>, <2 x double>, ptr) define ptr @test_v1f64_post_imm_st2(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A) @@ -7954,8 +7430,6 @@ define ptr @test_v1f64_post_reg_st2(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub ; CHECK-SD-LABEL: test_v1f64_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7963,8 +7437,6 @@ define ptr @test_v1f64_post_reg_st2(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A) @@ -7978,19 +7450,13 @@ declare void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double>, <1 x double>, ptr) define ptr @test_v16i8_post_imm_st3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.16b { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.16b { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A) @@ -8001,19 +7467,13 @@ define ptr @test_v16i8_post_imm_st3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C define ptr @test_v16i8_post_reg_st3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.16b { v0, v1, v2 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.16b { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A) @@ -8027,19 +7487,13 @@ declare void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, pt define ptr @test_v8i8_post_imm_st3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st3.8b { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st3.8b { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A) @@ -8050,19 +7504,13 @@ define ptr @test_v8i8_post_imm_st3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, < define ptr @test_v8i8_post_reg_st3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st3.8b { v0, v1, v2 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st3.8b { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A) @@ -8076,19 +7524,13 @@ declare void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, ptr) define ptr @test_v8i16_post_imm_st3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.8h { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.8h { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A) @@ -8099,20 +7541,14 @@ define ptr @test_v8i16_post_imm_st3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C define ptr @test_v8i16_post_reg_st3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.8h { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.8h { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A) @@ -8126,19 +7562,13 @@ declare void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, pt define ptr @test_v4i16_post_imm_st3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st3.4h { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st3.4h { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A) @@ -8149,20 +7579,14 @@ define ptr @test_v4i16_post_imm_st3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C define ptr @test_v4i16_post_reg_st3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st3.4h { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st3.4h { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A) @@ -8176,19 +7600,13 @@ declare void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, pt define ptr @test_v4i32_post_imm_st3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.4s { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.4s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A) @@ -8199,20 +7617,14 @@ define ptr @test_v4i32_post_imm_st3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C define ptr @test_v4i32_post_reg_st3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.4s { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.4s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A) @@ -8226,19 +7638,13 @@ declare void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, pt define ptr @test_v2i32_post_imm_st3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st3.2s { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st3.2s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A) @@ -8249,20 +7655,14 @@ define ptr @test_v2i32_post_imm_st3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C define ptr @test_v2i32_post_reg_st3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st3.2s { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st3.2s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A) @@ -8276,19 +7676,13 @@ declare void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, pt define ptr @test_v2i64_post_imm_st3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.2d { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.2d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A) @@ -8299,20 +7693,14 @@ define ptr @test_v2i64_post_imm_st3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C define ptr @test_v2i64_post_reg_st3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.2d { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.2d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A) @@ -8326,19 +7714,13 @@ declare void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, pt define ptr @test_v1i64_post_imm_st3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A) @@ -8349,20 +7731,14 @@ define ptr @test_v1i64_post_imm_st3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C define ptr @test_v1i64_post_reg_st3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A) @@ -8376,19 +7752,13 @@ declare void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, pt define ptr @test_v4f32_post_imm_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.4s { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.4s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A) @@ -8399,20 +7769,14 @@ define ptr @test_v4f32_post_imm_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float define ptr @test_v4f32_post_reg_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.4s { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.4s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A) @@ -8426,19 +7790,13 @@ declare void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float>, <4 x float>, <4 x floa define ptr @test_v2f32_post_imm_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st3.2s { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st3.2s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A) @@ -8449,20 +7807,14 @@ define ptr @test_v2f32_post_imm_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float define ptr @test_v2f32_post_reg_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st3.2s { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st3.2s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A) @@ -8476,19 +7828,13 @@ declare void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float>, <2 x float>, <2 x floa define ptr @test_v2f64_post_imm_st3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.2d { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.2d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A) @@ -8499,20 +7845,14 @@ define ptr @test_v2f64_post_imm_st3(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub define ptr @test_v2f64_post_reg_st3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.2d { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.2d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A) @@ -8526,19 +7866,13 @@ declare void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double>, <2 x double>, <2 x do define ptr @test_v1f64_post_imm_st3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A) @@ -8549,20 +7883,14 @@ define ptr @test_v1f64_post_imm_st3(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub define ptr @test_v1f64_post_reg_st3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_st3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A) @@ -8576,21 +7904,13 @@ declare void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double>, <1 x double>, <1 x do define ptr @test_v16i8_post_imm_st4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.16b { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.16b { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A) @@ -8601,21 +7921,13 @@ define ptr @test_v16i8_post_imm_st4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C define ptr @test_v16i8_post_reg_st4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.16b { v0, v1, v2, v3 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.16b { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A) @@ -8629,21 +7941,13 @@ declare void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <1 define ptr @test_v8i8_post_imm_st4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st4.8b { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st4.8b { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A) @@ -8654,21 +7958,13 @@ define ptr @test_v8i8_post_imm_st4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, < define ptr @test_v8i8_post_reg_st4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st4.8b { v0, v1, v2, v3 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st4.8b { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A) @@ -8682,21 +7978,13 @@ declare void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i define ptr @test_v8i16_post_imm_st4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.8h { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.8h { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A) @@ -8707,22 +7995,14 @@ define ptr @test_v8i16_post_imm_st4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C define ptr @test_v8i16_post_reg_st4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.8h { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.8h { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A) @@ -8736,21 +8016,13 @@ declare void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8 define ptr @test_v4i16_post_imm_st4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st4.4h { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st4.4h { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A) @@ -8761,22 +8033,14 @@ define ptr @test_v4i16_post_imm_st4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C define ptr @test_v4i16_post_reg_st4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st4.4h { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st4.4h { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A) @@ -8790,21 +8054,13 @@ declare void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>,<4 define ptr @test_v4i32_post_imm_st4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.4s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A) @@ -8815,22 +8071,14 @@ define ptr @test_v4i32_post_imm_st4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C define ptr @test_v4i32_post_reg_st4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.4s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A) @@ -8844,21 +8092,13 @@ declare void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>,<4 define ptr @test_v2i32_post_imm_st4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st4.2s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A) @@ -8869,22 +8109,14 @@ define ptr @test_v2i32_post_imm_st4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C define ptr @test_v2i32_post_reg_st4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st4.2s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A) @@ -8898,21 +8130,13 @@ declare void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, <2 define ptr @test_v2i64_post_imm_st4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.2d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A) @@ -8923,22 +8147,14 @@ define ptr @test_v2i64_post_imm_st4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C define ptr @test_v2i64_post_reg_st4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.2d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A) @@ -8952,21 +8168,13 @@ declare void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>,<2 define ptr @test_v1i64_post_imm_st4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A) @@ -8977,22 +8185,14 @@ define ptr @test_v1i64_post_imm_st4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C define ptr @test_v1i64_post_reg_st4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A) @@ -9006,21 +8206,13 @@ declare void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>,<1 define ptr @test_v4f32_post_imm_st4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.4s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A) @@ -9031,22 +8223,14 @@ define ptr @test_v4f32_post_imm_st4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float define ptr @test_v4f32_post_reg_st4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.4s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A) @@ -9060,21 +8244,13 @@ declare void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float>, <4 x float>, <4 x floa define ptr @test_v2f32_post_imm_st4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st4.2s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A) @@ -9085,22 +8261,14 @@ define ptr @test_v2f32_post_imm_st4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float define ptr @test_v2f32_post_reg_st4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st4.2s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A) @@ -9114,21 +8282,13 @@ declare void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float>, <2 x float>, <2 x floa define ptr @test_v2f64_post_imm_st4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.2d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A) @@ -9139,22 +8299,14 @@ define ptr @test_v2f64_post_imm_st4(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub define ptr @test_v2f64_post_reg_st4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.2d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A) @@ -9168,21 +8320,13 @@ declare void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double>, <2 x double>, <2 x do define ptr @test_v1f64_post_imm_st4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A) @@ -9193,22 +8337,14 @@ define ptr @test_v1f64_post_imm_st4(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub define ptr @test_v1f64_post_reg_st4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_st4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_st4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A) @@ -9222,17 +8358,13 @@ declare void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double>, <1 x double>, <1 x do define ptr @test_v16i8_post_imm_st1x2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.16b { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.16b { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A) @@ -9243,17 +8375,13 @@ define ptr @test_v16i8_post_imm_st1x2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> define ptr @test_v16i8_post_reg_st1x2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.16b { v0, v1 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.16b { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A) @@ -9267,17 +8395,13 @@ declare void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8>, <16 x i8>, ptr) define ptr @test_v8i8_post_imm_st1x2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.8b { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.8b { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A) @@ -9288,17 +8412,13 @@ define ptr @test_v8i8_post_imm_st1x2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) define ptr @test_v8i8_post_reg_st1x2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.8b { v0, v1 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.8b { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A) @@ -9312,17 +8432,13 @@ declare void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8>, <8 x i8>, ptr) define ptr @test_v8i16_post_imm_st1x2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.8h { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.8h { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A) @@ -9334,8 +8450,6 @@ define ptr @test_v8i16_post_reg_st1x2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> ; CHECK-SD-LABEL: test_v8i16_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.8h { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -9343,8 +8457,6 @@ define ptr @test_v8i16_post_reg_st1x2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.8h { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A) @@ -9358,17 +8470,13 @@ declare void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16>, <8 x i16>, ptr) define ptr @test_v4i16_post_imm_st1x2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.4h { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.4h { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A) @@ -9380,8 +8488,6 @@ define ptr @test_v4i16_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> ; CHECK-SD-LABEL: test_v4i16_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.4h { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -9389,8 +8495,6 @@ define ptr @test_v4i16_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.4h { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A) @@ -9404,17 +8508,13 @@ declare void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16>, <4 x i16>, ptr) define ptr @test_v4i32_post_imm_st1x2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.4s { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.4s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A) @@ -9426,8 +8526,6 @@ define ptr @test_v4i32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> ; CHECK-SD-LABEL: test_v4i32_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.4s { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -9435,8 +8533,6 @@ define ptr @test_v4i32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.4s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A) @@ -9450,17 +8546,13 @@ declare void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32>, <4 x i32>, ptr) define ptr @test_v2i32_post_imm_st1x2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.2s { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.2s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A) @@ -9472,8 +8564,6 @@ define ptr @test_v2i32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> ; CHECK-SD-LABEL: test_v2i32_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.2s { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -9481,8 +8571,6 @@ define ptr @test_v2i32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.2s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A) @@ -9496,17 +8584,13 @@ declare void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32>, <2 x i32>, ptr) define ptr @test_v2i64_post_imm_st1x2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.2d { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.2d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A) @@ -9518,8 +8602,6 @@ define ptr @test_v2i64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> ; CHECK-SD-LABEL: test_v2i64_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.2d { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -9527,8 +8609,6 @@ define ptr @test_v2i64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.2d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A) @@ -9542,17 +8622,13 @@ declare void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64>, <2 x i64>, ptr) define ptr @test_v1i64_post_imm_st1x2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A) @@ -9564,8 +8640,6 @@ define ptr @test_v1i64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> ; CHECK-SD-LABEL: test_v1i64_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -9573,8 +8647,6 @@ define ptr @test_v1i64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A) @@ -9588,17 +8660,13 @@ declare void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64>, <1 x i64>, ptr) define ptr @test_v4f32_post_imm_st1x2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.4s { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.4s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A) @@ -9610,8 +8678,6 @@ define ptr @test_v4f32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo ; CHECK-SD-LABEL: test_v4f32_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.4s { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -9619,8 +8685,6 @@ define ptr @test_v4f32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.4s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A) @@ -9634,17 +8698,13 @@ declare void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float>, <4 x float>, ptr) define ptr @test_v2f32_post_imm_st1x2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.2s { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.2s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A) @@ -9656,8 +8716,6 @@ define ptr @test_v2f32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo ; CHECK-SD-LABEL: test_v2f32_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.2s { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -9665,8 +8723,6 @@ define ptr @test_v2f32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.2s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A) @@ -9680,17 +8736,13 @@ declare void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float>, <2 x float>, ptr) define ptr @test_v2f64_post_imm_st1x2(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.2d { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.2d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A) @@ -9702,8 +8754,6 @@ define ptr @test_v2f64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x double> %B, <2 x do ; CHECK-SD-LABEL: test_v2f64_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.2d { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -9711,8 +8761,6 @@ define ptr @test_v2f64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x double> %B, <2 x do ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.2d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A) @@ -9726,17 +8774,13 @@ declare void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double>, <2 x double>, ptr) define ptr @test_v1f64_post_imm_st1x2(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st1x2: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A) @@ -9748,8 +8792,6 @@ define ptr @test_v1f64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x double> %B, <1 x do ; CHECK-SD-LABEL: test_v1f64_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -9757,8 +8799,6 @@ define ptr @test_v1f64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x double> %B, <1 x do ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A) @@ -9772,19 +8812,13 @@ declare void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double>, <1 x double>, ptr) define ptr @test_v16i8_post_imm_st1x3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.16b { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.16b { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A) @@ -9795,19 +8829,13 @@ define ptr @test_v16i8_post_imm_st1x3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> define ptr @test_v16i8_post_reg_st1x3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.16b { v0, v1, v2 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.16b { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A) @@ -9821,19 +8849,13 @@ declare void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, define ptr @test_v8i8_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.8b { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.8b { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A) @@ -9844,19 +8866,13 @@ define ptr @test_v8i8_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, define ptr @test_v8i8_post_reg_st1x3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.8b { v0, v1, v2 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.8b { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A) @@ -9870,19 +8886,13 @@ declare void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, ptr) define ptr @test_v8i16_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.8h { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.8h { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A) @@ -9893,20 +8903,14 @@ define ptr @test_v8i16_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> define ptr @test_v8i16_post_reg_st1x3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.8h { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.8h { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A) @@ -9920,19 +8924,13 @@ declare void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, define ptr @test_v4i16_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.4h { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.4h { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A) @@ -9943,20 +8941,14 @@ define ptr @test_v4i16_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> define ptr @test_v4i16_post_reg_st1x3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.4h { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.4h { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A) @@ -9970,19 +8962,13 @@ declare void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, define ptr @test_v4i32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.4s { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.4s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A) @@ -9993,20 +8979,14 @@ define ptr @test_v4i32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> define ptr @test_v4i32_post_reg_st1x3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.4s { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.4s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A) @@ -10020,19 +9000,13 @@ declare void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, define ptr @test_v2i32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.2s { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.2s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A) @@ -10043,20 +9017,14 @@ define ptr @test_v2i32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> define ptr @test_v2i32_post_reg_st1x3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.2s { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.2s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A) @@ -10070,19 +9038,13 @@ declare void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, define ptr @test_v2i64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.2d { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.2d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A) @@ -10093,20 +9055,14 @@ define ptr @test_v2i64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> define ptr @test_v2i64_post_reg_st1x3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.2d { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.2d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A) @@ -10120,19 +9076,13 @@ declare void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, define ptr @test_v1i64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A) @@ -10143,20 +9093,14 @@ define ptr @test_v1i64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> define ptr @test_v1i64_post_reg_st1x3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A) @@ -10170,19 +9114,13 @@ declare void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, define ptr @test_v4f32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.4s { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.4s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A) @@ -10193,20 +9131,14 @@ define ptr @test_v4f32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo define ptr @test_v4f32_post_reg_st1x3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.4s { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.4s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A) @@ -10220,19 +9152,13 @@ declare void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float>, <4 x float>, <4 x fl define ptr @test_v2f32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.2s { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.2s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A) @@ -10243,20 +9169,14 @@ define ptr @test_v2f32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo define ptr @test_v2f32_post_reg_st1x3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.2s { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.2s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A) @@ -10270,19 +9190,13 @@ declare void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float>, <2 x float>, <2 x fl define ptr @test_v2f64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.2d { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.2d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A) @@ -10293,20 +9207,14 @@ define ptr @test_v2f64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x double> %B, <2 x do define ptr @test_v2f64_post_reg_st1x3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.2d { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.2d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A) @@ -10320,19 +9228,13 @@ declare void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double>, <2 x double>, <2 x define ptr @test_v1f64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st1x3: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A) @@ -10343,20 +9245,14 @@ define ptr @test_v1f64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x double> %B, <1 x do define ptr @test_v1f64_post_reg_st1x3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_st1x3: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A) @@ -10370,21 +9266,13 @@ declare void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double>, <1 x double>, <1 x define ptr @test_v16i8_post_imm_st1x4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.16b { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.16b { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A) @@ -10395,21 +9283,13 @@ define ptr @test_v16i8_post_imm_st1x4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> define ptr @test_v16i8_post_reg_st1x4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.16b { v0, v1, v2, v3 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.16b { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A) @@ -10423,21 +9303,13 @@ declare void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, define ptr @test_v8i8_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.8b { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.8b { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A) @@ -10448,21 +9320,13 @@ define ptr @test_v8i8_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, define ptr @test_v8i8_post_reg_st1x4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.8b { v0, v1, v2, v3 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.8b { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A) @@ -10476,21 +9340,13 @@ declare void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x define ptr @test_v8i16_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.8h { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.8h { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A) @@ -10501,22 +9357,14 @@ define ptr @test_v8i16_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> define ptr @test_v8i16_post_reg_st1x4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.8h { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.8h { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A) @@ -10530,21 +9378,13 @@ declare void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, define ptr @test_v4i16_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.4h { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.4h { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A) @@ -10555,22 +9395,14 @@ define ptr @test_v4i16_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> define ptr @test_v4i16_post_reg_st1x4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.4h { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.4h { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A) @@ -10584,21 +9416,13 @@ declare void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>,< define ptr @test_v4i32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.4s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A) @@ -10609,22 +9433,14 @@ define ptr @test_v4i32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> define ptr @test_v4i32_post_reg_st1x4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.4s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A) @@ -10638,21 +9454,13 @@ declare void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>,< define ptr @test_v2i32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.2s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A) @@ -10663,22 +9471,14 @@ define ptr @test_v2i32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> define ptr @test_v2i32_post_reg_st1x4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.2s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A) @@ -10692,21 +9492,13 @@ declare void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, define ptr @test_v2i64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.2d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A) @@ -10717,22 +9509,14 @@ define ptr @test_v2i64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> define ptr @test_v2i64_post_reg_st1x4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.2d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A) @@ -10746,21 +9530,13 @@ declare void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>,< define ptr @test_v1i64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A) @@ -10771,22 +9547,14 @@ define ptr @test_v1i64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> define ptr @test_v1i64_post_reg_st1x4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A) @@ -10800,21 +9568,13 @@ declare void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>,< define ptr @test_v4f32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.4s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A) @@ -10825,22 +9585,14 @@ define ptr @test_v4f32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo define ptr @test_v4f32_post_reg_st1x4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.4s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A) @@ -10854,21 +9606,13 @@ declare void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float>, <4 x float>, <4 x fl define ptr @test_v2f32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.2s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A) @@ -10879,22 +9623,14 @@ define ptr @test_v2f32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo define ptr @test_v2f32_post_reg_st1x4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.2s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A) @@ -10908,21 +9644,13 @@ declare void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float>, <2 x float>, <2 x fl define ptr @test_v2f64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.2d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A) @@ -10933,22 +9661,14 @@ define ptr @test_v2f64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x double> %B, <2 x do define ptr @test_v2f64_post_reg_st1x4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.2d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A) @@ -10962,21 +9682,13 @@ declare void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double>, <2 x double>, <2 x define ptr @test_v1f64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A) @@ -10987,22 +9699,14 @@ define ptr @test_v1f64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x double> %B, <1 x do define ptr @test_v1f64_post_reg_st1x4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_st1x4: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_st1x4: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A) @@ -11015,17 +9719,13 @@ declare void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double>, <1 x double>, <1 x define ptr @test_v16i8_post_imm_st2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.b { v0, v1 }[0], [x0], #2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.b { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A) @@ -11036,17 +9736,13 @@ define ptr @test_v16i8_post_imm_st2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8 define ptr @test_v16i8_post_reg_st2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.b { v0, v1 }[0], [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.b { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A) @@ -11060,17 +9756,13 @@ declare void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8>, <16 x i8>, i64, ptr) define ptr @test_v8i8_post_imm_st2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.b { v0, v1 }[0], [x0], #2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.b { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A) @@ -11081,17 +9773,13 @@ define ptr @test_v8i8_post_imm_st2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> % define ptr @test_v8i8_post_reg_st2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.b { v0, v1 }[0], [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.b { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A) @@ -11105,17 +9793,13 @@ declare void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8>, <8 x i8>, i64, ptr) define ptr @test_v8i16_post_imm_st2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.h { v0, v1 }[0], [x0], #4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #4 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.h { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A) @@ -11127,8 +9811,6 @@ define ptr @test_v8i16_post_reg_st2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16 ; CHECK-SD-LABEL: test_v8i16_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.h { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -11136,8 +9818,6 @@ define ptr @test_v8i16_post_reg_st2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16 ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.h { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A) @@ -11151,17 +9831,13 @@ declare void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16>, <8 x i16>, i64, ptr) define ptr @test_v4i16_post_imm_st2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.h { v0, v1 }[0], [x0], #4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #4 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.h { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A) @@ -11173,8 +9849,6 @@ define ptr @test_v4i16_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16 ; CHECK-SD-LABEL: test_v4i16_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.h { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -11182,8 +9856,6 @@ define ptr @test_v4i16_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16 ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.h { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A) @@ -11197,17 +9869,13 @@ declare void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16>, <4 x i16>, i64, ptr) define ptr @test_v4i32_post_imm_st2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], #8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #8 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A) @@ -11219,8 +9887,6 @@ define ptr @test_v4i32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32 ; CHECK-SD-LABEL: test_v4i32_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -11228,8 +9894,6 @@ define ptr @test_v4i32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32 ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A) @@ -11243,17 +9907,13 @@ declare void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32>, <4 x i32>, i64, ptr) define ptr @test_v2i32_post_imm_st2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], #8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #8 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A) @@ -11265,8 +9925,6 @@ define ptr @test_v2i32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32 ; CHECK-SD-LABEL: test_v2i32_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -11274,8 +9932,6 @@ define ptr @test_v2i32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32 ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A) @@ -11289,17 +9945,13 @@ declare void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32>, <2 x i32>, i64, ptr) define ptr @test_v2i64_post_imm_st2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A) @@ -11311,8 +9963,6 @@ define ptr @test_v2i64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64 ; CHECK-SD-LABEL: test_v2i64_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -11320,8 +9970,6 @@ define ptr @test_v2i64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64 ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A) @@ -11335,17 +9983,13 @@ declare void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64>, <2 x i64>, i64, ptr) define ptr @test_v1i64_post_imm_st2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A) @@ -11357,8 +10001,6 @@ define ptr @test_v1i64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64 ; CHECK-SD-LABEL: test_v1i64_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -11366,8 +10008,6 @@ define ptr @test_v1i64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64 ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A) @@ -11381,17 +10021,13 @@ declare void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64>, <1 x i64>, i64, ptr) define ptr @test_v4f32_post_imm_st2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], #8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #8 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A) @@ -11403,8 +10039,6 @@ define ptr @test_v4f32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f ; CHECK-SD-LABEL: test_v4f32_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -11412,8 +10046,6 @@ define ptr @test_v4f32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A) @@ -11427,17 +10059,13 @@ declare void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float>, <4 x float>, i64, define ptr @test_v2f32_post_imm_st2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], #8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #8 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A) @@ -11449,8 +10077,6 @@ define ptr @test_v2f32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f ; CHECK-SD-LABEL: test_v2f32_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -11458,8 +10084,6 @@ define ptr @test_v2f32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A) @@ -11473,17 +10097,13 @@ declare void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float>, <2 x float>, i64, define ptr @test_v2f64_post_imm_st2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A) @@ -11495,8 +10115,6 @@ define ptr @test_v2f64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x ; CHECK-SD-LABEL: test_v2f64_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -11504,8 +10122,6 @@ define ptr @test_v2f64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A) @@ -11519,17 +10135,13 @@ declare void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double>, <2 x double>, i64 define ptr @test_v1f64_post_imm_st2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st2lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A) @@ -11541,8 +10153,6 @@ define ptr @test_v1f64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x ; CHECK-SD-LABEL: test_v1f64_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -11550,8 +10160,6 @@ define ptr @test_v1f64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A) @@ -11565,19 +10173,13 @@ declare void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double>, <1 x double>, i64 define ptr @test_v16i8_post_imm_st3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.b { v0, v1, v2 }[0], [x0], #3 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.b { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A) @@ -11588,19 +10190,13 @@ define ptr @test_v16i8_post_imm_st3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8 define ptr @test_v16i8_post_reg_st3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.b { v0, v1, v2 }[0], [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.b { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A) @@ -11614,19 +10210,13 @@ declare void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8> define ptr @test_v8i8_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.b { v0, v1, v2 }[0], [x0], #3 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.b { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A) @@ -11637,19 +10227,13 @@ define ptr @test_v8i8_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> % define ptr @test_v8i8_post_reg_st3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.b { v0, v1, v2 }[0], [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.b { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A) @@ -11663,19 +10247,13 @@ declare void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, i6 define ptr @test_v8i16_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.h { v0, v1, v2 }[0], [x0], #6 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #6 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.h { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A) @@ -11686,20 +10264,14 @@ define ptr @test_v8i16_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16 define ptr @test_v8i16_post_reg_st3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.h { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.h { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A) @@ -11713,19 +10285,13 @@ declare void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16> define ptr @test_v4i16_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.h { v0, v1, v2 }[0], [x0], #6 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #6 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.h { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A) @@ -11736,20 +10302,14 @@ define ptr @test_v4i16_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16 define ptr @test_v4i16_post_reg_st3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.h { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.h { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A) @@ -11763,19 +10323,13 @@ declare void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16> define ptr @test_v4i32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #12 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A) @@ -11786,20 +10340,14 @@ define ptr @test_v4i32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32 define ptr @test_v4i32_post_reg_st3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A) @@ -11813,19 +10361,13 @@ declare void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32> define ptr @test_v2i32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #12 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A) @@ -11836,20 +10378,14 @@ define ptr @test_v2i32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32 define ptr @test_v2i32_post_reg_st3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A) @@ -11863,19 +10399,13 @@ declare void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32> define ptr @test_v2i64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A) @@ -11886,20 +10416,14 @@ define ptr @test_v2i64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64 define ptr @test_v2i64_post_reg_st3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A) @@ -11913,19 +10437,13 @@ declare void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64> define ptr @test_v1i64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A) @@ -11936,20 +10454,14 @@ define ptr @test_v1i64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64 define ptr @test_v1i64_post_reg_st3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A) @@ -11963,19 +10475,13 @@ declare void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64> define ptr @test_v4f32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #12 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A) @@ -11986,20 +10492,14 @@ define ptr @test_v4f32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f define ptr @test_v4f32_post_reg_st3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A) @@ -12013,19 +10513,13 @@ declare void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float>, <4 x float>, <4 x define ptr @test_v2f32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #12 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A) @@ -12036,20 +10530,14 @@ define ptr @test_v2f32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f define ptr @test_v2f32_post_reg_st3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A) @@ -12063,19 +10551,13 @@ declare void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float>, <2 x float>, <2 x define ptr @test_v2f64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A) @@ -12086,20 +10568,14 @@ define ptr @test_v2f64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x define ptr @test_v2f64_post_reg_st3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A) @@ -12113,19 +10589,13 @@ declare void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double>, <2 x double>, <2 define ptr @test_v1f64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A) @@ -12136,20 +10606,14 @@ define ptr @test_v1f64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x define ptr @test_v1f64_post_reg_st3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_st3lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A) @@ -12163,21 +10627,13 @@ declare void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double>, <1 x double>, <1 define ptr @test_v16i8_post_imm_st4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], #4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #4 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.b { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A) @@ -12188,21 +10644,13 @@ define ptr @test_v16i8_post_imm_st4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8 define ptr @test_v16i8_post_reg_st4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.b { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A) @@ -12216,21 +10664,13 @@ declare void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8> define ptr @test_v8i8_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], #4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #4 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.b { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A) @@ -12241,21 +10681,13 @@ define ptr @test_v8i8_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> % define ptr @test_v8i8_post_reg_st4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.b { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A) @@ -12269,21 +10701,13 @@ declare void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 define ptr @test_v8i16_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], #8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #8 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.h { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A) @@ -12294,22 +10718,14 @@ define ptr @test_v8i16_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16 define ptr @test_v8i16_post_reg_st4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.h { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A) @@ -12323,21 +10739,13 @@ declare void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16> define ptr @test_v4i16_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], #8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #8 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.h { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A) @@ -12348,22 +10756,14 @@ define ptr @test_v4i16_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16 define ptr @test_v4i16_post_reg_st4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #1 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.h { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A) @@ -12377,21 +10777,13 @@ declare void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16> define ptr @test_v4i32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A) @@ -12402,22 +10794,14 @@ define ptr @test_v4i32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32 define ptr @test_v4i32_post_reg_st4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A) @@ -12431,21 +10815,13 @@ declare void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32> define ptr @test_v2i32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A) @@ -12456,22 +10832,14 @@ define ptr @test_v2i32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32 define ptr @test_v2i32_post_reg_st4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A) @@ -12485,21 +10853,13 @@ declare void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32> define ptr @test_v2i64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A) @@ -12510,22 +10870,14 @@ define ptr @test_v2i64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64 define ptr @test_v2i64_post_reg_st4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A) @@ -12539,21 +10891,13 @@ declare void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64> define ptr @test_v1i64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A) @@ -12564,22 +10908,14 @@ define ptr @test_v1i64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64 define ptr @test_v1i64_post_reg_st4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A) @@ -12593,21 +10929,13 @@ declare void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64> define ptr @test_v4f32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A) @@ -12618,22 +10946,14 @@ define ptr @test_v4f32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f define ptr @test_v4f32_post_reg_st4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A) @@ -12647,21 +10967,13 @@ declare void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float>, <4 x float>, <4 x define ptr @test_v2f32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #16 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A) @@ -12672,22 +10984,14 @@ define ptr @test_v2f32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f define ptr @test_v2f32_post_reg_st4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A) @@ -12701,21 +11005,13 @@ declare void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float>, <2 x float>, <2 x define ptr @test_v2f64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A) @@ -12726,22 +11022,14 @@ define ptr @test_v2f64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x define ptr @test_v2f64_post_reg_st4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A) @@ -12755,21 +11043,13 @@ declare void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double>, <2 x double>, <2 define ptr @test_v1f64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A) @@ -12780,22 +11060,14 @@ define ptr @test_v1f64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x define ptr @test_v1f64_post_reg_st4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_st4lane: ; CHECK-SD: ; %bb.0: -; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 -; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_st4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 -; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A) diff --git a/llvm/test/CodeGen/AArch64/arm64-ld1.ll b/llvm/test/CodeGen/AArch64/arm64-ld1.ll index 54b96520dce41d..c9d94f945f7af2 100644 --- a/llvm/test/CodeGen/AArch64/arm64-ld1.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ld1.ll @@ -351,63 +351,30 @@ declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0(ptr) nounwi define %struct.__neon_int8x16x2_t @ld2lane_16b(<16 x i8> %L1, <16 x i8> %L2, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-SD-LABEL: ld2lane_16b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ld2.b { v0, v1 }[1], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld2lane_16b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ld2.b { v0, v1 }[1], [x0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld2lane_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2.b { v0, v1 }[1], [x0] +; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, i64 1, ptr %A) ret %struct.__neon_int8x16x2_t %tmp2 } define %struct.__neon_int8x16x3_t @ld3lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-SD-LABEL: ld3lane_16b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ld3.b { v0, v1, v2 }[1], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld3lane_16b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[1], [x0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld3lane_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3.b { v0, v1, v2 }[1], [x0] +; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i64 1, ptr %A) ret %struct.__neon_int8x16x3_t %tmp2 } define %struct.__neon_int8x16x4_t @ld4lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-SD-LABEL: ld4lane_16b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ld4.b { v0, v1, v2, v3 }[1], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld4lane_16b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[1], [x0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld4lane_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4.b { v0, v1, v2, v3 }[1], [x0] +; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i64 1, ptr %A) ret %struct.__neon_int8x16x4_t %tmp2 } @@ -418,63 +385,30 @@ declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> define %struct.__neon_int16x8x2_t @ld2lane_8h(<8 x i16> %L1, <8 x i16> %L2, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-SD-LABEL: ld2lane_8h: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ld2.h { v0, v1 }[1], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld2lane_8h: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ld2.h { v0, v1 }[1], [x0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld2lane_8h: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2.h { v0, v1 }[1], [x0] +; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, i64 1, ptr %A) ret %struct.__neon_int16x8x2_t %tmp2 } define %struct.__neon_int16x8x3_t @ld3lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-SD-LABEL: ld3lane_8h: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ld3.h { v0, v1, v2 }[1], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld3lane_8h: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[1], [x0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld3lane_8h: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3.h { v0, v1, v2 }[1], [x0] +; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i64 1, ptr %A) ret %struct.__neon_int16x8x3_t %tmp2 } define %struct.__neon_int16x8x4_t @ld4lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-SD-LABEL: ld4lane_8h: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ld4.h { v0, v1, v2, v3 }[1], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld4lane_8h: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[1], [x0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld4lane_8h: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4.h { v0, v1, v2, v3 }[1], [x0] +; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i64 1, ptr %A) ret %struct.__neon_int16x8x4_t %tmp2 } @@ -485,63 +419,30 @@ declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> define %struct.__neon_int32x4x2_t @ld2lane_4s(<4 x i32> %L1, <4 x i32> %L2, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-SD-LABEL: ld2lane_4s: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ld2.s { v0, v1 }[1], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld2lane_4s: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ld2.s { v0, v1 }[1], [x0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld2lane_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2.s { v0, v1 }[1], [x0] +; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, i64 1, ptr %A) ret %struct.__neon_int32x4x2_t %tmp2 } define %struct.__neon_int32x4x3_t @ld3lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-SD-LABEL: ld3lane_4s: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[1], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld3lane_4s: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[1], [x0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld3lane_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3.s { v0, v1, v2 }[1], [x0] +; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 1, ptr %A) ret %struct.__neon_int32x4x3_t %tmp2 } define %struct.__neon_int32x4x4_t @ld4lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-SD-LABEL: ld4lane_4s: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[1], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld4lane_4s: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[1], [x0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld4lane_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4.s { v0, v1, v2, v3 }[1], [x0] +; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i64 1, ptr %A) ret %struct.__neon_int32x4x4_t %tmp2 } @@ -552,63 +453,30 @@ declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> define %struct.__neon_int64x2x2_t @ld2lane_2d(<2 x i64> %L1, <2 x i64> %L2, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-SD-LABEL: ld2lane_2d: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ld2.d { v0, v1 }[1], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld2lane_2d: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ld2.d { v0, v1 }[1], [x0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld2lane_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: ld2.d { v0, v1 }[1], [x0] +; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, i64 1, ptr %A) ret %struct.__neon_int64x2x2_t %tmp2 } define %struct.__neon_int64x2x3_t @ld3lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-SD-LABEL: ld3lane_2d: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[1], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld3lane_2d: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[1], [x0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld3lane_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: ld3.d { v0, v1, v2 }[1], [x0] +; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64 1, ptr %A) ret %struct.__neon_int64x2x3_t %tmp2 } define %struct.__neon_int64x2x4_t @ld4lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-SD-LABEL: ld4lane_2d: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[1], [x0] -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: ld4lane_2d: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[1], [x0] -; CHECK-GI-NEXT: ret +; CHECK-LABEL: ld4lane_2d: +; CHECK: // %bb.0: +; CHECK-NEXT: ld4.d { v0, v1, v2, v3 }[1], [x0] +; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64 1, ptr %A) ret %struct.__neon_int64x2x4_t %tmp2 } diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll index 43d5ab5ab54e10..ad4b0f377627d5 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -1780,9 +1780,7 @@ define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 { ; CHECK-GI-LABEL: test_concat_v16i8_v16i8_v16i8: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: adrp x8, .LCPI126_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI126_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret entry: @@ -1799,11 +1797,9 @@ define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 { ; ; CHECK-GI-LABEL: test_concat_v16i8_v8i8_v16i8: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: adrp x8, .LCPI127_0 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: mov b2, v0.b[1] ; CHECK-GI-NEXT: mov b3, v0.b[2] +; CHECK-GI-NEXT: adrp x8, .LCPI127_0 ; CHECK-GI-NEXT: mov b4, v0.b[3] ; CHECK-GI-NEXT: mov b5, v0.b[4] ; CHECK-GI-NEXT: mov b6, v0.b[5] @@ -2003,9 +1999,7 @@ define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 { ; CHECK-GI-LABEL: test_concat_v8i16_v8i16_v8i16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: adrp x8, .LCPI130_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI130_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret entry: @@ -2022,11 +2016,9 @@ define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 { ; ; CHECK-GI-LABEL: test_concat_v8i16_v4i16_v8i16: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: adrp x8, .LCPI131_0 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: mov h2, v0.h[1] ; CHECK-GI-NEXT: mov h3, v0.h[2] +; CHECK-GI-NEXT: adrp x8, .LCPI131_0 ; CHECK-GI-NEXT: mov h4, v0.h[3] ; CHECK-GI-NEXT: mov v0.h[1], v2.h[0] ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI131_0] @@ -2146,9 +2138,7 @@ define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 { ; CHECK-GI-LABEL: test_concat_v4i32_v4i32_v4i32: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: adrp x8, .LCPI134_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI134_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret entry: @@ -2165,10 +2155,8 @@ define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 { ; ; CHECK-GI-LABEL: test_concat_v4i32_v2i32_v4i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: adrp x8, .LCPI135_0 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: mov s2, v0.s[1] +; CHECK-GI-NEXT: adrp x8, .LCPI135_0 ; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI135_0] ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll index 6327679756739a..d04bac78377bfb 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll @@ -7,12 +7,11 @@ define <4 x i32> @copyTuple.QPair(ptr %a, ptr %b) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v3.4s, #2 ; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff -; CHECK-NEXT: mov v1.16b, v3.16b ; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: mov v1.16b, v3.16b ; CHECK-NEXT: ld2 { v0.s, v1.s }[1], [x0] ; CHECK-NEXT: mov v1.16b, v2.16b ; CHECK-NEXT: ld2 { v0.s, v1.s }[1], [x1] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 ; CHECK-NEXT: ret entry: %vld = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> , <4 x i32> , i64 1, ptr %a) @@ -25,17 +24,16 @@ entry: define <4 x i32> @copyTuple.QTriple(ptr %a, ptr %b, <4 x i32> %c) { ; CHECK-LABEL: copyTuple.QTriple: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $q0 killed $q0 def $q31_q0_q1 ; CHECK-NEXT: movi v31.2d, #0xffffffffffffffff ; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: mov v2.16b, v0.16b +; CHECK-NEXT: mov v3.16b, v1.16b +; CHECK-NEXT: mov v1.16b, v31.16b +; CHECK-NEXT: ld3 { v1.s, v2.s, v3.s }[1], [x0] ; CHECK-NEXT: mov v2.16b, v31.16b ; CHECK-NEXT: mov v3.16b, v0.16b -; CHECK-NEXT: mov v4.16b, v1.16b -; CHECK-NEXT: ld3 { v2.s, v3.s, v4.s }[1], [x0] -; CHECK-NEXT: mov v3.16b, v31.16b -; CHECK-NEXT: mov v4.16b, v0.16b -; CHECK-NEXT: ld3 { v2.s, v3.s, v4.s }[1], [x1] -; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ld3 { v1.s, v2.s, v3.s }[1], [x1] +; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> , <4 x i32> %c, <4 x i32> %c, i64 1, ptr %a) @@ -48,20 +46,19 @@ entry: define <4 x i32> @copyTuple.QQuad(ptr %a, ptr %b, <4 x i32> %c) { ; CHECK-LABEL: copyTuple.QQuad: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $q0 killed $q0 def $q31_q0_q1_q2 ; CHECK-NEXT: movi v31.2d, #0xffffffffffffffff ; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov v2.16b, v0.16b -; CHECK-NEXT: mov v3.16b, v31.16b +; CHECK-NEXT: mov v4.16b, v2.16b +; CHECK-NEXT: mov v3.16b, v1.16b +; CHECK-NEXT: mov v2.16b, v0.16b +; CHECK-NEXT: mov v1.16b, v31.16b +; CHECK-NEXT: ld4 { v1.s, v2.s, v3.s, v4.s }[1], [x0] +; CHECK-NEXT: mov v2.16b, v31.16b +; CHECK-NEXT: mov v3.16b, v0.16b ; CHECK-NEXT: mov v4.16b, v0.16b -; CHECK-NEXT: mov v5.16b, v1.16b -; CHECK-NEXT: mov v6.16b, v2.16b -; CHECK-NEXT: ld4 { v3.s, v4.s, v5.s, v6.s }[1], [x0] -; CHECK-NEXT: mov v4.16b, v31.16b -; CHECK-NEXT: mov v5.16b, v0.16b -; CHECK-NEXT: mov v6.16b, v0.16b -; CHECK-NEXT: ld4 { v3.s, v4.s, v5.s, v6.s }[1], [x1] -; CHECK-NEXT: mov v0.16b, v3.16b +; CHECK-NEXT: ld4 { v1.s, v2.s, v3.s, v4.s }[1], [x1] +; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> , <4 x i32> %c, <4 x i32> %c, <4 x i32> %c, i64 1, ptr %a) diff --git a/llvm/test/CodeGen/AArch64/arm64-tbl.ll b/llvm/test/CodeGen/AArch64/arm64-tbl.ll index 44b92e6ccd088f..2044a866b830aa 100644 --- a/llvm/test/CodeGen/AArch64/arm64-tbl.ll +++ b/llvm/test/CodeGen/AArch64/arm64-tbl.ll @@ -21,121 +21,55 @@ define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind { } define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) { -; CHECK-SD-LABEL: tbl2_8b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: tbl.8b v0, { v0, v1 }, v2 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: tbl2_8b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: tbl.8b v0, { v0, v1 }, v2 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: tbl2_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbl.8b v0, { v0, v1 }, v2 +; CHECK-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) ret <8 x i8> %tmp3 } define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { -; CHECK-SD-LABEL: tbl2_16b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: tbl.16b v0, { v0, v1 }, v2 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: tbl2_16b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: tbl2_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) ret <16 x i8> %tmp3 } define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { -; CHECK-SD-LABEL: tbl3_8b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: tbl.8b v0, { v0, v1, v2 }, v3 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: tbl3_8b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: tbl.8b v0, { v0, v1, v2 }, v3 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: tbl3_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbl.8b v0, { v0, v1, v2 }, v3 +; CHECK-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) ret <8 x i8> %tmp3 } define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) { -; CHECK-SD-LABEL: tbl3_16b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2 }, v3 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: tbl3_16b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-GI-NEXT: tbl.16b v0, { v0, v1, v2 }, v3 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: tbl3_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbl.16b v0, { v0, v1, v2 }, v3 +; CHECK-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) ret <16 x i8> %tmp3 } define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) { -; CHECK-SD-LABEL: tbl4_8b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: tbl4_8b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: tbl4_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4 +; CHECK-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) ret <8 x i8> %tmp3 } define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) { -; CHECK-SD-LABEL: tbl4_16b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: tbl4_16b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-GI-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: tbl4_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 +; CHECK-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) ret <16 x i8> %tmp3 } @@ -173,11 +107,7 @@ define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_v8i8: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI8_0 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-SD-NEXT: ldr d4, [x8, :lo12:.LCPI8_0] -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-SD-NEXT: tbl.8b v0, { v0, v1 }, v4 ; CHECK-SD-NEXT: tbl.8b v1, { v2, v3 }, v4 ; CHECK-SD-NEXT: mov.s v0[1], v1[1] @@ -187,11 +117,7 @@ define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_v8i8: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI8_1 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr d4, [x8, :lo12:.LCPI8_1] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: adrp x8, .LCPI8_0 ; CHECK-GI-NEXT: tbl.8b v0, { v0, v1 }, v4 ; CHECK-GI-NEXT: tbl.8b v1, { v2, v3 }, v4 @@ -262,23 +188,15 @@ define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8 define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: adrp x8, .LCPI9_0 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI9_0] -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI9_1 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI9_1] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: adrp x8, .LCPI9_0 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 ; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4 @@ -331,11 +249,7 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fmov s4, w0 ; CHECK-SD-NEXT: mov w8, #32 // =0x20 -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: mov.b v4[1], w0 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: mov.b v4[2], w0 ; CHECK-SD-NEXT: mov.b v4[3], w0 ; CHECK-SD-NEXT: mov.b v4[4], w0 @@ -364,10 +278,6 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: fmov s4, w0 ; CHECK-GI-NEXT: mov w8, #255 // =0xff -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: mov.16b v5, v4 ; CHECK-GI-NEXT: mov.b v5[1], v4[0] ; CHECK-GI-NEXT: mov.b v5[2], v4[0] @@ -454,11 +364,7 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask2: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: mov w8, #1 // =0x1 -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: fmov s4, w8 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: mov.b v4[1], w8 ; CHECK-SD-NEXT: mov.b v4[2], w8 ; CHECK-SD-NEXT: mov.b v4[3], w8 @@ -489,12 +395,8 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: fmov s6, w0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: fmov s4, w8 ; CHECK-GI-NEXT: mov w8, #255 // =0xff -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: mov.16b v5, v4 ; CHECK-GI-NEXT: mov.b v5[1], v4[0] ; CHECK-GI-NEXT: mov.b v5[2], v4[0] @@ -600,11 +502,7 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: movi.2d v4, #0xffffffffffffffff ; CHECK-SD-NEXT: adrp x8, .LCPI12_0 -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ldr q5, [x8, :lo12:.LCPI12_0] -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: tbl.16b v2, { v2, v3 }, v5 ; CHECK-SD-NEXT: mov.b v4[0], w0 ; CHECK-SD-NEXT: mov.b v4[1], w0 @@ -623,10 +521,6 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: fmov s4, w0 ; CHECK-GI-NEXT: mov w8, #255 // =0xff -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: mov.16b v5, v4 ; CHECK-GI-NEXT: mov.b v5[1], v4[0] ; CHECK-GI-NEXT: mov.b v5[2], v4[0] @@ -749,10 +643,6 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: dup.16b v4, w0 ; CHECK-SD-NEXT: mov w8, #255 // =0xff -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: mov.b v4[8], w8 ; CHECK-SD-NEXT: mov.b v4[9], w8 ; CHECK-SD-NEXT: mov.b v4[10], w8 @@ -772,12 +662,8 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: fmov s4, w0 ; CHECK-GI-NEXT: mov w8, #255 // =0xff -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: fmov s6, w8 ; CHECK-GI-NEXT: adrp x8, .LCPI13_1 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: mov.16b v5, v4 ; CHECK-GI-NEXT: mov.b v5[1], v4[0] ; CHECK-GI-NEXT: mov.b v5[2], v4[0] @@ -879,23 +765,15 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: adrp x8, .LCPI14_0 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI14_0] -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI14_1 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI14_1] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: adrp x8, .LCPI14_0 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 ; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4 @@ -981,24 +859,16 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: adrp x8, .LCPI15_0 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI15_0] -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI15_2 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI15_2] ; CHECK-GI-NEXT: adrp x8, .LCPI15_1 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI15_1] ; CHECK-GI-NEXT: adrp x8, .LCPI15_0 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 @@ -1085,24 +955,16 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: adrp x8, .LCPI16_0 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_0] -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI16_2 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI16_2] ; CHECK-GI-NEXT: adrp x8, .LCPI16_1 -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI16_1] ; CHECK-GI-NEXT: adrp x8, .LCPI16_0 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 @@ -1144,121 +1006,55 @@ define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind { } define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { -; CHECK-SD-LABEL: tbx2_8b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 -; CHECK-SD-NEXT: tbx.8b v0, { v1, v2 }, v3 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: tbx2_8b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 -; CHECK-GI-NEXT: tbx.8b v0, { v1, v2 }, v3 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: tbx2_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbx.8b v0, { v1, v2 }, v3 +; CHECK-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) ret <8 x i8> %tmp3 } define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) { -; CHECK-SD-LABEL: tbx2_16b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 -; CHECK-SD-NEXT: tbx.16b v0, { v1, v2 }, v3 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: tbx2_16b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 -; CHECK-GI-NEXT: tbx.16b v0, { v1, v2 }, v3 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: tbx2_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbx.16b v0, { v1, v2 }, v3 +; CHECK-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) ret <16 x i8> %tmp3 } define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) { -; CHECK-SD-LABEL: tbx3_8b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-SD-NEXT: tbx.8b v0, { v1, v2, v3 }, v4 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: tbx3_8b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-GI-NEXT: tbx.8b v0, { v1, v2, v3 }, v4 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: tbx3_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbx.8b v0, { v1, v2, v3 }, v4 +; CHECK-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) ret <8 x i8> %tmp3 } define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) { -; CHECK-SD-LABEL: tbx3_16b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-SD-NEXT: tbx.16b v0, { v1, v2, v3 }, v4 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: tbx3_16b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 -; CHECK-GI-NEXT: tbx.16b v0, { v1, v2, v3 }, v4 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: tbx3_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbx.16b v0, { v1, v2, v3 }, v4 +; CHECK-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) ret <16 x i8> %tmp3 } define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) { -; CHECK-SD-LABEL: tbx4_8b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-SD-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: tbx4_8b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-GI-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-GI-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: tbx4_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5 +; CHECK-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) ret <8 x i8> %tmp3 } define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) { -; CHECK-SD-LABEL: tbx4_16b: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-SD-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5 -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: tbx4_16b: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-GI-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 -; CHECK-GI-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: tbx4_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5 +; CHECK-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) ret <16 x i8> %tmp3 } diff --git a/llvm/test/CodeGen/AArch64/arm64-zip.ll b/llvm/test/CodeGen/AArch64/arm64-zip.ll index 9955b253f563e9..fd862dfcbd693a 100644 --- a/llvm/test/CodeGen/AArch64/arm64-zip.ll +++ b/llvm/test/CodeGen/AArch64/arm64-zip.ll @@ -359,20 +359,18 @@ define <8 x i16> @combine_v8i16_undef(<4 x i16> %0, <4 x i16> %1) { define <16 x i8> @combine_v8i16_8first(<8 x i8> %0, <8 x i8> %1) { ; CHECK-SD-LABEL: combine_v8i16_8first: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1_q2 +; CHECK-SD-NEXT: fmov d31, d1 ; CHECK-SD-NEXT: adrp x8, .LCPI25_0 -; CHECK-SD-NEXT: fmov d2, d0 -; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI25_0] -; CHECK-SD-NEXT: tbl.16b v0, { v1, v2 }, v3 +; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI25_0] +; CHECK-SD-NEXT: tbl.16b v0, { v31, v0 }, v1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_v8i16_8first: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q31_q0 +; CHECK-GI-NEXT: fmov d2, d0 ; CHECK-GI-NEXT: adrp x8, .LCPI25_0 -; CHECK-GI-NEXT: fmov d31, d1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI25_0] -; CHECK-GI-NEXT: tbl.16b v0, { v31, v0 }, v2 +; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI25_0] +; CHECK-GI-NEXT: tbl.16b v0, { v1, v2 }, v0 ; CHECK-GI-NEXT: ret %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> ret <16 x i8> %3 @@ -383,20 +381,18 @@ define <16 x i8> @combine_v8i16_8first(<8 x i8> %0, <8 x i8> %1) { define <16 x i8> @combine_v8i16_8firstundef(<8 x i8> %0, <8 x i8> %1) { ; CHECK-SD-LABEL: combine_v8i16_8firstundef: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1_q2 +; CHECK-SD-NEXT: fmov d31, d1 ; CHECK-SD-NEXT: adrp x8, .LCPI26_0 -; CHECK-SD-NEXT: fmov d2, d0 -; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI26_0] -; CHECK-SD-NEXT: tbl.16b v0, { v1, v2 }, v3 +; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI26_0] +; CHECK-SD-NEXT: tbl.16b v0, { v31, v0 }, v1 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_v8i16_8firstundef: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q31_q0 +; CHECK-GI-NEXT: fmov d2, d0 ; CHECK-GI-NEXT: adrp x8, .LCPI26_0 -; CHECK-GI-NEXT: fmov d31, d1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI26_0] -; CHECK-GI-NEXT: tbl.16b v0, { v31, v0 }, v2 +; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI26_0] +; CHECK-GI-NEXT: tbl.16b v0, { v1, v2 }, v0 ; CHECK-GI-NEXT: ret %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> ret <16 x i8> %3 diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll b/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll index 98033a8e449ffb..66f3c5c93fcbf1 100644 --- a/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll +++ b/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --force-update +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=NOLSE ; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -mattr=+lse -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=LSE @@ -103,8 +103,8 @@ define fp128 @test_rmw_xchg_f128(ptr %dst, fp128 %new) { ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 -; LSE-NEXT: mov x5, x7 ; LSE-NEXT: mov x4, x6 +; LSE-NEXT: mov x5, x7 ; LSE-NEXT: caspal x4, x5, x2, x3, [x0] ; LSE-NEXT: cmp x5, x7 ; LSE-NEXT: ccmp x4, x6, #0, eq diff --git a/llvm/test/CodeGen/AArch64/bf16-shuffle.ll b/llvm/test/CodeGen/AArch64/bf16-shuffle.ll index d59de3c56f4ee2..f10b7282669ae6 100644 --- a/llvm/test/CodeGen/AArch64/bf16-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/bf16-shuffle.ll @@ -224,14 +224,11 @@ define <8 x bfloat> @shuffle3step0_bf16(<32 x bfloat> %src) { ; CHECK-LABEL: shuffle3step0_bf16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI16_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: mov v3.16b, v2.16b -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI16_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: adrp x8, .LCPI16_1 -; CHECK-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v4.16b +; CHECK-NEXT: tbl v1.16b, { v0.16b, v1.16b }, v3.16b ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI16_1] -; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b +; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b ; CHECK-NEXT: ret entry: %s1 = shufflevector <32 x bfloat> %src, <32 x bfloat> undef, <8 x i32> @@ -242,14 +239,11 @@ define <8 x bfloat> @shuffle3step1_bf16(<32 x bfloat> %src) { ; CHECK-LABEL: shuffle3step1_bf16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI17_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: mov v3.16b, v2.16b -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI17_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI17_0] ; CHECK-NEXT: adrp x8, .LCPI17_1 -; CHECK-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v4.16b +; CHECK-NEXT: tbl v1.16b, { v0.16b, v1.16b }, v3.16b ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI17_1] -; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b +; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b ; CHECK-NEXT: ret entry: %s1 = shufflevector <32 x bfloat> %src, <32 x bfloat> undef, <8 x i32> @@ -260,14 +254,11 @@ define <8 x bfloat> @shuffle3step2_bf16(<32 x bfloat> %src) { ; CHECK-LABEL: shuffle3step2_bf16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI18_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: mov v3.16b, v2.16b -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI18_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI18_0] ; CHECK-NEXT: adrp x8, .LCPI18_1 -; CHECK-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v4.16b +; CHECK-NEXT: tbl v1.16b, { v0.16b, v1.16b }, v3.16b ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI18_1] -; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b +; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b ; CHECK-NEXT: ret entry: %s1 = shufflevector <32 x bfloat> %src, <32 x bfloat> undef, <8 x i32> diff --git a/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll b/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll index 5cfa59a3022394..dbbfbea9176f6e 100644 --- a/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll +++ b/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll @@ -78,9 +78,9 @@ entry: define <16 x i8> @test5(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) { ; CHECK-LABEL: test5: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr b0, [x0] ; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: ld1r { v1.16b }, [x1] +; CHECK-NEXT: ldr b0, [x0] ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll index 039025dafa0d6e..eae724870fb9dd 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll @@ -111,8 +111,8 @@ define <4 x float> @multiple_muls_shuffle_external(<4 x float> %a, <4 x float> % ; CHECK-NEXT: fmul v17.2s, v6.2s, v5.2s ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: fmul v5.2s, v4.2s, v5.2s -; CHECK-NEXT: fmla v17.2s, v1.2s, v4.2s ; CHECK-NEXT: fcmla v0.4s, v2.4s, v3.4s, #0 +; CHECK-NEXT: fmla v17.2s, v1.2s, v4.2s ; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: fneg v16.2s, v5.2s ; CHECK-NEXT: fcmla v0.4s, v2.4s, v3.4s, #90 @@ -162,19 +162,19 @@ define <4 x float> @multiple_muls_shuffle_external_with_loads(ptr %ptr_a, ptr %p ; CHECK-NEXT: ld2 { v0.2s, v1.2s }, [x0] ; CHECK-NEXT: ld2 { v2.2s, v3.2s }, [x1] ; CHECK-NEXT: fmul v4.2s, v3.2s, v1.2s -; CHECK-NEXT: fmul v6.2s, v2.2s, v1.2s +; CHECK-NEXT: fmul v1.2s, v2.2s, v1.2s ; CHECK-NEXT: fneg v4.2s, v4.2s -; CHECK-NEXT: fmla v6.2s, v0.2s, v3.2s +; CHECK-NEXT: fmla v1.2s, v0.2s, v3.2s ; CHECK-NEXT: fmla v4.2s, v0.2s, v2.2s ; CHECK-NEXT: str d4, [x4] ; CHECK-NEXT: ldr q5, [x2] -; CHECK-NEXT: ext v7.16b, v5.16b, v5.16b, #8 -; CHECK-NEXT: zip1 v0.2s, v5.2s, v7.2s -; CHECK-NEXT: zip2 v1.2s, v5.2s, v7.2s -; CHECK-NEXT: fmul v3.2s, v0.2s, v6.2s -; CHECK-NEXT: fmul v6.2s, v1.2s, v6.2s -; CHECK-NEXT: fmla v3.2s, v4.2s, v1.2s -; CHECK-NEXT: fneg v2.2s, v6.2s +; CHECK-NEXT: ext v2.16b, v5.16b, v5.16b, #8 +; CHECK-NEXT: zip1 v0.2s, v5.2s, v2.2s +; CHECK-NEXT: zip2 v2.2s, v5.2s, v2.2s +; CHECK-NEXT: fmul v3.2s, v0.2s, v1.2s +; CHECK-NEXT: fmul v1.2s, v2.2s, v1.2s +; CHECK-NEXT: fmla v3.2s, v4.2s, v2.2s +; CHECK-NEXT: fneg v2.2s, v1.2s ; CHECK-NEXT: fmla v2.2s, v4.2s, v0.2s ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: st2 { v2.2s, v3.2s }, [x5] @@ -241,20 +241,20 @@ define <4 x float> @multiple_muls_mul_external(<4 x float> %a, <4 x float> %b, < ; CHECK-NEXT: zip1 v3.2s, v3.2s, v17.2s ; CHECK-NEXT: fmul v18.2s, v6.2s, v7.2s ; CHECK-NEXT: fmul v5.2s, v19.2s, v16.2s -; CHECK-NEXT: fmul v16.2s, v2.2s, v16.2s ; CHECK-NEXT: fmul v7.2s, v0.2s, v7.2s +; CHECK-NEXT: fmul v16.2s, v2.2s, v16.2s ; CHECK-NEXT: fneg v4.2s, v18.2s ; CHECK-NEXT: fmla v5.2s, v3.2s, v2.2s -; CHECK-NEXT: fneg v2.2s, v16.2s ; CHECK-NEXT: fmla v7.2s, v1.2s, v6.2s +; CHECK-NEXT: fneg v2.2s, v16.2s ; CHECK-NEXT: fmla v4.2s, v1.2s, v0.2s -; CHECK-NEXT: fmla v2.2s, v3.2s, v19.2s ; CHECK-NEXT: fmul v0.2s, v7.2s, v5.2s +; CHECK-NEXT: fmla v2.2s, v3.2s, v19.2s ; CHECK-NEXT: fmul v17.2s, v4.2s, v5.2s ; CHECK-NEXT: str d4, [x0] -; CHECK-NEXT: fmla v17.2s, v2.2s, v7.2s ; CHECK-NEXT: fneg v16.2s, v0.2s ; CHECK-NEXT: zip1 v0.4s, v2.4s, v5.4s +; CHECK-NEXT: fmla v17.2s, v2.2s, v7.2s ; CHECK-NEXT: fmla v16.2s, v2.2s, v4.2s ; CHECK-NEXT: st2 { v16.2s, v17.2s }, [x1] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll index 0481d997d24faf..c8dc092bb05e43 100644 --- a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll +++ b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll @@ -920,10 +920,8 @@ define i32 @extract_v4i32_shuffle(<4 x i32> %a, <4 x i32> %b, i32 %c) { ; CHECK-GI-NEXT: sub sp, sp, #16 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 ; CHECK-GI-NEXT: adrp x8, .LCPI35_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: mov x9, sp ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: mov w8, w0 ; CHECK-GI-NEXT: and x8, x8, #0x3 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b diff --git a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll index 0a3b9a070c2b32..4253b06e1f1aca 100644 --- a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll +++ b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll @@ -39,9 +39,9 @@ define void @fptoui_v8f32_to_v8i8_in_loop(ptr %A, ptr %dst) { ; CHECK-NEXT: add x8, x8, #1 ; CHECK-NEXT: cmp x8, #1000 ; CHECK-NEXT: ldp q2, q1, [x9] -; CHECK-NEXT: fcvtzu.4s v4, v1 -; CHECK-NEXT: fcvtzu.4s v3, v2 -; CHECK-NEXT: tbl.16b v1, { v3, v4 }, v0 +; CHECK-NEXT: fcvtzu.4s v3, v1 +; CHECK-NEXT: fcvtzu.4s v2, v2 +; CHECK-NEXT: tbl.16b v1, { v2, v3 }, v0 ; CHECK-NEXT: str d1, [x1], #16 ; CHECK-NEXT: b.eq LBB0_1 ; CHECK-NEXT: ; %bb.2: ; %exit @@ -252,12 +252,12 @@ define void @fptoui_v16f32_to_v16i8_in_loop(ptr %A, ptr %dst) { ; CHECK-NEXT: add x8, x8, #1 ; CHECK-NEXT: cmp x8, #1000 ; CHECK-NEXT: ldp q2, q1, [x9, #32] -; CHECK-NEXT: fcvtzu.4s v7, v1 +; CHECK-NEXT: fcvtzu.4s v5, v1 ; CHECK-NEXT: ldp q1, q3, [x9] -; CHECK-NEXT: fcvtzu.4s v6, v2 -; CHECK-NEXT: fcvtzu.4s v5, v3 -; CHECK-NEXT: fcvtzu.4s v4, v1 -; CHECK-NEXT: tbl.16b v1, { v4, v5, v6, v7 }, v0 +; CHECK-NEXT: fcvtzu.4s v4, v2 +; CHECK-NEXT: fcvtzu.4s v3, v3 +; CHECK-NEXT: fcvtzu.4s v2, v1 +; CHECK-NEXT: tbl.16b v1, { v2, v3, v4, v5 }, v0 ; CHECK-NEXT: str q1, [x1], #32 ; CHECK-NEXT: b.eq LBB4_1 ; CHECK-NEXT: ; %bb.2: ; %exit @@ -316,20 +316,20 @@ define void @fptoui_2x_v16f32_to_v16i8_in_loop(ptr %A, ptr %B, ptr %dst) { ; CHECK-NEXT: ldp q3, q4, [x9, #32] ; CHECK-NEXT: ldp q5, q6, [x10] ; CHECK-NEXT: fcvtzu.4s v19, v1 +; CHECK-NEXT: ldp q7, q1, [x9] +; CHECK-NEXT: fcvtzu.4s v4, v4 ; CHECK-NEXT: fcvtzu.4s v18, v2 -; CHECK-NEXT: ldp q2, q1, [x9] -; CHECK-NEXT: fcvtzu.4s v23, v4 +; CHECK-NEXT: fcvtzu.4s v3, v3 ; CHECK-NEXT: fcvtzu.4s v17, v6 -; CHECK-NEXT: add x9, x2, x8, lsl #5 -; CHECK-NEXT: fcvtzu.4s v22, v3 ; CHECK-NEXT: fcvtzu.4s v16, v5 +; CHECK-NEXT: add x9, x2, x8, lsl #5 +; CHECK-NEXT: fcvtzu.4s v2, v1 +; CHECK-NEXT: fcvtzu.4s v1, v7 ; CHECK-NEXT: add x8, x8, #1 -; CHECK-NEXT: fcvtzu.4s v21, v1 ; CHECK-NEXT: cmp x8, #1000 -; CHECK-NEXT: fcvtzu.4s v20, v2 -; CHECK-NEXT: tbl.16b v1, { v16, v17, v18, v19 }, v0 -; CHECK-NEXT: tbl.16b v2, { v20, v21, v22, v23 }, v0 -; CHECK-NEXT: stp q2, q1, [x9] +; CHECK-NEXT: tbl.16b v5, { v16, v17, v18, v19 }, v0 +; CHECK-NEXT: tbl.16b v1, { v1, v2, v3, v4 }, v0 +; CHECK-NEXT: stp q1, q5, [x9] ; CHECK-NEXT: b.eq LBB5_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll index 3b8054a635bcda..e38394f2b05338 100644 --- a/llvm/test/CodeGen/AArch64/fptoi.ll +++ b/llvm/test/CodeGen/AArch64/fptoi.ll @@ -1483,12 +1483,12 @@ define <8 x i16> @fptos_v8f64_v8i16(<8 x double> %a) { ; CHECK-SD-NEXT: adrp x8, .LCPI70_0 ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: xtn v6.2s, v3.2d -; CHECK-SD-NEXT: xtn v5.2s, v2.2d -; CHECK-SD-NEXT: xtn v4.2s, v1.2d -; CHECK-SD-NEXT: xtn v3.2s, v0.2d -; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI70_0] -; CHECK-SD-NEXT: tbl v0.16b, { v3.16b, v4.16b, v5.16b, v6.16b }, v0.16b +; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI70_0] +; CHECK-SD-NEXT: xtn v3.2s, v3.2d +; CHECK-SD-NEXT: xtn v2.2s, v2.2d +; CHECK-SD-NEXT: xtn v1.2s, v1.2d +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptos_v8f64_v8i16: @@ -1514,12 +1514,12 @@ define <8 x i16> @fptou_v8f64_v8i16(<8 x double> %a) { ; CHECK-SD-NEXT: adrp x8, .LCPI71_0 ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: xtn v6.2s, v3.2d -; CHECK-SD-NEXT: xtn v5.2s, v2.2d -; CHECK-SD-NEXT: xtn v4.2s, v1.2d -; CHECK-SD-NEXT: xtn v3.2s, v0.2d -; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI71_0] -; CHECK-SD-NEXT: tbl v0.16b, { v3.16b, v4.16b, v5.16b, v6.16b }, v0.16b +; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI71_0] +; CHECK-SD-NEXT: xtn v3.2s, v3.2d +; CHECK-SD-NEXT: xtn v2.2s, v2.2d +; CHECK-SD-NEXT: xtn v1.2s, v1.2d +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptou_v8f64_v8i16: @@ -1545,21 +1545,21 @@ define <16 x i16> @fptos_v16f64_v16i16(<16 x double> %a) { ; CHECK-SD-NEXT: adrp x8, .LCPI72_0 ; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d ; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI72_0] ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-SD-NEXT: fcvtzs v4.2d, v4.2d -; CHECK-SD-NEXT: xtn v19.2s, v3.2d -; CHECK-SD-NEXT: xtn v23.2s, v7.2d -; CHECK-SD-NEXT: xtn v18.2s, v2.2d -; CHECK-SD-NEXT: xtn v22.2s, v6.2d -; CHECK-SD-NEXT: xtn v17.2s, v1.2d -; CHECK-SD-NEXT: xtn v21.2s, v5.2d -; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI72_0] -; CHECK-SD-NEXT: xtn v16.2s, v0.2d -; CHECK-SD-NEXT: xtn v20.2s, v4.2d -; CHECK-SD-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b -; CHECK-SD-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b +; CHECK-SD-NEXT: xtn v3.2s, v3.2d +; CHECK-SD-NEXT: xtn v7.2s, v7.2d +; CHECK-SD-NEXT: xtn v2.2s, v2.2d +; CHECK-SD-NEXT: xtn v6.2s, v6.2d +; CHECK-SD-NEXT: xtn v1.2s, v1.2d +; CHECK-SD-NEXT: xtn v5.2s, v5.2d +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: xtn v4.2s, v4.2d +; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b +; CHECK-SD-NEXT: tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptos_v16f64_v16i16: @@ -1592,21 +1592,21 @@ define <16 x i16> @fptou_v16f64_v16i16(<16 x double> %a) { ; CHECK-SD-NEXT: adrp x8, .LCPI73_0 ; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d ; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI73_0] ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-SD-NEXT: fcvtzs v4.2d, v4.2d -; CHECK-SD-NEXT: xtn v19.2s, v3.2d -; CHECK-SD-NEXT: xtn v23.2s, v7.2d -; CHECK-SD-NEXT: xtn v18.2s, v2.2d -; CHECK-SD-NEXT: xtn v22.2s, v6.2d -; CHECK-SD-NEXT: xtn v17.2s, v1.2d -; CHECK-SD-NEXT: xtn v21.2s, v5.2d -; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI73_0] -; CHECK-SD-NEXT: xtn v16.2s, v0.2d -; CHECK-SD-NEXT: xtn v20.2s, v4.2d -; CHECK-SD-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b -; CHECK-SD-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b +; CHECK-SD-NEXT: xtn v3.2s, v3.2d +; CHECK-SD-NEXT: xtn v7.2s, v7.2d +; CHECK-SD-NEXT: xtn v2.2s, v2.2d +; CHECK-SD-NEXT: xtn v6.2s, v6.2d +; CHECK-SD-NEXT: xtn v1.2s, v1.2d +; CHECK-SD-NEXT: xtn v5.2s, v5.2d +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: xtn v4.2s, v4.2d +; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b +; CHECK-SD-NEXT: tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptou_v16f64_v16i16: @@ -1634,65 +1634,48 @@ entry: define <32 x i16> @fptos_v32f64_v32i16(<32 x double> %a) { ; CHECK-SD-LABEL: fptos_v32f64_v32i16: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill -; CHECK-SD-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 -; CHECK-SD-NEXT: .cfi_offset b8, -8 -; CHECK-SD-NEXT: .cfi_offset b9, -16 -; CHECK-SD-NEXT: .cfi_offset b10, -24 -; CHECK-SD-NEXT: .cfi_offset b11, -32 -; CHECK-SD-NEXT: .cfi_offset b12, -40 -; CHECK-SD-NEXT: .cfi_offset b13, -48 -; CHECK-SD-NEXT: .cfi_offset b14, -56 -; CHECK-SD-NEXT: .cfi_offset b15, -64 +; CHECK-SD-NEXT: ldp q16, q17, [sp, #64] ; CHECK-SD-NEXT: fcvtzs v3.2d, v3.2d -; CHECK-SD-NEXT: fcvtzs v18.2d, v2.2d -; CHECK-SD-NEXT: adrp x8, .LCPI74_0 -; CHECK-SD-NEXT: fcvtzs v19.2d, v1.2d -; CHECK-SD-NEXT: ldp q20, q21, [sp, #160] -; CHECK-SD-NEXT: fcvtzs v22.2d, v0.2d -; CHECK-SD-NEXT: ldp q23, q24, [sp, #96] +; CHECK-SD-NEXT: ldp q18, q19, [sp, #96] +; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-SD-NEXT: ldp q20, q21, [sp, #32] +; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-SD-NEXT: ldp q22, q23, [sp] +; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-SD-NEXT: fcvtzs v7.2d, v7.2d -; CHECK-SD-NEXT: ldp q16, q17, [sp, #128] -; CHECK-SD-NEXT: xtn v3.2s, v3.2d +; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-SD-NEXT: fcvtzs v19.2d, v19.2d ; CHECK-SD-NEXT: fcvtzs v21.2d, v21.2d +; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d ; CHECK-SD-NEXT: fcvtzs v20.2d, v20.2d -; CHECK-SD-NEXT: xtn v2.2s, v18.2d -; CHECK-SD-NEXT: ldp q18, q25, [sp, #64] -; CHECK-SD-NEXT: xtn v1.2s, v19.2d -; CHECK-SD-NEXT: fcvtzs v19.2d, v24.2d -; CHECK-SD-NEXT: fcvtzs v17.2d, v17.2d -; CHECK-SD-NEXT: xtn v0.2s, v22.2d -; CHECK-SD-NEXT: fcvtzs v22.2d, v23.2d -; CHECK-SD-NEXT: xtn v29.2s, v7.2d -; CHECK-SD-NEXT: fcvtzs v7.2d, v25.2d -; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d ; CHECK-SD-NEXT: fcvtzs v18.2d, v18.2d -; CHECK-SD-NEXT: fcvtzs v16.2d, v16.2d -; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d -; CHECK-SD-NEXT: xtn v15.2s, v21.2d -; CHECK-SD-NEXT: xtn v11.2s, v19.2d +; CHECK-SD-NEXT: fcvtzs v23.2d, v23.2d +; CHECK-SD-NEXT: fcvtzs v17.2d, v17.2d ; CHECK-SD-NEXT: fcvtzs v4.2d, v4.2d -; CHECK-SD-NEXT: xtn v14.2s, v20.2d -; CHECK-SD-NEXT: xtn v10.2s, v22.2d -; CHECK-SD-NEXT: xtn v13.2s, v17.2d -; CHECK-SD-NEXT: xtn v9.2s, v7.2d -; CHECK-SD-NEXT: xtn v28.2s, v6.2d -; CHECK-SD-NEXT: xtn v8.2s, v18.2d -; CHECK-SD-NEXT: xtn v12.2s, v16.2d -; CHECK-SD-NEXT: xtn v27.2s, v5.2d -; CHECK-SD-NEXT: xtn v26.2s, v4.2d -; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI74_0] -; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b -; CHECK-SD-NEXT: tbl v2.16b, { v8.16b, v9.16b, v10.16b, v11.16b }, v4.16b -; CHECK-SD-NEXT: tbl v3.16b, { v12.16b, v13.16b, v14.16b, v15.16b }, v4.16b -; CHECK-SD-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: tbl v1.16b, { v26.16b, v27.16b, v28.16b, v29.16b }, v4.16b -; CHECK-SD-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload +; CHECK-SD-NEXT: fcvtzs v22.2d, v22.2d +; CHECK-SD-NEXT: fcvtzs v16.2d, v16.2d +; CHECK-SD-NEXT: xtn v3.2s, v3.2d +; CHECK-SD-NEXT: xtn v2.2s, v2.2d +; CHECK-SD-NEXT: adrp x8, .LCPI74_0 +; CHECK-SD-NEXT: xtn v1.2s, v1.2d +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: xtn v7.2s, v7.2d +; CHECK-SD-NEXT: xtn v6.2s, v6.2d +; CHECK-SD-NEXT: xtn v21.2s, v21.2d +; CHECK-SD-NEXT: xtn v25.2s, v19.2d +; CHECK-SD-NEXT: xtn v5.2s, v5.2d +; CHECK-SD-NEXT: xtn v20.2s, v20.2d +; CHECK-SD-NEXT: xtn v24.2s, v18.2d +; CHECK-SD-NEXT: xtn v19.2s, v23.2d +; CHECK-SD-NEXT: xtn v23.2s, v17.2d +; CHECK-SD-NEXT: xtn v4.2s, v4.2d +; CHECK-SD-NEXT: xtn v18.2s, v22.2d +; CHECK-SD-NEXT: xtn v22.2s, v16.2d +; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI74_0] +; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b +; CHECK-SD-NEXT: tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b +; CHECK-SD-NEXT: tbl v2.16b, { v18.16b, v19.16b, v20.16b, v21.16b }, v16.16b +; CHECK-SD-NEXT: tbl v3.16b, { v22.16b, v23.16b, v24.16b, v25.16b }, v16.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptos_v32f64_v32i16: @@ -1738,65 +1721,48 @@ entry: define <32 x i16> @fptou_v32f64_v32i16(<32 x double> %a) { ; CHECK-SD-LABEL: fptou_v32f64_v32i16: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill -; CHECK-SD-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill -; CHECK-SD-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill -; CHECK-SD-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 -; CHECK-SD-NEXT: .cfi_offset b8, -8 -; CHECK-SD-NEXT: .cfi_offset b9, -16 -; CHECK-SD-NEXT: .cfi_offset b10, -24 -; CHECK-SD-NEXT: .cfi_offset b11, -32 -; CHECK-SD-NEXT: .cfi_offset b12, -40 -; CHECK-SD-NEXT: .cfi_offset b13, -48 -; CHECK-SD-NEXT: .cfi_offset b14, -56 -; CHECK-SD-NEXT: .cfi_offset b15, -64 +; CHECK-SD-NEXT: ldp q16, q17, [sp, #64] ; CHECK-SD-NEXT: fcvtzs v3.2d, v3.2d -; CHECK-SD-NEXT: fcvtzs v18.2d, v2.2d -; CHECK-SD-NEXT: adrp x8, .LCPI75_0 -; CHECK-SD-NEXT: fcvtzs v19.2d, v1.2d -; CHECK-SD-NEXT: ldp q20, q21, [sp, #160] -; CHECK-SD-NEXT: fcvtzs v22.2d, v0.2d -; CHECK-SD-NEXT: ldp q23, q24, [sp, #96] +; CHECK-SD-NEXT: ldp q18, q19, [sp, #96] +; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-SD-NEXT: ldp q20, q21, [sp, #32] +; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d +; CHECK-SD-NEXT: ldp q22, q23, [sp] +; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-SD-NEXT: fcvtzs v7.2d, v7.2d -; CHECK-SD-NEXT: ldp q16, q17, [sp, #128] -; CHECK-SD-NEXT: xtn v3.2s, v3.2d +; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-SD-NEXT: fcvtzs v19.2d, v19.2d ; CHECK-SD-NEXT: fcvtzs v21.2d, v21.2d +; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d ; CHECK-SD-NEXT: fcvtzs v20.2d, v20.2d -; CHECK-SD-NEXT: xtn v2.2s, v18.2d -; CHECK-SD-NEXT: ldp q18, q25, [sp, #64] -; CHECK-SD-NEXT: xtn v1.2s, v19.2d -; CHECK-SD-NEXT: fcvtzs v19.2d, v24.2d -; CHECK-SD-NEXT: fcvtzs v17.2d, v17.2d -; CHECK-SD-NEXT: xtn v0.2s, v22.2d -; CHECK-SD-NEXT: fcvtzs v22.2d, v23.2d -; CHECK-SD-NEXT: xtn v29.2s, v7.2d -; CHECK-SD-NEXT: fcvtzs v7.2d, v25.2d -; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d ; CHECK-SD-NEXT: fcvtzs v18.2d, v18.2d -; CHECK-SD-NEXT: fcvtzs v16.2d, v16.2d -; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d -; CHECK-SD-NEXT: xtn v15.2s, v21.2d -; CHECK-SD-NEXT: xtn v11.2s, v19.2d +; CHECK-SD-NEXT: fcvtzs v23.2d, v23.2d +; CHECK-SD-NEXT: fcvtzs v17.2d, v17.2d ; CHECK-SD-NEXT: fcvtzs v4.2d, v4.2d -; CHECK-SD-NEXT: xtn v14.2s, v20.2d -; CHECK-SD-NEXT: xtn v10.2s, v22.2d -; CHECK-SD-NEXT: xtn v13.2s, v17.2d -; CHECK-SD-NEXT: xtn v9.2s, v7.2d -; CHECK-SD-NEXT: xtn v28.2s, v6.2d -; CHECK-SD-NEXT: xtn v8.2s, v18.2d -; CHECK-SD-NEXT: xtn v12.2s, v16.2d -; CHECK-SD-NEXT: xtn v27.2s, v5.2d -; CHECK-SD-NEXT: xtn v26.2s, v4.2d -; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI75_0] -; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b -; CHECK-SD-NEXT: tbl v2.16b, { v8.16b, v9.16b, v10.16b, v11.16b }, v4.16b -; CHECK-SD-NEXT: tbl v3.16b, { v12.16b, v13.16b, v14.16b, v15.16b }, v4.16b -; CHECK-SD-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-SD-NEXT: tbl v1.16b, { v26.16b, v27.16b, v28.16b, v29.16b }, v4.16b -; CHECK-SD-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-SD-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload +; CHECK-SD-NEXT: fcvtzs v22.2d, v22.2d +; CHECK-SD-NEXT: fcvtzs v16.2d, v16.2d +; CHECK-SD-NEXT: xtn v3.2s, v3.2d +; CHECK-SD-NEXT: xtn v2.2s, v2.2d +; CHECK-SD-NEXT: adrp x8, .LCPI75_0 +; CHECK-SD-NEXT: xtn v1.2s, v1.2d +; CHECK-SD-NEXT: xtn v0.2s, v0.2d +; CHECK-SD-NEXT: xtn v7.2s, v7.2d +; CHECK-SD-NEXT: xtn v6.2s, v6.2d +; CHECK-SD-NEXT: xtn v21.2s, v21.2d +; CHECK-SD-NEXT: xtn v25.2s, v19.2d +; CHECK-SD-NEXT: xtn v5.2s, v5.2d +; CHECK-SD-NEXT: xtn v20.2s, v20.2d +; CHECK-SD-NEXT: xtn v24.2s, v18.2d +; CHECK-SD-NEXT: xtn v19.2s, v23.2d +; CHECK-SD-NEXT: xtn v23.2s, v17.2d +; CHECK-SD-NEXT: xtn v4.2s, v4.2d +; CHECK-SD-NEXT: xtn v18.2s, v22.2d +; CHECK-SD-NEXT: xtn v22.2s, v16.2d +; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI75_0] +; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b +; CHECK-SD-NEXT: tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b +; CHECK-SD-NEXT: tbl v2.16b, { v18.16b, v19.16b, v20.16b, v21.16b }, v16.16b +; CHECK-SD-NEXT: tbl v3.16b, { v22.16b, v23.16b, v24.16b, v25.16b }, v16.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptou_v32f64_v32i16: diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll index d620a8851ee449..2d0931fb4f5257 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -3365,111 +3365,111 @@ define <16 x i8> @test_signed_v16f64_v16i8(<16 x double> %f) { ; CHECK-NEXT: cmn w10, #128 ; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: csel w12, w13, w8, lt ; CHECK-NEXT: mov v0.s[1], w11 +; CHECK-NEXT: csel w12, w13, w8, lt ; CHECK-NEXT: fcvtzs w11, d1 ; CHECK-NEXT: cmn w12, #128 ; CHECK-NEXT: csel w12, w12, w9, gt ; CHECK-NEXT: fmov s1, w12 ; CHECK-NEXT: fcvtzs w12, d2 ; CHECK-NEXT: mov d2, v3.d[1] -; CHECK-NEXT: cmp w11, #127 ; CHECK-NEXT: mov w13, v0.s[1] +; CHECK-NEXT: cmp w11, #127 ; CHECK-NEXT: mov v1.s[1], w10 ; CHECK-NEXT: csel w10, w11, w8, lt ; CHECK-NEXT: cmn w10, #128 ; CHECK-NEXT: fcvtzs w11, d2 +; CHECK-NEXT: mov d2, v4.d[1] +; CHECK-NEXT: mov v0.b[1], w13 ; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w12, #127 -; CHECK-NEXT: mov v0.b[1], w13 ; CHECK-NEXT: csel w12, w12, w8, lt ; CHECK-NEXT: cmn w12, #128 ; CHECK-NEXT: mov w13, v1.s[1] ; CHECK-NEXT: csel w12, w12, w9, gt ; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: fmov s2, w12 -; CHECK-NEXT: fcvtzs w12, d3 -; CHECK-NEXT: mov d3, v4.d[1] ; CHECK-NEXT: mov v0.b[2], v1.b[0] -; CHECK-NEXT: mov v2.s[1], w10 +; CHECK-NEXT: fmov s1, w12 +; CHECK-NEXT: fcvtzs w12, d3 +; CHECK-NEXT: mov v1.s[1], w10 ; CHECK-NEXT: csel w10, w11, w8, lt +; CHECK-NEXT: fcvtzs w11, d2 ; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: fcvtzs w11, d3 +; CHECK-NEXT: mov v0.b[3], w13 +; CHECK-NEXT: mov d2, v5.d[1] ; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w12, #127 -; CHECK-NEXT: mov v0.b[3], w13 ; CHECK-NEXT: csel w12, w12, w8, lt ; CHECK-NEXT: cmn w12, #128 -; CHECK-NEXT: mov w13, v2.s[1] +; CHECK-NEXT: mov w13, v1.s[1] ; CHECK-NEXT: csel w12, w12, w9, gt +; CHECK-NEXT: mov v0.b[4], v1.b[0] ; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: fmov s3, w12 +; CHECK-NEXT: fmov s1, w12 ; CHECK-NEXT: fcvtzs w12, d4 -; CHECK-NEXT: mov v0.b[4], v2.b[0] -; CHECK-NEXT: mov d4, v5.d[1] -; CHECK-NEXT: mov v3.s[1], w10 +; CHECK-NEXT: mov v1.s[1], w10 ; CHECK-NEXT: csel w10, w11, w8, lt -; CHECK-NEXT: cmn w10, #128 ; CHECK-NEXT: mov v0.b[5], w13 +; CHECK-NEXT: cmn w10, #128 +; CHECK-NEXT: fcvtzs w11, d2 +; CHECK-NEXT: mov d2, v6.d[1] ; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w12, #127 -; CHECK-NEXT: fcvtzs w11, d4 ; CHECK-NEXT: csel w12, w12, w8, lt ; CHECK-NEXT: cmn w12, #128 -; CHECK-NEXT: mov w13, v3.s[1] +; CHECK-NEXT: mov w13, v1.s[1] +; CHECK-NEXT: mov v0.b[6], v1.b[0] ; CHECK-NEXT: csel w12, w12, w9, gt -; CHECK-NEXT: mov v0.b[6], v3.b[0] -; CHECK-NEXT: fmov s4, w12 -; CHECK-NEXT: fcvtzs w12, d5 ; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: mov d5, v6.d[1] -; CHECK-NEXT: mov v4.s[1], w10 -; CHECK-NEXT: csel w10, w11, w8, lt +; CHECK-NEXT: fmov s1, w12 +; CHECK-NEXT: fcvtzs w12, d5 ; CHECK-NEXT: mov v0.b[7], w13 +; CHECK-NEXT: fcvtzs w13, d2 +; CHECK-NEXT: mov d2, v7.d[1] +; CHECK-NEXT: mov v1.s[1], w10 +; CHECK-NEXT: csel w10, w11, w8, lt ; CHECK-NEXT: cmn w10, #128 ; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w12, #127 -; CHECK-NEXT: fcvtzs w13, d5 ; CHECK-NEXT: csel w11, w12, w8, lt ; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: mov w12, v4.s[1] -; CHECK-NEXT: mov v0.b[8], v4.b[0] +; CHECK-NEXT: mov w12, v1.s[1] +; CHECK-NEXT: mov v0.b[8], v1.b[0] ; CHECK-NEXT: csel w11, w11, w9, gt -; CHECK-NEXT: fmov s5, w11 -; CHECK-NEXT: fcvtzs w11, d6 ; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: mov d6, v7.d[1] +; CHECK-NEXT: fmov s1, w11 +; CHECK-NEXT: fcvtzs w11, d6 ; CHECK-NEXT: mov v0.b[9], w12 -; CHECK-NEXT: mov v5.s[1], w10 +; CHECK-NEXT: mov v1.s[1], w10 ; CHECK-NEXT: csel w10, w13, w8, lt +; CHECK-NEXT: fcvtzs w13, d2 ; CHECK-NEXT: cmn w10, #128 ; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: fcvtzs w13, d6 ; CHECK-NEXT: csel w11, w11, w8, lt ; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: mov v0.b[10], v5.b[0] -; CHECK-NEXT: mov w12, v5.s[1] +; CHECK-NEXT: mov v0.b[10], v1.b[0] +; CHECK-NEXT: mov w12, v1.s[1] ; CHECK-NEXT: csel w11, w11, w9, gt -; CHECK-NEXT: fmov s6, w11 -; CHECK-NEXT: fcvtzs w11, d7 ; CHECK-NEXT: cmp w13, #127 +; CHECK-NEXT: fmov s1, w11 +; CHECK-NEXT: fcvtzs w11, d7 ; CHECK-NEXT: mov v0.b[11], w12 -; CHECK-NEXT: mov v6.s[1], w10 +; CHECK-NEXT: mov v1.s[1], w10 ; CHECK-NEXT: csel w10, w13, w8, lt ; CHECK-NEXT: cmn w10, #128 ; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w11, #127 ; CHECK-NEXT: csel w8, w11, w8, lt ; CHECK-NEXT: cmn w8, #128 -; CHECK-NEXT: mov v0.b[12], v6.b[0] -; CHECK-NEXT: mov w11, v6.s[1] +; CHECK-NEXT: mov v0.b[12], v1.b[0] +; CHECK-NEXT: mov w11, v1.s[1] ; CHECK-NEXT: csel w8, w8, w9, gt -; CHECK-NEXT: fmov s7, w8 +; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: mov v0.b[13], w11 -; CHECK-NEXT: mov v7.s[1], w10 -; CHECK-NEXT: mov v0.b[14], v7.b[0] -; CHECK-NEXT: mov w8, v7.s[1] +; CHECK-NEXT: mov v1.s[1], w10 +; CHECK-NEXT: mov v0.b[14], v1.b[0] +; CHECK-NEXT: mov w8, v1.s[1] ; CHECK-NEXT: mov v0.b[15], w8 ; CHECK-NEXT: ret %x = call <16 x i8> @llvm.fptosi.sat.v16f64.v16i8(<16 x double> %f) @@ -3575,26 +3575,32 @@ define <16 x i16> @test_signed_v16f64_v16i16(<16 x double> %f) { ; CHECK-NEXT: cmp w13, w9 ; CHECK-NEXT: csel w11, w13, w9, lt ; CHECK-NEXT: fcvtzs w13, d3 +; CHECK-NEXT: fmov s3, w12 ; CHECK-NEXT: cmn w11, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w11, w11, w8, gt ; CHECK-NEXT: cmp w14, w9 ; CHECK-NEXT: csel w14, w14, w9, lt +; CHECK-NEXT: mov v3.s[1], w10 ; CHECK-NEXT: cmn w14, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w14, w14, w8, gt ; CHECK-NEXT: cmp w13, w9 ; CHECK-NEXT: csel w13, w13, w9, lt +; CHECK-NEXT: fmov s2, w14 ; CHECK-NEXT: cmn w13, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w13, w13, w8, gt ; CHECK-NEXT: cmp w15, w9 ; CHECK-NEXT: csel w15, w15, w9, lt +; CHECK-NEXT: mov v2.s[1], w11 ; CHECK-NEXT: cmn w15, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w16, w15, w8, gt ; CHECK-NEXT: cmp w17, w9 ; CHECK-NEXT: csel w15, w17, w9, lt +; CHECK-NEXT: fmov s1, w16 ; CHECK-NEXT: cmn w15, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w15, w15, w8, gt ; CHECK-NEXT: cmp w18, w9 ; CHECK-NEXT: csel w17, w18, w9, lt +; CHECK-NEXT: mov v1.s[1], w13 ; CHECK-NEXT: cmn w17, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w17, w17, w8, gt ; CHECK-NEXT: cmp w0, w9 @@ -3617,38 +3623,32 @@ define <16 x i16> @test_signed_v16f64_v16i16(<16 x double> %f) { ; CHECK-NEXT: cmp w2, w9 ; CHECK-NEXT: fcvtzs w5, d0 ; CHECK-NEXT: csel w2, w2, w9, lt -; CHECK-NEXT: fmov s3, w12 +; CHECK-NEXT: fmov s0, w17 ; CHECK-NEXT: mov v7.s[1], w18 ; CHECK-NEXT: cmn w2, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w2, w2, w8, gt ; CHECK-NEXT: cmp w3, w9 ; CHECK-NEXT: csel w3, w3, w9, lt -; CHECK-NEXT: mov v3.s[1], w10 ; CHECK-NEXT: fmov s6, w2 +; CHECK-NEXT: mov v0.s[1], w15 ; CHECK-NEXT: cmn w3, #8, lsl #12 // =32768 -; CHECK-NEXT: fmov s2, w14 ; CHECK-NEXT: csel w3, w3, w8, gt ; CHECK-NEXT: cmp w4, w9 ; CHECK-NEXT: csel w4, w4, w9, lt ; CHECK-NEXT: mov v6.s[1], w0 ; CHECK-NEXT: cmn w4, #8, lsl #12 // =32768 -; CHECK-NEXT: mov v2.s[1], w11 ; CHECK-NEXT: csel w12, w4, w8, gt ; CHECK-NEXT: cmp w5, w9 -; CHECK-NEXT: fmov s1, w16 ; CHECK-NEXT: csel w10, w5, w9, lt ; CHECK-NEXT: fmov s5, w12 ; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w10, w10, w8, gt ; CHECK-NEXT: cmp w6, w9 -; CHECK-NEXT: mov v1.s[1], w13 ; CHECK-NEXT: csel w9, w6, w9, lt ; CHECK-NEXT: mov v5.s[1], w3 -; CHECK-NEXT: fmov s0, w17 ; CHECK-NEXT: cmn w9, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w8, w9, w8, gt ; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: mov v0.s[1], w15 ; CHECK-NEXT: adrp x8, .LCPI85_0 ; CHECK-NEXT: ldr q16, [x8, :lo12:.LCPI85_0] ; CHECK-NEXT: mov v4.s[1], w10 diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll index 16e04070b65439..62f5e0fe2dcaa5 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -2751,8 +2751,8 @@ define <8 x i8> @test_unsigned_v8f64_v8i8(<8 x double> %f) { ; CHECK-NEXT: fcvtzu w12, d2 ; CHECK-NEXT: fcvtzu w14, d1 ; CHECK-NEXT: fcvtzu w8, d4 -; CHECK-NEXT: mov d4, v0.d[1] ; CHECK-NEXT: fcvtzu w10, d5 +; CHECK-NEXT: mov d4, v0.d[1] ; CHECK-NEXT: fcvtzu w13, d3 ; CHECK-NEXT: cmp w8, #255 ; CHECK-NEXT: fcvtzu w15, d4 @@ -2760,29 +2760,29 @@ define <8 x i8> @test_unsigned_v8f64_v8i8(<8 x double> %f) { ; CHECK-NEXT: cmp w9, #255 ; CHECK-NEXT: csel w9, w9, w11, lo ; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: fmov s4, w9 +; CHECK-NEXT: fmov s3, w9 ; CHECK-NEXT: csel w9, w10, w11, lo ; CHECK-NEXT: cmp w12, #255 ; CHECK-NEXT: fcvtzu w10, d0 -; CHECK-NEXT: mov v4.s[1], w8 +; CHECK-NEXT: mov v3.s[1], w8 ; CHECK-NEXT: csel w8, w12, w11, lo ; CHECK-NEXT: cmp w13, #255 -; CHECK-NEXT: fmov s3, w8 +; CHECK-NEXT: fmov s2, w8 ; CHECK-NEXT: csel w8, w13, w11, lo ; CHECK-NEXT: cmp w14, #255 -; CHECK-NEXT: mov v3.s[1], w9 +; CHECK-NEXT: mov v2.s[1], w9 ; CHECK-NEXT: csel w9, w14, w11, lo ; CHECK-NEXT: cmp w15, #255 -; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: fmov s1, w9 ; CHECK-NEXT: csel w9, w15, w11, lo ; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov v2.s[1], w8 +; CHECK-NEXT: mov v1.s[1], w8 ; CHECK-NEXT: csel w8, w10, w11, lo -; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: adrp x8, .LCPI82_0 -; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI82_0] -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: tbl v0.8b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.8b +; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI82_0] +; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.8b ; CHECK-NEXT: ret %x = call <8 x i8> @llvm.fptoui.sat.v8f64.v8i8(<8 x double> %f) ret <8 x i8> %x @@ -2802,29 +2802,29 @@ define <16 x i8> @test_unsigned_v16f64_v16i8(<16 x double> %f) { ; CHECK-NEXT: csel w10, w10, w8, lo ; CHECK-NEXT: fmov s0, w10 ; CHECK-NEXT: fcvtzu w10, d16 -; CHECK-NEXT: mov d16, v2.d[1] ; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: fcvtzu w9, d1 +; CHECK-NEXT: mov d1, v2.d[1] ; CHECK-NEXT: cmp w10, #255 ; CHECK-NEXT: csel w10, w10, w8, lo ; CHECK-NEXT: cmp w9, #255 ; CHECK-NEXT: mov w11, v0.s[1] ; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: fcvtzu w9, d16 -; CHECK-NEXT: mov d16, v3.d[1] +; CHECK-NEXT: fmov s16, w9 +; CHECK-NEXT: fcvtzu w9, d1 +; CHECK-NEXT: mov d1, v3.d[1] ; CHECK-NEXT: mov v0.b[1], w11 -; CHECK-NEXT: mov v1.s[1], w10 +; CHECK-NEXT: mov v16.s[1], w10 ; CHECK-NEXT: fcvtzu w10, d2 ; CHECK-NEXT: cmp w9, #255 ; CHECK-NEXT: csel w9, w9, w8, lo ; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov w11, v1.s[1] -; CHECK-NEXT: mov v0.b[2], v1.b[0] +; CHECK-NEXT: mov w11, v16.s[1] +; CHECK-NEXT: mov v0.b[2], v16.b[0] ; CHECK-NEXT: csel w10, w10, w8, lo ; CHECK-NEXT: fmov s2, w10 -; CHECK-NEXT: fcvtzu w10, d16 -; CHECK-NEXT: mov d16, v4.d[1] +; CHECK-NEXT: fcvtzu w10, d1 +; CHECK-NEXT: mov d1, v4.d[1] ; CHECK-NEXT: mov v0.b[3], w11 ; CHECK-NEXT: mov v2.s[1], w9 ; CHECK-NEXT: fcvtzu w9, d3 @@ -2834,58 +2834,58 @@ define <16 x i8> @test_unsigned_v16f64_v16i8(<16 x double> %f) { ; CHECK-NEXT: mov w11, v2.s[1] ; CHECK-NEXT: mov v0.b[4], v2.b[0] ; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: fmov s3, w9 -; CHECK-NEXT: fcvtzu w9, d16 -; CHECK-NEXT: mov d16, v5.d[1] +; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: fcvtzu w9, d1 +; CHECK-NEXT: mov d1, v5.d[1] ; CHECK-NEXT: mov v0.b[5], w11 -; CHECK-NEXT: mov v3.s[1], w10 +; CHECK-NEXT: mov v2.s[1], w10 ; CHECK-NEXT: fcvtzu w10, d4 ; CHECK-NEXT: cmp w9, #255 ; CHECK-NEXT: csel w9, w9, w8, lo ; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov w11, v3.s[1] -; CHECK-NEXT: mov v0.b[6], v3.b[0] +; CHECK-NEXT: mov w11, v2.s[1] +; CHECK-NEXT: mov v0.b[6], v2.b[0] ; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: fmov s4, w10 -; CHECK-NEXT: fcvtzu w10, d16 +; CHECK-NEXT: fmov s2, w10 +; CHECK-NEXT: fcvtzu w10, d1 +; CHECK-NEXT: mov d1, v6.d[1] ; CHECK-NEXT: mov v0.b[7], w11 -; CHECK-NEXT: mov v4.s[1], w9 +; CHECK-NEXT: mov v2.s[1], w9 ; CHECK-NEXT: fcvtzu w9, d5 -; CHECK-NEXT: mov d5, v6.d[1] ; CHECK-NEXT: cmp w10, #255 ; CHECK-NEXT: csel w10, w10, w8, lo ; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: mov w11, v4.s[1] -; CHECK-NEXT: mov v0.b[8], v4.b[0] +; CHECK-NEXT: mov w11, v2.s[1] +; CHECK-NEXT: mov v0.b[8], v2.b[0] ; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: fmov s16, w9 -; CHECK-NEXT: fcvtzu w9, d5 -; CHECK-NEXT: mov d5, v7.d[1] +; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: fcvtzu w9, d1 +; CHECK-NEXT: mov d1, v7.d[1] ; CHECK-NEXT: mov v0.b[9], w11 -; CHECK-NEXT: mov v16.s[1], w10 +; CHECK-NEXT: mov v2.s[1], w10 ; CHECK-NEXT: fcvtzu w10, d6 ; CHECK-NEXT: cmp w9, #255 ; CHECK-NEXT: csel w9, w9, w8, lo ; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov v0.b[10], v16.b[0] -; CHECK-NEXT: mov w11, v16.s[1] +; CHECK-NEXT: mov v0.b[10], v2.b[0] +; CHECK-NEXT: mov w11, v2.s[1] ; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: fmov s6, w10 +; CHECK-NEXT: fmov s2, w10 ; CHECK-NEXT: fcvtzu w10, d7 ; CHECK-NEXT: mov v0.b[11], w11 -; CHECK-NEXT: mov v6.s[1], w9 -; CHECK-NEXT: fcvtzu w9, d5 +; CHECK-NEXT: mov v2.s[1], w9 +; CHECK-NEXT: fcvtzu w9, d1 ; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: mov v0.b[12], v6.b[0] -; CHECK-NEXT: mov w11, v6.s[1] +; CHECK-NEXT: mov v0.b[12], v2.b[0] +; CHECK-NEXT: mov w11, v2.s[1] ; CHECK-NEXT: csel w9, w9, w8, lo ; CHECK-NEXT: cmp w10, #255 ; CHECK-NEXT: csel w8, w10, w8, lo -; CHECK-NEXT: fmov s5, w8 +; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: mov v0.b[13], w11 -; CHECK-NEXT: mov v5.s[1], w9 -; CHECK-NEXT: mov v0.b[14], v5.b[0] -; CHECK-NEXT: mov w8, v5.s[1] +; CHECK-NEXT: mov v1.s[1], w9 +; CHECK-NEXT: mov v0.b[14], v1.b[0] +; CHECK-NEXT: mov w8, v1.s[1] ; CHECK-NEXT: mov v0.b[15], w8 ; CHECK-NEXT: ret %x = call <16 x i8> @llvm.fptoui.sat.v16f64.v16i8(<16 x double> %f) @@ -2903,8 +2903,8 @@ define <8 x i16> @test_unsigned_v8f64_v8i16(<8 x double> %f) { ; CHECK-NEXT: fcvtzu w12, d2 ; CHECK-NEXT: fcvtzu w14, d1 ; CHECK-NEXT: fcvtzu w8, d4 -; CHECK-NEXT: mov d4, v0.d[1] ; CHECK-NEXT: fcvtzu w11, d5 +; CHECK-NEXT: mov d4, v0.d[1] ; CHECK-NEXT: fcvtzu w13, d3 ; CHECK-NEXT: cmp w8, w10 ; CHECK-NEXT: fcvtzu w15, d4 @@ -2912,29 +2912,29 @@ define <8 x i16> @test_unsigned_v8f64_v8i16(<8 x double> %f) { ; CHECK-NEXT: cmp w9, w10 ; CHECK-NEXT: csel w9, w9, w10, lo ; CHECK-NEXT: cmp w11, w10 -; CHECK-NEXT: fmov s4, w9 +; CHECK-NEXT: fmov s3, w9 ; CHECK-NEXT: csel w9, w11, w10, lo ; CHECK-NEXT: cmp w12, w10 ; CHECK-NEXT: fcvtzu w11, d0 -; CHECK-NEXT: mov v4.s[1], w8 +; CHECK-NEXT: mov v3.s[1], w8 ; CHECK-NEXT: csel w8, w12, w10, lo ; CHECK-NEXT: cmp w13, w10 -; CHECK-NEXT: fmov s3, w8 +; CHECK-NEXT: fmov s2, w8 ; CHECK-NEXT: csel w8, w13, w10, lo ; CHECK-NEXT: cmp w14, w10 -; CHECK-NEXT: mov v3.s[1], w9 +; CHECK-NEXT: mov v2.s[1], w9 ; CHECK-NEXT: csel w9, w14, w10, lo ; CHECK-NEXT: cmp w15, w10 -; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: fmov s1, w9 ; CHECK-NEXT: csel w9, w15, w10, lo ; CHECK-NEXT: cmp w11, w10 -; CHECK-NEXT: mov v2.s[1], w8 +; CHECK-NEXT: mov v1.s[1], w8 ; CHECK-NEXT: csel w8, w11, w10, lo -; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: adrp x8, .LCPI84_0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI84_0] -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI84_0] +; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b ; CHECK-NEXT: ret %x = call <8 x i16> @llvm.fptoui.sat.v8f64.v8i16(<8 x double> %f) ret <8 x i16> %x @@ -2973,53 +2973,53 @@ define <16 x i16> @test_unsigned_v16f64_v16i16(<16 x double> %f) { ; CHECK-NEXT: fcvtzu w16, d0 ; CHECK-NEXT: csel w11, w11, w8, lo ; CHECK-NEXT: cmp w17, w8 +; CHECK-NEXT: fmov s18, w11 ; CHECK-NEXT: mov v19.s[1], w13 ; CHECK-NEXT: csel w13, w17, w8, lo ; CHECK-NEXT: cmp w10, w8 ; CHECK-NEXT: csel w10, w10, w8, lo ; CHECK-NEXT: cmp w18, w8 -; CHECK-NEXT: fmov s18, w11 +; CHECK-NEXT: fcvtzu w17, d2 ; CHECK-NEXT: csel w11, w18, w8, lo ; CHECK-NEXT: cmp w12, w8 -; CHECK-NEXT: fcvtzu w17, d2 +; CHECK-NEXT: mov v18.s[1], w9 ; CHECK-NEXT: csel w12, w12, w8, lo ; CHECK-NEXT: cmp w16, w8 -; CHECK-NEXT: fcvtzu w18, d6 -; CHECK-NEXT: mov v18.s[1], w9 +; CHECK-NEXT: fmov s17, w10 ; CHECK-NEXT: csel w9, w16, w8, lo ; CHECK-NEXT: cmp w14, w8 -; CHECK-NEXT: fmov s17, w10 -; CHECK-NEXT: csel w10, w14, w8, lo ; CHECK-NEXT: fcvtzu w16, d5 -; CHECK-NEXT: fmov s23, w10 +; CHECK-NEXT: csel w10, w14, w8, lo +; CHECK-NEXT: fcvtzu w18, d6 ; CHECK-NEXT: cmp w17, w8 -; CHECK-NEXT: fcvtzu w14, d3 +; CHECK-NEXT: fmov s5, w10 ; CHECK-NEXT: csel w10, w17, w8, lo ; CHECK-NEXT: cmp w15, w8 +; CHECK-NEXT: fcvtzu w14, d3 ; CHECK-NEXT: fcvtzu w17, d4 +; CHECK-NEXT: fmov s16, w12 ; CHECK-NEXT: mov v17.s[1], w13 -; CHECK-NEXT: mov v23.s[1], w9 +; CHECK-NEXT: mov v5.s[1], w9 ; CHECK-NEXT: csel w9, w15, w8, lo ; CHECK-NEXT: cmp w18, w8 -; CHECK-NEXT: fmov s22, w9 +; CHECK-NEXT: fmov s4, w9 ; CHECK-NEXT: csel w9, w18, w8, lo ; CHECK-NEXT: cmp w16, w8 -; CHECK-NEXT: fmov s16, w12 -; CHECK-NEXT: mov v22.s[1], w10 +; CHECK-NEXT: mov v16.s[1], w11 +; CHECK-NEXT: mov v4.s[1], w10 ; CHECK-NEXT: csel w10, w16, w8, lo ; CHECK-NEXT: cmp w14, w8 -; CHECK-NEXT: fmov s21, w10 +; CHECK-NEXT: fmov s3, w10 ; CHECK-NEXT: csel w10, w14, w8, lo ; CHECK-NEXT: cmp w17, w8 ; CHECK-NEXT: csel w8, w17, w8, lo -; CHECK-NEXT: mov v16.s[1], w11 -; CHECK-NEXT: mov v21.s[1], w9 -; CHECK-NEXT: fmov s20, w8 +; CHECK-NEXT: fmov s2, w8 ; CHECK-NEXT: adrp x8, .LCPI85_0 +; CHECK-NEXT: mov v3.s[1], w9 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI85_0] -; CHECK-NEXT: mov v20.s[1], w10 +; CHECK-NEXT: mov v2.s[1], w10 ; CHECK-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b -; CHECK-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b +; CHECK-NEXT: tbl v1.16b, { v2.16b, v3.16b, v4.16b, v5.16b }, v1.16b ; CHECK-NEXT: ret %x = call <16 x i16> @llvm.fptoui.sat.v16f64.v16i16(<16 x double> %f) ret <16 x i16> %x diff --git a/llvm/test/CodeGen/AArch64/insert-subvector.ll b/llvm/test/CodeGen/AArch64/insert-subvector.ll index 6828fa9f1508c8..d664421086fef0 100644 --- a/llvm/test/CodeGen/AArch64/insert-subvector.ll +++ b/llvm/test/CodeGen/AArch64/insert-subvector.ll @@ -47,11 +47,10 @@ define <16 x i8> @insert_v16i8_4_1(float %tmp, <16 x i8> %b, <16 x i8> %a) { define <16 x i8> @insert_v16i8_4_15(float %tmp, <16 x i8> %b, <16 x i8> %a) { ; CHECK-LABEL: insert_v16i8_4_15: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q2 killed $q2 def $q2_q3 +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: mov v3.16b, v1.16b -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_0] +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %s2 @@ -146,11 +145,10 @@ define <8 x i16> @insert_v8i16_2_1(float %tmp, <8 x i16> %b, <8 x i16> %a) { define <8 x i16> @insert_v8i16_2_15(float %tmp, <8 x i16> %b, <8 x i16> %a) { ; CHECK-LABEL: insert_v8i16_2_15: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q2 killed $q2 def $q2_q3 +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: adrp x8, .LCPI13_0 -; CHECK-NEXT: mov v3.16b, v1.16b -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI13_0] -; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %s2 @@ -272,7 +270,6 @@ define <16 x i8> @load_v16i8_4_1(float %tmp, <16 x i8> %b, ptr %a) { define <16 x i8> @load_v16i8_4_15(float %tmp, <16 x i8> %b, ptr %a) { ; CHECK-LABEL: load_v16i8_4_15: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q1 killed $q1 def $q0_q1 ; CHECK-NEXT: adrp x8, .LCPI24_0 ; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI24_0] @@ -493,7 +490,6 @@ define <8 x i16> @load_v8i16_2_1(float %tmp, <8 x i16> %b, ptr %a) { define <8 x i16> @load_v8i16_2_15(float %tmp, <8 x i16> %b, ptr %a) { ; CHECK-LABEL: load_v8i16_2_15: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q1 killed $q1 def $q0_q1 ; CHECK-NEXT: adrp x8, .LCPI40_0 ; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI40_0] diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll index 50c0c8b11e7517..4907abc6e946ec 100644 --- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll +++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll @@ -1349,18 +1349,14 @@ define <8 x i16> @vselect_equivalent_shuffle_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-SD-LABEL: vselect_equivalent_shuffle_v8i16: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI92_0 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI92_0] -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: vselect_equivalent_shuffle_v8i16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI92_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI92_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -1386,9 +1382,8 @@ define <8 x i16> @vselect_equivalent_shuffle_v8i16_zero(<8 x i16> %a) { ; ; CHECK-GI-LABEL: vselect_equivalent_shuffle_v8i16_zero: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1 -; CHECK-GI-NEXT: adrp x8, .LCPI93_0 ; CHECK-GI-NEXT: movi v1.2d, #0000000000000000 +; CHECK-GI-NEXT: adrp x8, .LCPI93_0 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI93_0] ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret @@ -1422,9 +1417,8 @@ define <8 x i16> @vselect_equivalent_shuffle_v8i16_zeroswap(<8 x i16> %a) { ; ; CHECK-GI-LABEL: vselect_equivalent_shuffle_v8i16_zeroswap: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q31_q0 -; CHECK-GI-NEXT: adrp x8, .LCPI94_0 ; CHECK-GI-NEXT: movi v31.2d, #0000000000000000 +; CHECK-GI-NEXT: adrp x8, .LCPI94_0 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI94_0] ; CHECK-GI-NEXT: tbl v0.16b, { v31.16b, v0.16b }, v1.16b ; CHECK-GI-NEXT: ret @@ -1466,9 +1460,7 @@ define <4 x i32> @vselect_equivalent_shuffle_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-GI-LABEL: vselect_equivalent_shuffle_v4i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI96_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI96_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll b/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll index 3f590226c47150..8c88d3c33e07ce 100644 --- a/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll +++ b/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll @@ -267,12 +267,8 @@ entry: define <16 x i8> @extract_4_v4i32_badindex(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { ; CHECK-LABEL: extract_4_v4i32_badindex: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: adrp x8, .LCPI5_0 -; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI5_0] -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll index de90024a4a2571..1ed9e7cc5254d3 100644 --- a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll +++ b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll @@ -47,9 +47,7 @@ define <8 x i16> @v8i16_2(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: v8i16_2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret entry: @@ -82,9 +80,7 @@ define <16 x i8> @v16i8_2(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: v16i8_2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI7_0 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI7_0] -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll b/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll index afcced5dcb9ab5..d315c306aa37a0 100644 --- a/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll @@ -137,9 +137,7 @@ define <8 x i16> @shuffle_widen_faili1(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: shuffle_widen_faili1: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI12_0 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_0] -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret entry: @@ -152,9 +150,7 @@ define <8 x i16> @shuffle_widen_fail2(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: shuffle_widen_fail2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI13_0 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret entry: @@ -167,9 +163,7 @@ define <8 x i16> @shuffle_widen_fail3(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: shuffle_widen_fail3: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI14_0 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/seqpairspill.mir b/llvm/test/CodeGen/AArch64/seqpairspill.mir index 0e6c94c44712c7..b29ab7727f65d6 100644 --- a/llvm/test/CodeGen/AArch64/seqpairspill.mir +++ b/llvm/test/CodeGen/AArch64/seqpairspill.mir @@ -7,11 +7,11 @@ body: | bb.0: ; Check the spill/reload sequence for the %0 register ; CHECK: renamable $[[REG0:[a-z0-9]+]]_[[REG1:[a-z0-9]+]] = CASPALX - ; CHECK-NEXT: STPXi renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0, implicit killed $[[REG0]]_[[REG1]] :: (store (s128) into %stack.0, align 8) + ; CHECK-NEXT: STPXi killed renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0 :: (store (s128) into %stack.0, align 8) ; CHECK: INLINEASM - ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPXi %stack.0, 0, implicit-def $[[REG2]]_[[REG3]] :: (load (s128) from %stack.0, align 8) + ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPXi %stack.0, 0 :: (load (s128) from %stack.0, align 8) ; CHECK-NEXT: $xzr = COPY renamable $[[REG2]] - ; CHECK-NEXT: $xzr = COPY renamable $[[REG3]] + ; CHECK-NEXT: $xzr = COPY killed renamable $[[REG3]] %0 : xseqpairsclass = IMPLICIT_DEF %1 : xseqpairsclass = IMPLICIT_DEF %2 : gpr64common = IMPLICIT_DEF @@ -27,11 +27,11 @@ body: | bb.0: ; Check the spill/reload sequence for the %0 register ; CHECK: $[[REG0:[a-z0-9]+]]_[[REG1:[a-z0-9]+]] = CASPALW - ; CHECK-NEXT: STPWi renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0, implicit killed $[[REG0]]_[[REG1]] :: (store (s64) into %stack.0, align 4) + ; CHECK-NEXT: STPWi killed renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0 :: (store (s64) into %stack.0, align 4) ; CHECK: INLINEASM - ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPWi %stack.0, 0, implicit-def $[[REG2]]_[[REG3]] :: (load (s64) from %stack.0, align 4) + ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPWi %stack.0, 0 :: (load (s64) from %stack.0, align 4) ; CHECK-NEXT: $xzr = COPY renamable $[[REG2]] - ; CHECK-NEXT: $xzr = COPY renamable $[[REG3]] + ; CHECK-NEXT: $xzr = COPY killed renamable $[[REG3]] %0 : wseqpairsclass = IMPLICIT_DEF %1 : wseqpairsclass = IMPLICIT_DEF %2 : gpr64common = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll b/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll index fb571eff39fe50..4e49a055067847 100644 --- a/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll +++ b/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll @@ -21,12 +21,8 @@ define <16 x i8> @shuffle4_v4i8_16(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) { ; CHECK-LABEL: shuffle4_v4i8_16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: adrp x8, .LCPI0_0 -; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b ; CHECK-NEXT: ret %x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> @@ -47,12 +43,8 @@ define <16 x i8> @shuffle4_v4i8_16(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i define <8 x i8> @shuffle4_v4i8_8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) { ; CHECK-LABEL: shuffle4_v4i8_8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: adrp x8, .LCPI1_0 -; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI1_0] -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.8b ; CHECK-NEXT: ret %x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> @@ -101,10 +93,10 @@ define <8 x i8> @shuffle4_v4i8_8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> define <16 x i8> @shuffle4_v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) { ; CHECK-LABEL: shuffle4_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: adrp x8, .LCPI2_0 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: mov v2.d[1], v3.d[0] @@ -214,10 +206,10 @@ define <8 x i16> @shuffle4_v8i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x ; CHECK-LABEL: shuffle4_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d5, d2 +; CHECK-NEXT: fmov d4, d0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 ; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: fmov d4, d0 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: mov v4.d[1], v1.d[0] ; CHECK-NEXT: mov v5.d[1], v3.d[0] @@ -282,10 +274,10 @@ define <16 x i8> @shuffle4_v8i8_v16i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 ; CHECK-NEXT: mov v0.d[1], v0.d[0] ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI6_0] ; CHECK-NEXT: adrp x8, .LCPI6_1 -; CHECK-NEXT: tbl v3.8b, { v2.16b }, v1.8b -; CHECK-NEXT: tbl v2.8b, { v0.16b }, v1.8b +; CHECK-NEXT: tbl v2.8b, { v2.16b }, v1.8b +; CHECK-NEXT: tbl v1.8b, { v0.16b }, v1.8b ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI6_1] -; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b +; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b ; CHECK-NEXT: ret %x = shufflevector <8 x i8> %a, <8 x i8> %b, <4 x i32> %y = shufflevector <8 x i8> %c, <8 x i8> %d, <4 x i32> @@ -354,10 +346,10 @@ define <8 x i16> @shuffle4_v4i8_zext(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x ; CHECK-LABEL: shuffle4_v4i8_zext: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d5, d2 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: fmov d4, d0 ; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: adrp x8, .LCPI8_0 -; CHECK-NEXT: fmov d4, d0 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI8_0] ; CHECK-NEXT: mov v4.d[1], v1.d[0] ; CHECK-NEXT: mov v5.d[1], v3.d[0] @@ -393,12 +385,8 @@ define <8 x i16> @shuffle4_v4i8_zext(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x define <16 x i8> @shuffle4_v4i16_trunc(<4 x i16> %ae, <4 x i16> %be, <4 x i16> %ce, <4 x i16> %de) { ; CHECK-LABEL: shuffle4_v4i16_trunc: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: adrp x8, .LCPI9_0 -; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI9_0] -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b ; CHECK-NEXT: ret %a = trunc <4 x i16> %ae to <4 x i8> @@ -432,13 +420,13 @@ define <16 x i8> @shuffle4_v4i16_trunc(<4 x i16> %ae, <4 x i16> %be, <4 x i16> % define <16 x i8> @shuffle4_v4i32_trunc(<4 x i32> %ae, <4 x i32> %be, <4 x i32> %ce, <4 x i32> %de) { ; CHECK-LABEL: shuffle4_v4i32_trunc: ; CHECK: // %bb.0: -; CHECK-NEXT: xtn v4.4h, v0.4s +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: xtn v1.4h, v1.4s ; CHECK-NEXT: adrp x8, .LCPI10_0 -; CHECK-NEXT: xtn v5.4h, v1.4s -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI10_0] -; CHECK-NEXT: xtn v6.4h, v2.4s -; CHECK-NEXT: xtn v7.4h, v3.4s -; CHECK-NEXT: tbl v0.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v0.16b +; CHECK-NEXT: xtn v2.4h, v2.4s +; CHECK-NEXT: xtn v3.4h, v3.4s +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI10_0] +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b ; CHECK-NEXT: ret %a = trunc <4 x i32> %ae to <4 x i8> %b = trunc <4 x i32> %be to <4 x i8> @@ -470,11 +458,8 @@ define <16 x i8> @shuffle4_v4i32_trunc(<4 x i32> %ae, <4 x i32> %be, <4 x i32> % define <12 x i8> @shuffle3_v4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c) { ; CHECK-LABEL: shuffle3_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: adrp x8, .LCPI11_0 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI11_0] -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v3.16b ; CHECK-NEXT: ret %x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> @@ -504,9 +489,9 @@ define <8 x i16> @shuffle3_v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) { ; CHECK-LABEL: shuffle3_v4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d3, d2 +; CHECK-NEXT: fmov d2, d0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: adrp x8, .LCPI12_0 -; CHECK-NEXT: fmov d2, d0 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI12_0] ; CHECK-NEXT: mov v2.d[1], v1.d[0] ; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b @@ -563,12 +548,12 @@ define <8 x i8> @insert4_v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8> ; CHECK-LABEL: insert4_v8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov v4.16b, v3.16b ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mov v4.16b, v3.16b +; CHECK-NEXT: mov v3.16b, v1.16b ; CHECK-NEXT: adrp x8, .LCPI14_0 -; CHECK-NEXT: adrp x9, .LCPI14_1 ; CHECK-NEXT: mov v0.d[1], v2.d[0] -; CHECK-NEXT: mov v3.16b, v1.16b +; CHECK-NEXT: adrp x9, .LCPI14_1 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI14_0] ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI14_1] ; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b @@ -632,16 +617,14 @@ define <8 x i8> @insert4_v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8> define <16 x i8> @insert4_v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8> %d) { ; CHECK-LABEL: insert4_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov v4.16b, v3.16b -; CHECK-NEXT: adrp x8, .LCPI15_0 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q31_q0 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: mov v3.16b, v1.16b -; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI15_0] +; CHECK-NEXT: adrp x8, .LCPI15_0 ; CHECK-NEXT: mov v0.d[1], v2.d[0] +; CHECK-NEXT: mov v2.16b, v1.16b +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI15_0] ; CHECK-NEXT: adrp x8, .LCPI15_1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_1] -; CHECK-NEXT: tbl v31.16b, { v3.16b, v4.16b }, v5.16b +; CHECK-NEXT: tbl v31.16b, { v2.16b, v3.16b }, v4.16b ; CHECK-NEXT: tbl v0.16b, { v31.16b, v0.16b }, v1.16b ; CHECK-NEXT: ret %e1 = extractelement <8 x i8> %a, i32 4 @@ -705,6 +688,7 @@ define <16 x i16> @test(<2 x double> %l213, <2 x double> %l231, <2 x double> %l2 ; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: frintm v1.2d, v1.2d ; CHECK-NEXT: frintm v5.2d, v5.2d +; CHECK-NEXT: ldr q16, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: frintm v2.2d, v2.2d ; CHECK-NEXT: frintm v6.2d, v6.2d ; CHECK-NEXT: frintm v3.2d, v3.2d @@ -717,17 +701,16 @@ define <16 x i16> @test(<2 x double> %l213, <2 x double> %l231, <2 x double> %l2 ; CHECK-NEXT: fcvtzs v6.2d, v6.2d ; CHECK-NEXT: fcvtzs v3.2d, v3.2d ; CHECK-NEXT: fcvtzs v7.2d, v7.2d -; CHECK-NEXT: xtn v16.2s, v0.2d -; CHECK-NEXT: xtn v20.2s, v4.2d -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI16_0] -; CHECK-NEXT: xtn v17.2s, v1.2d -; CHECK-NEXT: xtn v21.2s, v5.2d -; CHECK-NEXT: xtn v18.2s, v2.2d -; CHECK-NEXT: xtn v22.2s, v6.2d -; CHECK-NEXT: xtn v19.2s, v3.2d -; CHECK-NEXT: xtn v23.2s, v7.2d -; CHECK-NEXT: tbl v1.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.16b -; CHECK-NEXT: tbl v2.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v0.16b +; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: xtn v4.2s, v4.2d +; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: xtn v5.2s, v5.2d +; CHECK-NEXT: xtn v2.2s, v2.2d +; CHECK-NEXT: xtn v6.2s, v6.2d +; CHECK-NEXT: xtn v3.2s, v3.2d +; CHECK-NEXT: xtn v7.2s, v7.2d +; CHECK-NEXT: tbl v1.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b +; CHECK-NEXT: tbl v2.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b ; CHECK-NEXT: uzp1 v0.8h, v1.8h, v2.8h ; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/shuffles.ll b/llvm/test/CodeGen/AArch64/shuffles.ll index 41dd7f06712d24..00057ea3359b74 100644 --- a/llvm/test/CodeGen/AArch64/shuffles.ll +++ b/llvm/test/CodeGen/AArch64/shuffles.ll @@ -366,9 +366,7 @@ define <8 x i16> @test_shuf9(<8 x i16> %a, <8 x i16> %b) ; CHECKLE-LABEL: test_shuf9: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: adrp x8, .LCPI13_0 -; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] -; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKLE-NEXT: ret ; @@ -378,10 +376,10 @@ define <8 x i16> @test_shuf9(<8 x i16> %a, <8 x i16> %b) ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: adrp x8, .LCPI13_0 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI13_0 -; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 -; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECKBE-NEXT: ld1 { v0.16b }, [x8] -; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b +; CHECKBE-NEXT: ld1 { v2.16b }, [x8] +; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECKBE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret @@ -418,9 +416,7 @@ define <8 x half> @test_shuf11(<8 x half> %a, <8 x half> %b) ; CHECKLE-LABEL: test_shuf11: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: adrp x8, .LCPI15_0 -; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI15_0] -; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKLE-NEXT: ret ; @@ -430,10 +426,10 @@ define <8 x half> @test_shuf11(<8 x half> %a, <8 x half> %b) ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: adrp x8, .LCPI15_0 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI15_0 -; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 -; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECKBE-NEXT: ld1 { v0.16b }, [x8] -; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b +; CHECKBE-NEXT: ld1 { v2.16b }, [x8] +; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECKBE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret @@ -446,9 +442,7 @@ define <8 x half> @test_shuf12(<8 x half> %a, <8 x half> %b) ; CHECKLE-LABEL: test_shuf12: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: adrp x8, .LCPI16_0 -; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI16_0] -; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKLE-NEXT: ret ; @@ -458,10 +452,10 @@ define <8 x half> @test_shuf12(<8 x half> %a, <8 x half> %b) ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: adrp x8, .LCPI16_0 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI16_0 -; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 -; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECKBE-NEXT: ld1 { v0.16b }, [x8] -; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b +; CHECKBE-NEXT: ld1 { v2.16b }, [x8] +; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECKBE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret @@ -474,9 +468,7 @@ define <8 x half> @test_shuf13(<8 x half> %a, <8 x half> %b) ; CHECKLE-LABEL: test_shuf13: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: adrp x8, .LCPI17_0 -; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI17_0] -; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKLE-NEXT: ret ; @@ -486,10 +478,10 @@ define <8 x half> @test_shuf13(<8 x half> %a, <8 x half> %b) ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: adrp x8, .LCPI17_0 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI17_0 -; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 -; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECKBE-NEXT: ld1 { v0.16b }, [x8] -; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b +; CHECKBE-NEXT: ld1 { v2.16b }, [x8] +; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECKBE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret @@ -502,9 +494,7 @@ define <8 x half> @test_shuf14(<8 x half> %a, <8 x half> %b) ; CHECKLE-LABEL: test_shuf14: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: adrp x8, .LCPI18_0 -; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] -; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKLE-NEXT: ret ; @@ -514,10 +504,10 @@ define <8 x half> @test_shuf14(<8 x half> %a, <8 x half> %b) ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: adrp x8, .LCPI18_0 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI18_0 -; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 -; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECKBE-NEXT: ld1 { v0.16b }, [x8] -; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b +; CHECKBE-NEXT: ld1 { v2.16b }, [x8] +; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECKBE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret @@ -530,9 +520,7 @@ define <8 x half> @test_shuf15(<8 x half> %a, <8 x half> %b) ; CHECKLE-LABEL: test_shuf15: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: adrp x8, .LCPI19_0 -; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI19_0] -; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKLE-NEXT: ret ; @@ -542,10 +530,10 @@ define <8 x half> @test_shuf15(<8 x half> %a, <8 x half> %b) ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: adrp x8, .LCPI19_0 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI19_0 -; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 -; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECKBE-NEXT: ld1 { v0.16b }, [x8] -; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b +; CHECKBE-NEXT: ld1 { v2.16b }, [x8] +; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECKBE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/shufflevector.ll b/llvm/test/CodeGen/AArch64/shufflevector.ll index b1131f287fe9a9..1f81a2e4bbb824 100644 --- a/llvm/test/CodeGen/AArch64/shufflevector.ll +++ b/llvm/test/CodeGen/AArch64/shufflevector.ll @@ -33,23 +33,12 @@ define <8 x i8> @shufflevector_v8i8(<8 x i8> %a, <8 x i8> %b) { } define <16 x i8> @shufflevector_v16i8(<16 x i8> %a, <16 x i8> %b) { -; CHECK-SD-LABEL: shufflevector_v16i8: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: adrp x8, .LCPI1_0 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI1_0] -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: shufflevector_v16i8: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI1_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI1_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: shufflevector_v16i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI1_0 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: ret %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %c } @@ -64,23 +53,12 @@ define <4 x i16> @shufflevector_v4i16(<4 x i16> %a, <4 x i16> %b) { } define <8 x i16> @shufflevector_v8i16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-SD-LABEL: shufflevector_v8i16: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: adrp x8, .LCPI3_0 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: shufflevector_v8i16: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI3_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: shufflevector_v8i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI3_0 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: ret %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %c } @@ -237,26 +215,25 @@ define i32 @shufflevector_v4i8(<4 x i8> %a, <4 x i8> %b){ define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b){ ; CHECK-SD-LABEL: shufflevector_v32i8: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 def $q1_q2 +; CHECK-SD-NEXT: mov v1.16b, v2.16b ; CHECK-SD-NEXT: adrp x8, .LCPI16_0 ; CHECK-SD-NEXT: adrp x9, .LCPI16_1 -; CHECK-SD-NEXT: mov v1.16b, v0.16b -; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI16_0] -; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI16_1] -; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b -; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b +; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI16_0] +; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI16_1] +; CHECK-SD-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-SD-NEXT: tbl v1.16b, { v0.16b, v1.16b }, v3.16b +; CHECK-SD-NEXT: mov v0.16b, v2.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v32i8: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov v3.16b, v0.16b +; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: adrp x8, .LCPI16_1 ; CHECK-GI-NEXT: adrp x9, .LCPI16_0 -; CHECK-GI-NEXT: mov v4.16b, v2.16b ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI16_1] -; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI16_0] -; CHECK-GI-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v0.16b -; CHECK-GI-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b +; CHECK-GI-NEXT: ldr q3, [x9, :lo12:.LCPI16_0] +; CHECK-GI-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b +; CHECK-GI-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v3.16b ; CHECK-GI-NEXT: ret %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %c @@ -298,26 +275,25 @@ define i32 @shufflevector_v2i16(<2 x i16> %a, <2 x i16> %b){ define <16 x i16> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b){ ; CHECK-SD-LABEL: shufflevector_v16i16: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: // kill: def $q2 killed $q2 def $q1_q2 +; CHECK-SD-NEXT: mov v1.16b, v2.16b ; CHECK-SD-NEXT: adrp x8, .LCPI18_0 ; CHECK-SD-NEXT: adrp x9, .LCPI18_1 -; CHECK-SD-NEXT: mov v1.16b, v0.16b -; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI18_0] -; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI18_1] -; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b -; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b +; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] +; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI18_1] +; CHECK-SD-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-SD-NEXT: tbl v1.16b, { v0.16b, v1.16b }, v3.16b +; CHECK-SD-NEXT: mov v0.16b, v2.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v16i16: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov v3.16b, v0.16b +; CHECK-GI-NEXT: mov v1.16b, v0.16b ; CHECK-GI-NEXT: adrp x8, .LCPI18_1 ; CHECK-GI-NEXT: adrp x9, .LCPI18_0 -; CHECK-GI-NEXT: mov v4.16b, v2.16b ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI18_1] -; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI18_0] -; CHECK-GI-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v0.16b -; CHECK-GI-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b +; CHECK-GI-NEXT: ldr q3, [x9, :lo12:.LCPI18_0] +; CHECK-GI-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b +; CHECK-GI-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v3.16b ; CHECK-GI-NEXT: ret %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %c @@ -344,10 +320,8 @@ define <8 x i32> @shufflevector_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK-GI-LABEL: shufflevector_v8i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI20_0 -; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: uzp2 v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI20_0] -; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: tbl v1.16b, { v2.16b, v3.16b }, v4.16b ; CHECK-GI-NEXT: ret %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -563,23 +537,12 @@ define <3 x i16> @shufflevector_v3i16(<3 x i16> %a, <3 x i16> %b) { } define <7 x i16> @shufflevector_v7i16(<7 x i16> %a, <7 x i16> %b) { -; CHECK-SD-LABEL: shufflevector_v7i16: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: adrp x8, .LCPI33_0 -; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI33_0] -; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: shufflevector_v7i16: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: adrp x8, .LCPI33_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI33_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: shufflevector_v7i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI33_0 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI33_0] +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: ret %c = shufflevector <7 x i16> %a, <7 x i16> %b, <7 x i32> ret <7 x i16> %c } @@ -594,9 +557,7 @@ define <3 x i32> @shufflevector_v3i32(<3 x i32> %a, <3 x i32> %b) { ; CHECK-GI-LABEL: shufflevector_v3i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI34_0 -; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI34_0] -; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret %c = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll index e7a6c0d6c549be..52a161ba78525a 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll @@ -6,9 +6,7 @@ target triple = "aarch64-linux" define void @add_f16_vg1x2(i32 %slice, %zn0, %zn1) #0 { ; CHECK-LABEL: add_f16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fadd za.h[w8, 0, vgx2], { z0.h, z1.h } ; CHECK-NEXT: fadd za.h[w8, 7, vgx2], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -21,11 +19,7 @@ define void @add_f16_vg1x2(i32 %slice, %zn0, %zn0, %zn1, ; CHECK-LABEL: add_f16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fadd za.h[w8, 0, vgx4], { z0.h - z3.h } ; CHECK-NEXT: fadd za.h[w8, 7, vgx4], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -41,9 +35,7 @@ define void @add_f16_vg1x4(i32 %slice, %zn0, %zn0, %zn1) #1 { ; CHECK-LABEL: sub_f16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fsub za.h[w8, 0, vgx2], { z0.h, z1.h } ; CHECK-NEXT: fsub za.h[w8, 7, vgx2], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -56,11 +48,7 @@ define void @sub_f16_vg1x2(i32 %slice, %zn0, %zn0, %zn1, ; CHECK-LABEL: sub_f16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fsub za.h[w8, 0, vgx4], { z0.h - z3.h } ; CHECK-NEXT: fsub za.h[w8, 7, vgx4], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -76,9 +64,7 @@ define void @sub_f16_vg1x4(i32 %slice, %zn0, %zn0, %zn1) #2 { ; CHECK-LABEL: add_bf16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfadd za.h[w8, 0, vgx2], { z0.h, z1.h } ; CHECK-NEXT: bfadd za.h[w8, 7, vgx2], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -91,11 +77,7 @@ define void @add_bf16_vg1x2(i32 %slice, %zn0, %zn0, %zn1, ; CHECK-LABEL: add_bf16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfadd za.h[w8, 0, vgx4], { z0.h - z3.h } ; CHECK-NEXT: bfadd za.h[w8, 7, vgx4], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -111,9 +93,7 @@ define void @add_bf16_vg1x4(i32 %slice, %zn0, %zn0, %zn1) #2 { ; CHECK-LABEL: sub_bf16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfsub za.h[w8, 0, vgx2], { z0.h, z1.h } ; CHECK-NEXT: bfsub za.h[w8, 7, vgx2], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -126,11 +106,7 @@ define void @sub_bf16_vg1x2(i32 %slice, %zn0, %zn0, %zn1, ; CHECK-LABEL: sub_bf16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfsub za.h[w8, 0, vgx4], { z0.h - z3.h } ; CHECK-NEXT: bfsub za.h[w8, 7, vgx4], { z0.h - z3.h } ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll index ecaf8bccb71fb6..402183ab123728 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll @@ -8,9 +8,7 @@ define void @multi_vector_add_write_single_za_vg1x2_i32(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_write_single_za_vg1x2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: add za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: ret @@ -27,9 +25,7 @@ define void @multi_vector_add_write_single_za_vg1x2_i32(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_write_single_za_vg1x2_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: add za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: ret @@ -50,11 +46,7 @@ define void @multi_vector_add_write_single_za_vg1x2_i64(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_write_single_za_vg1x4_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: add za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s ; CHECK-NEXT: add za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s ; CHECK-NEXT: ret @@ -75,11 +67,7 @@ define void @multi_vector_add_write_single_za_vg1x4_i32(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_write_za_vg1x2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: add za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: ret @@ -128,11 +112,7 @@ define void @multi_vector_add_write_za_vg1x2_i32(i32 %slice, define void @multi_vector_add_write_za_vg1x2_i64(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_write_za_vg1x2_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d } ; CHECK-NEXT: add za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d } ; CHECK-NEXT: ret @@ -155,15 +135,7 @@ define void @multi_vector_add_write_za_vg1x2_i64(i32 %slice, define void @multi_vector_add_write_za_vg1x4_i32(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_write_za_vg1x4_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: add za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: add za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: ret @@ -187,15 +159,7 @@ define void @multi_vector_add_write_za_vg1x4_i32(i32 %slice, define void @multi_vector_add_write_za_vg1x4_i64(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_write_za_vg1x4_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: add za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: add za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: ret @@ -223,9 +187,7 @@ define void @multi_vector_add_write_za_vg1x4_i64(i32 %slice, define void @multi_vector_add_za_vg1x2_i32(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_add_za_vg1x2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.s[w8, 0, vgx2], { z0.s, z1.s } ; CHECK-NEXT: add za.s[w8, 7, vgx2], { z0.s, z1.s } ; CHECK-NEXT: ret @@ -238,9 +200,7 @@ define void @multi_vector_add_za_vg1x2_i32(i32 %slice, %zn0, define void @multi_vector_add_za_vg1x2_i64(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_add_za_vg1x2_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: add za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -253,9 +213,7 @@ define void @multi_vector_add_za_vg1x2_i64(i32 %slice, %zn0, define void @multi_vector_add_za_vg1x2_f32(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_add_za_vg1x2_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fadd za.s[w8, 0, vgx2], { z0.s, z1.s } ; CHECK-NEXT: fadd za.s[w8, 7, vgx2], { z0.s, z1.s } ; CHECK-NEXT: ret @@ -270,9 +228,7 @@ define void @multi_vector_add_za_vg1x2_f32(i32 %slice, %zn0 define void @multi_vector_add_za_vg1x2_f64(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_add_za_vg1x2_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fadd za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: fadd za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -289,11 +245,7 @@ define void @multi_vector_add_za_vg1x2_f64(i32 %slice, %zn define void @multi_vector_add_za_vg1x4_i32(i32 %slice, %zn0, %zn1, %zn2, %zn3) { ; CHECK-LABEL: multi_vector_add_za_vg1x4_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: add za.s[w8, 0, vgx4], { z0.s - z3.s } ; CHECK-NEXT: add za.s[w8, 7, vgx4], { z0.s - z3.s } ; CHECK-NEXT: ret @@ -310,11 +262,7 @@ define void @multi_vector_add_za_vg1x4_i32(i32 %slice, %zn0, define void @multi_vector_add_za_vg1x4_i64(i32 %slice, %zn0, %zn1, %zn2, %zn3) { ; CHECK-LABEL: multi_vector_add_za_vg1x4_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: add za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: add za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -331,11 +279,7 @@ define void @multi_vector_add_za_vg1x4_i64(i32 %slice, %zn0, define void @multi_vector_add_za_vg1x4_f32(i32 %slice, %zn0, %zn1, %zn2, %zn3) { ; CHECK-LABEL: multi_vector_add_za_vg1x4_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fadd za.s[w8, 0, vgx4], { z0.s - z3.s } ; CHECK-NEXT: fadd za.s[w8, 7, vgx4], { z0.s - z3.s } ; CHECK-NEXT: ret @@ -352,11 +296,7 @@ define void @multi_vector_add_za_vg1x4_f32(i32 %slice, %zn0 define void @multi_vector_add_za_vg1x4_f64(i32 %slice, %zn0, %zn1, %zn2, %zn3) { ; CHECK-LABEL: multi_vector_add_za_vg1x4_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fadd za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: fadd za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll index 3a73ff7cdc29ce..613fba4a73838a 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll @@ -7,8 +7,6 @@ define @multi_vector_cvtn_x2_f16( %zn1, %zn2) { ; CHECK-LABEL: multi_vector_cvtn_x2_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fcvtn z0.h, { z0.s, z1.s } ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.fcvtn.x2.nxv4f32( %zn1, %zn2) @@ -22,8 +20,6 @@ define @multi_vector_cvtn_x2_f16( %zn1, define @multi_vector_bfcvtn_x2( %zn1, %zn2) { ; CHECK-LABEL: multi_vector_bfcvtn_x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfcvtn z0.h, { z0.s, z1.s } ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.bfcvtn.x2( %zn1, %zn2) diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll index 401cdd0b9dfb79..07b10fdc8eeb21 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll @@ -6,9 +6,7 @@ define void @multi_vector_add_single_vg1x2_s(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg1x2_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: fmla za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: ret @@ -25,9 +23,7 @@ define void @multi_vector_add_single_vg1x2_s(i32 %slice, %z define void @multi_vector_add_single_vg1x2_d(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg1x2_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: fmla za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: ret @@ -44,11 +40,7 @@ define void @multi_vector_add_single_vg1x2_d(i32 %slice, % define void @multi_vector_add_single_vg1x4_s(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_single_vg1x4_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s ; CHECK-NEXT: fmla za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s ; CHECK-NEXT: ret @@ -68,11 +60,7 @@ define void @multi_vector_add_single_vg1x4_s(i32 %slice, %z define void @multi_vector_add_single_vg1x4_d(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_single_vg1x4_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d ; CHECK-NEXT: fmla za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d ; CHECK-NEXT: ret @@ -94,9 +82,7 @@ define void @multi_vector_add_single_vg1x4_d(i32 %slice, % define void @multi_vector_sub_single_vg1x2_s(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg1x2_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: fmls za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: ret @@ -113,9 +99,7 @@ define void @multi_vector_sub_single_vg1x2_s(i32 %slice, %z define void @multi_vector_sub_single_vg1x2_d(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg1x2_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: fmls za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: ret @@ -132,11 +116,7 @@ define void @multi_vector_sub_single_vg1x2_d(i32 %slice, % define void @multi_vector_sub_single_vg1x4_s(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_single_vg1x4_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s ; CHECK-NEXT: fmls za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s ; CHECK-NEXT: ret @@ -156,11 +136,7 @@ define void @multi_vector_sub_single_vg1x4_s(i32 %slice, %z define void @multi_vector_sub_single_vg1x4_d(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_single_vg1x4_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d ; CHECK-NEXT: fmls za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d ; CHECK-NEXT: ret @@ -182,11 +158,7 @@ define void @multi_vector_sub_single_vg1x4_d(i32 %slice, % define void @multi_vector_add_vg1x2_s(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_vg1x2_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: fmla za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: ret @@ -204,11 +176,7 @@ define void @multi_vector_add_vg1x2_s(i32 %slice, %zn0, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_vg1x2_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d } ; CHECK-NEXT: fmla za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d } ; CHECK-NEXT: ret @@ -244,15 +212,7 @@ define void @multi_vector_add_vg1x2_s_regclass(i32 %slice, define void @multi_vector_add_vg1x4_s(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_vg1x4_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: fmla za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: ret @@ -270,15 +230,7 @@ define void @multi_vector_add_vg1x4_s(i32 %slice, %zn0, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_vg1x4_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: fmla za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: ret @@ -320,11 +272,7 @@ define void @multi_vector_add_vg1x4_s_regclass(i32 %slice, define void @multi_vector_sub_vg1x2_s(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_sub_vg1x2_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: fmls za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: ret @@ -342,11 +290,7 @@ define void @multi_vector_sub_vg1x2_s(i32 %slice, %zn0, %zn0, %zn1, ; CHECK-LABEL: multi_vector_sub_vg1x2_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d } ; CHECK-NEXT: fmls za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d } ; CHECK-NEXT: ret @@ -364,15 +308,7 @@ define void @multi_vector_sub_vg1x2_d(i32 %slice, %zn0, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_vg1x4_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: fmls za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: ret @@ -390,15 +326,7 @@ define void @multi_vector_sub_vg1x4_s(i32 %slice, %zn0, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_vg1x4_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: fmls za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: ret @@ -418,9 +346,7 @@ define void @multi_vector_sub_vg1x4_d(i32 %slice, %zn0, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg1x2_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s[3] ; CHECK-NEXT: fmla za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s[3] ; CHECK-NEXT: ret @@ -437,9 +363,7 @@ define void @multi_vector_add_lane_vg1x2_s(i32 %slice, %zn0 define void @multi_vector_add_lane_vg1x2_d(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg1x2_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d[1] ; CHECK-NEXT: fmla za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d[1] ; CHECK-NEXT: ret @@ -458,8 +382,8 @@ define void @multi_vector_add_lane_vg1x2_s_regclass(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_lane_vg1x4_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s[3] ; CHECK-NEXT: fmla za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s[3] ; CHECK-NEXT: ret @@ -495,11 +415,7 @@ define void @multi_vector_add_lane_vg1x4_s(i32 %slice, %zn0 define void @multi_vector_add_lane_vg1x4_d(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_lane_vg1x4_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d[1] ; CHECK-NEXT: fmla za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d[1] ; CHECK-NEXT: ret @@ -521,8 +437,8 @@ define void @multi_vector_add_lane_vg1x4_s_regclass(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg1x2_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s[3] ; CHECK-NEXT: fmls za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s[3] ; CHECK-NEXT: ret @@ -559,9 +473,7 @@ define void @multi_vector_sub_lane_vg1x2_s(i32 %slice, %zn0 define void @multi_vector_sub_lane_vg1x2_d(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg1x2_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d[1] ; CHECK-NEXT: fmls za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d[1] ; CHECK-NEXT: ret @@ -578,11 +490,7 @@ define void @multi_vector_sub_lane_vg1x2_d(i32 %slice, %zn define void @multi_vector_sub_lane_vg1x4_s(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_lane_vg1x4_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s[3] ; CHECK-NEXT: fmls za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s[3] ; CHECK-NEXT: ret @@ -602,11 +510,7 @@ define void @multi_vector_sub_lane_vg1x4_s(i32 %slice, %zn0 define void @multi_vector_sub_lane_vg1x4_d(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_lane_vg1x4_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d[1] ; CHECK-NEXT: fmls za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d[1] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll index b4fd5a2272e7ea..cd8d22441eaa23 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll @@ -26,18 +26,18 @@ define void @fdot_multi_za32_f16_vg1x2(i32 %slice, %unused, < define void @fdot_multi_za32_f16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: fdot_multi_za32_f16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h } -; CHECK-NEXT: fdot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: fdot za.s[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #0 { call void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8f16(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -71,18 +71,18 @@ define void @bfdot_multi_za32_bf16_vg1x2(i32 %slice, %unused, define void @fdot_multi_za32_bf16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: fdot_multi_za32_bf16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: bfdot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h } -; CHECK-NEXT: bfdot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: bfdot za.s[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: bfdot za.s[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #0 { call void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -99,9 +99,7 @@ define void @fdot_multi_za32_bf16_vg1x4(i32 %slice, %unused, define void @fdot_single_za32_f16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: fdot_single_za32_f16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: fdot za.s[w8, 0, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: fdot za.s[w8, 7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -114,11 +112,7 @@ define void @fdot_single_za32_f16_vg1x2(i32 %slice, %unused, define void @fdot_single_za32_f16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: fdot_single_za32_f16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: fdot za.s[w8, 7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -134,9 +128,7 @@ define void @fdot_single_za32_f16_vg1x4(i32 %slice, %unused, define void @bfdot_single_za32_bf16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: bfdot_single_za32_bf16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: bfdot za.s[w8, 0, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: bfdot za.s[w8, 7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -149,11 +141,7 @@ define void @bfdot_single_za32_bf16_vg1x2(i32 %slice, %unused define void @bfdot_single_za32_bf16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: bfdot_single_za32_bf16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: bfdot za.s[w8, 0, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: bfdot za.s[w8, 7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -170,8 +158,8 @@ define void @fdot_lane_za32_f16_vg1x2(i32 %slice, %unused, %unused, %unused, ; CHECK-LABEL: bfdot_lane_za32_bf16_vg1x2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: bfdot za.s[w8, 0, vgx2], { z4.h, z5.h }, z3.h[3] ; CHECK-NEXT: bfdot za.s[w8, 7, vgx2], { z4.h, z5.h }, z3.h[3] ; CHECK-NEXT: ret @@ -222,8 +210,8 @@ define void @bfdot_lane_za32_bf16_vg1x4(i32 %slice, %unused, ; CHECK-LABEL: bfdot_lane_za32_bf16_vg1x4: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z25.d, z2.d ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: bfdot za.s[w8, 0, vgx4], { z24.h - z27.h }, z5.h[3] diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll index 99de6f832a3c91..f144e33793fe80 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll @@ -10,9 +10,7 @@ define void @za_write_vg2_horiz_b(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_b: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.b[w12, 0:1], { z0.b, z1.b } ; CHECK-NEXT: mov za0h.b[w12, 14:15], { z0.b, z1.b } ; CHECK-NEXT: ret @@ -25,9 +23,7 @@ define void @za_write_vg2_horiz_b(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_h: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1h.h[w12, 6:7], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -40,9 +36,7 @@ define void @za_write_vg2_horiz_h(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1h.h[w12, 6:7], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -55,9 +49,7 @@ define void @za_write_vg2_horiz_f16(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1h.h[w12, 6:7], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -70,9 +62,7 @@ define void @za_write_vg2_horiz_bf16(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.s[w12, 0:1], { z0.s, z1.s } ; CHECK-NEXT: mov za3h.s[w12, 2:3], { z0.s, z1.s } ; CHECK-NEXT: ret @@ -85,9 +75,7 @@ define void @za_write_vg2_horiz_s(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.s[w12, 0:1], { z0.s, z1.s } ; CHECK-NEXT: mov za3h.s[w12, 2:3], { z0.s, z1.s } ; CHECK-NEXT: ret @@ -100,9 +88,7 @@ define void @za_write_vg2_horiz_f32(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.d[w12, 0:1], { z0.d, z1.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 0, i32 %slice, %zn1, %zn2) @@ -112,9 +98,7 @@ define void @za_write_vg2_horiz_d(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.d[w12, 0:1], { z0.d, z1.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg2.nxv2f64(i32 0, i32 %slice, %zn1, %zn2) @@ -126,9 +110,7 @@ define void @za_write_vg2_horiz_f64(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_b: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.b[w12, 0:1], { z0.b, z1.b } ; CHECK-NEXT: mov za0v.b[w12, 14:15], { z0.b, z1.b } ; CHECK-NEXT: ret @@ -141,9 +123,7 @@ define void @za_write_vg2_vert_b(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_h: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1v.h[w12, 6:7], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -156,9 +136,7 @@ define void @za_write_vg2_vert_h(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1v.h[w12, 6:7], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -171,9 +149,7 @@ define void @za_write_vg2_vert_f16(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1v.h[w12, 6:7], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -186,9 +162,7 @@ define void @za_write_vg2_vert_bf16(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.s[w12, 0:1], { z0.s, z1.s } ; CHECK-NEXT: mov za3v.s[w12, 2:3], { z0.s, z1.s } ; CHECK-NEXT: ret @@ -201,9 +175,7 @@ define void @za_write_vg2_vert_s(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.s[w12, 0:1], { z0.s, z1.s } ; CHECK-NEXT: mov za3v.s[w12, 2:3], { z0.s, z1.s } ; CHECK-NEXT: ret @@ -216,9 +188,7 @@ define void @za_write_vg2_vert_f32(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.d[w12, 0:1], { z0.d, z1.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 0, i32 %slice, %zn1, %zn2) @@ -228,9 +198,7 @@ define void @za_write_vg2_vert_d(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.d[w12, 0:1], { z0.d, z1.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg2.nxv2f64(i32 0, i32 %slice, %zn1, %zn2) @@ -246,11 +214,7 @@ define void @za_write_vg2_vert_f64(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_b: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.b[w12, 0:3], { z0.b - z3.b } ; CHECK-NEXT: mov za0h.b[w12, 12:15], { z0.b - z3.b } ; CHECK-NEXT: ret @@ -263,11 +227,7 @@ define void @za_write_vg4_horiz_b(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_h: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.h[w12, 0:3], { z0.h - z3.h } ; CHECK-NEXT: mov za1h.h[w12, 4:7], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -280,11 +240,7 @@ define void @za_write_vg4_horiz_h(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.h[w12, 0:3], { z0.h - z3.h } ; CHECK-NEXT: mov za1h.h[w12, 4:7], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -297,11 +253,7 @@ define void @za_write_vg4_horiz_f16(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.h[w12, 0:3], { z0.h - z3.h } ; CHECK-NEXT: mov za1h.h[w12, 4:7], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -314,11 +266,7 @@ define void @za_write_vg4_horiz_bf16(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.s[w12, 0:3], { z0.s - z3.s } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) @@ -328,11 +276,7 @@ define void @za_write_vg4_horiz_s(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.s[w12, 0:3], { z0.s - z3.s } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg4.nxv4f32(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) @@ -342,11 +286,7 @@ define void @za_write_vg4_horiz_f32(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.d[w12, 0:3], { z0.d - z3.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) @@ -356,11 +296,7 @@ define void @za_write_vg4_horiz_d(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.d[w12, 0:3], { z0.d - z3.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg4.nxv2f64(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) @@ -372,11 +308,7 @@ define void @za_write_vg4_horiz_f64(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_b: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.b[w12, 0:3], { z0.b - z3.b } ; CHECK-NEXT: mov za0v.b[w12, 12:15], { z0.b - z3.b } ; CHECK-NEXT: ret @@ -389,11 +321,7 @@ define void @za_write_vg4_vert_b(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_h: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.h[w12, 0:3], { z0.h - z3.h } ; CHECK-NEXT: mov za1v.h[w12, 4:7], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -406,11 +334,7 @@ define void @za_write_vg4_vert_h(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.h[w12, 0:3], { z0.h - z3.h } ; CHECK-NEXT: mov za1v.h[w12, 4:7], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -423,11 +347,7 @@ define void @za_write_vg4_vert_f16(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.h[w12, 0:3], { z0.h - z3.h } ; CHECK-NEXT: mov za1v.h[w12, 4:7], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -440,11 +360,7 @@ define void @za_write_vg4_vert_bf16(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.s[w12, 0:3], { z0.s - z3.s } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) @@ -454,11 +370,7 @@ define void @za_write_vg4_vert_s(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.s[w12, 0:3], { z0.s - z3.s } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg4.nxv4f32(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) @@ -468,11 +380,7 @@ define void @za_write_vg4_vert_f32(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.d[w12, 0:3], { z0.d - z3.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) @@ -482,11 +390,7 @@ define void @za_write_vg4_vert_d(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.d[w12, 0:3], { z0.d - z3.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg4.nxv2f64(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) @@ -500,9 +404,7 @@ define void @za_write_vg4_vert_f64(i32 %slice, %zn1, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_b: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -515,9 +417,7 @@ define void @za_write_vg1x2_b(i32 %slice, %za1, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_h: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -530,9 +430,7 @@ define void @za_write_vg1x2_h(i32 %slice, %za1, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -545,9 +443,7 @@ define void @za_write_vg1x2_f16(i32 %slice, %za1, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -560,9 +456,7 @@ define void @za_write_vg1x2_bf16(i32 %slice, %za1, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -575,9 +469,7 @@ define void @za_write_vg1x2_s(i32 %slice, %za1, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -590,9 +482,7 @@ define void @za_write_vg1x2_f32(i32 %slice, %za1, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -605,9 +495,7 @@ define void @za_write_vg1x2_d(i32 %slice, %za1, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -624,11 +512,7 @@ define void @za_write_vg1x2_f64(i32 %slice, %za1, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_b: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -641,11 +525,7 @@ define void @za_write_vg1x4_b(i32 %slice, %za1, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_h: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -658,11 +538,7 @@ define void @za_write_vg1x4_h(i32 %slice, %za1, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -675,11 +551,7 @@ define void @za_write_vg1x4_f16(i32 %slice, %za1, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -692,11 +564,7 @@ define void @za_write_vg1x4_bf16(i32 %slice, %za1, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -709,11 +577,7 @@ define void @za_write_vg1x4_s(i32 %slice, %za1, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -726,11 +590,7 @@ define void @za_write_vg1x4_f32(i32 %slice, %za1, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -743,11 +603,7 @@ define void @za_write_vg1x4_d(i32 %slice, %za1, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll index e154a4df86efe1..3ce77cd8e03216 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll @@ -26,18 +26,18 @@ define void @udot_multi_za32_u16_vg1x2(i32 %slice, %unused, < define void @udot_multi_za32_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: udot_multi_za32_u16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h } -; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #0 { call void @llvm.aarch64.sme.udot.za32.vg1x4.nxv8i16(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -68,18 +68,18 @@ define void @udot_multi_za32_u8_vg1x2(i32 %slice, %unused, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: udot_multi_za32_u8_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z28.b - z31.b }, { z24.b - z27.b } -; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z4.b - z7.b }, { z24.b - z27.b } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #0 { call void @llvm.aarch64.sme.udot.za32.vg1x4.nxv16i8(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -110,18 +110,18 @@ define void @udot_multi_za64_u16_vg1x2(i32 %slice, %unused, < define void @udot_multi_za64_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: udot_multi_za64_u16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: udot za.d[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h } -; CHECK-NEXT: udot za.d[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: udot za.d[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: udot za.d[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #1 { call void @llvm.aarch64.sme.udot.za64.vg1x4.nxv8i16(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -152,18 +152,18 @@ define void @usdot_multi_za32_u8_vg1x2(i32 %slice, %unused, < define void @usdot_multi_za32_u8_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: usdot_multi_za32_u8_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z28.b - z31.b }, { z24.b - z27.b } -; CHECK-NEXT: usdot za.s[w8, 7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: usdot za.s[w8, 7, vgx4], { z4.b - z7.b }, { z24.b - z27.b } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #0 { call void @llvm.aarch64.sme.usdot.za32.vg1x4.nxv16i8(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -197,18 +197,18 @@ define void @sdot_multi_za32_u16_vg1x2(i32 %slice, %unused, < define void @sdot_multi_za32_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: sdot_multi_za32_u16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h } -; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #0 { call void @llvm.aarch64.sme.sdot.za32.vg1x4.nxv8i16(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -239,18 +239,18 @@ define void @sdot_multi_za32_u8_vg1x2(i32 %slice, %unused, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: sdot_multi_za32_u8_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z28.b - z31.b }, { z24.b - z27.b } -; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z4.b - z7.b }, { z24.b - z27.b } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #0 { call void @llvm.aarch64.sme.sdot.za32.vg1x4.nxv16i8(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -281,18 +281,18 @@ define void @sdot_multi_za64_u16_vg1x2(i32 %slice, %unused, < define void @sdot_multi_za64_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: sdot_multi_za64_u16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: sdot za.d[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h } -; CHECK-NEXT: sdot za.d[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sdot za.d[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: sdot za.d[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #1 { call void @llvm.aarch64.sme.sdot.za64.vg1x4.nxv8i16(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -309,9 +309,7 @@ define void @sdot_multi_za64_u16_vg1x4(i32 %slice, %unused, < define void @udot_single_za32_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: udot_single_za32_u16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: udot za.s[w8, 0, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: udot za.s[w8, 7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -324,11 +322,7 @@ define void @udot_single_za32_u16_vg1x2(i32 %slice, %unused, define void @udot_single_za32_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: udot_single_za32_u16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -341,9 +335,7 @@ define void @udot_single_za32_u16_vg1x4(i32 %slice, %unused, define void @udot_single_za32_u8_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: udot_single_za32_u8_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: udot za.s[w8, 0, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: udot za.s[w8, 7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -356,11 +348,7 @@ define void @udot_single_za32_u8_vg1x2(i32 %slice, %unused, < define void @udot_single_za32_u8_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: udot_single_za32_u8_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -373,9 +361,7 @@ define void @udot_single_za32_u8_vg1x4(i32 %slice, %unused, < define void @udot_single_za64_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #1 { ; CHECK-LABEL: udot_single_za64_u16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: udot za.d[w8, 0, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: udot za.d[w8, 7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -388,11 +374,7 @@ define void @udot_single_za64_u16_vg1x2(i32 %slice, %unused, define void @udot_single_za64_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #1 { ; CHECK-LABEL: udot_single_za64_u16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: udot za.d[w8, 0, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: udot za.d[w8, 7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -405,9 +387,7 @@ define void @udot_single_za64_u16_vg1x4(i32 %slice, %unused, define void @usdot_single_za32_u8_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: usdot_single_za32_u8_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: usdot za.s[w8, 0, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: usdot za.s[w8, 7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -420,11 +400,7 @@ define void @usdot_single_za32_u8_vg1x2(i32 %slice, %unused, define void @usdot_single_za32_u8_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: usdot_single_za32_u8_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: usdot za.s[w8, 7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -440,9 +416,7 @@ define void @usdot_single_za32_u8_vg1x4(i32 %slice, %unused, define void @sdot_single_za32_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: sdot_single_za32_u16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: sdot za.s[w8, 0, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: sdot za.s[w8, 7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -455,11 +429,7 @@ define void @sdot_single_za32_u16_vg1x2(i32 %slice, %unused, define void @sdot_single_za32_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: sdot_single_za32_u16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -472,9 +442,7 @@ define void @sdot_single_za32_u16_vg1x4(i32 %slice, %unused, define void @sdot_single_za32_u8_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: sdot_single_za32_u8_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: sdot za.s[w8, 0, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: sdot za.s[w8, 7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -487,11 +455,7 @@ define void @sdot_single_za32_u8_vg1x2(i32 %slice, %unused, < define void @sdot_single_za32_u8_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: sdot_single_za32_u8_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -504,9 +468,7 @@ define void @sdot_single_za32_u8_vg1x4(i32 %slice, %unused, < define void @sdot_single_za64_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #1 { ; CHECK-LABEL: sdot_single_za64_u16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: sdot za.d[w8, 0, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: sdot za.d[w8, 7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -519,11 +481,7 @@ define void @sdot_single_za64_u16_vg1x2(i32 %slice, %unused, define void @sdot_single_za64_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #1 { ; CHECK-LABEL: sdot_single_za64_u16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: sdot za.d[w8, 0, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: sdot za.d[w8, 7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -536,9 +494,7 @@ define void @sdot_single_za64_u16_vg1x4(i32 %slice, %unused, define void @sudot_single_za32_u8_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: sudot_single_za32_u8_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: sudot za.s[w8, 0, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: sudot za.s[w8, 7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -551,11 +507,7 @@ define void @sudot_single_za32_u8_vg1x2(i32 %slice, %unused, define void @sudot_single_za32_u8_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: sudot_single_za32_u8_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: sudot za.s[w8, 7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -571,8 +523,8 @@ define void @udot_lane_za32_u16_vg1x2(i32 %slice, %unused, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: udot_lane_za32_u16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z0.h - z3.h }, z4.h[3] ; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z0.h - z3.h }, z4.h[3] ; CHECK-NEXT: ret @@ -605,8 +553,8 @@ define void @udot_lane_za32_u8_vg1x2(i32 %slice, %unused, %unused, %unused, , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , } %1, 0 + %3 = extractvalue { , } %1, 1 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %4 = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) + %5 = extractvalue { , } %4, 0 + %6 = extractvalue { , } %4, 1 + tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x2.nxv16i8(i32 0, %2, %5, undef, i32 0) + tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x2.nxv16i8(i32 0, %3, %6, undef, i32 0) + ret void +} + +define void @udot_form_4x_tuple(ptr %ptr, i64 %stride) #0 { +; CHECK-LABEL: udot_form_4x_tuple: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ptrue pn8.b +; CHECK-NEXT: lsl x9, x1, #1 +; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0] +; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1] +; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9] +; CHECK-NEXT: add x9, x9, x1 +; CHECK-NEXT: mov z0.d, z17.d +; CHECK-NEXT: mov z1.d, z16.d +; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9] +; CHECK-NEXT: mov z4.d, z21.d +; CHECK-NEXT: mov z5.d, z20.d +; CHECK-NEXT: mov z8.d, z25.d +; CHECK-NEXT: mov z9.d, z24.d +; CHECK-NEXT: mov z3.d, z16.d +; CHECK-NEXT: mov z7.d, z17.d +; CHECK-NEXT: mov z11.d, z18.d +; CHECK-NEXT: mov z16.d, z29.d +; CHECK-NEXT: mov z17.d, z28.d +; CHECK-NEXT: mov z18.d, z14.d +; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0] +; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0] +; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0] +; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + %1 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , , , } %1, 0 + %3 = extractvalue { , , , } %1, 1 + %4 = extractvalue { , , , } %1, 2 + %5 = extractvalue { , , , } %1, 3 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %6 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) + %7 = extractvalue { , , , } %6, 0 + %8 = extractvalue { , , , } %6, 1 + %9 = extractvalue { , , , } %6, 2 + %10 = extractvalue { , , , } %6, 3 + %mul3 = shl i64 %stride, 1 + %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3 + %11 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4) + %12 = extractvalue { , , , } %11, 0 + %13 = extractvalue { , , , } %11, 1 + %14 = extractvalue { , , , } %11, 2 + %15 = extractvalue { , , , } %11, 3 + %mul5 = mul i64 %stride, 3 + %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5 + %16 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6) + %17 = extractvalue { , , , } %16, 0 + %18 = extractvalue { , , , } %16, 1 + %19 = extractvalue { , , , } %16, 2 + %20 = extractvalue { , , , } %16, 3 + tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x4.nxv16i8(i32 0, %2, %7, %12, %17, undef, i32 0) + tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x4.nxv16i8(i32 0, %3, %8, %13, %18, undef, i32 0) + tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x4.nxv16i8(i32 0, %4, %9, %14, %19, undef, i32 0) + tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x4.nxv16i8(i32 0, %5, %10, %15, %20, undef, i32 0) + ret void +} + define void @udot_lane_za64_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #1 { ; CHECK-LABEL: udot_lane_za64_u16_vg1x2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: udot za.d[w8, 0, vgx2], { z4.h, z5.h }, z3.h[1] ; CHECK-NEXT: udot za.d[w8, 7, vgx2], { z4.h, z5.h }, z3.h[1] ; CHECK-NEXT: ret @@ -654,8 +697,8 @@ define void @udot_lane_za64_u16_vg1x4(i32 %slice, %unused, %unused, %unused, %unused, , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , } %1, 0 + %3 = extractvalue { , } %1, 1 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %4 = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) + %5 = extractvalue { , } %4, 0 + %6 = extractvalue { , } %4, 1 + tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x2.nxv16i8(i32 0, %2, %5, undef, i32 0) + tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x2.nxv16i8(i32 0, %3, %6, undef, i32 0) + ret void +} + +define void @usdot_form_4x_tuple(ptr %ptr, i64 %stride) #0 { +; CHECK-LABEL: usdot_form_4x_tuple: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ptrue pn8.b +; CHECK-NEXT: lsl x9, x1, #1 +; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0] +; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1] +; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9] +; CHECK-NEXT: add x9, x9, x1 +; CHECK-NEXT: mov z0.d, z17.d +; CHECK-NEXT: mov z1.d, z16.d +; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9] +; CHECK-NEXT: mov z4.d, z21.d +; CHECK-NEXT: mov z5.d, z20.d +; CHECK-NEXT: mov z8.d, z25.d +; CHECK-NEXT: mov z9.d, z24.d +; CHECK-NEXT: mov z3.d, z16.d +; CHECK-NEXT: mov z7.d, z17.d +; CHECK-NEXT: mov z11.d, z18.d +; CHECK-NEXT: mov z16.d, z29.d +; CHECK-NEXT: mov z17.d, z28.d +; CHECK-NEXT: mov z18.d, z14.d +; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0] +; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0] +; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0] +; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + %1 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , , , } %1, 0 + %3 = extractvalue { , , , } %1, 1 + %4 = extractvalue { , , , } %1, 2 + %5 = extractvalue { , , , } %1, 3 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %6 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) + %7 = extractvalue { , , , } %6, 0 + %8 = extractvalue { , , , } %6, 1 + %9 = extractvalue { , , , } %6, 2 + %10 = extractvalue { , , , } %6, 3 + %mul3 = shl i64 %stride, 1 + %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3 + %11 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4) + %12 = extractvalue { , , , } %11, 0 + %13 = extractvalue { , , , } %11, 1 + %14 = extractvalue { , , , } %11, 2 + %15 = extractvalue { , , , } %11, 3 + %mul5 = mul i64 %stride, 3 + %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5 + %16 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6) + %17 = extractvalue { , , , } %16, 0 + %18 = extractvalue { , , , } %16, 1 + %19 = extractvalue { , , , } %16, 2 + %20 = extractvalue { , , , } %16, 3 + tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x4.nxv16i8(i32 0, %2, %7, %12, %17, undef, i32 0) + tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x4.nxv16i8(i32 0, %3, %8, %13, %18, undef, i32 0) + tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x4.nxv16i8(i32 0, %4, %9, %14, %19, undef, i32 0) + tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x4.nxv16i8(i32 0, %5, %10, %15, %20, undef, i32 0) + ret void +} ; == Multi, indexed (signed) == @@ -710,8 +847,8 @@ define void @sdot_lane_za32_u16_vg1x2(i32 %slice, %unused, %unused, %unused, %unused, %unused, , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , } %1, 0 + %3 = extractvalue { , } %1, 1 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %4 = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) + %5 = extractvalue { , } %4, 0 + %6 = extractvalue { , } %4, 1 + tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x2.nxv16i8(i32 0, %2, %5, undef, i32 0) + tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x2.nxv16i8(i32 0, %3, %6, undef, i32 0) + ret void +} + +define void @sdot_form_4x_tuple(ptr %ptr, i64 %stride) #0 { +; CHECK-LABEL: sdot_form_4x_tuple: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ptrue pn8.b +; CHECK-NEXT: lsl x9, x1, #1 +; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0] +; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1] +; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9] +; CHECK-NEXT: add x9, x9, x1 +; CHECK-NEXT: mov z0.d, z17.d +; CHECK-NEXT: mov z1.d, z16.d +; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9] +; CHECK-NEXT: mov z4.d, z21.d +; CHECK-NEXT: mov z5.d, z20.d +; CHECK-NEXT: mov z8.d, z25.d +; CHECK-NEXT: mov z9.d, z24.d +; CHECK-NEXT: mov z3.d, z16.d +; CHECK-NEXT: mov z7.d, z17.d +; CHECK-NEXT: mov z11.d, z18.d +; CHECK-NEXT: mov z16.d, z29.d +; CHECK-NEXT: mov z17.d, z28.d +; CHECK-NEXT: mov z18.d, z14.d +; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0] +; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0] +; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0] +; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + %1 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , , , } %1, 0 + %3 = extractvalue { , , , } %1, 1 + %4 = extractvalue { , , , } %1, 2 + %5 = extractvalue { , , , } %1, 3 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %6 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) + %7 = extractvalue { , , , } %6, 0 + %8 = extractvalue { , , , } %6, 1 + %9 = extractvalue { , , , } %6, 2 + %10 = extractvalue { , , , } %6, 3 + %mul3 = shl i64 %stride, 1 + %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3 + %11 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4) + %12 = extractvalue { , , , } %11, 0 + %13 = extractvalue { , , , } %11, 1 + %14 = extractvalue { , , , } %11, 2 + %15 = extractvalue { , , , } %11, 3 + %mul5 = mul i64 %stride, 3 + %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5 + %16 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6) + %17 = extractvalue { , , , } %16, 0 + %18 = extractvalue { , , , } %16, 1 + %19 = extractvalue { , , , } %16, 2 + %20 = extractvalue { , , , } %16, 3 + tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x4.nxv16i8(i32 0, %2, %7, %12, %17, undef, i32 0) + tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x4.nxv16i8(i32 0, %3, %8, %13, %18, undef, i32 0) + tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x4.nxv16i8(i32 0, %4, %9, %14, %19, undef, i32 0) + tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x4.nxv16i8(i32 0, %5, %10, %15, %20, undef, i32 0) + ret void +} + define void @sdot_lane_za64_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #1 { ; CHECK-LABEL: sdot_lane_za64_u16_vg1x2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: sdot za.d[w8, 0, vgx2], { z4.h, z5.h }, z3.h[1] ; CHECK-NEXT: sdot za.d[w8, 7, vgx2], { z4.h, z5.h }, z3.h[1] ; CHECK-NEXT: ret @@ -793,8 +1025,8 @@ define void @sdot_lane_za64_u16_vg1x4(i32 %slice, %unused, %unused, %unused, %unused, , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , } %1, 0 + %3 = extractvalue { , } %1, 1 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %4 = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) + %5 = extractvalue { , } %4, 0 + %6 = extractvalue { , } %4, 1 + tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x2.nxv16i8(i32 0, %2, %5, undef, i32 0) + tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x2.nxv16i8(i32 0, %3, %6, undef, i32 0) + ret void +} + +define void @sudot_form_4x_tuple(ptr %ptr, i64 %stride) #0 { +; CHECK-LABEL: sudot_form_4x_tuple: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ptrue pn8.b +; CHECK-NEXT: lsl x9, x1, #1 +; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0] +; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1] +; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9] +; CHECK-NEXT: add x9, x9, x1 +; CHECK-NEXT: mov z0.d, z17.d +; CHECK-NEXT: mov z1.d, z16.d +; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9] +; CHECK-NEXT: mov z4.d, z21.d +; CHECK-NEXT: mov z5.d, z20.d +; CHECK-NEXT: mov z8.d, z25.d +; CHECK-NEXT: mov z9.d, z24.d +; CHECK-NEXT: mov z3.d, z16.d +; CHECK-NEXT: mov z7.d, z17.d +; CHECK-NEXT: mov z11.d, z18.d +; CHECK-NEXT: mov z16.d, z29.d +; CHECK-NEXT: mov z17.d, z28.d +; CHECK-NEXT: mov z18.d, z14.d +; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0] +; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0] +; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0] +; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + %1 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , , , } %1, 0 + %3 = extractvalue { , , , } %1, 1 + %4 = extractvalue { , , , } %1, 2 + %5 = extractvalue { , , , } %1, 3 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %6 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) + %7 = extractvalue { , , , } %6, 0 + %8 = extractvalue { , , , } %6, 1 + %9 = extractvalue { , , , } %6, 2 + %10 = extractvalue { , , , } %6, 3 + %mul3 = shl i64 %stride, 1 + %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3 + %11 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4) + %12 = extractvalue { , , , } %11, 0 + %13 = extractvalue { , , , } %11, 1 + %14 = extractvalue { , , , } %11, 2 + %15 = extractvalue { , , , } %11, 3 + %mul5 = mul i64 %stride, 3 + %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5 + %16 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6) + %17 = extractvalue { , , , } %16, 0 + %18 = extractvalue { , , , } %16, 1 + %19 = extractvalue { , , , } %16, 2 + %20 = extractvalue { , , , } %16, 3 + tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x4.nxv16i8(i32 0, %2, %7, %12, %17, undef, i32 0) + tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x4.nxv16i8(i32 0, %3, %8, %13, %18, undef, i32 0) + tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x4.nxv16i8(i32 0, %4, %9, %14, %19, undef, i32 0) + tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x4.nxv16i8(i32 0, %5, %10, %15, %20, undef, i32 0) + ret void +} -attributes #0 = { nounwind "target-features"="+sme2" } -attributes #1 = { nounwind "target-features"="+sme2,+sme-i16i64" } +attributes #0 = { nounwind "target-features"="+sme2" "aarch64_pstate_sm_enabled" } +attributes #1 = { nounwind "target-features"="+sme2,+sme-i16i64" "aarch64_pstate_sm_enabled" } ; == Multi, multi (unsigned) diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll index 79db677853cb58..3616e074d408e1 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll @@ -114,8 +114,6 @@ define { , } @multi_vec_max_single_x2_u64(< define { , } @multi_vec_max_single_x2_bf16( %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_max_single_x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmax { z0.h, z1.h }, { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8bf16( %zdn1, %zdn2, %zm) @@ -316,10 +314,6 @@ define { , , , , , , } @multi_vec_max_single_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { ; CHECK-LABEL: multi_vec_max_single_x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmax { z0.h - z3.h }, { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) @@ -511,10 +505,6 @@ define { , } @multi_vec_max_multi_x2_u64(, } @multi_vec_max_x2_bf16( %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_max_x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: bfmax { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fmax.x2.nxv8bf16( %zdn1, %zdn2, %zm1, %zm2) @@ -573,20 +563,20 @@ define { , } @multi_vec_max_multi_x2_ define { , , , } @multi_vec_max_multi_x4_s8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smax { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: smax { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -598,20 +588,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_s16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: smax { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -623,20 +613,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_s32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_s32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: smax { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -648,20 +638,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_s64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_s64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: smax { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -675,20 +665,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_u8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umax { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: umax { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -700,20 +690,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_u16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: umax { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -725,20 +715,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_u32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_u32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: umax { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -750,20 +740,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_u64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_u64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: umax { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -777,14 +767,6 @@ define { , , , , , , } @multi_vec_max_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_max_x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 ; CHECK-NEXT: bfmax { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) @@ -796,20 +778,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_f16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fmax { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -821,20 +803,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_f32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fmax { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -846,20 +828,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_f64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fmax { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -873,8 +855,6 @@ define { , , , , } @multi_vec_maxnm_single_x2_bf16( %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_maxnm_single_x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmaxnm { z0.h, z1.h }, { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8bf16( %zdn1, %zdn2, %zm) @@ -927,10 +907,6 @@ define { , } @multi_vec_maxnm_single define { , , , } @multi_vec_maxnm_single_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { ; CHECK-LABEL: multi_vec_maxnm_single_x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmaxnm { z0.h - z3.h }, { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) @@ -998,10 +974,6 @@ define { , , , , } @multi_vec_maxnm_x2_bf16( %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_maxnm_x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: bfmaxnm { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8bf16( %zdn1, %zdn2, %zm1, %zm2) @@ -1060,14 +1032,6 @@ define { , } @multi_vec_maxnm_x2_f64( define { , , , } @multi_vec_maxnm_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_maxnm_x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 ; CHECK-NEXT: bfmaxnm { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) @@ -1079,20 +1043,20 @@ define { , , , , , , } @multi_vec_maxnm_x4_f16( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_maxnm_x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmaxnm { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fmaxnm { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, @@ -1103,20 +1067,20 @@ define { , , , , , , } @multi_vec_maxnm_x4_f32( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_maxnm_x4_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmaxnm { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fmaxnm { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, @@ -1127,20 +1091,20 @@ define { , , , , , , } @multi_vec_maxnm_x4_f64( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_maxnm_x4_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmaxnm { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fmaxnm { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll index e5c36d42fb1351..58a0989f25d82c 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll @@ -114,8 +114,6 @@ define { , } @multi_vec_min_single_x2_u64(< define { , } @multi_vec_min_single_x2_bf16( %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_min_single_x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmin { z0.h, z1.h }, { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8bf16( %zdn1, %zdn2, %zm) @@ -316,10 +314,6 @@ define { , , , , , , } @multi_vec_min_single_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { ; CHECK-LABEL: multi_vec_min_single_x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmin { z0.h - z3.h }, { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) @@ -511,10 +505,6 @@ define { , } @multi_vec_min_multi_x2_u64(, } @multi_vec_min_x2_bf16( %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_min_x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: bfmin { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fmin.x2.nxv8bf16( %zdn1, %zdn2, %zm1, %zm2) @@ -573,20 +563,20 @@ define { , } @multi_vec_min_multi_x2_ define { , , , } @multi_vec_min_multi_x4_s8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smin { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: smin { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -598,20 +588,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_s16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: smin { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -623,20 +613,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_s32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_s32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: smin { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -648,20 +638,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_s64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_s64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: smin { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -675,20 +665,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_u8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umin { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: umin { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -700,20 +690,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_u16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: umin { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -725,20 +715,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_u32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_u32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: umin { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -750,20 +740,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_u64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_u64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: umin { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -778,14 +768,6 @@ define { , , , , , , } @multi_vec_min_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_min_x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 ; CHECK-NEXT: bfmin { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) @@ -797,20 +779,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_f16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fmin { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -822,20 +804,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_f32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fmin { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -847,20 +829,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_f64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fmin { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -874,8 +856,6 @@ define { , , , , } @multi_vec_minnm_single_x2_bf16( %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_minnm_single_x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfminnm { z0.h, z1.h }, { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8bf16( %zdn1, %zdn2, %zm) @@ -928,10 +908,6 @@ define { , } @multi_vec_minnm_single define { , , , } @multi_vec_minnm_single_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { ; CHECK-LABEL: multi_vec_minnm_single_x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfminnm { z0.h - z3.h }, { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) @@ -999,10 +975,6 @@ define { , , , , } @multi_vec_minnm_x2_bf16( %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_minnm_x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: bfminnm { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fminnm.x2.nxv8bf16( %zdn1, %zdn2, %zm1, %zm2) @@ -1061,14 +1033,6 @@ define { , } @multi_vec_minnm_x2_f64( define { , , , } @multi_vec_minnm_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_minnm_x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 ; CHECK-NEXT: bfminnm { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) @@ -1080,20 +1044,20 @@ define { , , , , , , } @multi_vec_minnm_x4_f16( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_minnm_x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fminnm { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fminnm { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, @@ -1104,20 +1068,20 @@ define { , , , , , , } @multi_vec_minnm_x4_f32( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_minnm_x4_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fminnm { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fminnm { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, @@ -1128,20 +1092,20 @@ define { , , , , , , } @multi_vec_minnm_x4_f64( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_minnm_x4_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fminnm { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: fminnm { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll index 346afc611eb756..e5e3da05edced8 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll @@ -38,9 +38,7 @@ define void @multi_vector_mul_add_single_long_vg4x1_s16(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: smlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: smlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -53,9 +51,7 @@ define void @multi_vector_mul_add_single_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: smlall za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: smlall za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -70,11 +66,7 @@ define void @multi_vector_mul_add_single_long_vg4x2_s16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: smlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: smlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -87,11 +79,7 @@ define void @multi_vector_mul_add_single_long_vg4x4_s8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: smlall za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: smlall za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -142,18 +130,18 @@ define void @multi_vector_mul_add_multi_long_vg4x2_s16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: smlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b } -; CHECK-NEXT: smlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: smlall za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: smlall za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.smla.za32.vg4x4.nxv16i8(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -164,18 +152,18 @@ define void @multi_vector_mul_add_multi_long_vg4x4_s8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: smlall za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h } -; CHECK-NEXT: smlall za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: smlall za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: smlall za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.smla.za64.vg4x4.nxv8i16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -217,8 +205,8 @@ define void @multi_vector_mul_add_lane_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: umlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: umlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -329,9 +315,7 @@ define void @multi_vector_mul_add_single_long_vg4x2_u8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: umlall za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: umlall za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -346,11 +330,7 @@ define void @multi_vector_mul_add_single_long_vg4x2_u16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: umlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: umlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -363,11 +343,7 @@ define void @multi_vector_mul_add_single_long_vg4x4_u8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: umlall za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: umlall za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -418,18 +394,18 @@ define void @multi_vector_mul_add_multi_long_vg4x2_u16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: umlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b } -; CHECK-NEXT: umlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: umlall za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: umlall za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.umla.za32.vg4x4.nxv16i8(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -440,18 +416,18 @@ define void @multi_vector_mul_add_multi_long_vg4x4_u8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: umlall za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h } -; CHECK-NEXT: umlall za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: umlall za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: umlall za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.umla.za64.vg4x4.nxv8i16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -493,8 +469,8 @@ define void @multi_vector_mul_add_lane_long_vg4x2_u8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: smlsll za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: smlsll za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -605,9 +579,7 @@ define void @multi_vector_mul_sub_single_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: smlsll za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: smlsll za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -622,11 +594,7 @@ define void @multi_vector_mul_sub_single_long_vg4x2_s16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: smlsll za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: smlsll za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -639,11 +607,7 @@ define void @multi_vector_mul_sub_single_long_vg4x4_s8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: smlsll za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: smlsll za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -694,18 +658,18 @@ define void @multi_vector_mul_sub_multi_long_vg4x2_s16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: smlsll za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b } -; CHECK-NEXT: smlsll za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: smlsll za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: smlsll za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.smls.za32.vg4x4.nxv16i8(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -716,18 +680,18 @@ define void @multi_vector_mul_sub_multi_long_vg4x4_s8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: smlsll za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h } -; CHECK-NEXT: smlsll za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: smlsll za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: smlsll za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.smls.za64.vg4x4.nxv8i16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -769,8 +733,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: umlsll za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: umlsll za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -881,9 +843,7 @@ define void @multi_vector_mul_sub_single_long_vg4x2_u8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: umlsll za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: umlsll za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -898,11 +858,7 @@ define void @multi_vector_mul_sub_single_long_vg4x2_u16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: umlsll za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: umlsll za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -915,11 +871,7 @@ define void @multi_vector_mul_sub_single_long_vg4x4_u8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: umlsll za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: umlsll za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -970,18 +922,18 @@ define void @multi_vector_mul_sub_multi_long_vg4x2_u16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: umlsll za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b } -; CHECK-NEXT: umlsll za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: umlsll za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: umlsll za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.umls.za32.vg4x4.nxv16i8(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -992,18 +944,18 @@ define void @multi_vector_mul_sub_multi_long_vg4x4_u8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: umlsll za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h } -; CHECK-NEXT: umlsll za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: umlsll za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: umlsll za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.umls.za64.vg4x4.nxv8i16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -1045,8 +997,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x2_u8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_signed_long_vg4x2_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: sumlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: sumlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -1133,11 +1083,7 @@ define void @multi_vector_mul_add_single_signed_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_signed_long_vg4x4_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: sumlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: sumlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -1168,8 +1114,8 @@ define void @multi_vector_mul_add_lane_signed_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_unsigned_long_vg4x2_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: usmlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: usmlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -1237,11 +1181,7 @@ define void @multi_vector_mul_add_single_unsigned_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_unsigned_long_vg4x4_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: usmlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: usmlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -1275,18 +1215,18 @@ define void @multi_vector_mul_add_multi_unsigned_long_vg4x2_u8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_add_multi_unsigned_long_vg4x4_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] -; CHECK-NEXT: mov z28.d, z1.d -; CHECK-NEXT: usmlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b } -; CHECK-NEXT: usmlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: usmlall za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: usmlall za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.usmla.za32.vg4x4.nxv16i8(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -1315,8 +1255,8 @@ define void @multi_vector_mul_add_lane_unsigned_long_vg4x2_s8(i32 %slice, %z define void @multi_vector_add_single_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret @@ -135,9 +133,7 @@ define void @multi_vector_add_single_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x2_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret @@ -150,9 +146,7 @@ define void @multi_vector_add_single_vg2x2_f16(i32 %slice, % define void @multi_vector_add_single_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: smlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret @@ -165,9 +159,7 @@ define void @multi_vector_add_single_vg2x2_s16(i32 %slice, %z define void @multi_vector_add_single_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: umlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret @@ -184,9 +176,7 @@ define void @multi_vector_add_single_vg2x2_u16(i32 %slice, %z define void @multi_vector_sub_single_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret @@ -199,9 +189,7 @@ define void @multi_vector_sub_single_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x2_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret @@ -214,9 +202,7 @@ define void @multi_vector_sub_single_vg2x2_f16(i32 %slice, % define void @multi_vector_sub_single_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret @@ -229,9 +215,7 @@ define void @multi_vector_sub_single_vg2x2_s16(i32 %slice, %z define void @multi_vector_sub_single_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret @@ -248,11 +232,7 @@ define void @multi_vector_sub_single_vg2x2_u16(i32 %slice, %z define void @multi_vector_add_single_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret @@ -269,11 +249,8 @@ define void @multi_vector_add_single_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret @@ -290,11 +267,7 @@ define void @multi_vector_add_single_vg2x4_f16(i32 %slice, % define void @multi_vector_add_single_vg2x4_s16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: smlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: smlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret @@ -311,11 +284,7 @@ define void @multi_vector_add_single_vg2x4_s16(i32 %slice, %z define void @multi_vector_add_single_vg2x4_u16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: umlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: umlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret @@ -336,11 +305,7 @@ define void @multi_vector_add_single_vg2x4_u16(i32 %slice, %z define void @multi_vector_sub_single_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret @@ -357,11 +322,7 @@ define void @multi_vector_sub_single_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret @@ -378,11 +339,7 @@ define void @multi_vector_sub_single_vg2x4_f16(i32 %slice, % define void @multi_vector_sub_single_vg2x4_s16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret @@ -399,11 +356,7 @@ define void @multi_vector_sub_single_vg2x4_s16(i32 %slice, %z define void @multi_vector_sub_single_vg2x4_u16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret @@ -424,11 +377,7 @@ define void @multi_vector_sub_single_vg2x4_u16(i32 %slice, %z define void @multi_vector_add_multi_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_add_multi_vg2x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret @@ -443,11 +392,7 @@ define void @multi_vector_add_multi_vg2x2_bf16(i32 %slice, define void @multi_vector_add_multi_vg2x2_f16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_add_multi_vg2x2_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret @@ -462,11 +407,7 @@ define void @multi_vector_add_multi_vg2x2_f16(i32 %slice, %z define void @multi_vector_add_multi_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_add_multi_vg2x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: smlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret @@ -481,11 +422,7 @@ define void @multi_vector_add_multi_vg2x2_s16(i32 %slice, %zn define void @multi_vector_add_multi_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_add_multi_vg2x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: umlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret @@ -504,11 +441,7 @@ define void @multi_vector_add_multi_vg2x2_u16(i32 %slice, %zn define void @multi_vector_sub_multi_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret @@ -523,11 +456,7 @@ define void @multi_vector_sub_multi_vg2x2_bf16(i32 %slice, define void @multi_vector_sub_multi_vg2x2_f16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret @@ -542,11 +471,7 @@ define void @multi_vector_sub_multi_vg2x2_f16(i32 %slice, %z define void @multi_vector_sub_multi_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret @@ -561,11 +486,7 @@ define void @multi_vector_sub_multi_vg2x2_s16(i32 %slice, %zn define void @multi_vector_sub_multi_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret @@ -584,15 +505,7 @@ define void @multi_vector_sub_multi_vg2x2_u16(i32 %slice, %zn define void @multi_vector_add_multi_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_multi_vg2x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret @@ -610,15 +523,7 @@ define void @multi_vector_add_multi_vg2x4_bf16(i32 %slice, define void @multi_vector_add_multi_vg2x4_f16(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_multi_vg2x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret @@ -636,15 +541,7 @@ define void @multi_vector_add_multi_vg2x4_f16(i32 %slice, %z define void @multi_vector_add_multi_vg2x4_s16(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_multi_vg2x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: smlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: smlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret @@ -662,15 +559,7 @@ define void @multi_vector_add_multi_vg2x4_s16(i32 %slice, %zn define void @multi_vector_add_multi_vg2x4_u16(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_multi_vg2x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: umlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: umlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret @@ -692,15 +581,7 @@ define void @multi_vector_add_multi_vg2x4_u16(i32 %slice, %zn define void @multi_vector_sub_multi_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_multi_vg2x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret @@ -718,15 +599,7 @@ define void @multi_vector_sub_multi_vg2x4_bf16(i32 %slice, define void @multi_vector_sub_multi_vg2x4_f16(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_multi_vg2x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret @@ -744,15 +617,7 @@ define void @multi_vector_sub_multi_vg2x4_f16(i32 %slice, %z define void @multi_vector_sub_multi_vg2x4_s16(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_multi_vg2x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret @@ -770,15 +635,7 @@ define void @multi_vector_sub_multi_vg2x4_s16(i32 %slice, %zn define void @multi_vector_sub_multi_vg2x4_u16(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_multi_vg2x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret @@ -912,9 +769,7 @@ define void @multi_vector_sub_lane_vg2x1_u16(i32 %slice, %zn, define void @multi_vector_add_lane_vg2x2_f16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x2_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] ; CHECK-NEXT: ret @@ -929,9 +784,7 @@ define void @multi_vector_add_lane_vg2x2_f16(i32 %slice, %zn define void @multi_vector_add_lane_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] ; CHECK-NEXT: ret @@ -946,9 +799,7 @@ define void @multi_vector_add_lane_vg2x2_bf16(i32 %slice, define void @multi_vector_add_lane_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: smlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] ; CHECK-NEXT: ret @@ -963,9 +814,7 @@ define void @multi_vector_add_lane_vg2x2_s16(i32 %slice, %zn0 define void @multi_vector_add_lane_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: umlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] ; CHECK-NEXT: ret @@ -984,9 +833,7 @@ define void @multi_vector_add_lane_vg2x2_u16(i32 %slice, %zn0 define void @multi_vector_sub_lane_vg2x2_f16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] ; CHECK-NEXT: ret @@ -1001,9 +848,7 @@ define void @multi_vector_sub_lane_vg2x2_f16(i32 %slice, %zn define void @multi_vector_sub_lane_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] ; CHECK-NEXT: ret @@ -1018,9 +863,7 @@ define void @multi_vector_sub_lane_vg2x2_bf16(i32 %slice, define void @multi_vector_sub_lane_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] ; CHECK-NEXT: ret @@ -1035,9 +878,7 @@ define void @multi_vector_sub_lane_vg2x2_s16(i32 %slice, %zn0 define void @multi_vector_sub_lane_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] ; CHECK-NEXT: ret @@ -1056,11 +897,7 @@ define void @multi_vector_sub_lane_vg2x2_u16(i32 %slice, %zn0 define void @multi_vector_add_lane_vg2x4_f16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] ; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] ; CHECK-NEXT: ret @@ -1077,11 +914,7 @@ define void @multi_vector_add_lane_vg2x4_f16(i32 %slice, %zn define void @multi_vector_add_lane_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] ; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] ; CHECK-NEXT: ret @@ -1098,11 +931,7 @@ define void @multi_vector_add_lane_vg2x4_bf16(i32 %slice, define void @multi_vector_add_lane_vg2x4_s16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: smlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] ; CHECK-NEXT: smlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] ; CHECK-NEXT: ret @@ -1119,11 +948,7 @@ define void @multi_vector_add_lane_vg2x4_s16(i32 %slice, %zn0 define void @multi_vector_add_lane_vg2x4_u16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: umlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] ; CHECK-NEXT: umlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] ; CHECK-NEXT: ret @@ -1144,11 +969,7 @@ define void @multi_vector_add_lane_vg2x4_u16(i32 %slice, %zn0 define void @multi_vector_sub_lane_vg2x4_f16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] ; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] ; CHECK-NEXT: ret @@ -1165,11 +986,7 @@ define void @multi_vector_sub_lane_vg2x4_f16(i32 %slice, %zn define void @multi_vector_sub_lane_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x4_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] ; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] ; CHECK-NEXT: ret @@ -1186,11 +1003,7 @@ define void @multi_vector_sub_lane_vg2x4_bf16(i32 %slice, define void @multi_vector_sub_lane_vg2x4_s16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] ; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] ; CHECK-NEXT: ret @@ -1207,11 +1020,7 @@ define void @multi_vector_sub_lane_vg2x4_s16(i32 %slice, %zn0 define void @multi_vector_sub_lane_vg2x4_u16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] ; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll index 12a940ff03e29a..b95a774e899c89 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll @@ -324,20 +324,20 @@ define { , } @multi_vec_rounding_shl_x2_s64 define { , , , } @multi_vec_rounding_shl_x4_s8( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_rounding_shl_x4_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: srshl { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: srshl { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.srshl.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, @@ -348,20 +348,20 @@ define { , , , , , , } @multi_vec_rounding_shl_x4_s16( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_rounding_shl_x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: srshl { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: srshl { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.srshl.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, @@ -372,20 +372,20 @@ define { , , , , , , } @multi_vec_rounding_shl_x4_s32( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_rounding_shl_x4_s32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: srshl { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: srshl { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.srshl.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, @@ -396,20 +396,20 @@ define { , , , , , , } @multi_vec_rounding_shl_x4_s64( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_rounding_shl_x4_s64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: srshl { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: srshl { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.srshl.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, @@ -484,20 +484,20 @@ define { , } @multi_vec_rounding_uhl_x2_u64 define { , , , } @multi_vec_rounding_shl_x4_u8( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_rounding_shl_x4_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: urshl { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: urshl { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.urshl.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, @@ -508,20 +508,20 @@ define { , , , , , , } @multi_vec_rounding_shl_x4_u16( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_rounding_shl_x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: urshl { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: urshl { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.urshl.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, @@ -532,20 +532,20 @@ define { , , , , , , } @multi_vec_rounding_shl_x4_u32( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_rounding_shl_x4_u32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: urshl { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: urshl { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.urshl.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, @@ -556,20 +556,20 @@ define { , , , , , , } @multi_vec_rounding_shl_x4_u64( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_rounding_shl_x4_u64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: urshl { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: urshl { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.urshl.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll index f41791e626f5f5..07a5f7993a1cba 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll @@ -8,7 +8,6 @@ define @test_tileslice_no_add(i32 %idx) #0 { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2] -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 ; CHECK-NEXT: ret entry: %read = call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 %idx) @@ -21,7 +20,6 @@ define @test_tileslice_add_nonconstant(i32 %idx1, i32 %idx2) ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: add w8, w0, w1 ; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2] -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 ; CHECK-NEXT: ret entry: %add = add i32 %idx1, %idx2 diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll index e71afe213d8a59..68ae92bc68f4ba 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll @@ -196,20 +196,20 @@ define { , } @multi_vec_sat_double_mulh_mul define { , , , } @multi_vec_sat_double_mulh_multi_x4_s8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: sqdmulh { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqdmulh { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -221,20 +221,20 @@ define { , , , , , , } @multi_vec_sat_double_mulh_multi_x4_s16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: sqdmulh { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqdmulh { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -246,20 +246,20 @@ define { , , , , , , } @multi_vec_sat_double_mulh_multi_x4_s32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: sqdmulh { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqdmulh { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -271,20 +271,20 @@ define { , , , , , , } @multi_vec_sat_double_mulh_multi_x4_s64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: sqdmulh { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d +; CHECK-NEXT: mov z26.d, z7.d +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: sqdmulh { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } +; CHECK-NEXT: mov z0.d, z4.d +; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z2.d, z6.d +; CHECK-NEXT: mov z3.d, z7.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll index da8c679d5a39a8..46409a0a80b780 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll @@ -8,9 +8,7 @@ define void @multi_vector_sub_write_single_za_vg1x2_i32(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sub za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: sub za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: ret @@ -27,9 +25,7 @@ define void @multi_vector_sub_write_single_za_vg1x2_i32(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x2_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sub za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: sub za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: ret @@ -50,11 +46,7 @@ define void @multi_vector_sub_write_single_za_vg1x2_i64(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x4_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sub za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s ; CHECK-NEXT: sub za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s ; CHECK-NEXT: ret @@ -75,11 +67,7 @@ define void @multi_vector_sub_write_single_za_vg1x4_i32(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_sub_write_za_vg1x2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sub za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: sub za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: ret @@ -128,11 +112,7 @@ define void @multi_vector_sub_write_za_vg1x2_i32(i32 %slice, define void @multi_vector_sub_write_za_vg1x2_i64(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_sub_write_za_vg1x2_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sub za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d } ; CHECK-NEXT: sub za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d } ; CHECK-NEXT: ret @@ -155,15 +135,7 @@ define void @multi_vector_sub_write_za_vg1x2_i64(i32 %slice, define void @multi_vector_sub_write_za_vg1x4_i32(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_sub_write_za_vg1x4_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sub za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: sub za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: ret @@ -187,15 +159,7 @@ define void @multi_vector_sub_write_za_vg1x4_i32(i32 %slice, define void @multi_vector_sub_write_za_vg1x4_i64(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_sub_write_za_vg1x4_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sub za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: sub za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: ret @@ -225,9 +189,7 @@ define void @multi_vector_sub_write_za_vg1x4_i64(i32 %slice, define void @multi_vector_sub_za_vg1x2_i32(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_sub_za_vg1x2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sub za.s[w8, 0, vgx2], { z0.s, z1.s } ; CHECK-NEXT: sub za.s[w8, 7, vgx2], { z0.s, z1.s } ; CHECK-NEXT: ret @@ -240,9 +202,7 @@ define void @multi_vector_sub_za_vg1x2_i32(i32 %slice, %zn0, define void @multi_vector_sub_za_vg1x2_i64(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_sub_za_vg1x2_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sub za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: sub za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -255,9 +215,7 @@ define void @multi_vector_sub_za_vg1x2_i64(i32 %slice, %zn0, define void @multi_vector_sub_za_vg1x2_f32(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_sub_za_vg1x2_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fsub za.s[w8, 0, vgx2], { z0.s, z1.s } ; CHECK-NEXT: fsub za.s[w8, 7, vgx2], { z0.s, z1.s } ; CHECK-NEXT: ret @@ -272,9 +230,7 @@ define void @multi_vector_sub_za_vg1x2_f32(i32 %slice, %zn0 define void @multi_vector_sub_za_vg1x2_f64(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_sub_za_vg1x2_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fsub za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: fsub za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -291,11 +247,7 @@ define void @multi_vector_sub_za_vg1x2_f64(i32 %slice, %zn define void @multi_vector_sub_za_vg1x4_i32(i32 %slice, ; CHECK-LABEL: multi_vector_sub_za_vg1x4_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sub za.s[w8, 0, vgx4], { z0.s - z3.s } ; CHECK-NEXT: sub za.s[w8, 7, vgx4], { z0.s - z3.s } ; CHECK-NEXT: ret @@ -314,11 +266,7 @@ define void @multi_vector_sub_za_vg1x4_i32(i32 %slice, define void @multi_vector_sub_za_vg1x4_i64(i32 %slice, ; CHECK-LABEL: multi_vector_sub_za_vg1x4_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sub za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: sub za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -337,11 +285,7 @@ define void @multi_vector_sub_za_vg1x4_i64(i32 %slice, define void @multi_vector_sub_za_vg1x4_f32(i32 %slice, ; CHECK-LABEL: multi_vector_sub_za_vg1x4_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fsub za.s[w8, 0, vgx4], { z0.s - z3.s } ; CHECK-NEXT: fsub za.s[w8, 7, vgx4], { z0.s - z3.s } ; CHECK-NEXT: ret @@ -360,11 +304,7 @@ define void @multi_vector_sub_za_vg1x4_f32(i32 %slice, define void @multi_vector_sub_za_vg1x4_f64(i32 %slice, ; CHECK-LABEL: multi_vector_sub_za_vg1x4_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fsub za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: fsub za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll index b698b60007eb9f..f552c9e604bdd5 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll @@ -1,15 +1,14 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2,+sme-i16i64 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -verify-machineinstrs < %s | FileCheck %s +target triple="aarch64-linux-gnu" ; == FVDOT == -define void @test_fvdot_lane_za32_vg1x2_nxv8f16(i32 %slice, %zn1, %zn2, %zm) { +define void @test_fvdot_lane_za32_vg1x2_nxv8f16(i32 %slice, %zn1, %zn2, %zm) #0 { ; CHECK-LABEL: test_fvdot_lane_za32_vg1x2_nxv8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fvdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: fvdot za.s[w8, 7, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: ret @@ -22,12 +21,10 @@ define void @test_fvdot_lane_za32_vg1x2_nxv8f16(i32 %slice, ; == BFVDOT == -define void @test_fvdot_lane_za32_vg1x2_nxv8bf16(i32 %slice, %zn1, %zn2, %zm) { +define void @test_fvdot_lane_za32_vg1x2_nxv8bf16(i32 %slice, %zn1, %zn2, %zm) #0 { ; CHECK-LABEL: test_fvdot_lane_za32_vg1x2_nxv8bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfvdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: bfvdot za.s[w8, 7, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: ret @@ -40,12 +37,10 @@ define void @test_fvdot_lane_za32_vg1x2_nxv8bf16(i32 %slice, %zn1, %zn2, %zm) { +define void @test_svdot_lane_za32_vg1x2_nxv8i16(i32 %slice, %zn1, %zn2, %zm) #0 { ; CHECK-LABEL: test_svdot_lane_za32_vg1x2_nxv8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: svdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: svdot za.s[w8, 7, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: ret @@ -55,14 +50,10 @@ define void @test_svdot_lane_za32_vg1x2_nxv8i16(i32 %slice, % ret void } -define void @test_svdot_lane_za32_vg1x4_nxv16i8(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) { +define void @test_svdot_lane_za32_vg1x4_nxv16i8(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) #0 { ; CHECK-LABEL: test_svdot_lane_za32_vg1x4_nxv16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: svdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z4.b[3] ; CHECK-NEXT: svdot za.s[w8, 7, vgx4], { z0.b - z3.b }, z4.b[3] ; CHECK-NEXT: ret @@ -72,14 +63,10 @@ define void @test_svdot_lane_za32_vg1x4_nxv16i8(i32 %slice, % ret void } -define void @test_svdot_lane_za64_vg1x4_nxv8i16(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) { +define void @test_svdot_lane_za64_vg1x4_nxv8i16(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) #1 { ; CHECK-LABEL: test_svdot_lane_za64_vg1x4_nxv8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: svdot za.d[w8, 0, vgx4], { z0.h - z3.h }, z4.h[1] ; CHECK-NEXT: svdot za.d[w8, 7, vgx4], { z0.h - z3.h }, z4.h[1] ; CHECK-NEXT: ret @@ -89,15 +76,108 @@ define void @test_svdot_lane_za64_vg1x4_nxv8i16(i32 %slice, % ret void } +define void @svdot_form_2x_tuple(ptr %ptr, i64 %stride) #0 { +; CHECK-LABEL: svdot_form_2x_tuple: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue pn8.b +; CHECK-NEXT: add x9, x0, x1 +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: ld1h { z16.h, z24.h }, pn8/z, [x0] +; CHECK-NEXT: ld1h { z0.h, z1.h }, pn8/z, [x9] +; CHECK-NEXT: mov z2.d, z16.d +; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: svdot za.s[w8, 0, vgx2], { z2.h, z3.h }, z0.h[0] +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: svdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] +; CHECK-NEXT: ret +entry: + %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + %1 = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , } %1, 0 + %3 = extractvalue { , } %1, 1 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %4 = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") %0, ptr %arrayidx2) + %5 = extractvalue { , } %4, 0 + %6 = extractvalue { , } %4, 1 + tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x2.nxv8i16(i32 0, %2, %5, undef, i32 0) + tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x2.nxv8i16(i32 0, %3, %6, undef, i32 0) + ret void +} + +define void @svdot_form_4x_tuple(ptr %ptr, i64 %stride) #0 { +; CHECK-LABEL: svdot_form_4x_tuple: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ptrue pn8.b +; CHECK-NEXT: lsl x9, x1, #1 +; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0] +; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1] +; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9] +; CHECK-NEXT: add x9, x9, x1 +; CHECK-NEXT: mov z0.d, z17.d +; CHECK-NEXT: mov z1.d, z16.d +; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9] +; CHECK-NEXT: mov z4.d, z21.d +; CHECK-NEXT: mov z5.d, z20.d +; CHECK-NEXT: mov z8.d, z25.d +; CHECK-NEXT: mov z9.d, z24.d +; CHECK-NEXT: mov z3.d, z16.d +; CHECK-NEXT: mov z7.d, z17.d +; CHECK-NEXT: mov z11.d, z18.d +; CHECK-NEXT: mov z16.d, z29.d +; CHECK-NEXT: mov z17.d, z28.d +; CHECK-NEXT: mov z18.d, z14.d +; CHECK-NEXT: svdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +; CHECK-NEXT: svdot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0] +; CHECK-NEXT: svdot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0] +; CHECK-NEXT: svdot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0] +; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + %1 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , , , } %1, 0 + %3 = extractvalue { , , , } %1, 1 + %4 = extractvalue { , , , } %1, 2 + %5 = extractvalue { , , , } %1, 3 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %6 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) + %7 = extractvalue { , , , } %6, 0 + %8 = extractvalue { , , , } %6, 1 + %9 = extractvalue { , , , } %6, 2 + %10 = extractvalue { , , , } %6, 3 + %mul3 = shl i64 %stride, 1 + %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3 + %11 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4) + %12 = extractvalue { , , , } %11, 0 + %13 = extractvalue { , , , } %11, 1 + %14 = extractvalue { , , , } %11, 2 + %15 = extractvalue { , , , } %11, 3 + %mul5 = mul i64 %stride, 3 + %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5 + %16 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6) + %17 = extractvalue { , , , } %16, 0 + %18 = extractvalue { , , , } %16, 1 + %19 = extractvalue { , , , } %16, 2 + %20 = extractvalue { , , , } %16, 3 + tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x4.nxv16i8(i32 0, %2, %7, %12, %17, undef, i32 0) + tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x4.nxv16i8(i32 0, %3, %8, %13, %18, undef, i32 0) + tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x4.nxv16i8(i32 0, %4, %9, %14, %19, undef, i32 0) + tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x4.nxv16i8(i32 0, %5, %10, %15, %20, undef, i32 0) + ret void +} ; == UVDOT == -define void @test_uvdot_lane_za32_vg1x2_nxv8i16(i32 %slice, %zn1, %zn2, %zm) { +define void @test_uvdot_lane_za32_vg1x2_nxv8i16(i32 %slice, %zn1, %zn2, %zm) #0 { ; CHECK-LABEL: test_uvdot_lane_za32_vg1x2_nxv8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: uvdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: uvdot za.s[w8, 7, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: ret @@ -107,14 +187,10 @@ define void @test_uvdot_lane_za32_vg1x2_nxv8i16(i32 %slice, % ret void } -define void @test_uvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) { +define void @test_uvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) #0 { ; CHECK-LABEL: test_uvdot_lane_za32_vg1x4_nxv16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: uvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z4.b[3] ; CHECK-NEXT: uvdot za.s[w8, 7, vgx4], { z0.b - z3.b }, z4.b[3] ; CHECK-NEXT: ret @@ -124,14 +200,10 @@ define void @test_uvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, % ret void } -define void @test_uvdot_lane_za64_vg1x4_nxv8i16(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) { +define void @test_uvdot_lane_za64_vg1x4_nxv8i16(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) #1 { ; CHECK-LABEL: test_uvdot_lane_za64_vg1x4_nxv8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: uvdot za.d[w8, 0, vgx4], { z0.h - z3.h }, z4.h[1] ; CHECK-NEXT: uvdot za.d[w8, 7, vgx4], { z0.h - z3.h }, z4.h[1] ; CHECK-NEXT: ret @@ -141,17 +213,108 @@ define void @test_uvdot_lane_za64_vg1x4_nxv8i16(i32 %slice, % ret void } +define void @uvdot_form_2x_tuple(ptr %ptr, i64 %stride) #0 { +; CHECK-LABEL: uvdot_form_2x_tuple: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue pn8.b +; CHECK-NEXT: add x9, x0, x1 +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: ld1h { z16.h, z24.h }, pn8/z, [x0] +; CHECK-NEXT: ld1h { z0.h, z1.h }, pn8/z, [x9] +; CHECK-NEXT: mov z2.d, z16.d +; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: uvdot za.s[w8, 0, vgx2], { z2.h, z3.h }, z0.h[0] +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: uvdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] +; CHECK-NEXT: ret +entry: + %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + %1 = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , } %1, 0 + %3 = extractvalue { , } %1, 1 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %4 = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") %0, ptr %arrayidx2) + %5 = extractvalue { , } %4, 0 + %6 = extractvalue { , } %4, 1 + tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x2.nxv8i16(i32 0, %2, %5, undef, i32 0) + tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x2.nxv8i16(i32 0, %3, %6, undef, i32 0) + ret void +} + +define void @uvdot_form_4x_tuple(ptr %ptr, i64 %stride) #0 { +; CHECK-LABEL: uvdot_form_4x_tuple: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ptrue pn8.b +; CHECK-NEXT: lsl x9, x1, #1 +; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0] +; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1] +; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9] +; CHECK-NEXT: add x9, x9, x1 +; CHECK-NEXT: mov z0.d, z17.d +; CHECK-NEXT: mov z1.d, z16.d +; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9] +; CHECK-NEXT: mov z4.d, z21.d +; CHECK-NEXT: mov z5.d, z20.d +; CHECK-NEXT: mov z8.d, z25.d +; CHECK-NEXT: mov z9.d, z24.d +; CHECK-NEXT: mov z3.d, z16.d +; CHECK-NEXT: mov z7.d, z17.d +; CHECK-NEXT: mov z11.d, z18.d +; CHECK-NEXT: mov z16.d, z29.d +; CHECK-NEXT: mov z17.d, z28.d +; CHECK-NEXT: mov z18.d, z14.d +; CHECK-NEXT: uvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +; CHECK-NEXT: uvdot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0] +; CHECK-NEXT: uvdot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0] +; CHECK-NEXT: uvdot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0] +; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + %1 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , , , } %1, 0 + %3 = extractvalue { , , , } %1, 1 + %4 = extractvalue { , , , } %1, 2 + %5 = extractvalue { , , , } %1, 3 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %6 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) + %7 = extractvalue { , , , } %6, 0 + %8 = extractvalue { , , , } %6, 1 + %9 = extractvalue { , , , } %6, 2 + %10 = extractvalue { , , , } %6, 3 + %mul3 = shl i64 %stride, 1 + %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3 + %11 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4) + %12 = extractvalue { , , , } %11, 0 + %13 = extractvalue { , , , } %11, 1 + %14 = extractvalue { , , , } %11, 2 + %15 = extractvalue { , , , } %11, 3 + %mul5 = mul i64 %stride, 3 + %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5 + %16 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6) + %17 = extractvalue { , , , } %16, 0 + %18 = extractvalue { , , , } %16, 1 + %19 = extractvalue { , , , } %16, 2 + %20 = extractvalue { , , , } %16, 3 + tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x4.nxv16i8(i32 0, %2, %7, %12, %17, undef, i32 0) + tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x4.nxv16i8(i32 0, %3, %8, %13, %18, undef, i32 0) + tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x4.nxv16i8(i32 0, %4, %9, %14, %19, undef, i32 0) + tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x4.nxv16i8(i32 0, %5, %10, %15, %20, undef, i32 0) + ret void +} ; == SUVDOT == -define void @test_suvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) { +define void @test_suvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) #0 { ; CHECK-LABEL: test_suvdot_lane_za32_vg1x4_nxv16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: suvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z4.b[3] ; CHECK-NEXT: suvdot za.s[w8, 7, vgx4], { z0.b - z3.b }, z4.b[3] ; CHECK-NEXT: ret @@ -161,17 +324,80 @@ define void @test_suvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, ret void } +define void @suvdot_form_4x_tuple(ptr %ptr, i64 %stride) #0 { +; CHECK-LABEL: suvdot_form_4x_tuple: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ptrue pn8.b +; CHECK-NEXT: lsl x9, x1, #1 +; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0] +; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1] +; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9] +; CHECK-NEXT: add x9, x9, x1 +; CHECK-NEXT: mov z0.d, z17.d +; CHECK-NEXT: mov z1.d, z16.d +; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9] +; CHECK-NEXT: mov z4.d, z21.d +; CHECK-NEXT: mov z5.d, z20.d +; CHECK-NEXT: mov z8.d, z25.d +; CHECK-NEXT: mov z9.d, z24.d +; CHECK-NEXT: mov z3.d, z16.d +; CHECK-NEXT: mov z7.d, z17.d +; CHECK-NEXT: mov z11.d, z18.d +; CHECK-NEXT: mov z16.d, z29.d +; CHECK-NEXT: mov z17.d, z28.d +; CHECK-NEXT: mov z18.d, z14.d +; CHECK-NEXT: suvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +; CHECK-NEXT: suvdot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0] +; CHECK-NEXT: suvdot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0] +; CHECK-NEXT: suvdot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0] +; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + %1 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , , , } %1, 0 + %3 = extractvalue { , , , } %1, 1 + %4 = extractvalue { , , , } %1, 2 + %5 = extractvalue { , , , } %1, 3 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %6 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) + %7 = extractvalue { , , , } %6, 0 + %8 = extractvalue { , , , } %6, 1 + %9 = extractvalue { , , , } %6, 2 + %10 = extractvalue { , , , } %6, 3 + %mul3 = shl i64 %stride, 1 + %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3 + %11 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4) + %12 = extractvalue { , , , } %11, 0 + %13 = extractvalue { , , , } %11, 1 + %14 = extractvalue { , , , } %11, 2 + %15 = extractvalue { , , , } %11, 3 + %mul5 = mul i64 %stride, 3 + %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5 + %16 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6) + %17 = extractvalue { , , , } %16, 0 + %18 = extractvalue { , , , } %16, 1 + %19 = extractvalue { , , , } %16, 2 + %20 = extractvalue { , , , } %16, 3 + tail call void @llvm.aarch64.sme.suvdot.lane.za32.vg1x4.nxv16i8(i32 0, %2, %7, %12, %17, undef, i32 0) + tail call void @llvm.aarch64.sme.suvdot.lane.za32.vg1x4.nxv16i8(i32 0, %3, %8, %13, %18, undef, i32 0) + tail call void @llvm.aarch64.sme.suvdot.lane.za32.vg1x4.nxv16i8(i32 0, %4, %9, %14, %19, undef, i32 0) + tail call void @llvm.aarch64.sme.suvdot.lane.za32.vg1x4.nxv16i8(i32 0, %5, %10, %15, %20, undef, i32 0) + ret void +} ; == USVDOT == -define void @test_usvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) { +define void @test_usvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) #0 { ; CHECK-LABEL: test_usvdot_lane_za32_vg1x4_nxv16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: usvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z4.b[3] ; CHECK-NEXT: usvdot za.s[w8, 7, vgx4], { z0.b - z3.b }, z4.b[3] ; CHECK-NEXT: ret @@ -181,6 +407,76 @@ define void @test_usvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, ret void } +define void @usvdot_form_4x_tuple(ptr %ptr, i64 %stride) #0 { +; CHECK-LABEL: usvdot_form_4x_tuple: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: ptrue pn8.b +; CHECK-NEXT: lsl x9, x1, #1 +; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0] +; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1] +; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9] +; CHECK-NEXT: add x9, x9, x1 +; CHECK-NEXT: mov z0.d, z17.d +; CHECK-NEXT: mov z1.d, z16.d +; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9] +; CHECK-NEXT: mov z4.d, z21.d +; CHECK-NEXT: mov z5.d, z20.d +; CHECK-NEXT: mov z8.d, z25.d +; CHECK-NEXT: mov z9.d, z24.d +; CHECK-NEXT: mov z3.d, z16.d +; CHECK-NEXT: mov z7.d, z17.d +; CHECK-NEXT: mov z11.d, z18.d +; CHECK-NEXT: mov z16.d, z29.d +; CHECK-NEXT: mov z17.d, z28.d +; CHECK-NEXT: mov z18.d, z14.d +; CHECK-NEXT: usvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +; CHECK-NEXT: usvdot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0] +; CHECK-NEXT: usvdot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0] +; CHECK-NEXT: usvdot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0] +; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: + %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() + %1 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) + %2 = extractvalue { , , , } %1, 0 + %3 = extractvalue { , , , } %1, 1 + %4 = extractvalue { , , , } %1, 2 + %5 = extractvalue { , , , } %1, 3 + %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride + %6 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) + %7 = extractvalue { , , , } %6, 0 + %8 = extractvalue { , , , } %6, 1 + %9 = extractvalue { , , , } %6, 2 + %10 = extractvalue { , , , } %6, 3 + %mul3 = shl i64 %stride, 1 + %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3 + %11 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4) + %12 = extractvalue { , , , } %11, 0 + %13 = extractvalue { , , , } %11, 1 + %14 = extractvalue { , , , } %11, 2 + %15 = extractvalue { , , , } %11, 3 + %mul5 = mul i64 %stride, 3 + %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5 + %16 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6) + %17 = extractvalue { , , , } %16, 0 + %18 = extractvalue { , , , } %16, 1 + %19 = extractvalue { , , , } %16, 2 + %20 = extractvalue { , , , } %16, 3 + tail call void @llvm.aarch64.sme.usvdot.lane.za32.vg1x4.nxv16i8(i32 0, %2, %7, %12, %17, undef, i32 0) + tail call void @llvm.aarch64.sme.usvdot.lane.za32.vg1x4.nxv16i8(i32 0, %3, %8, %13, %18, undef, i32 0) + tail call void @llvm.aarch64.sme.usvdot.lane.za32.vg1x4.nxv16i8(i32 0, %4, %9, %14, %19, undef, i32 0) + tail call void @llvm.aarch64.sme.usvdot.lane.za32.vg1x4.nxv16i8(i32 0, %5, %10, %15, %20, undef, i32 0) + ret void +} + +attributes #0 = { nounwind "target-features"="+sme2" "aarch64_pstate_sm_enabled" } +attributes #1 = { nounwind "target-features"="+sme2,+sme-i16i64" "aarch64_pstate_sm_enabled" } ; == FVDOT == declare void @llvm.aarch64.sme.fvdot.lane.za32.vg1x2.nxv8f16(i32, , , , i32) diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll index fb169491b0c909..6895d1854e87d8 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll @@ -13,7 +13,7 @@ define void @hang_when_merging_stores_after_legalisation(ptr %a, <2 x i32> %b) v ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: mov z0.s, s0 ; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #16 +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 ; CHECK-NEXT: st2 { v0.4s, v1.4s }, [x0] ; CHECK-NEXT: ret %splat = shufflevector <2 x i32> %b, <2 x i32> undef, <8 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll index 8882fc9290386d..0ecf1b1a988343 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll @@ -15,8 +15,6 @@ define void @st2b_i8_valid_imm( %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2b_i8_valid_imm: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, #2, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 2, i64 0 @@ -30,9 +28,7 @@ define void @st2b_i8_valid_imm( %v0, %v1, < define void @st2b_i8_invalid_imm_not_multiple_of_2( %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2b_i8_invalid_imm_not_multiple_of_2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: rdvl x8, #3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 3, i64 0 @@ -46,9 +42,7 @@ define void @st2b_i8_invalid_imm_not_multiple_of_2( %v0, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2b_i8_invalid_imm_out_of_lower_bound: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: rdvl x8, #-18 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -18, i64 0 @@ -62,9 +56,7 @@ define void @st2b_i8_invalid_imm_out_of_lower_bound( %v0, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2b_i8_invalid_imm_out_of_upper_bound: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: rdvl x8, #16 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 16, i64 0 @@ -78,8 +70,6 @@ define void @st2b_i8_invalid_imm_out_of_upper_bound( %v0, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2b_i8_valid_imm_lower_bound: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, #-16, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -16, i64 0 @@ -93,8 +83,6 @@ define void @st2b_i8_valid_imm_lower_bound( %v0, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2b_i8_valid_imm_upper_bound: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 14, i64 0 @@ -112,8 +100,6 @@ define void @st2b_i8_valid_imm_upper_bound( %v0, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2h_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0, #2, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 2, i64 0 @@ -127,8 +113,6 @@ define void @st2h_i16( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2h_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0, #2, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 2, i64 0 @@ -146,8 +130,6 @@ define void @st2h_f16( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2w_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0, #4, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 4, i64 0 @@ -161,8 +143,6 @@ define void @st2w_i32( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2w_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0, #6, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 6, i64 0 @@ -180,8 +160,6 @@ define void @st2w_f32( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2d_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0, #8, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 8, i64 0 @@ -195,8 +173,6 @@ define void @st2d_i64( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2d_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0, #10, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 10, i64 0 @@ -214,9 +190,6 @@ define void @st2d_f64( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3b_i8_valid_imm: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, #3, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 3, i64 0 @@ -231,10 +204,7 @@ define void @st3b_i8_valid_imm( %v0, %v1, < define void @st3b_i8_invalid_imm_not_multiple_of_3_01( %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_01: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: rdvl x8, #4 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 4, i64 0 @@ -249,10 +219,7 @@ define void @st3b_i8_invalid_imm_not_multiple_of_3_01( %v0, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_02: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: rdvl x8, #5 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 5, i64 0 @@ -267,10 +234,7 @@ define void @st3b_i8_invalid_imm_not_multiple_of_3_02( %v0, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3b_i8_invalid_imm_out_of_lower_bound: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: rdvl x8, #-27 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -27, i64 0 @@ -285,10 +249,7 @@ define void @st3b_i8_invalid_imm_out_of_lower_bound( %v0, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3b_i8_invalid_imm_out_of_upper_bound: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: rdvl x8, #24 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 24, i64 0 @@ -303,9 +264,6 @@ define void @st3b_i8_invalid_imm_out_of_upper_bound( %v0, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3b_i8_valid_imm_lower_bound: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, #-24, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -24, i64 0 @@ -320,9 +278,6 @@ define void @st3b_i8_valid_imm_lower_bound( %v0, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3b_i8_valid_imm_upper_bound: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21, i64 0 @@ -341,9 +296,6 @@ define void @st3b_i8_valid_imm_upper_bound( %v0, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3h_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0, #6, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 6, i64 0 @@ -358,9 +310,6 @@ define void @st3h_i16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3h_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0, #9, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 9, i64 0 @@ -379,9 +328,6 @@ define void @st3h_f16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3w_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0, #12, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 12, i64 0 @@ -396,9 +342,6 @@ define void @st3w_i32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3w_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0, #15, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 15, i64 0 @@ -417,9 +360,6 @@ define void @st3w_f32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3d_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0, #18, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 18, i64 0 @@ -434,9 +374,6 @@ define void @st3d_i64( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3d_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0, #-3, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -3, i64 0 @@ -455,10 +392,6 @@ define void @st3d_f64( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4b_i8_valid_imm: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, #4, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 4, i64 0 @@ -474,11 +407,7 @@ define void @st4b_i8_valid_imm( %v0, %v1, < define void @st4b_i8_invalid_imm_not_multiple_of_4_01( %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_01: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: rdvl x8, #5 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 5, i64 0 @@ -494,11 +423,7 @@ define void @st4b_i8_invalid_imm_not_multiple_of_4_01( %v0, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_02: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: rdvl x8, #6 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 6, i64 0 @@ -514,11 +439,7 @@ define void @st4b_i8_invalid_imm_not_multiple_of_4_02( %v0, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_03: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: rdvl x8, #7 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 7, i64 0 @@ -536,12 +457,8 @@ define void @st4b_i8_invalid_imm_out_of_lower_bound( %v0, %v0, %v0, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4b_i8_valid_imm_lower_bound: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, #-32, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -32, i64 0 @@ -607,10 +516,6 @@ define void @st4b_i8_valid_imm_lower_bound( %v0, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4b_i8_valid_imm_upper_bound: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28, i64 0 @@ -630,10 +535,6 @@ define void @st4b_i8_valid_imm_upper_bound( %v0, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4h_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0, #8, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 8, i64 0 @@ -649,10 +550,6 @@ define void @st4h_i16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4h_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0, #12, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 12, i64 0 @@ -672,10 +569,6 @@ define void @st4h_f16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4w_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0, #16, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 16, i64 0 @@ -691,10 +584,6 @@ define void @st4w_i32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4w_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0, #20, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 20, i64 0 @@ -714,10 +603,6 @@ define void @st4w_f32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4d_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0, #24, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 24, i64 0 @@ -733,10 +618,6 @@ define void @st4d_i64( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4d_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28, i64 0 diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll index d6ee787a23f877..d7b7e59548003b 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll @@ -9,8 +9,6 @@ define void @st2b_i8( %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2b_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x1] ; CHECK-NEXT: ret %1 = getelementptr i8, ptr %addr, i64 %offset @@ -28,8 +26,6 @@ define void @st2b_i8( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2h_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1] ; CHECK-NEXT: ret %1 = getelementptr i16, ptr %addr, i64 %offset @@ -43,8 +39,6 @@ define void @st2h_i16( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2h_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1] ; CHECK-NEXT: ret %1 = getelementptr half, ptr %addr, i64 %offset @@ -62,8 +56,6 @@ define void @st2h_f16( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2w_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret %1 = getelementptr i32, ptr %addr, i64 %offset @@ -77,8 +69,6 @@ define void @st2w_i32( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2w_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret %1 = getelementptr float, ptr %addr, i64 %offset @@ -96,8 +86,6 @@ define void @st2w_f32( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2d_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3] ; CHECK-NEXT: ret %1 = getelementptr i64, ptr %addr, i64 %offset @@ -111,8 +99,6 @@ define void @st2d_i64( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2d_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3] ; CHECK-NEXT: ret %1 = getelementptr double, ptr %addr, i64 %offset @@ -130,9 +116,6 @@ define void @st2d_f64( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3b_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x1] ; CHECK-NEXT: ret %1 = getelementptr i8, ptr %addr, i64 %offset @@ -151,9 +134,6 @@ define void @st3b_i8( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3h_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0, x1, lsl #1] ; CHECK-NEXT: ret %1 = getelementptr i16, ptr %addr, i64 %offset @@ -168,9 +148,6 @@ define void @st3h_i16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3h_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0, x1, lsl #1] ; CHECK-NEXT: ret %1 = getelementptr half, ptr %addr, i64 %offset @@ -189,9 +166,6 @@ define void @st3h_f16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3w_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret %1 = getelementptr i32, ptr %addr, i64 %offset @@ -206,9 +180,6 @@ define void @st3w_i32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3w_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret %1 = getelementptr float, ptr %addr, i64 %offset @@ -227,9 +198,6 @@ define void @st3w_f32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3d_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0, x1, lsl #3] ; CHECK-NEXT: ret %1 = getelementptr i64, ptr %addr, i64 %offset @@ -244,9 +212,6 @@ define void @st3d_i64( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3d_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0, x1, lsl #3] ; CHECK-NEXT: ret %1 = getelementptr double, ptr %addr, i64 %offset @@ -265,10 +230,6 @@ define void @st3d_f64( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4b_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x1] ; CHECK-NEXT: ret %1 = getelementptr i8, ptr %addr, i64 %offset @@ -288,10 +249,6 @@ define void @st4b_i8( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4h_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0, x1, lsl #1] ; CHECK-NEXT: ret %1 = getelementptr i16, ptr %addr, i64 %offset @@ -307,10 +264,6 @@ define void @st4h_i16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4h_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0, x1, lsl #1] ; CHECK-NEXT: ret %1 = getelementptr half, ptr %addr, i64 %offset @@ -330,10 +283,6 @@ define void @st4h_f16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4w_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret %1 = getelementptr i32, ptr %addr, i64 %offset @@ -349,10 +298,6 @@ define void @st4w_i32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4w_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret %1 = getelementptr float, ptr %addr, i64 %offset @@ -372,10 +317,6 @@ define void @st4w_f32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4d_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0, x1, lsl #3] ; CHECK-NEXT: ret %1 = getelementptr i64, ptr %addr, i64 %offset @@ -391,10 +332,6 @@ define void @st4d_i64( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4d_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0, x1, lsl #3] ; CHECK-NEXT: ret %1 = getelementptr double, ptr %addr, i64 %offset diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll index d07fd8785121b3..e03d4379d0ee23 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll @@ -9,8 +9,6 @@ define void @st2b_i8( %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2b_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv16i8( %v0, @@ -27,8 +25,6 @@ define void @st2b_i8( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2h_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv8i16( %v0, @@ -41,8 +37,6 @@ define void @st2h_i16( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2h_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv8f16( %v0, @@ -55,8 +49,6 @@ define void @st2h_f16( %v0, %v1, %v0, %v1, %pred, ptr %addr) #0 { ; CHECK-LABEL: st2h_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv8bf16( %v0, @@ -73,8 +65,6 @@ define void @st2h_bf16( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2w_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv4i32( %v0, @@ -87,8 +77,6 @@ define void @st2w_i32( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2w_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv4f32( %v0, @@ -105,8 +93,6 @@ define void @st2w_f32( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2d_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv2i64( %v0, @@ -119,8 +105,6 @@ define void @st2d_i64( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2d_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv2f64( %v0, @@ -133,8 +117,6 @@ define void @st2d_f64( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2d_ptr: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv2p0( %v0, @@ -151,9 +133,6 @@ define void @st2d_ptr( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3b_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv16i8( %v0, @@ -171,9 +150,6 @@ define void @st3b_i8( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3h_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv8i16( %v0, @@ -187,9 +163,6 @@ define void @st3h_i16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3h_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv8f16( %v0, @@ -203,9 +176,6 @@ define void @st3h_f16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) #0 { ; CHECK-LABEL: st3h_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv8bf16( %v0, @@ -223,9 +193,6 @@ define void @st3h_bf16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3w_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv4i32( %v0, @@ -239,9 +206,6 @@ define void @st3w_i32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3w_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv4f32( %v0, @@ -259,9 +223,6 @@ define void @st3w_f32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3d_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv2i64( %v0, @@ -275,9 +236,6 @@ define void @st3d_i64( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3d_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv2f64( %v0, @@ -291,9 +249,6 @@ define void @st3d_f64( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3d_ptr: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv2p0( %v0, @@ -311,10 +266,6 @@ define void @st3d_ptr( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4b_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv16i8( %v0, @@ -333,10 +284,6 @@ define void @st4b_i8( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4h_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv8i16( %v0, @@ -351,10 +298,6 @@ define void @st4h_i16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4h_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv8f16( %v0, @@ -369,10 +312,6 @@ define void @st4h_f16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) #0 { ; CHECK-LABEL: st4h_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv8bf16( %v0, @@ -391,10 +330,6 @@ define void @st4h_bf16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4w_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv4i32( %v0, @@ -409,10 +344,6 @@ define void @st4w_i32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4w_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv4f32( %v0, @@ -431,10 +362,6 @@ define void @st4w_f32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4d_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv2i64( %v0, @@ -449,10 +376,6 @@ define void @st4d_i64( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4d_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv2f64( %v0, @@ -467,10 +390,6 @@ define void @st4d_f64( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4d_ptr: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv2p0( %v0, diff --git a/llvm/test/CodeGen/AArch64/sve-merging-stores.ll b/llvm/test/CodeGen/AArch64/sve-merging-stores.ll index 47758893ce7117..f6330c613de842 100644 --- a/llvm/test/CodeGen/AArch64/sve-merging-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-merging-stores.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s %complex = type { { double, double } } @@ -10,11 +11,13 @@ declare double @llvm.aarch64.sve.faddv.nxv2f64(, %pred, ptr %inptr) { ; CHECK-LABEL: foo1: -; CHECK: ld2d { z0.d, z1.d }, p0/z, [x1] -; CHECK-NEXT: faddv d2, p0, z0.d -; CHECK-NEXT: faddv d0, p0, z1.d -; CHECK-NEXT: mov v2.d[1], v0.d[0] -; CHECK-NEXT: str q2, [x0] +; CHECK: // %bb.0: +; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x1] +; CHECK-NEXT: faddv d0, p0, z0.d +; CHECK-NEXT: faddv d1, p0, z1.d +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret %imagp = getelementptr inbounds %complex, ptr %outval, i64 0, i32 0, i32 1 %1 = call { , } @llvm.aarch64.sve.ld2.sret.nxv2f64( %pred, ptr nonnull %inptr) %2 = extractvalue { , } %1, 0 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll index 66d544d0acbf56..4e52258e8b5df7 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll @@ -21,9 +21,9 @@ define void @alloc_v4i8(ptr %st_ptr) nounwind { ; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x20] ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: mov z2.b, z0.b[1] +; CHECK-NEXT: mov z1.b, z0.b[1] ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: fmov w9, s2 +; CHECK-NEXT: fmov w9, s1 ; CHECK-NEXT: stp w8, w9, [sp, #8] ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: st1b { z0.s }, p0, [x19] @@ -198,9 +198,9 @@ define void @alloc_v8f64(ptr %st_ptr) nounwind { ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: mov x8, #4 // =0x4 ; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x20] -; CHECK-NEXT: ld2d { z2.d, z3.d }, p0/z, [x20, x8, lsl #3] +; CHECK-NEXT: ld2d { z1.d, z2.d }, p0/z, [x20, x8, lsl #3] ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload -; CHECK-NEXT: stp q0, q2, [x19] +; CHECK-NEXT: stp q0, q1, [x19] ; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll index b66e6d90135730..dd27097d8bdf75 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll @@ -38,8 +38,6 @@ define void @interleave_store_without_splat(ptr %a, <4 x i32> %v1, <4 x i32> %v2 ; CHECK-LABEL: interleave_store_without_splat: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0] ; CHECK-NEXT: ret ; @@ -75,13 +73,12 @@ define void @interleave_store_legalization(ptr %a, <8 x i32> %v1, <8 x i32> %v2) ; CHECK-LABEL: interleave_store_legalization: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: // kill: def $q3 killed $q3 def $z2_z3 -; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: mov x8, #8 // =0x8 ; CHECK-NEXT: mov z4.d, z0.d -; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: mov x8, #8 // =0x8 +; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: st2w { z4.s, z5.s }, p0, [x0] -; CHECK-NEXT: st2w { z2.s, z3.s }, p0, [x0, x8, lsl #2] +; CHECK-NEXT: st2w { z1.s, z2.s }, p0, [x0, x8, lsl #2] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: interleave_store_legalization: diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll index 9fd1eb616c28c9..b200eb3f23bf2a 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll @@ -8,9 +8,8 @@ define @tbl2_b( %a, %unused, %b, %c) { ; CHECK-LABEL: tbl2_b: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2 -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: tbl z0.b, { z1.b, z2.b }, z3.b +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: tbl z0.b, { z0.b, z1.b }, z3.b ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.tbl2.nxv16i8( %a, %b, @@ -21,9 +20,8 @@ define @tbl2_b( %a, %unu define @tbl2_h( %a, %unused, %b, %c) { ; CHECK-LABEL: tbl2_h: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2 -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: tbl z0.h, { z1.h, z2.h }, z3.h +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: tbl z0.h, { z0.h, z1.h }, z3.h ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.tbl2.nxv8i16( %a, %b, @@ -34,9 +32,8 @@ define @tbl2_h( %a, %unu define @tbl2_s( %a, %unused, %b, %c) { ; CHECK-LABEL: tbl2_s: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2 -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: tbl z0.s, { z1.s, z2.s }, z3.s +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: tbl z0.s, { z0.s, z1.s }, z3.s ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.tbl2.nxv4i32( %a, %b, @@ -47,9 +44,8 @@ define @tbl2_s( %a, %unu define @tbl2_d( %a, %unused, %b, %c) { ; CHECK-LABEL: tbl2_d: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2 -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: tbl z0.d, { z1.d, z2.d }, z3.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: tbl z0.d, { z0.d, z1.d }, z3.d ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.tbl2.nxv2i64( %a, %b, @@ -60,9 +56,8 @@ define @tbl2_d( %a, %unu define @tbl2_fh( %a, %unused, %b, %c) { ; CHECK-LABEL: tbl2_fh: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2 -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: tbl z0.h, { z1.h, z2.h }, z3.h +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: tbl z0.h, { z0.h, z1.h }, z3.h ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.tbl2.nxv8f16( %a, %b, @@ -73,9 +68,8 @@ define @tbl2_fh( %a, define @tbl2_bf16( %a, %unused, %b, %c) #0 { ; CHECK-LABEL: tbl2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2 -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: tbl z0.h, { z1.h, z2.h }, z3.h +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: tbl z0.h, { z0.h, z1.h }, z3.h ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.tbl2.nxv8bf16( %a, %b, @@ -86,9 +80,8 @@ define @tbl2_bf16( %a, @tbl2_fs( %a, %unused, %b, %c) { ; CHECK-LABEL: tbl2_fs: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2 -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: tbl z0.s, { z1.s, z2.s }, z3.s +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: tbl z0.s, { z0.s, z1.s }, z3.s ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.tbl2.nxv4f32( %a, %b, @@ -99,9 +92,8 @@ define @tbl2_fs( %a, @tbl2_fd( %a, %unused, %b, %c) { ; CHECK-LABEL: tbl2_fd: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2 -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: tbl z0.d, { z1.d, z2.d }, z3.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: tbl z0.d, { z0.d, z1.d }, z3.d ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.tbl2.nxv2f64( %a, %b, diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll index 7934f831a7e62f..5eeca5fec16f1b 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll @@ -15,8 +15,6 @@ declare @llvm.aarch64.sve.fclamp.nxv8bf16(, } @test_bfclamp_single_x2_f16( %a, %b, %c, %d){ ; CHECK-LABEL: test_bfclamp_single_x2_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfclamp { z0.h, z1.h }, z2.h, z3.h ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.bfclamp.single.x2.nxv8bf16( %a, %b, %c, %d) @@ -26,10 +24,6 @@ define { , } @test_bfclamp_single_x2_ define { , , , } @test_bfclamp_single_x4_f16( %a, %b, %c, %d, %e, %f){ ; CHECK-LABEL: test_bfclamp_single_x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfclamp { z0.h - z3.h }, z4.h, z5.h ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.bfclamp.single.x4.nxv8bf16( %a, %b, %c, %d, %e, %f) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll index df6b34a3280a7a..90a4927cfa5e99 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll @@ -33,8 +33,6 @@ define @test_fclamp_f64( %a, , } @test_fclamp_single_x2_f16( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_fclamp_single_x2_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fclamp { z0.h, z1.h }, z2.h, z3.h ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv8f16( %a, %b, %c, %d) @@ -44,8 +42,6 @@ define { , } @test_fclamp_single_x2_f16(< define { , } @test_fclamp_single_x2_f32( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_fclamp_single_x2_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fclamp { z0.s, z1.s }, z2.s, z3.s ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv4f32( %a, %b, %c, %d) @@ -55,8 +51,6 @@ define { , } @test_fclamp_single_x2_f32 define { , } @test_fclamp_single_x2_f64( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_fclamp_single_x2_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fclamp { z0.d, z1.d }, z2.d, z3.d ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv2f64( %a, %b, %c, %d) @@ -67,10 +61,6 @@ define { , } @test_fclamp_single_x2_f define { , , , } @test_fclamp_single_x4_f16( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_fclamp_single_x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fclamp { z0.h - z3.h }, z4.h, z5.h ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv8f16( %a, %b, %c, %d, %e, %f) @@ -80,10 +70,6 @@ define { , , , , , , } @test_fclamp_single_x4_f32( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_fclamp_single_x4_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fclamp { z0.s - z3.s }, z4.s, z5.s ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv4f32( %a, %b, %c, %d, %e, %f) @@ -93,10 +79,6 @@ define { , , , , , , } @test_fclamp_single_x4_f64( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_fclamp_single_x4_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fclamp { z0.d - z3.d }, z4.d, z5.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv2f64( %a, %b, %c, %d, %e, %f) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll index 8fe0694808c8e9..57e1a1e100db0b 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll @@ -7,8 +7,6 @@ define void @st2q_ss_i8( %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2q_ss_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -21,8 +19,6 @@ define void @st2q_ss_i8( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2q_ss_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -36,8 +32,6 @@ define void @st2q_ss_i16( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2q_ss_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -51,8 +45,6 @@ define void @st2q_ss_i32( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2q_ss_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -66,8 +58,6 @@ define void @st2q_ss_i64( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2q_ss_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -81,8 +71,6 @@ define void @st2q_ss_f16( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2q_ss_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -96,8 +84,6 @@ define void @st2q_ss_f32( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2q_ss_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -111,8 +97,6 @@ define void @st2q_ss_f64( %v0, %v1, < define void @st2q_ss_bf16( %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2q_ss_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -127,8 +111,6 @@ define void @st2q_ss_bf16( %v0, %v1, define void @st2q_si_i8_off16( %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2q_si_i8_off16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #-16, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -16 @@ -142,8 +124,6 @@ define void @st2q_si_i8_off16( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2q_si_i8_off14: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 14 @@ -157,8 +137,6 @@ define void @st2q_si_i8_off14( %v0, %v1, %v0, %v1, %pred, ptr %base) { ; CHECK-LABEL: st2q_si_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %gep = getelementptr , ptr %base, i64 14 @@ -172,8 +150,6 @@ define void @st2q_si_i16( %v0, %v1, %v0, %v1, %pred, ptr %base) { ; CHECK-LABEL: st2q_si_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %gep = getelementptr , ptr %base, i64 14 @@ -187,8 +163,6 @@ define void @st2q_si_i32( %v0, %v1, %v0, %v1, %pred, ptr %base) { ; CHECK-LABEL: st2q_si_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %gep = getelementptr , ptr %base, i64 14 @@ -202,8 +176,6 @@ define void @st2q_si_i64( %v0, %v1, %v0, %v1, %pred, ptr %base) { ; CHECK-LABEL: st2q_si_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %gep = getelementptr , ptr %base, i64 14 @@ -217,8 +189,6 @@ define void @st2q_si_f16( %v0, %v1, %v0, %v1, %pred, ptr %base) { ; CHECK-LABEL: st2q_si_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %gep = getelementptr , ptr %base, i64 14 @@ -232,8 +202,6 @@ define void @st2q_si_f32( %v0, %v1, %v0, %v1, %pred, ptr %base) { ; CHECK-LABEL: st2q_si_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %gep= getelementptr , ptr %base, i64 14 @@ -247,8 +215,6 @@ define void @st2q_si_f64( %v0, %v1, < define void @st2q_si_bf16( %v0, %v1, %pred, ptr %base) { ; CHECK-LABEL: st2q_si_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %gep = getelementptr , ptr %base, i64 14 @@ -266,9 +232,6 @@ define void @st2q_si_bf16( %v0, %v1, define void @st3q_ss_i8( %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3q_ss_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -283,9 +246,6 @@ define void @st3q_ss_i8( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3q_ss_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -300,9 +260,6 @@ define void @st3q_ss_i16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3q_ss_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -317,9 +274,6 @@ define void @st3q_ss_i32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3q_ss_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -334,9 +288,6 @@ define void @st3q_ss_i64( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3q_ss_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -351,9 +302,6 @@ define void @st3q_ss_f16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3q_ss_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -368,9 +316,6 @@ define void @st3q_ss_f32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3q_ss_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -385,9 +330,6 @@ define void @st3q_ss_f64( %v0, %v1, < define void @st3q_ss_bf16( %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3q_ss_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -402,9 +344,6 @@ define void @st3q_ss_bf16( %v0, %v1, define void @st3q_si_i8_off24( %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_i8_off24: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #-24, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -24 @@ -419,9 +358,6 @@ define void @st3q_si_i8_off24( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_i8_off21: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21 @@ -436,9 +372,6 @@ define void @st3q_si_i8_off21( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21 @@ -453,9 +386,6 @@ define void @st3q_si_i16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21 @@ -470,9 +400,6 @@ define void @st3q_si_i32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21 @@ -487,9 +414,6 @@ define void @st3q_si_i64( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21 @@ -504,9 +428,6 @@ define void @st3q_si_f16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21 @@ -521,9 +442,6 @@ define void @st3q_si_f32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21 @@ -538,9 +456,6 @@ define void @st3q_si_f64( %v0, %v1, < define void @st3q_si_bf16( %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21 @@ -558,10 +473,6 @@ define void @st3q_si_bf16( %v0, %v1, define void @st4q_ss_i8( %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4q_ss_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -577,10 +488,6 @@ define void @st4q_ss_i8( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4q_ss_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -596,10 +503,6 @@ define void @st4q_ss_i16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4q_ss_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -615,10 +518,6 @@ define void @st4q_ss_i32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4q_ss_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -634,10 +533,6 @@ define void @st4q_ss_i64( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4q_ss_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -653,10 +548,6 @@ define void @st4q_ss_f16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4q_ss_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -672,10 +563,6 @@ define void @st4q_ss_f32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4q_ss_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -691,10 +578,6 @@ define void @st4q_ss_f64( %v0, %v1, < define void @st4q_ss_bf16( %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4q_ss_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -710,10 +593,6 @@ define void @st4q_ss_bf16( %v0, %v1, define void @st4q_si_i8_off32( %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_i8_off32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #-32, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -32 @@ -729,10 +608,6 @@ define void @st4q_si_i8_off32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_i8_off28: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28 @@ -748,10 +623,6 @@ define void @st4q_si_i8_off28( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28 @@ -767,10 +638,6 @@ define void @st4q_si_i16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base1 = getelementptr , ptr %addr, i64 28 @@ -786,10 +653,6 @@ define void @st4q_si_i32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28 @@ -805,10 +668,6 @@ define void @st4q_si_i64( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28 @@ -824,10 +683,6 @@ define void @st4q_si_f16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28 @@ -843,10 +698,6 @@ define void @st4q_si_f32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28 @@ -862,10 +713,6 @@ define void @st4q_si_f64( %v0, %v1, < define void @st4q_si_bf16( %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28 diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll index 912d5d853aa8d5..26316caad2bbc2 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll @@ -42,8 +42,6 @@ define @test_sclamp_i64( %a, , } @test_sclamp_single_x2_i8( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_sclamp_single_x2_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sclamp { z0.b, z1.b }, z2.b, z3.b ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8( %a, %b, %c, %d) @@ -53,8 +51,6 @@ define { , } @test_sclamp_single_x2_i8(, } @test_sclamp_single_x2_i16( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_sclamp_single_x2_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sclamp { z0.h, z1.h }, z2.h, z3.h ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv8i16( %a, %b, %c, %d) @@ -64,8 +60,6 @@ define { , } @test_sclamp_single_x2_i16(, } @test_sclamp_single_x2_i32( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_sclamp_single_x2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sclamp { z0.s, z1.s }, z2.s, z3.s ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv4i32( %a, %b, %c, %d) @@ -75,8 +69,6 @@ define { , } @test_sclamp_single_x2_i32(, } @test_sclamp_single_x2_i64( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_sclamp_single_x2_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sclamp { z0.d, z1.d }, z2.d, z3.d ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv2i64( %a, %b, %c, %d) @@ -86,10 +78,6 @@ define { , } @test_sclamp_single_x2_i64(, , , } @test_sclamp_single_x4_i8( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_sclamp_single_x4_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sclamp { z0.b - z3.b }, z4.b, z5.b ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv16i8( %a, %b, %c, %d, %e, %f) @@ -99,10 +87,6 @@ define { , , , , , , } @test_sclamp_single_x4_i16( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_sclamp_single_x4_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sclamp { z0.h - z3.h }, z4.h, z5.h ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv8i16( %a, %b, %c, %d, %e, %f) @@ -112,10 +96,6 @@ define { , , , , , , } @test_sclamp_single_x4_i32( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_sclamp_single_x4_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sclamp { z0.s - z3.s }, z4.s, z5.s ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv4i32( %a, %b, %c, %d, %e, %f) @@ -125,10 +105,6 @@ define { , , , , , , } @test_sclamp_single_x4_i64( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_sclamp_single_x4_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sclamp { z0.d - z3.d }, z4.d, z5.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv2i64( %a, %b, %c, %d, %e, %f) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll index 3a21eaead5f72e..d64f06aaef8858 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll @@ -8,18 +8,18 @@ define { , , , , , , , , , , , , , , , , , , , , , , , , %unused, %zn0, %unused, %zn0, %unused, %zn0, %unused, %zn0, %unused, %zn0, %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: st1h { z2.h, z3.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -118,9 +118,9 @@ define void @st1_x2_f32( %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: st1w { z2.s, z3.s }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -136,9 +136,9 @@ define void @st1_x2_f64( %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: st1d { z2.d, z3.d }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -154,11 +154,11 @@ define void @st1_x4_i8( %unused, %zn0, %unused, %zn0, %unused, %zn0, %unused, %zn0, %unused, %zn0, %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: st1h { z4.h - z7.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -274,11 +274,11 @@ define void @st1_x4_f32( %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: st1w { z4.s - z7.s }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -294,11 +294,11 @@ define void @st1_x4_f64( %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: st1d { z4.d - z7.d }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -316,9 +316,9 @@ define void @stnt1_x2_i8( %unused, %zn0, %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -352,9 +352,9 @@ define void @stnt1_x2_i32( %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1w { z2.s, z3.s }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -370,9 +370,9 @@ define void @stnt1_x2_i64( %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1d { z2.d, z3.d }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -388,9 +388,9 @@ define void @stnt1_x2_f16( %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -406,9 +406,9 @@ define void @stnt1_x2_bf16( %unused, %zn ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -424,9 +424,9 @@ define void @stnt1_x2_f32( %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1w { z2.s, z3.s }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -442,9 +442,9 @@ define void @stnt1_x2_f64( %unused, %zn0 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1d { z2.d, z3.d }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -460,11 +460,11 @@ define void @stnt1_x4_i8( %unused, %zn0, %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -500,11 +500,11 @@ define void @stnt1_x4_i32( %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: stnt1w { z4.s - z7.s }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -520,11 +520,11 @@ define void @stnt1_x4_i64( %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: stnt1d { z4.d - z7.d }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -540,11 +540,11 @@ define void @stnt1_x4_f16( %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -560,11 +560,11 @@ define void @stnt1_x4_bf16( %unused, %zn ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -580,11 +580,11 @@ define void @stnt1_x4_f32( %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: stnt1w { z4.s - z7.s }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -600,11 +600,11 @@ define void @stnt1_x4_f64( %unused, %zn0 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: stnt1d { z4.d - z7.d }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll index de1695162c98eb..ca0bad16fe0e9c 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll @@ -42,8 +42,6 @@ define @test_uclamp_i64( %a, , } @test_uclamp_single_x2_i8( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_uclamp_single_x2_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: uclamp { z0.b, z1.b }, z2.b, z3.b ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv16i8( %a, %b, %c, %d) @@ -53,8 +51,6 @@ define { , } @test_uclamp_single_x2_i8(, } @test_uclamp_single_x2_i16( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_uclamp_single_x2_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: uclamp { z0.h, z1.h }, z2.h, z3.h ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv8i16( %a, %b, %c, %d) @@ -64,8 +60,6 @@ define { , } @test_uclamp_single_x2_i16(, } @test_uclamp_single_x2_i32( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_uclamp_single_x2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: uclamp { z0.s, z1.s }, z2.s, z3.s ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv4i32( %a, %b, %c, %d) @@ -75,8 +69,6 @@ define { , } @test_uclamp_single_x2_i32(, } @test_uclamp_single_x2_i64( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_uclamp_single_x2_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: uclamp { z0.d, z1.d }, z2.d, z3.d ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv2i64( %a, %b, %c, %d) @@ -86,10 +78,6 @@ define { , } @test_uclamp_single_x2_i64(, , , } @test_uclamp_single_x4_i8( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_uclamp_single_x4_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: uclamp { z0.b - z3.b }, z4.b, z5.b ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv16i8( %a, %b, %c, %d, %e, %f) @@ -99,10 +87,6 @@ define { , , , , , , } @test_uclamp_single_x4_i16( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_uclamp_single_x4_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: uclamp { z0.h - z3.h }, z4.h, z5.h ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv8i16( %a, %b, %c, %d, %e, %f) @@ -112,10 +96,6 @@ define { , , , , , , } @test_uclamp_single_x4_i32( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_uclamp_single_x4_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: uclamp { z0.s - z3.s }, z4.s, z5.s ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv4i32( %a, %b, %c, %d, %e, %f) @@ -125,10 +105,6 @@ define { , , , , , , } @test_uclamp_single_x4_i64( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_uclamp_single_x4_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: uclamp { z0.d - z3.d }, z4.d, z5.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv2i64( %a, %b, %c, %d, %e, %f) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll index fe3ddbf747acec..741afc3a49a69e 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll @@ -97,11 +97,11 @@ define { , , , , , , } @uzp_x4_f64( %unused, %zn1, %zn2, %zn3, %zn4) nounwind { ; CHECK-LABEL: uzp_x4_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z5.d -; CHECK-NEXT: mov z26.d, z4.d -; CHECK-NEXT: mov z25.d, z3.d -; CHECK-NEXT: mov z24.d, z2.d -; CHECK-NEXT: uzp { z0.d - z3.d }, { z24.d - z27.d } +; CHECK-NEXT: mov z7.d, z5.d +; CHECK-NEXT: mov z6.d, z4.d +; CHECK-NEXT: mov z5.d, z3.d +; CHECK-NEXT: mov z4.d, z2.d +; CHECK-NEXT: uzp { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2f64( %zn1, %zn2, %zn3, %zn4) ret { , , , } %res @@ -204,11 +204,11 @@ define { , , , , , , } @zipq_x4_f64( %unused, %zn1, %zn2, %zn3, %zn4) nounwind { ; CHECK-LABEL: zipq_x4_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z5.d -; CHECK-NEXT: mov z26.d, z4.d -; CHECK-NEXT: mov z25.d, z3.d -; CHECK-NEXT: mov z24.d, z2.d -; CHECK-NEXT: uzp { z0.q - z3.q }, { z24.q - z27.q } +; CHECK-NEXT: mov z7.d, z5.d +; CHECK-NEXT: mov z6.d, z4.d +; CHECK-NEXT: mov z5.d, z3.d +; CHECK-NEXT: mov z4.d, z2.d +; CHECK-NEXT: uzp { z0.q - z3.q }, { z4.q - z7.q } ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2f64( %zn1, %zn2, %zn3, %zn4) ret { , , , } %res diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll index ab70f57b488742..638849605a2cb8 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll @@ -7,7 +7,6 @@ define @whilege_x2_nxv16i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilege_x2_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilege { p0.b, p1.b }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -18,7 +17,6 @@ define @whilege_x2_nxv8i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilege_x2_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilege { p0.h, p1.h }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -29,7 +27,6 @@ define @whilege_x2_nxv4i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilege_x2_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilege { p0.s, p1.s }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -40,7 +37,6 @@ define @whilege_x2_nxv2i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilege_x2_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilege { p0.d, p1.d }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -54,7 +50,6 @@ define @whilegt_x2_nxv16i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilegt_x2_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilegt { p0.b, p1.b }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -65,7 +60,6 @@ define @whilegt_x2_nxv8i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilegt_x2_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilegt { p0.h, p1.h }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -76,7 +70,6 @@ define @whilegt_x2_nxv4i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilegt_x2_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilegt { p0.s, p1.s }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -87,7 +80,6 @@ define @whilegt_x2_nxv2i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilegt_x2_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilegt { p0.d, p1.d }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -101,7 +93,6 @@ define @whilehi_x2_nxv16i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilehi_x2_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilehi { p0.b, p1.b }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -112,7 +103,6 @@ define @whilehi_x2_nxv8i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilehi_x2_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilehi { p0.h, p1.h }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -123,7 +113,6 @@ define @whilehi_x2_nxv4i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilehi_x2_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilehi { p0.s, p1.s }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -134,7 +123,6 @@ define @whilehi_x2_nxv2i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilehi_x2_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilehi { p0.d, p1.d }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -148,7 +136,6 @@ define @whilehs_x2_nxv16i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilehs_x2_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilehs { p0.b, p1.b }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -159,7 +146,6 @@ define @whilehs_x2_nxv8i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilehs_x2_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilehs { p0.h, p1.h }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -170,7 +156,6 @@ define @whilehs_x2_nxv4i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilehs_x2_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilehs { p0.s, p1.s }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -181,7 +166,6 @@ define @whilehs_x2_nxv2i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilehs_x2_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilehs { p0.d, p1.d }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -195,7 +179,6 @@ define @whilele_x2_nxv16i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilele_x2_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilele { p0.b, p1.b }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -206,7 +189,6 @@ define @whilele_x2_nxv8i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilele_x2_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilele { p0.h, p1.h }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -217,7 +199,6 @@ define @whilele_x2_nxv4i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilele_x2_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilele { p0.s, p1.s }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -228,7 +209,6 @@ define @whilele_x2_nxv2i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilele_x2_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilele { p0.d, p1.d }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -242,7 +222,6 @@ define @whilelo_x2_nxv16i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilelo_x2_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilelo { p0.b, p1.b }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -253,7 +232,6 @@ define @whilelo_x2_nxv8i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilelo_x2_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilelo { p0.h, p1.h }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -264,7 +242,6 @@ define @whilelo_x2_nxv4i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilelo_x2_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilelo { p0.s, p1.s }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -275,7 +252,6 @@ define @whilelo_x2_nxv2i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilelo_x2_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilelo { p0.d, p1.d }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -289,7 +265,6 @@ define @whilels_x2_nxv16i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilels_x2_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilels { p0.b, p1.b }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -300,7 +275,6 @@ define @whilels_x2_nxv8i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilels_x2_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilels { p0.h, p1.h }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -311,7 +285,6 @@ define @whilels_x2_nxv4i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilels_x2_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilels { p0.s, p1.s }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -322,7 +295,6 @@ define @whilels_x2_nxv2i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilels_x2_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilels { p0.d, p1.d }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -336,7 +308,6 @@ define @whilelt_x2_nxv16i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilelt_x2_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilelt { p0.b, p1.b }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -347,7 +318,6 @@ define @whilelt_x2_nxv8i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilelt_x2_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilelt { p0.h, p1.h }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -358,7 +328,6 @@ define @whilelt_x2_nxv4i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilelt_x2_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilelt { p0.s, p1.s }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -369,7 +338,6 @@ define @whilelt_x2_nxv2i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilelt_x2_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilelt { p0.d, p1.d }, x0, x1 -; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 diff --git a/llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll b/llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll index d3abc27a53dadc..77415381709d18 100644 --- a/llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll +++ b/llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll @@ -9,6 +9,7 @@ define void @"func"(ptr swifterror %0) #0 { ; CHECK-NEXT: b {{\.?}}LBB0_2 ; CHECK-NEXT: {{\.?}}LBB0_1:{{.*}}%thirtythree ; CHECK-NEXT: {{.*}}=>This Inner Loop Header: Depth=1 +; CHECK-NEXT: {{.*}}implicit-def: $x0 ; CHECK-NEXT: b {{\.?}}LBB0_1 ; CHECK-NEXT: {{\.?}}LBB0_2:{{.*}}%thirtyeight ; CHECK-NEXT: b {{\.?}}LBB0_3 diff --git a/llvm/test/CodeGen/AArch64/tbl-loops.ll b/llvm/test/CodeGen/AArch64/tbl-loops.ll index 0ad99008655184..dd5ce449bb1d2a 100644 --- a/llvm/test/CodeGen/AArch64/tbl-loops.ll +++ b/llvm/test/CodeGen/AArch64/tbl-loops.ll @@ -203,16 +203,17 @@ define void @loop2(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: fcmgt v3.4s, v1.4s, v0.4s ; CHECK-NEXT: fcmgt v4.4s, v2.4s, v0.4s ; CHECK-NEXT: fcmlt v5.4s, v1.4s, #0.0 -; CHECK-NEXT: bsl v3.16b, v0.16b, v1.16b -; CHECK-NEXT: bsl v4.16b, v0.16b, v2.16b -; CHECK-NEXT: fcmlt v1.4s, v2.4s, #0.0 -; CHECK-NEXT: bic v2.16b, v3.16b, v5.16b -; CHECK-NEXT: bic v1.16b, v4.16b, v1.16b -; CHECK-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-NEXT: bit v1.16b, v0.16b, v3.16b +; CHECK-NEXT: mov v3.16b, v4.16b +; CHECK-NEXT: bsl v3.16b, v0.16b, v2.16b +; CHECK-NEXT: fcmlt v2.4s, v2.4s, #0.0 +; CHECK-NEXT: bic v1.16b, v1.16b, v5.16b +; CHECK-NEXT: bic v2.16b, v3.16b, v2.16b ; CHECK-NEXT: fcvtzs v1.4s, v1.4s -; CHECK-NEXT: xtn v2.4h, v2.4s +; CHECK-NEXT: fcvtzs v2.4s, v2.4s ; CHECK-NEXT: xtn v1.4h, v1.4s -; CHECK-NEXT: trn1 v1.8b, v2.8b, v1.8b +; CHECK-NEXT: xtn v2.4h, v2.4s +; CHECK-NEXT: trn1 v1.8b, v1.8b, v2.8b ; CHECK-NEXT: str d1, [x0], #8 ; CHECK-NEXT: b.ne .LBB1_9 ; CHECK-NEXT: // %bb.10: // %middle.block @@ -352,21 +353,22 @@ define void @loop3(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: fcmgt v6.4s, v3.4s, v0.4s ; CHECK-NEXT: fcmgt v7.4s, v4.4s, v0.4s ; CHECK-NEXT: fcmlt v16.4s, v2.4s, #0.0 -; CHECK-NEXT: fcmlt v17.4s, v3.4s, #0.0 -; CHECK-NEXT: bsl v5.16b, v0.16b, v2.16b -; CHECK-NEXT: bsl v6.16b, v0.16b, v3.16b -; CHECK-NEXT: bsl v7.16b, v0.16b, v4.16b -; CHECK-NEXT: fcmlt v2.4s, v4.4s, #0.0 -; CHECK-NEXT: bic v3.16b, v5.16b, v16.16b -; CHECK-NEXT: bic v4.16b, v6.16b, v17.16b -; CHECK-NEXT: bic v2.16b, v7.16b, v2.16b +; CHECK-NEXT: bit v2.16b, v0.16b, v5.16b +; CHECK-NEXT: fcmlt v5.4s, v3.4s, #0.0 +; CHECK-NEXT: bit v3.16b, v0.16b, v6.16b +; CHECK-NEXT: mov v6.16b, v7.16b +; CHECK-NEXT: bsl v6.16b, v0.16b, v4.16b +; CHECK-NEXT: fcmlt v4.4s, v4.4s, #0.0 +; CHECK-NEXT: bic v2.16b, v2.16b, v16.16b +; CHECK-NEXT: bic v3.16b, v3.16b, v5.16b +; CHECK-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-NEXT: bic v4.16b, v6.16b, v4.16b ; CHECK-NEXT: fcvtzs v3.4s, v3.4s ; CHECK-NEXT: fcvtzs v4.4s, v4.4s -; CHECK-NEXT: fcvtzs v2.4s, v2.4s -; CHECK-NEXT: xtn v5.4h, v3.4s -; CHECK-NEXT: xtn v6.4h, v4.4s -; CHECK-NEXT: xtn v7.4h, v2.4s -; CHECK-NEXT: tbl v2.16b, { v5.16b, v6.16b, v7.16b }, v1.16b +; CHECK-NEXT: xtn v2.4h, v2.4s +; CHECK-NEXT: xtn v3.4h, v3.4s +; CHECK-NEXT: xtn v4.4h, v4.4s +; CHECK-NEXT: tbl v2.16b, { v2.16b, v3.16b, v4.16b }, v1.16b ; CHECK-NEXT: st1 { v2.s }[2], [x13] ; CHECK-NEXT: str d2, [x0], #12 ; CHECK-NEXT: b.ne .LBB2_4 @@ -605,26 +607,27 @@ define void @loop4(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: fcmgt v16.4s, v4.4s, v0.4s ; CHECK-NEXT: fcmgt v17.4s, v5.4s, v0.4s ; CHECK-NEXT: fcmlt v18.4s, v2.4s, #0.0 -; CHECK-NEXT: fcmlt v19.4s, v3.4s, #0.0 -; CHECK-NEXT: fcmlt v20.4s, v4.4s, #0.0 -; CHECK-NEXT: bsl v6.16b, v0.16b, v2.16b -; CHECK-NEXT: bsl v7.16b, v0.16b, v3.16b -; CHECK-NEXT: bsl v16.16b, v0.16b, v4.16b -; CHECK-NEXT: bsl v17.16b, v0.16b, v5.16b -; CHECK-NEXT: fcmlt v2.4s, v5.4s, #0.0 -; CHECK-NEXT: bic v3.16b, v6.16b, v18.16b -; CHECK-NEXT: bic v4.16b, v7.16b, v19.16b -; CHECK-NEXT: bic v5.16b, v16.16b, v20.16b -; CHECK-NEXT: bic v2.16b, v17.16b, v2.16b +; CHECK-NEXT: bit v2.16b, v0.16b, v6.16b +; CHECK-NEXT: fcmlt v6.4s, v3.4s, #0.0 +; CHECK-NEXT: bit v3.16b, v0.16b, v7.16b +; CHECK-NEXT: fcmlt v7.4s, v4.4s, #0.0 +; CHECK-NEXT: bit v4.16b, v0.16b, v16.16b +; CHECK-NEXT: mov v16.16b, v17.16b +; CHECK-NEXT: bsl v16.16b, v0.16b, v5.16b +; CHECK-NEXT: fcmlt v5.4s, v5.4s, #0.0 +; CHECK-NEXT: bic v2.16b, v2.16b, v18.16b +; CHECK-NEXT: bic v3.16b, v3.16b, v6.16b +; CHECK-NEXT: bic v4.16b, v4.16b, v7.16b +; CHECK-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-NEXT: bic v5.16b, v16.16b, v5.16b ; CHECK-NEXT: fcvtzs v3.4s, v3.4s ; CHECK-NEXT: fcvtzs v4.4s, v4.4s ; CHECK-NEXT: fcvtzs v5.4s, v5.4s -; CHECK-NEXT: fcvtzs v2.4s, v2.4s -; CHECK-NEXT: xtn v16.4h, v3.4s -; CHECK-NEXT: xtn v17.4h, v4.4s -; CHECK-NEXT: xtn v18.4h, v5.4s -; CHECK-NEXT: xtn v19.4h, v2.4s -; CHECK-NEXT: tbl v2.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b +; CHECK-NEXT: xtn v2.4h, v2.4s +; CHECK-NEXT: xtn v3.4h, v3.4s +; CHECK-NEXT: xtn v4.4h, v4.4s +; CHECK-NEXT: xtn v5.4h, v5.4s +; CHECK-NEXT: tbl v2.16b, { v2.16b, v3.16b, v4.16b, v5.16b }, v1.16b ; CHECK-NEXT: str q2, [x0], #16 ; CHECK-NEXT: b.ne .LBB3_9 ; CHECK-NEXT: // %bb.10: // %middle.block diff --git a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll index c4a58ba12dc6be..575a4b2e6e0fbe 100644 --- a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll +++ b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll @@ -74,8 +74,8 @@ define void @trunc_v16i32_to_v16i8_in_loop(ptr %A, ptr %dst) { ; CHECK-BE-NEXT: add x10, x9, #16 ; CHECK-BE-NEXT: ld1 { v1.16b }, [x9] ; CHECK-BE-NEXT: add x11, x9, #32 -; CHECK-BE-NEXT: ld1 { v2.16b }, [x10] ; CHECK-BE-NEXT: add x9, x9, #48 +; CHECK-BE-NEXT: ld1 { v2.16b }, [x10] ; CHECK-BE-NEXT: ld1 { v3.16b }, [x11] ; CHECK-BE-NEXT: ld1 { v4.16b }, [x9] ; CHECK-BE-NEXT: add x9, x1, x8, lsl #4 @@ -363,21 +363,21 @@ define void @trunc_v16i64_to_v16i8_in_loop(ptr %A, ptr %dst) { ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: add x9, x0, x8, lsl #7 ; CHECK-BE-NEXT: add x13, x9, #64 +; CHECK-BE-NEXT: add x10, x9, #112 +; CHECK-BE-NEXT: add x11, x9, #96 ; CHECK-BE-NEXT: add x12, x9, #80 ; CHECK-BE-NEXT: add x14, x9, #16 ; CHECK-BE-NEXT: ld1 { v1.16b }, [x9] ; CHECK-BE-NEXT: ld1 { v16.16b }, [x13] -; CHECK-BE-NEXT: add x11, x9, #96 ; CHECK-BE-NEXT: add x13, x9, #32 +; CHECK-BE-NEXT: add x9, x9, #48 ; CHECK-BE-NEXT: ld1 { v2.16b }, [x14] ; CHECK-BE-NEXT: ld1 { v17.16b }, [x12] -; CHECK-BE-NEXT: add x10, x9, #112 -; CHECK-BE-NEXT: add x9, x9, #48 ; CHECK-BE-NEXT: ld1 { v3.16b }, [x13] ; CHECK-BE-NEXT: ld1 { v18.16b }, [x11] ; CHECK-BE-NEXT: ld1 { v4.16b }, [x9] -; CHECK-BE-NEXT: add x9, x1, x8, lsl #4 ; CHECK-BE-NEXT: ld1 { v19.16b }, [x10] +; CHECK-BE-NEXT: add x9, x1, x8, lsl #4 ; CHECK-BE-NEXT: add x8, x8, #1 ; CHECK-BE-NEXT: cmp x8, #1000 ; CHECK-BE-NEXT: tbl v1.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b @@ -510,8 +510,8 @@ define void @trunc_v8i64_to_v8i8_in_loop(ptr %A, ptr %dst) { ; CHECK-BE-NEXT: add x10, x9, #16 ; CHECK-BE-NEXT: ld1 { v1.16b }, [x9] ; CHECK-BE-NEXT: add x11, x9, #32 -; CHECK-BE-NEXT: ld1 { v2.16b }, [x10] ; CHECK-BE-NEXT: add x9, x9, #48 +; CHECK-BE-NEXT: ld1 { v2.16b }, [x10] ; CHECK-BE-NEXT: ld1 { v3.16b }, [x11] ; CHECK-BE-NEXT: ld1 { v4.16b }, [x9] ; CHECK-BE-NEXT: add x9, x1, x8, lsl #3 diff --git a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll index 3685e9cf85bd6e..e453d618325225 100644 --- a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll +++ b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll @@ -10,9 +10,9 @@ define void @vld2(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32 ; CHECK-NEXT: .LBB0_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld2 { v0.4s, v1.4s }, [x0], #32 -; CHECK-NEXT: fmul v2.4s, v0.4s, v0.4s -; CHECK-NEXT: fmla v2.4s, v1.4s, v1.4s -; CHECK-NEXT: str q2, [x1, x8] +; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s +; CHECK-NEXT: fmla v0.4s, v1.4s, v1.4s +; CHECK-NEXT: str q0, [x1, x8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #1, lsl #12 // =4096 ; CHECK-NEXT: b.ne .LBB0_1 @@ -50,10 +50,10 @@ define void @vld3(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32 ; CHECK-NEXT: .LBB1_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld3 { v0.4s, v1.4s, v2.4s }, [x0], #48 -; CHECK-NEXT: fmul v3.4s, v0.4s, v0.4s -; CHECK-NEXT: fmla v3.4s, v1.4s, v1.4s -; CHECK-NEXT: fmla v3.4s, v2.4s, v2.4s -; CHECK-NEXT: str q3, [x1, x8] +; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s +; CHECK-NEXT: fmla v0.4s, v1.4s, v1.4s +; CHECK-NEXT: fmla v0.4s, v2.4s, v2.4s +; CHECK-NEXT: str q0, [x1, x8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #1, lsl #12 // =4096 ; CHECK-NEXT: b.ne .LBB1_1 @@ -97,11 +97,11 @@ define void @vld4(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32 ; CHECK-NEXT: add x9, x1, x8 ; CHECK-NEXT: add x8, x8, #32 ; CHECK-NEXT: cmp x8, #2, lsl #12 // =8192 -; CHECK-NEXT: fmul v4.4s, v0.4s, v0.4s -; CHECK-NEXT: fmla v4.4s, v1.4s, v1.4s -; CHECK-NEXT: fmul v5.4s, v2.4s, v2.4s -; CHECK-NEXT: fmla v5.4s, v3.4s, v3.4s -; CHECK-NEXT: st2 { v4.4s, v5.4s }, [x9] +; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s +; CHECK-NEXT: fmla v0.4s, v1.4s, v1.4s +; CHECK-NEXT: fmul v1.4s, v2.4s, v2.4s +; CHECK-NEXT: fmla v1.4s, v3.4s, v3.4s +; CHECK-NEXT: st2 { v0.4s, v1.4s }, [x9] ; CHECK-NEXT: b.ne .LBB2_1 ; CHECK-NEXT: // %bb.2: // %while.end ; CHECK-NEXT: ret @@ -149,9 +149,9 @@ define void @twosrc(ptr nocapture readonly %pSrc, ptr nocapture readonly %pSrc2, ; CHECK-NEXT: ld2 { v0.4s, v1.4s }, [x9] ; CHECK-NEXT: cmp x8, #2, lsl #12 // =8192 ; CHECK-NEXT: ld2 { v2.4s, v3.4s }, [x10] -; CHECK-NEXT: fmul v4.4s, v2.4s, v0.4s -; CHECK-NEXT: fmla v4.4s, v1.4s, v3.4s -; CHECK-NEXT: str q4, [x2], #16 +; CHECK-NEXT: fmul v0.4s, v2.4s, v0.4s +; CHECK-NEXT: fmla v0.4s, v1.4s, v3.4s +; CHECK-NEXT: str q0, [x2], #16 ; CHECK-NEXT: b.ne .LBB3_1 ; CHECK-NEXT: // %bb.2: // %while.end ; CHECK-NEXT: ret @@ -190,9 +190,9 @@ define void @vld2_multiuse(ptr nocapture readonly %pSrc, ptr noalias nocapture % ; CHECK-NEXT: .LBB4_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld2 { v0.4s, v1.4s }, [x0], #32 -; CHECK-NEXT: fmul v2.4s, v0.4s, v0.4s -; CHECK-NEXT: fmla v2.4s, v1.4s, v1.4s -; CHECK-NEXT: str q2, [x1, x8] +; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s +; CHECK-NEXT: fmla v0.4s, v1.4s, v1.4s +; CHECK-NEXT: str q0, [x1, x8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #1, lsl #12 // =4096 ; CHECK-NEXT: b.ne .LBB4_1 @@ -229,10 +229,10 @@ define void @vld3_multiuse(ptr nocapture readonly %pSrc, ptr noalias nocapture % ; CHECK-NEXT: .LBB5_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld3 { v0.4s, v1.4s, v2.4s }, [x0], #48 -; CHECK-NEXT: fmul v3.4s, v0.4s, v0.4s -; CHECK-NEXT: fmla v3.4s, v1.4s, v1.4s -; CHECK-NEXT: fmla v3.4s, v2.4s, v2.4s -; CHECK-NEXT: str q3, [x1, x8] +; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s +; CHECK-NEXT: fmla v0.4s, v1.4s, v1.4s +; CHECK-NEXT: fmla v0.4s, v2.4s, v2.4s +; CHECK-NEXT: str q0, [x1, x8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #1, lsl #12 // =4096 ; CHECK-NEXT: b.ne .LBB5_1 @@ -274,11 +274,11 @@ define void @vld4_multiuse(ptr nocapture readonly %pSrc, ptr noalias nocapture % ; CHECK-NEXT: add x9, x1, x8 ; CHECK-NEXT: add x8, x8, #32 ; CHECK-NEXT: cmp x8, #2, lsl #12 // =8192 -; CHECK-NEXT: fmul v4.4s, v0.4s, v0.4s -; CHECK-NEXT: fmla v4.4s, v1.4s, v1.4s -; CHECK-NEXT: fmul v5.4s, v2.4s, v2.4s -; CHECK-NEXT: fmla v5.4s, v3.4s, v3.4s -; CHECK-NEXT: st2 { v4.4s, v5.4s }, [x9] +; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s +; CHECK-NEXT: fmla v0.4s, v1.4s, v1.4s +; CHECK-NEXT: fmul v1.4s, v2.4s, v2.4s +; CHECK-NEXT: fmla v1.4s, v3.4s, v3.4s +; CHECK-NEXT: st2 { v0.4s, v1.4s }, [x9] ; CHECK-NEXT: b.ne .LBB6_1 ; CHECK-NEXT: // %bb.2: // %while.end ; CHECK-NEXT: ret @@ -369,16 +369,16 @@ define void @transpose_s16_8x8_simpler2(ptr nocapture noundef %a) { ; CHECK: .Lfunc_begin8: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: ldp q0, q2, [x0] -; CHECK-NEXT: ldp q3, q4, [x0, #64] -; CHECK-NEXT: ldp q5, q6, [x0, #32] -; CHECK-NEXT: ldp q7, q16, [x0, #96] -; CHECK-NEXT: mov v0.h[5], v2.h[4] -; CHECK-NEXT: zip1 v2.8h, v3.8h, v4.8h -; CHECK-NEXT: zip1 v3.8h, v5.8h, v6.8h -; CHECK-NEXT: mov v7.h[5], v16.h[4] -; CHECK-NEXT: mov v0.s[1], v2.s[0] -; CHECK-NEXT: uzp1 v1.4s, v3.4s, v7.4s +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q2, q3, [x0, #64] +; CHECK-NEXT: ldp q4, q5, [x0, #32] +; CHECK-NEXT: ldp q6, q7, [x0, #96] +; CHECK-NEXT: mov v0.h[5], v1.h[4] +; CHECK-NEXT: zip1 v1.8h, v2.8h, v3.8h +; CHECK-NEXT: zip1 v2.8h, v4.8h, v5.8h +; CHECK-NEXT: mov v6.h[5], v7.h[4] +; CHECK-NEXT: mov v0.s[1], v1.s[0] +; CHECK-NEXT: uzp1 v1.4s, v2.4s, v6.4s ; CHECK-NEXT: zip2 v2.4s, v0.4s, v1.4s ; CHECK-NEXT: st2 { v0.2s, v1.2s }, [x0] ; CHECK-NEXT: str q2, [x0, #64] @@ -424,23 +424,23 @@ define void @transpose_s16_8x8(ptr nocapture noundef %0, ptr nocapture noundef % ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: ldr q2, [x2] ; CHECK-NEXT: ldr q3, [x4] ; CHECK-NEXT: ldr q4, [x5] -; CHECK-NEXT: ldr q2, [x2] ; CHECK-NEXT: ldr q5, [x3] ; CHECK-NEXT: trn1 v16.8h, v0.8h, v1.8h ; CHECK-NEXT: trn2 v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ldr q6, [x6] ; CHECK-NEXT: ldr q7, [x7] ; CHECK-NEXT: trn1 v17.8h, v3.8h, v4.8h -; CHECK-NEXT: trn2 v1.8h, v3.8h, v4.8h ; CHECK-NEXT: trn1 v18.8h, v2.8h, v5.8h +; CHECK-NEXT: trn2 v1.8h, v3.8h, v4.8h ; CHECK-NEXT: trn2 v2.8h, v2.8h, v5.8h ; CHECK-NEXT: trn1 v19.8h, v6.8h, v7.8h ; CHECK-NEXT: trn2 v3.8h, v6.8h, v7.8h ; CHECK-NEXT: trn1 v4.4s, v16.4s, v17.4s -; CHECK-NEXT: trn1 v6.4s, v0.4s, v1.4s ; CHECK-NEXT: trn2 v16.4s, v16.4s, v17.4s +; CHECK-NEXT: trn1 v6.4s, v0.4s, v1.4s ; CHECK-NEXT: trn2 v0.4s, v0.4s, v1.4s ; CHECK-NEXT: trn1 v5.4s, v18.4s, v19.4s ; CHECK-NEXT: trn1 v7.4s, v2.4s, v3.4s @@ -668,11 +668,11 @@ define void @store_factor3(ptr %ptr, <4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2 ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ext v3.16b, v0.16b, v1.16b, #12 -; CHECK-NEXT: ext v6.16b, v1.16b, v2.16b, #12 +; CHECK-NEXT: ext v4.16b, v1.16b, v2.16b, #12 ; CHECK-NEXT: zip2 v3.4s, v0.4s, v3.4s +; CHECK-NEXT: zip2 v4.4s, v1.4s, v4.4s ; CHECK-NEXT: mov v3.s[0], v0.s[0] ; CHECK-NEXT: ext v0.16b, v2.16b, v0.16b, #12 -; CHECK-NEXT: zip2 v4.4s, v1.4s, v6.4s ; CHECK-NEXT: mov v4.s[0], v1.s[0] ; CHECK-NEXT: zip2 v5.4s, v2.4s, v0.4s ; CHECK-NEXT: mov v5.s[0], v2.s[0] From cf4c3d98430de7e2a19ba42714db78810d04d4e8 Mon Sep 17 00:00:00 2001 From: Vikram Hegde <115221833+vikramRH@users.noreply.github.com> Date: Fri, 14 Jun 2024 17:45:28 +0530 Subject: [PATCH 085/155] [AMDGPU] Extend llvm.amdgcn.set.inactive intrinsic to support Reg32/Reg64 types (#94457) Missed this while handling other patches. Any comments/concerns ? --- llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 2 +- llvm/lib/Target/AMDGPU/SIInstructions.td | 18 +- llvm/lib/Target/AMDGPU/SIRegisterInfo.td | 2 +- .../GlobalISel/llvm.amdgcn.set.inactive.ll | 320 +++++++++++++++++ .../AMDGPU/llvm.amdgcn.set.inactive.ll | 322 ++++++++++++++++++ 5 files changed, 656 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td index 4db98e1b31d9ce..e6b69b39911a9f 100644 --- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td @@ -2252,7 +2252,7 @@ def int_amdgcn_strict_wqm : Intrinsic<[llvm_any_ty], // program ever uses WQM, then the instruction and the first source will be // computed in WQM. def int_amdgcn_set_inactive : - Intrinsic<[llvm_anyint_ty], + Intrinsic<[llvm_any_ty], [LLVMMatchType<0>, // value to be copied LLVMMatchType<0>], // value for the inactive lanes to take [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>; diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index 89c96791bfd1c3..ba31027da92e8a 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -237,16 +237,22 @@ def FPTRUNC_DOWNWARD_PSEUDO : VPseudoInstSI <(outs VGPR_32:$vdst), // restoring it after we're done. let Defs = [SCC], isConvergent = 1 in { def V_SET_INACTIVE_B32 : VPseudoInstSI <(outs VGPR_32:$vdst), - (ins VSrc_b32: $src, VSrc_b32:$inactive), - [(set i32:$vdst, (int_amdgcn_set_inactive i32:$src, i32:$inactive))]> { -} + (ins VSrc_b32: $src, VSrc_b32:$inactive), []>; def V_SET_INACTIVE_B64 : VPseudoInstSI <(outs VReg_64:$vdst), - (ins VSrc_b64: $src, VSrc_b64:$inactive), - [(set i64:$vdst, (int_amdgcn_set_inactive i64:$src, i64:$inactive))]> { -} + (ins VSrc_b64: $src, VSrc_b64:$inactive), []>; } // End Defs = [SCC] +foreach vt = Reg32Types.types in { +def : GCNPat <(vt (int_amdgcn_set_inactive vt:$src, vt:$inactive)), + (V_SET_INACTIVE_B32 VSrc_b32:$src, VSrc_b32:$inactive)>; +} + +foreach vt = Reg64Types.types in { +def : GCNPat <(vt (int_amdgcn_set_inactive vt:$src, vt:$inactive)), + (V_SET_INACTIVE_B64 VSrc_b64:$src, VSrc_b64:$inactive)>; +} + def : GCNPat<(i32 (int_amdgcn_set_inactive_chain_arg i32:$src, i32:$inactive)), (V_SET_INACTIVE_B32 VGPR_32:$src, VGPR_32:$inactive)>; diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td index caac7126068ef3..3666976cf82f89 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td @@ -586,7 +586,7 @@ class RegisterTypes reg_types> { def Reg16Types : RegisterTypes<[i16, f16, bf16]>; def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, v2bf16, p2, p3, p5, p6]>; -def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, p0]>; +def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, v4i16, v4f16, v4bf16, p0]>; let HasVGPR = 1 in { // VOP3 and VINTERP can access 256 lo and 256 hi registers. diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll index cbee039df7fd0b..0c60be9d94591a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll @@ -93,6 +93,326 @@ define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x ret void } +define amdgpu_kernel void @set_inactive_f32(ptr addrspace(1) %out, float %in) { +; GCN-LABEL: set_inactive_f32: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GCN-NEXT: v_mov_b32_e32 v1, 0x40400000 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, v1 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GCN-NEXT: s_endpgm + %tmp = call float @llvm.amdgcn.set.inactive.f32(float %in, float 3.0) #0 + store float %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_f64(ptr addrspace(1) %out, double %in) { +; GCN-LABEL: set_inactive_f64: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s4, 0xcccccccd +; GCN-NEXT: s_mov_b32 s5, 0x4010cccc +; GCN-NEXT: v_mov_b32_e32 v2, s4 +; GCN-NEXT: v_mov_b32_e32 v3, s5 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, v2 +; GCN-NEXT: v_mov_b32_e32 v1, v3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-NEXT: s_endpgm + %tmp = call double @llvm.amdgcn.set.inactive.f64(double %in, double 4.2) #0 + store double %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_v2i16(ptr addrspace(1) %out, <2 x i16> %in) { +; GCN-LABEL: set_inactive_v2i16: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GCN-NEXT: v_mov_b32_e32 v1, 0x10001 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, v1 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GCN-NEXT: s_endpgm + %tmp = call <2 x i16> @llvm.amdgcn.set.inactive.v2i16(<2 x i16> %in, <2 x i16> ) #0 + store <2 x i16> %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_v2f16(ptr addrspace(1) %out, <2 x half> %in) { +; GCN-LABEL: set_inactive_v2f16: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GCN-NEXT: v_mov_b32_e32 v1, 0x3c003c00 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, v1 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GCN-NEXT: s_endpgm + %tmp = call <2 x half> @llvm.amdgcn.set.inactive.v2f16(<2 x half> %in, <2 x half> ) #0 + store <2 x half> %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_v2i32(ptr addrspace(1) %out, <2 x i32> %in) { +; GCN-LABEL: set_inactive_v2i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s4, 1 +; GCN-NEXT: s_mov_b32 s5, s4 +; GCN-NEXT: v_mov_b32_e32 v2, s4 +; GCN-NEXT: v_mov_b32_e32 v3, s5 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, v2 +; GCN-NEXT: v_mov_b32_e32 v1, v3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-NEXT: s_endpgm + %tmp = call <2 x i32> @llvm.amdgcn.set.inactive.v2i32(<2 x i32> %in, <2 x i32> ) #0 + store <2 x i32> %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_v2f32(ptr addrspace(1) %out, <2 x float> %in) { +; GCN-LABEL: set_inactive_v2f32: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s4, 1.0 +; GCN-NEXT: s_mov_b32 s5, s4 +; GCN-NEXT: v_mov_b32_e32 v2, s4 +; GCN-NEXT: v_mov_b32_e32 v3, s5 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, v2 +; GCN-NEXT: v_mov_b32_e32 v1, v3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-NEXT: s_endpgm + %tmp = call <2 x float> @llvm.amdgcn.set.inactive.v2f32(<2 x float> %in, <2 x float> ) #0 + store <2 x float> %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_v2bf16(ptr addrspace(1) %out, <2 x bfloat> %in) { +; GCN-LABEL: set_inactive_v2bf16: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GCN-NEXT: v_mov_b32_e32 v1, 0x3f803f80 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, v1 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GCN-NEXT: s_endpgm + %tmp = call <2 x bfloat> @llvm.amdgcn.set.inactive.v2bf16(<2 x bfloat> %in, <2 x bfloat> ) #0 + store <2 x bfloat> %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_v4i16(ptr addrspace(1) %out, <4 x i16> %in) { +; GCN-LABEL: set_inactive_v4i16: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s4, 0x10001 +; GCN-NEXT: s_mov_b32 s5, s4 +; GCN-NEXT: v_mov_b32_e32 v2, s4 +; GCN-NEXT: v_mov_b32_e32 v3, s5 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, v2 +; GCN-NEXT: v_mov_b32_e32 v1, v3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-NEXT: s_endpgm + %tmp = call <4 x i16> @llvm.amdgcn.set.inactive.v4i16(<4 x i16> %in, <4 x i16> ) #0 + store <4 x i16> %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_v4f16(ptr addrspace(1) %out, <4 x half> %in) { +; GCN-LABEL: set_inactive_v4f16: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s4, 0x3c003c00 +; GCN-NEXT: s_mov_b32 s5, s4 +; GCN-NEXT: v_mov_b32_e32 v2, s4 +; GCN-NEXT: v_mov_b32_e32 v3, s5 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, v2 +; GCN-NEXT: v_mov_b32_e32 v1, v3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-NEXT: s_endpgm + %tmp = call <4 x half> @llvm.amdgcn.set.inactive.v4f16(<4 x half> %in, <4 x half> ) #0 + store <4 x half> %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_v4bf16(ptr addrspace(1) %out, <4 x bfloat> %in) { +; GCN-LABEL: set_inactive_v4bf16: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s4, 0x3f803f80 +; GCN-NEXT: s_mov_b32 s5, s4 +; GCN-NEXT: v_mov_b32_e32 v2, s4 +; GCN-NEXT: v_mov_b32_e32 v3, s5 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, v2 +; GCN-NEXT: v_mov_b32_e32 v1, v3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-NEXT: s_endpgm + %tmp = call <4 x bfloat> @llvm.amdgcn.set.inactive.v4bf16(<4 x bfloat> %in, <4 x bfloat> ) #0 + store <4 x bfloat> %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_p0(ptr addrspace(1) %out, ptr %in) { +; GCN-LABEL: set_inactive_p0: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: v_mov_b32_e32 v1, 0 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GCN-NEXT: s_endpgm + %tmp = call ptr @llvm.amdgcn.set.inactive.p0(ptr %in, ptr null) #0 + store ptr %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_p2(ptr addrspace(1) %out, ptr addrspace(2) %in) { +; GCN-LABEL: set_inactive_p2: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GCN-NEXT: s_endpgm + %tmp = call ptr addrspace(2) @llvm.amdgcn.set.inactive.p2(ptr addrspace(2) %in, ptr addrspace(2) null) #0 + store ptr addrspace(2) %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_p3(ptr addrspace(1) %out, ptr addrspace(3) %in) { +; GCN-LABEL: set_inactive_p3: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GCN-NEXT: s_endpgm + %tmp = call ptr addrspace(3) @llvm.amdgcn.set.inactive.p3(ptr addrspace(3) %in, ptr addrspace(3) null) #0 + store ptr addrspace(3) %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_p5(ptr addrspace(1) %out, ptr addrspace(5) %in) { +; GCN-LABEL: set_inactive_p5: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GCN-NEXT: s_endpgm + %tmp = call ptr addrspace(5) @llvm.amdgcn.set.inactive.p5(ptr addrspace(5) %in, ptr addrspace(5) null) #0 + store ptr addrspace(5) %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_p6(ptr addrspace(1) %out, ptr addrspace(6) %in) { +; GCN-LABEL: set_inactive_p6: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GCN-NEXT: s_endpgm + %tmp = call ptr addrspace(6) @llvm.amdgcn.set.inactive.p6(ptr addrspace(6) %in, ptr addrspace(6) null) #0 + store ptr addrspace(6) %tmp, ptr addrspace(1) %out + ret void +} + declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #0 declare i64 @llvm.amdgcn.set.inactive.i64(i64, i64) #0 declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll index 8302af7450ed9d..fc33206845a713 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll @@ -124,6 +124,328 @@ define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x ret void } +define amdgpu_kernel void @set_inactive_f32(ptr addrspace(1) %out, float %in) { +; GCN-LABEL: set_inactive_f32: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dword s4, s[0:1], 0x2c +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_mov_b32 s5, 0x40400000 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, s5 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GCN-NEXT: s_endpgm + %tmp = call float @llvm.amdgcn.set.inactive.f32(float %in, float 3.0) #0 + store float %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_f64(ptr addrspace(1) %out, double %in) { +; GCN-LABEL: set_inactive_f64: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: s_mov_b32 s0, 0xcccccccd +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: s_mov_b32 s1, 0x4010cccc +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s1 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; GCN-NEXT: s_endpgm + %tmp = call double @llvm.amdgcn.set.inactive.f64(double %in, double 4.2) #0 + store double %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_v2i16(ptr addrspace(1) %out, <2 x i16> %in) { +; GCN-LABEL: set_inactive_v2i16: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dword s4, s[0:1], 0x2c +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_mov_b32 s5, 0x10001 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, s5 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GCN-NEXT: s_endpgm + %tmp = call <2 x i16> @llvm.amdgcn.set.inactive.v2i16(<2 x i16> %in, <2 x i16> ) #0 + store <2 x i16> %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_v2f16(ptr addrspace(1) %out, <2 x half> %in) { +; GCN-LABEL: set_inactive_v2f16: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dword s4, s[0:1], 0x2c +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_mov_b32 s5, 0x3c003c00 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, s5 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GCN-NEXT: s_endpgm + %tmp = call <2 x half> @llvm.amdgcn.set.inactive.v2f16(<2 x half> %in, <2 x half> ) #0 + store <2 x half> %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_v2i32(ptr addrspace(1) %out, <2 x i32> %in) { +; GCN-LABEL: set_inactive_v2i32: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s8, 1 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: s_mov_b32 s9, s8 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, s8 +; GCN-NEXT: v_mov_b32_e32 v1, s9 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; GCN-NEXT: s_endpgm + %tmp = call <2 x i32> @llvm.amdgcn.set.inactive.v2i32(<2 x i32> %in, <2 x i32> ) #0 + store <2 x i32> %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_v2f32(ptr addrspace(1) %out, <2 x float> %in) { +; GCN-LABEL: set_inactive_v2f32: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s8, 1.0 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: s_mov_b32 s9, s8 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, s8 +; GCN-NEXT: v_mov_b32_e32 v1, s9 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; GCN-NEXT: s_endpgm + %tmp = call <2 x float> @llvm.amdgcn.set.inactive.v2f32(<2 x float> %in, <2 x float> ) #0 + store <2 x float> %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_v2bf16(ptr addrspace(1) %out, <2 x bfloat> %in) { +; GCN-LABEL: set_inactive_v2bf16: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dword s4, s[0:1], 0x2c +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_mov_b32 s5, 0x3f803f80 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, s5 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GCN-NEXT: s_endpgm + %tmp = call <2 x bfloat> @llvm.amdgcn.set.inactive.v2bf16(<2 x bfloat> %in, <2 x bfloat> ) #0 + store <2 x bfloat> %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_v4i16(ptr addrspace(1) %out, <4 x i16> %in) { +; GCN-LABEL: set_inactive_v4i16: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s8, 0x10001 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: s_mov_b32 s9, s8 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, s8 +; GCN-NEXT: v_mov_b32_e32 v1, s9 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; GCN-NEXT: s_endpgm + %tmp = call <4 x i16> @llvm.amdgcn.set.inactive.v4i16(<4 x i16> %in, <4 x i16> ) #0 + store <4 x i16> %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_v4f16(ptr addrspace(1) %out, <4 x half> %in) { +; GCN-LABEL: set_inactive_v4f16: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s8, 0x3c003c00 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: s_mov_b32 s9, s8 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, s8 +; GCN-NEXT: v_mov_b32_e32 v1, s9 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; GCN-NEXT: s_endpgm + %tmp = call <4 x half> @llvm.amdgcn.set.inactive.v4f16(<4 x half> %in, <4 x half> ) #0 + store <4 x half> %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_v4bf16(ptr addrspace(1) %out, <4 x bfloat> %in) { +; GCN-LABEL: set_inactive_v4bf16: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s8, 0x3f803f80 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: s_mov_b32 s9, s8 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, s8 +; GCN-NEXT: v_mov_b32_e32 v1, s9 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; GCN-NEXT: s_endpgm + %tmp = call <4 x bfloat> @llvm.amdgcn.set.inactive.v4bf16(<4 x bfloat> %in, <4 x bfloat> ) #0 + store <4 x bfloat> %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_p0(ptr addrspace(1) %out, ptr %in) { +; GCN-LABEL: set_inactive_p0: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s2 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: s_mov_b32 s5, s1 +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: v_mov_b32_e32 v1, 0 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; GCN-NEXT: s_endpgm + %tmp = call ptr @llvm.amdgcn.set.inactive.p0(ptr %in, ptr null) #0 + store ptr %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_p2(ptr addrspace(1) %out, ptr addrspace(2) %in) { +; GCN-LABEL: set_inactive_p2: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dword s4, s[0:1], 0x2c +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GCN-NEXT: s_endpgm + %tmp = call ptr addrspace(2) @llvm.amdgcn.set.inactive.p2(ptr addrspace(2) %in, ptr addrspace(2) null) #0 + store ptr addrspace(2) %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_p3(ptr addrspace(1) %out, ptr addrspace(3) %in) { +; GCN-LABEL: set_inactive_p3: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dword s4, s[0:1], 0x2c +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GCN-NEXT: s_endpgm + %tmp = call ptr addrspace(3) @llvm.amdgcn.set.inactive.p3(ptr addrspace(3) %in, ptr addrspace(3) null) #0 + store ptr addrspace(3) %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_p5(ptr addrspace(1) %out, ptr addrspace(5) %in) { +; GCN-LABEL: set_inactive_p5: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dword s4, s[0:1], 0x2c +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GCN-NEXT: s_endpgm + %tmp = call ptr addrspace(5) @llvm.amdgcn.set.inactive.p5(ptr addrspace(5) %in, ptr addrspace(5) null) #0 + store ptr addrspace(5) %tmp, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @set_inactive_p6(ptr addrspace(1) %out, ptr addrspace(6) %in) { +; GCN-LABEL: set_inactive_p6: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dword s4, s[0:1], 0x2c +; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s4 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: v_mov_b32_e32 v0, 0 +; GCN-NEXT: s_not_b64 exec, exec +; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GCN-NEXT: s_endpgm + %tmp = call ptr addrspace(6) @llvm.amdgcn.set.inactive.p6(ptr addrspace(6) %in, ptr addrspace(6) null) #0 + store ptr addrspace(6) %tmp, ptr addrspace(1) %out + ret void +} + declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #0 declare i64 @llvm.amdgcn.set.inactive.i64(i64, i64) #0 declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32) From 86dc75862398ec48ad411103770613fba9add9f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amaury=20S=C3=A9chet?= Date: Wed, 5 Jun 2024 13:04:22 +0000 Subject: [PATCH 086/155] [llvm-c] Move LLVMAttributeIndex to a more apropriate place. NFC. --- llvm/include/llvm-c/Core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h index d4a10e92776052..7de99c3a6038df 100644 --- a/llvm/include/llvm-c/Core.h +++ b/llvm/include/llvm-c/Core.h @@ -471,6 +471,8 @@ enum { LLVMAttributeFunctionIndex = -1, }; +typedef unsigned LLVMAttributeIndex; + /** * Tail call kind for LLVMSetTailCallKind and LLVMGetTailCallKind. * @@ -485,8 +487,6 @@ typedef enum { LLVMTailCallKindNoTail = 3, } LLVMTailCallKind; -typedef unsigned LLVMAttributeIndex; - enum { LLVMFastMathAllowReassoc = (1 << 0), LLVMFastMathNoNaNs = (1 << 1), From e910f61fb1810020ab68fdf6479bde03e702e013 Mon Sep 17 00:00:00 2001 From: agozillon Date: Fri, 14 Jun 2024 14:34:59 +0200 Subject: [PATCH 087/155] [Flang][OpenMP] Fix type in getBaseObject causing crashes in certain scenarios (#95472) This typo would unfortunately cause code like the following to ICE, where common block symbols/names are used in a map clause: subroutine sb() implicit none integer:: b, c common /var/ b, c !$omp target map(tofrom: /var/) b = 1 c = 2 !$omp end target end subroutine --- flang/lib/Lower/OpenMP/Clauses.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index 1e3fea7c4cdc75..6bae62cd858606 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -153,7 +153,7 @@ Object makeObject(const parser::OmpObject &object, std::optional getBaseObject(const Object &object, semantics::SemanticsContext &semaCtx) { // If it's just the symbol, then there is no base. - if (!object.id()) + if (!object.ref()) return std::nullopt; auto maybeRef = evaluate::ExtractDataRef(*object.ref()); From 8ab3f8ae0d39048e4bc1198514049813c6765fb6 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 14 Jun 2024 14:47:41 +0200 Subject: [PATCH 088/155] [llvm-exegesis] Fix typos in cmake file Fix typos introduced in #95421. --- llvm/tools/llvm-exegesis/CMakeLists.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/tools/llvm-exegesis/CMakeLists.txt b/llvm/tools/llvm-exegesis/CMakeLists.txt index c3c4058cf65255..49cb7e176c6d34 100644 --- a/llvm/tools/llvm-exegesis/CMakeLists.txt +++ b/llvm/tools/llvm-exegesis/CMakeLists.txt @@ -11,11 +11,11 @@ set(LLVM_LINK_COMPONENTS foreach(t ${LLVM_EXEGESIS_TARGETS}) string(STRIP ${t} t) - list(APPEND LLVM_LINK_COMPONTENTS "LLVM${t}AsmParser") - list(APPEND LLVM_LINK_COMPONTENTS "LLVM${t}CodeGen") - list(APPEND LLVM_LINK_COMPONTENTS "LLVM${t}Desc") - list(APPEND LLVM_LINK_COMPONTENTS "LLVM${t}Disassembler") - list(APPEND LLVM_LINK_COMPONTENTS "LLVM${t}Info") + list(APPEND LLVM_LINK_COMPONENTS "${t}AsmParser") + list(APPEND LLVM_LINK_COMPONENTS "${t}CodeGen") + list(APPEND LLVM_LINK_COMPONENTS "${t}Desc") + list(APPEND LLVM_LINK_COMPONENTS "${t}Disassembler") + list(APPEND LLVM_LINK_COMPONENTS "${t}Info") endforeach() add_llvm_tool(llvm-exegesis From c6b6e18c4d25305ab98b6eab752de99ea4e15344 Mon Sep 17 00:00:00 2001 From: David Truby Date: Fri, 14 Jun 2024 14:10:41 +0100 Subject: [PATCH 089/155] [flang] Implement !DIR$ VECTOR ALWAYS (#93830) This patch implements support for the VECTOR ALWAYS directive, which forces vectorization to occurr when possible regardless of a decision by the cost model. This is done by adding an attribute to the branch into the loop in LLVM to indicate that the loop should always be vectorized. This patch only implements this directive on plan structured do loops without labels. Support for unstructured loops and array expressions is planned for future patches. --- flang/docs/Directives.md | 59 +++++++++ flang/include/flang/Lower/PFTBuilder.h | 2 + .../include/flang/Optimizer/Dialect/FIROps.h | 1 + .../include/flang/Optimizer/Dialect/FIROps.td | 3 +- flang/include/flang/Parser/dump-parse-tree.h | 3 +- flang/include/flang/Parser/parse-tree.h | 3 +- flang/lib/Lower/Bridge.cpp | 49 ++++++- .../Transforms/ControlFlowConverter.cpp | 6 +- flang/lib/Parser/Fortran-parsers.cpp | 3 + flang/lib/Parser/unparse.cpp | 3 + flang/lib/Semantics/CMakeLists.txt | 1 + .../lib/Semantics/canonicalize-directives.cpp | 124 ++++++++++++++++++ flang/lib/Semantics/canonicalize-directives.h | 22 ++++ flang/lib/Semantics/resolve-names.cpp | 3 + flang/lib/Semantics/semantics.cpp | 2 + flang/test/Fir/vector-always-cfg.fir | 32 +++++ flang/test/Fir/vector-always.fir | 21 +++ flang/test/Integration/vector-always.f90 | 14 ++ flang/test/Lower/vector-always.f90 | 26 ++++ flang/test/Parser/compiler-directives.f90 | 9 +- flang/test/Semantics/loop-directives.f90 | 19 +++ 21 files changed, 396 insertions(+), 9 deletions(-) create mode 100644 flang/lib/Semantics/canonicalize-directives.cpp create mode 100644 flang/lib/Semantics/canonicalize-directives.h create mode 100644 flang/test/Fir/vector-always-cfg.fir create mode 100644 flang/test/Fir/vector-always.fir create mode 100644 flang/test/Integration/vector-always.f90 create mode 100644 flang/test/Lower/vector-always.f90 create mode 100644 flang/test/Semantics/loop-directives.f90 diff --git a/flang/docs/Directives.md b/flang/docs/Directives.md index fe08b4f855f23c..f356f762b13a2d 100644 --- a/flang/docs/Directives.md +++ b/flang/docs/Directives.md @@ -36,3 +36,62 @@ A list of non-standard directives supported by Flang and is limited to 256. [This directive is currently recognised by the parser, but not handled by the other parts of the compiler]. +* `!dir$ vector always` forces vectorization on the following loop regardless + of cost model decisions. The loop must still be vectorizable. + [This directive currently only works on plain do loops without labels]. + +# Directive Details + +## Introduction +Directives are commonly used in Fortran programs to specify additional actions +to be performed by the compiler. The directives are always specified with the +`!dir$` or `cdir$` prefix. + +## Loop Directives +Some directives are associated with the following construct, for example loop +directives. Directives on loops are used to specify additional transformation to +be performed by the compiler like enabling vectorisation, unrolling, interchange +etc. + +Currently loop directives are not accepted in the presence of OpenMP or OpenACC +constructs on the loop. This should be implemented as it is used in some +applications. + +### Array Expressions +It is to be decided whether loop directives should also be able to be associated +with array expressions. + +## Semantics +Directives that are associated with constructs must appear in the same section +as the construct they are associated with, for example loop directives must +appear in the executable section as the loops appear there. To facilitate this +the parse tree is corrected to move such directives that appear in the +specification part into the execution part. + +When a directive that must be associated with a construct appears, a search +forward from that directive to the next non-directive construct is performed to +check that that construct matches the expected construct for the directive. +Skipping other intermediate directives allows multiple directives to appear on +the same construct. + +## Lowering +Evaluation is extended with a new field called dirs for representing directives +associated with that Evaluation. When lowering loop directives, the associated +Do Loop's evaluation is found and the directive is added to it. This information +is used only during the lowering of the loop. + +### Representation in LLVM +The `llvm.loop` metadata is used in LLVM to provide information to the optimizer +about the loop. For example, the `llvm.loop.vectorize.enable` metadata informs +the optimizer that a loop can be vectorized without considering its cost-model. +This attribute is added to the loop condition branch. + +### Representation in MLIR +The MLIR LLVM dialect models this by an attribute called LoopAnnotation +Attribute. The attribute can be added to the latch of the loop in the cf +dialect and is then carried through lowering to the LLVM dialect. + +## Testing +Since directives must maintain a flow from source to LLVM IR, an integration +test is provided that tests the `vector always` directive, as well as individual +lit tests for each of the parsing, semantics and lowering stages. diff --git a/flang/include/flang/Lower/PFTBuilder.h b/flang/include/flang/Lower/PFTBuilder.h index 83200eb6351a80..c2b600c6b5d9b4 100644 --- a/flang/include/flang/Lower/PFTBuilder.h +++ b/flang/include/flang/Lower/PFTBuilder.h @@ -350,6 +350,8 @@ struct Evaluation : EvaluationVariant { parser::CharBlock position{}; std::optional label{}; std::unique_ptr evaluationList; // nested evaluations + // associated compiler directives + llvm::SmallVector dirs; Evaluation *parentConstruct{nullptr}; // set for nodes below the top level Evaluation *lexicalSuccessor{nullptr}; // set for leaf nodes, some directives Evaluation *controlSuccessor{nullptr}; // set for some leaf nodes diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.h b/flang/include/flang/Optimizer/Dialect/FIROps.h index 9f07364ddb6279..a21f8bbe176852 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.h +++ b/flang/include/flang/Optimizer/Dialect/FIROps.h @@ -16,6 +16,7 @@ #include "flang/Optimizer/Dialect/FortranVariableInterface.h" #include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/LLVMIR/LLVMAttrs.h" #include "mlir/Interfaces/LoopLikeInterface.h" #include "mlir/Interfaces/SideEffectInterfaces.h" diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index e7da3af5485cca..baf095263479b5 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -2160,7 +2160,8 @@ def fir_DoLoopOp : region_Op<"do_loop", [AttrSizedOperandSegments, Variadic:$initArgs, OptionalAttr:$unordered, OptionalAttr:$finalValue, - OptionalAttr:$reduceAttrs + OptionalAttr:$reduceAttrs, + OptionalAttr:$loopAnnotation ); let results = (outs Variadic:$results); let regions = (region SizedRegion<1>:$region); diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index 4232e85a6e5952..37c3370b48a085 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -201,11 +201,12 @@ class ParseTreeDumper { NODE(parser, CommonStmt) NODE(CommonStmt, Block) NODE(parser, CompilerDirective) + NODE(CompilerDirective, AssumeAligned) NODE(CompilerDirective, IgnoreTKR) NODE(CompilerDirective, LoopCount) - NODE(CompilerDirective, AssumeAligned) NODE(CompilerDirective, NameValue) NODE(CompilerDirective, Unrecognized) + NODE(CompilerDirective, VectorAlways) NODE(parser, ComplexLiteralConstant) NODE(parser, ComplexPart) NODE(parser, ComponentArraySpec) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index f0b9b682030c61..548fcc81984b2a 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3337,6 +3337,7 @@ struct CompilerDirective { TUPLE_CLASS_BOILERPLATE(AssumeAligned); std::tuple, uint64_t> t; }; + EMPTY_CLASS(VectorAlways); struct NameValue { TUPLE_CLASS_BOILERPLATE(NameValue); std::tuple> t; @@ -3344,7 +3345,7 @@ struct CompilerDirective { EMPTY_CLASS(Unrecognized); CharBlock source; std::variant, LoopCount, std::list, - std::list, Unrecognized> + VectorAlways, std::list, Unrecognized> u; }; diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 4dd0b7eb2a05f4..c73d43210a260a 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -1935,7 +1935,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { // Increment loop begin code. (Infinite/while code was already generated.) if (!infiniteLoop && !whileCondition) - genFIRIncrementLoopBegin(incrementLoopNestInfo); + genFIRIncrementLoopBegin(incrementLoopNestInfo, doStmtEval.dirs); // Loop body code. auto iter = eval.getNestedEvaluations().begin(); @@ -1980,8 +1980,20 @@ class FirConverter : public Fortran::lower::AbstractConverter { return builder->createIntegerConstant(loc, controlType, 1); // step } + void addLoopAnnotationAttr(IncrementLoopInfo &info) { + mlir::BoolAttr f = mlir::BoolAttr::get(builder->getContext(), false); + mlir::LLVM::LoopVectorizeAttr va = mlir::LLVM::LoopVectorizeAttr::get( + builder->getContext(), /*disable=*/f, {}, {}, {}, {}, {}, {}); + mlir::LLVM::LoopAnnotationAttr la = mlir::LLVM::LoopAnnotationAttr::get( + builder->getContext(), {}, /*vectorize=*/va, {}, {}, {}, {}, {}, {}, {}, + {}, {}, {}, {}, {}, {}); + info.doLoop.setLoopAnnotationAttr(la); + } + /// Generate FIR to begin a structured or unstructured increment loop nest. - void genFIRIncrementLoopBegin(IncrementLoopNestInfo &incrementLoopNestInfo) { + void genFIRIncrementLoopBegin( + IncrementLoopNestInfo &incrementLoopNestInfo, + llvm::SmallVectorImpl &dirs) { assert(!incrementLoopNestInfo.empty() && "empty loop nest"); mlir::Location loc = toLocation(); for (IncrementLoopInfo &info : incrementLoopNestInfo) { @@ -2046,6 +2058,15 @@ class FirConverter : public Fortran::lower::AbstractConverter { } if (info.hasLocalitySpecs()) handleLocalitySpecs(info); + + for (const auto *dir : dirs) { + std::visit( + Fortran::common::visitors{ + [&](const Fortran::parser::CompilerDirective::VectorAlways + &d) { addLoopAnnotationAttr(info); }, + [&](const auto &) {}}, + dir->u); + } continue; } @@ -2579,8 +2600,28 @@ class FirConverter : public Fortran::lower::AbstractConverter { } } - void genFIR(const Fortran::parser::CompilerDirective &) { - // TODO + void attachDirectiveToLoop(const Fortran::parser::CompilerDirective &dir, + Fortran::lower::pft::Evaluation *e) { + while (e->isDirective()) + e = e->lexicalSuccessor; + + if (e->isA()) + e->dirs.push_back(&dir); + else + fir::emitFatalError(toLocation(), + "loop directive must appear before a loop"); + } + + void genFIR(const Fortran::parser::CompilerDirective &dir) { + Fortran::lower::pft::Evaluation &eval = getEval(); + + std::visit( + Fortran::common::visitors{ + [&](const Fortran::parser::CompilerDirective::VectorAlways &) { + attachDirectiveToLoop(dir, &eval); + }, + [&](const auto &) {}}, + dir.u); } void genFIR(const Fortran::parser::OpenACCConstruct &acc) { diff --git a/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp b/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp index a233e7fbdcd1e3..1af5a68e852973 100644 --- a/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp +++ b/flang/lib/Optimizer/Transforms/ControlFlowConverter.cpp @@ -132,10 +132,14 @@ class CfgLoopConv : public mlir::OpRewritePattern { auto comparison = rewriter.create( loc, arith::CmpIPredicate::sgt, itersLeft, zero); - rewriter.create( + auto cond = rewriter.create( loc, comparison, firstBlock, llvm::ArrayRef(), endBlock, llvm::ArrayRef()); + // Copy loop annotations from the do loop to the loop entry condition. + if (auto ann = loop.getLoopAnnotation()) + cond->setAttr("loop_annotation", *ann); + // The result of the loop operation is the values of the condition block // arguments except the induction variable on the last iteration. auto args = loop.getFinalValue() diff --git a/flang/lib/Parser/Fortran-parsers.cpp b/flang/lib/Parser/Fortran-parsers.cpp index 13f15c84e579e6..746d04ad649d16 100644 --- a/flang/lib/Parser/Fortran-parsers.cpp +++ b/flang/lib/Parser/Fortran-parsers.cpp @@ -1277,10 +1277,13 @@ constexpr auto loopCount{ constexpr auto assumeAligned{"ASSUME_ALIGNED" >> optionalList(construct( indirect(designator), ":"_tok >> digitString64))}; +constexpr auto vectorAlways{ + "VECTOR ALWAYS" >> construct()}; TYPE_PARSER(beginDirective >> "DIR$ "_tok >> sourced((construct(ignore_tkr) || construct(loopCount) || construct(assumeAligned) || + construct(vectorAlways) || construct( many(construct( name, maybe(("="_tok || ":"_tok) >> digitString64))))) / diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 13ca2309ad502c..036f306c02cbfa 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -1834,6 +1834,9 @@ class UnparseVisitor { Word("!DIR$ ASSUME_ALIGNED "); Walk(" ", assumeAligned, ", "); }, + [&](const CompilerDirective::VectorAlways &valways) { + Word("!DIR$ VECTOR ALWAYS"); + }, [&](const std::list &names) { Walk("!DIR$ ", names, " "); }, diff --git a/flang/lib/Semantics/CMakeLists.txt b/flang/lib/Semantics/CMakeLists.txt index 809206565fc1cf..41406ecf50e004 100644 --- a/flang/lib/Semantics/CMakeLists.txt +++ b/flang/lib/Semantics/CMakeLists.txt @@ -2,6 +2,7 @@ add_flang_library(FortranSemantics assignment.cpp attr.cpp canonicalize-acc.cpp + canonicalize-directives.cpp canonicalize-do.cpp canonicalize-omp.cpp check-acc-structure.cpp diff --git a/flang/lib/Semantics/canonicalize-directives.cpp b/flang/lib/Semantics/canonicalize-directives.cpp new file mode 100644 index 00000000000000..4bf36754eb10bd --- /dev/null +++ b/flang/lib/Semantics/canonicalize-directives.cpp @@ -0,0 +1,124 @@ +//===-- lib/Semantics/canonicalize-directives.cpp -------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "canonicalize-directives.h" +#include "flang/Parser/parse-tree-visitor.h" + +namespace Fortran::semantics { + +using namespace parser::literals; + +// Check that directives are associated with the correct constructs. +// Directives that need to be associated with other constructs in the execution +// part are moved to the execution part so they can be checked there. +class CanonicalizationOfDirectives { +public: + CanonicalizationOfDirectives(parser::Messages &messages) + : messages_{messages} {} + + template bool Pre(T &) { return true; } + template void Post(T &) {} + + // Move directives that must appear in the Execution part out of the + // Specification part. + void Post(parser::SpecificationPart &spec); + bool Pre(parser::ExecutionPart &x); + + // Ensure that directives associated with constructs appear accompanying the + // construct. + void Post(parser::Block &block); + +private: + // Ensure that loop directives appear immediately before a loop. + void CheckLoopDirective(parser::CompilerDirective &dir, parser::Block &block, + std::list::iterator it); + + parser::Messages &messages_; + + // Directives to be moved to the Execution part from the Specification part. + std::list> + directivesToConvert_; +}; + +bool CanonicalizeDirectives( + parser::Messages &messages, parser::Program &program) { + CanonicalizationOfDirectives dirs{messages}; + Walk(program, dirs); + return !messages.AnyFatalError(); +} + +static bool IsExecutionDirective(const parser::CompilerDirective &dir) { + return std::holds_alternative(dir.u); +} + +void CanonicalizationOfDirectives::Post(parser::SpecificationPart &spec) { + auto &list{ + std::get>>( + spec.t)}; + for (auto it{list.begin()}; it != list.end();) { + if (IsExecutionDirective(it->value())) { + directivesToConvert_.emplace_back(std::move(*it)); + it = list.erase(it); + } else { + ++it; + } + } +} + +bool CanonicalizationOfDirectives::Pre(parser::ExecutionPart &x) { + auto origFirst{x.v.begin()}; + for (auto &dir : directivesToConvert_) { + x.v.insert(origFirst, + parser::ExecutionPartConstruct{ + parser::ExecutableConstruct{std::move(dir)}}); + } + + directivesToConvert_.clear(); + return true; +} + +template T *GetConstructIf(parser::ExecutionPartConstruct &x) { + if (auto *y{std::get_if(&x.u)}) { + if (auto *z{std::get_if>(&y->u)}) { + return &z->value(); + } + } + return nullptr; +} + +void CanonicalizationOfDirectives::CheckLoopDirective( + parser::CompilerDirective &dir, parser::Block &block, + std::list::iterator it) { + + // Skip over this and other compiler directives + while (GetConstructIf(*it)) { + ++it; + } + + if (it == block.end() || !GetConstructIf(*it)) { + std::string s{parser::ToUpperCaseLetters(dir.source.ToString())}; + s.pop_back(); // Remove trailing newline from source string + messages_.Say( + dir.source, "A DO loop must follow the %s directive"_err_en_US, s); + } +} + +void CanonicalizationOfDirectives::Post(parser::Block &block) { + for (auto it{block.begin()}; it != block.end(); ++it) { + if (auto *dir{GetConstructIf(*it)}) { + std::visit( + common::visitors{[&](parser::CompilerDirective::VectorAlways &) { + CheckLoopDirective(*dir, block, it); + }, + [&](auto &) {}}, + dir->u); + } + } +} + +} // namespace Fortran::semantics diff --git a/flang/lib/Semantics/canonicalize-directives.h b/flang/lib/Semantics/canonicalize-directives.h new file mode 100644 index 00000000000000..89f8a0e3fce4ae --- /dev/null +++ b/flang/lib/Semantics/canonicalize-directives.h @@ -0,0 +1,22 @@ +//===-- lib/Semantics/canonicalize-directives.h -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_SEMANTICS_CANONICALIZE_DIRECTIVES_H_ +#define FORTRAN_SEMANTICS_CANONICALIZE_DIRECTIVES_H_ + +namespace Fortran::parser { +struct Program; +class Messages; +} // namespace Fortran::parser + +namespace Fortran::semantics { +bool CanonicalizeDirectives( + parser::Messages &messages, parser::Program &program); +} + +#endif // FORTRAN_SEMANTICS_CANONICALIZE_DIRECTIVES_H_ diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 17ff12568b06d8..d4fe668b92ec93 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -8899,6 +8899,9 @@ void ResolveNamesVisitor::Post(const parser::AssignedGotoStmt &x) { } void ResolveNamesVisitor::Post(const parser::CompilerDirective &x) { + if (std::holds_alternative(x.u)) { + return; + } if (const auto *tkr{ std::get_if>(&x.u)}) { if (currScope().IsTopLevel() || diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp index d51cc62d804e84..1bb0679b751102 100644 --- a/flang/lib/Semantics/semantics.cpp +++ b/flang/lib/Semantics/semantics.cpp @@ -9,6 +9,7 @@ #include "flang/Semantics/semantics.h" #include "assignment.h" #include "canonicalize-acc.h" +#include "canonicalize-directives.h" #include "canonicalize-do.h" #include "canonicalize-omp.h" #include "check-acc-structure.h" @@ -599,6 +600,7 @@ bool Semantics::Perform() { CanonicalizeAcc(context_.messages(), program_) && CanonicalizeOmp(context_.messages(), program_) && CanonicalizeCUDA(program_) && + CanonicalizeDirectives(context_.messages(), program_) && PerformStatementSemantics(context_, program_) && ModFileWriter{context_}.WriteAll(); } diff --git a/flang/test/Fir/vector-always-cfg.fir b/flang/test/Fir/vector-always-cfg.fir new file mode 100644 index 00000000000000..45c2ea056a707c --- /dev/null +++ b/flang/test/Fir/vector-always-cfg.fir @@ -0,0 +1,32 @@ +// RUN: fir-opt --fir-to-llvm-ir %s | FileCheck %s + +#access_group = #llvm.access_group> +// CHECK: #[[ACCESS:.*]] = #llvm.access_group> +#loop_vectorize = #llvm.loop_vectorize +// CHECK: #[[VECTORIZE:.*]] = #llvm.loop_vectorize +#loop_annotation = #llvm.loop_annotation +// CHECK: #[[ANNOTATION:.*]] = #llvm.loop_annotation + +func.func @_QPvector_always() -> i32 { + %c1 = arith.constant 1 : index + %c10_i32 = arith.constant 10 : i32 + %c1_i32 = arith.constant 1 : i32 + %c10 = arith.constant 10 : index + %0 = arith.subi %c10, %c1 : index + %1 = arith.addi %0, %c1 : index + %2 = arith.divsi %1, %c1 : index + cf.br ^bb1(%c1, %c1_i32, %2 : index, i32, index) +^bb1(%3: index, %4: i32, %5: index): // 2 preds: ^bb0, ^bb2 + %c0 = arith.constant 0 : index + %6 = arith.cmpi sgt, %5, %c0 : index + cf.cond_br %6, ^bb2, ^bb3 {loop_annotation = #loop_annotation} +// CHECK: llvm.cond_br %{{.*}}, ^{{.*}}, ^{{.*}} {loop_annotation = #[[ANNOTATION]]} +^bb2: // pred: ^bb1 + %7 = arith.addi %3, %c1 : index + %c1_0 = arith.constant 1 : index + %8 = arith.subi %5, %c1_0 : index + cf.br ^bb1(%7, %c1_i32, %8 : index, i32, index) +^bb3: // pred: ^bb1 + return %4 : i32 +} + diff --git a/flang/test/Fir/vector-always.fir b/flang/test/Fir/vector-always.fir new file mode 100644 index 00000000000000..00eb0e7a756ee6 --- /dev/null +++ b/flang/test/Fir/vector-always.fir @@ -0,0 +1,21 @@ +// RUN: fir-opt --cfg-conversion %s | FileCheck %s + +#access_group = #llvm.access_group> +// CHECK: #[[ACCESS:.*]] = #llvm.access_group> +#loop_vectorize = #llvm.loop_vectorize +// CHECK: #[[VECTORIZE:.*]] = #llvm.loop_vectorize +#loop_annotation = #llvm.loop_annotation +// CHECK: #[[ANNOTATION:.*]] = #llvm.loop_annotation + +// CHECK-LABEL: @_QPvector_always +func.func @_QPvector_always() -> i32 { + %c1 = arith.constant 1 : index + %c10_i32 = arith.constant 10 : i32 + %c1_i32 = arith.constant 1 : i32 + %c10 = arith.constant 10 : index +// CHECK: cf.cond_br %{{.*}}, ^{{.*}}, ^{{.*}} {loop_annotation = #[[ANNOTATION]]} + %8:2 = fir.do_loop %arg0 = %c1 to %c10 step %c1 iter_args(%arg1 = %c1_i32) -> (index, i32) attributes {loopAnnotation = #loop_annotation} { + fir.result %c1, %c1_i32 : index, i32 + } + return %8#1 : i32 + } diff --git a/flang/test/Integration/vector-always.f90 b/flang/test/Integration/vector-always.f90 new file mode 100644 index 00000000000000..7216698f901c1f --- /dev/null +++ b/flang/test/Integration/vector-always.f90 @@ -0,0 +1,14 @@ +! RUN: %flang_fc1 -emit-llvm -o - %s | FileCheck %s + +! CHECK-LABEL: vector_always +subroutine vector_always + integer :: a(10) + !dir$ vector always + ! CHECK: br i1 {{.*}}, label {{.*}}, label {{.*}}, !llvm.loop ![[ANNOTATION:.*]] + do i=1,10 + a(i)=i + end do +end subroutine vector_always + +! CHECK: ![[ANNOTATION]] = distinct !{![[ANNOTATION]], ![[VECTORIZE:.*]]} +! CHECK: ![[VECTORIZE]] = !{!"llvm.loop.vectorize.enable", i1 true} diff --git a/flang/test/Lower/vector-always.f90 b/flang/test/Lower/vector-always.f90 new file mode 100644 index 00000000000000..1822fc33dfdb81 --- /dev/null +++ b/flang/test/Lower/vector-always.f90 @@ -0,0 +1,26 @@ +! RUN: %flang_fc1 -emit-hlfir -o - %s | FileCheck %s + +! CHECK: #loop_vectorize = #llvm.loop_vectorize +! CHECK: #loop_annotation = #llvm.loop_annotation + +! CHECK-LABEL: vector_always +subroutine vector_always + integer :: a(10) + !dir$ vector always + !CHECK: fir.do_loop {{.*}} attributes {loopAnnotation = #loop_annotation} + do i=1,10 + a(i)=i + end do +end subroutine vector_always + + +! CHECK-LABEL: intermediate_directive +subroutine intermediate_directive + integer :: a(10) + !dir$ vector always + !dir$ unknown + !CHECK: fir.do_loop {{.*}} attributes {loopAnnotation = #loop_annotation} + do i=1,10 + a(i)=i + end do +end subroutine intermediate_directive diff --git a/flang/test/Parser/compiler-directives.f90 b/flang/test/Parser/compiler-directives.f90 index d4c99ae12f14ea..246eaf985251c6 100644 --- a/flang/test/Parser/compiler-directives.f90 +++ b/flang/test/Parser/compiler-directives.f90 @@ -1,4 +1,4 @@ -! RUN: %flang_fc1 -fdebug-unparse %s 2>&1 +! RUN: %flang_fc1 -fdebug-unparse %s 2>&1 | FileCheck %s ! Test that compiler directives can appear in various places. @@ -28,3 +28,10 @@ module m !dir$ align : 1024 :: d end type stuff end + +subroutine vector_always + !dir$ vector always + ! CHECK: !DIR$ VECTOR ALWAYS + do i=1,10 + enddo +end subroutine diff --git a/flang/test/Semantics/loop-directives.f90 b/flang/test/Semantics/loop-directives.f90 new file mode 100644 index 00000000000000..e2807c1f9d0e22 --- /dev/null +++ b/flang/test/Semantics/loop-directives.f90 @@ -0,0 +1,19 @@ +! RUN: %python %S/test_errors.py %s %flang + +subroutine empty + ! ERROR: A DO loop must follow the VECTOR ALWAYS directive + !dir$ vector always +end subroutine empty + +subroutine non_do + ! ERROR: A DO loop must follow the VECTOR ALWAYS directive + !dir$ vector always + a = 1 +end subroutine non_do + +subroutine execution_part + do i=1,10 + ! ERROR: A DO loop must follow the VECTOR ALWAYS directive + !dir$ vector always + end do +end subroutine execution_part From c81d5b11cf7caf82749638752d819a061fdf4d9e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 14 Jun 2024 11:32:56 +0100 Subject: [PATCH 090/155] [X86] Add scalar test coverage for ISD::AVG nodes on 32 and 64-bit targets --- llvm/test/CodeGen/X86/avgceils-scalar.ll | 267 ++++++++++++++++++++++ llvm/test/CodeGen/X86/avgceilu-scalar.ll | 267 ++++++++++++++++++++++ llvm/test/CodeGen/X86/avgfloors-scalar.ll | 263 +++++++++++++++++++++ llvm/test/CodeGen/X86/avgflooru-scalar.ll | 263 +++++++++++++++++++++ 4 files changed, 1060 insertions(+) create mode 100644 llvm/test/CodeGen/X86/avgceils-scalar.ll create mode 100644 llvm/test/CodeGen/X86/avgceilu-scalar.ll create mode 100644 llvm/test/CodeGen/X86/avgfloors-scalar.ll create mode 100644 llvm/test/CodeGen/X86/avgflooru-scalar.ll diff --git a/llvm/test/CodeGen/X86/avgceils-scalar.ll b/llvm/test/CodeGen/X86/avgceils-scalar.ll new file mode 100644 index 00000000000000..86de35d36f0769 --- /dev/null +++ b/llvm/test/CodeGen/X86/avgceils-scalar.ll @@ -0,0 +1,267 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=X64 + +; +; fixed avg(x,y) = sub(or(x,y),ashr(xor(x,y),1)) +; +; ext avg(x,y) = trunc(ashr(add(sext(x),sext(y),1),1)) +; + +define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind { +; X86-LABEL: test_fixed_i8: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: orb %cl, %al +; X86-NEXT: xorb %cl, %dl +; X86-NEXT: sarb %dl +; X86-NEXT: subb %dl, %al +; X86-NEXT: retl +; +; X64-LABEL: test_fixed_i8: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: orb %sil, %al +; X64-NEXT: xorb %sil, %dil +; X64-NEXT: sarb %dil +; X64-NEXT: subb %dil, %al +; X64-NEXT: retq + %or = or i8 %a0, %a1 + %xor = xor i8 %a0, %a1 + %shift = ashr i8 %xor, 1 + %res = sub i8 %or, %shift + ret i8 %res +} + +define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind { +; X86-LABEL: test_ext_i8: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: orb %cl, %al +; X86-NEXT: xorb %cl, %dl +; X86-NEXT: sarb %dl +; X86-NEXT: subb %dl, %al +; X86-NEXT: retl +; +; X64-LABEL: test_ext_i8: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: orb %sil, %al +; X64-NEXT: xorb %sil, %dil +; X64-NEXT: sarb %dil +; X64-NEXT: subb %dil, %al +; X64-NEXT: retq + %x0 = sext i8 %a0 to i16 + %x1 = sext i8 %a1 to i16 + %sum = add i16 %x0, %x1 + %sum1 = add i16 %sum, 1 + %shift = ashr i16 %sum1, 1 + %res = trunc i16 %shift to i8 + ret i8 %res +} + +define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind { +; X86-LABEL: test_fixed_i16: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: xorl %ecx, %edx +; X86-NEXT: movswl %dx, %ecx +; X86-NEXT: sarl %ecx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; X64-LABEL: test_fixed_i16: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: orl %esi, %eax +; X64-NEXT: xorl %esi, %edi +; X64-NEXT: movswl %di, %ecx +; X64-NEXT: sarl %ecx +; X64-NEXT: subl %ecx, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq + %or = or i16 %a0, %a1 + %xor = xor i16 %a0, %a1 + %shift = ashr i16 %xor, 1 + %res = sub i16 %or, %shift + ret i16 %res +} + +define i16 @test_ext_i16(i16 %a0, i16 %a1) nounwind { +; X86-LABEL: test_ext_i16: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: xorl %ecx, %edx +; X86-NEXT: movswl %dx, %ecx +; X86-NEXT: sarl %ecx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; X64-LABEL: test_ext_i16: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: orl %esi, %eax +; X64-NEXT: xorl %esi, %edi +; X64-NEXT: movswl %di, %ecx +; X64-NEXT: sarl %ecx +; X64-NEXT: subl %ecx, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq + %x0 = sext i16 %a0 to i32 + %x1 = sext i16 %a1 to i32 + %sum = add i32 %x0, %x1 + %sum1 = add i32 %sum, 1 + %shift = ashr i32 %sum1, 1 + %res = trunc i32 %shift to i16 + ret i16 %res +} + +define i32 @test_fixed_i32(i32 %a0, i32 %a1) nounwind { +; X86-LABEL: test_fixed_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: xorl %ecx, %edx +; X86-NEXT: sarl %edx +; X86-NEXT: subl %edx, %eax +; X86-NEXT: retl +; +; X64-LABEL: test_fixed_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: orl %esi, %eax +; X64-NEXT: xorl %esi, %edi +; X64-NEXT: sarl %edi +; X64-NEXT: subl %edi, %eax +; X64-NEXT: retq + %or = or i32 %a0, %a1 + %xor = xor i32 %a1, %a0 + %shift = ashr i32 %xor, 1 + %res = sub i32 %or, %shift + ret i32 %res +} + +define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind { +; X86-LABEL: test_ext_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: xorl %ecx, %edx +; X86-NEXT: sarl %edx +; X86-NEXT: subl %edx, %eax +; X86-NEXT: retl +; +; X64-LABEL: test_ext_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: orl %esi, %eax +; X64-NEXT: xorl %esi, %edi +; X64-NEXT: sarl %edi +; X64-NEXT: subl %edi, %eax +; X64-NEXT: retq + %x0 = sext i32 %a0 to i64 + %x1 = sext i32 %a1 to i64 + %sum = add i64 %x0, %x1 + %sum1 = add i64 %sum, 1 + %shift = ashr i64 %sum1, 1 + %res = trunc i64 %shift to i32 + ret i32 %res +} + +define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind { +; X86-LABEL: test_fixed_i64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl %ecx, %edi +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: xorl %esi, %ebx +; X86-NEXT: shrdl $1, %ebx, %edi +; X86-NEXT: orl %esi, %edx +; X86-NEXT: sarl %ebx +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: subl %edi, %eax +; X86-NEXT: sbbl %ebx, %edx +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: retl +; +; X64-LABEL: test_fixed_i64: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: orq %rsi, %rax +; X64-NEXT: xorq %rsi, %rdi +; X64-NEXT: sarq %rdi +; X64-NEXT: subq %rdi, %rax +; X64-NEXT: retq + %or = or i64 %a0, %a1 + %xor = xor i64 %a1, %a0 + %shift = ashr i64 %xor, 1 + %res = sub i64 %or, %shift + ret i64 %res +} + +define i64 @test_ext_i64(i64 %a0, i64 %a1) nounwind { +; X86-LABEL: test_ext_i64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl %ecx, %edi +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: xorl %esi, %ebx +; X86-NEXT: shrdl $1, %ebx, %edi +; X86-NEXT: orl %esi, %edx +; X86-NEXT: sarl %ebx +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: subl %edi, %eax +; X86-NEXT: sbbl %ebx, %edx +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: retl +; +; X64-LABEL: test_ext_i64: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: orq %rsi, %rax +; X64-NEXT: xorq %rsi, %rdi +; X64-NEXT: sarq %rdi +; X64-NEXT: subq %rdi, %rax +; X64-NEXT: retq + %x0 = sext i64 %a0 to i128 + %x1 = sext i64 %a1 to i128 + %sum = add i128 %x0, %x1 + %sum1 = add i128 %sum, 1 + %shift = ashr i128 %sum1, 1 + %res = trunc i128 %shift to i64 + ret i64 %res +} diff --git a/llvm/test/CodeGen/X86/avgceilu-scalar.ll b/llvm/test/CodeGen/X86/avgceilu-scalar.ll new file mode 100644 index 00000000000000..014c984528141b --- /dev/null +++ b/llvm/test/CodeGen/X86/avgceilu-scalar.ll @@ -0,0 +1,267 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=X64 + +; +; fixed avg(x,y) = sub(or(x,y),lshr(xor(x,y),1)) +; +; ext avg(x,y) = trunc(lshr(add(zext(x),zext(y),1),1)) +; + +define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind { +; X86-LABEL: test_fixed_i8: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: orb %cl, %al +; X86-NEXT: xorb %cl, %dl +; X86-NEXT: shrb %dl +; X86-NEXT: subb %dl, %al +; X86-NEXT: retl +; +; X64-LABEL: test_fixed_i8: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: orb %sil, %al +; X64-NEXT: xorb %sil, %dil +; X64-NEXT: shrb %dil +; X64-NEXT: subb %dil, %al +; X64-NEXT: retq + %or = or i8 %a0, %a1 + %xor = xor i8 %a0, %a1 + %shift = lshr i8 %xor, 1 + %res = sub i8 %or, %shift + ret i8 %res +} + +define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind { +; X86-LABEL: test_ext_i8: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: orb %cl, %al +; X86-NEXT: xorb %cl, %dl +; X86-NEXT: shrb %dl +; X86-NEXT: subb %dl, %al +; X86-NEXT: retl +; +; X64-LABEL: test_ext_i8: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: orb %sil, %al +; X64-NEXT: xorb %sil, %dil +; X64-NEXT: shrb %dil +; X64-NEXT: subb %dil, %al +; X64-NEXT: retq + %x0 = zext i8 %a0 to i16 + %x1 = zext i8 %a1 to i16 + %sum = add i16 %x0, %x1 + %sum1 = add i16 %sum, 1 + %shift = lshr i16 %sum1, 1 + %res = trunc i16 %shift to i8 + ret i8 %res +} + +define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind { +; X86-LABEL: test_fixed_i16: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: xorl %ecx, %edx +; X86-NEXT: movzwl %dx, %ecx +; X86-NEXT: shrl %ecx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; X64-LABEL: test_fixed_i16: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: orl %esi, %eax +; X64-NEXT: xorl %esi, %edi +; X64-NEXT: movzwl %di, %ecx +; X64-NEXT: shrl %ecx +; X64-NEXT: subl %ecx, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq + %or = or i16 %a0, %a1 + %xor = xor i16 %a0, %a1 + %shift = lshr i16 %xor, 1 + %res = sub i16 %or, %shift + ret i16 %res +} + +define i16 @test_ext_i16(i16 %a0, i16 %a1) nounwind { +; X86-LABEL: test_ext_i16: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: xorl %ecx, %edx +; X86-NEXT: movzwl %dx, %ecx +; X86-NEXT: shrl %ecx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; X64-LABEL: test_ext_i16: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: orl %esi, %eax +; X64-NEXT: xorl %esi, %edi +; X64-NEXT: movzwl %di, %ecx +; X64-NEXT: shrl %ecx +; X64-NEXT: subl %ecx, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq + %x0 = zext i16 %a0 to i32 + %x1 = zext i16 %a1 to i32 + %sum = add i32 %x0, %x1 + %sum1 = add i32 %sum, 1 + %shift = lshr i32 %sum1, 1 + %res = trunc i32 %shift to i16 + ret i16 %res +} + +define i32 @test_fixed_i32(i32 %a0, i32 %a1) nounwind { +; X86-LABEL: test_fixed_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: xorl %ecx, %edx +; X86-NEXT: shrl %edx +; X86-NEXT: subl %edx, %eax +; X86-NEXT: retl +; +; X64-LABEL: test_fixed_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: orl %esi, %eax +; X64-NEXT: xorl %esi, %edi +; X64-NEXT: shrl %edi +; X64-NEXT: subl %edi, %eax +; X64-NEXT: retq + %or = or i32 %a0, %a1 + %xor = xor i32 %a1, %a0 + %shift = lshr i32 %xor, 1 + %res = sub i32 %or, %shift + ret i32 %res +} + +define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind { +; X86-LABEL: test_ext_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: xorl %ecx, %edx +; X86-NEXT: shrl %edx +; X86-NEXT: subl %edx, %eax +; X86-NEXT: retl +; +; X64-LABEL: test_ext_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: orl %esi, %eax +; X64-NEXT: xorl %esi, %edi +; X64-NEXT: shrl %edi +; X64-NEXT: subl %edi, %eax +; X64-NEXT: retq + %x0 = zext i32 %a0 to i64 + %x1 = zext i32 %a1 to i64 + %sum = add i64 %x0, %x1 + %sum1 = add i64 %sum, 1 + %shift = lshr i64 %sum1, 1 + %res = trunc i64 %shift to i32 + ret i32 %res +} + +define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind { +; X86-LABEL: test_fixed_i64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl %ecx, %edi +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: xorl %esi, %ebx +; X86-NEXT: shrdl $1, %ebx, %edi +; X86-NEXT: orl %esi, %edx +; X86-NEXT: shrl %ebx +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: subl %edi, %eax +; X86-NEXT: sbbl %ebx, %edx +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: retl +; +; X64-LABEL: test_fixed_i64: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: orq %rsi, %rax +; X64-NEXT: xorq %rsi, %rdi +; X64-NEXT: shrq %rdi +; X64-NEXT: subq %rdi, %rax +; X64-NEXT: retq + %or = or i64 %a0, %a1 + %xor = xor i64 %a1, %a0 + %shift = lshr i64 %xor, 1 + %res = sub i64 %or, %shift + ret i64 %res +} + +define i64 @test_ext_i64(i64 %a0, i64 %a1) nounwind { +; X86-LABEL: test_ext_i64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl %eax, %edi +; X86-NEXT: xorl %ecx, %edi +; X86-NEXT: movl %edx, %ebx +; X86-NEXT: xorl %esi, %ebx +; X86-NEXT: shrdl $1, %ebx, %edi +; X86-NEXT: orl %esi, %edx +; X86-NEXT: shrl %ebx +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: subl %edi, %eax +; X86-NEXT: sbbl %ebx, %edx +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: retl +; +; X64-LABEL: test_ext_i64: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: orq %rsi, %rax +; X64-NEXT: xorq %rsi, %rdi +; X64-NEXT: shrq %rdi +; X64-NEXT: subq %rdi, %rax +; X64-NEXT: retq + %x0 = zext i64 %a0 to i128 + %x1 = zext i64 %a1 to i128 + %sum = add i128 %x0, %x1 + %sum1 = add i128 %sum, 1 + %shift = lshr i128 %sum1, 1 + %res = trunc i128 %shift to i64 + ret i64 %res +} diff --git a/llvm/test/CodeGen/X86/avgfloors-scalar.ll b/llvm/test/CodeGen/X86/avgfloors-scalar.ll new file mode 100644 index 00000000000000..4c591d40790401 --- /dev/null +++ b/llvm/test/CodeGen/X86/avgfloors-scalar.ll @@ -0,0 +1,263 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=X64 + +; +; fixed avg(x,y) = add(and(x,y),ashr(xor(x,y),1)) +; +; ext avg(x,y) = trunc(ashr(add(sext(x),sext(y)),1)) +; + +define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind { +; X86-LABEL: test_fixed_i8: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andb %cl, %dl +; X86-NEXT: xorb %cl, %al +; X86-NEXT: sarb %al +; X86-NEXT: addb %dl, %al +; X86-NEXT: retl +; +; X64-LABEL: test_fixed_i8: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb %sil, %al +; X64-NEXT: xorb %sil, %dil +; X64-NEXT: sarb %dil +; X64-NEXT: addb %dil, %al +; X64-NEXT: retq + %and = and i8 %a0, %a1 + %xor = xor i8 %a0, %a1 + %shift = ashr i8 %xor, 1 + %res = add i8 %and, %shift + ret i8 %res +} + +define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind { +; X86-LABEL: test_ext_i8: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andb %cl, %dl +; X86-NEXT: xorb %cl, %al +; X86-NEXT: sarb %al +; X86-NEXT: addb %dl, %al +; X86-NEXT: retl +; +; X64-LABEL: test_ext_i8: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb %sil, %al +; X64-NEXT: xorb %sil, %dil +; X64-NEXT: sarb %dil +; X64-NEXT: addb %dil, %al +; X64-NEXT: retq + %x0 = sext i8 %a0 to i16 + %x1 = sext i8 %a1 to i16 + %sum = add i16 %x0, %x1 + %shift = ashr i16 %sum, 1 + %res = trunc i16 %shift to i8 + ret i8 %res +} + +define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind { +; X86-LABEL: test_fixed_i16: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl %eax, %edx +; X86-NEXT: xorl %eax, %ecx +; X86-NEXT: movswl %cx, %eax +; X86-NEXT: sarl %eax +; X86-NEXT: addl %edx, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; X64-LABEL: test_fixed_i16: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: andl %esi, %ecx +; X64-NEXT: xorl %esi, %edi +; X64-NEXT: movswl %di, %eax +; X64-NEXT: sarl %eax +; X64-NEXT: addl %ecx, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq + %and = and i16 %a0, %a1 + %xor = xor i16 %a0, %a1 + %shift = ashr i16 %xor, 1 + %res = add i16 %and, %shift + ret i16 %res +} + +define i16 @test_ext_i16(i16 %a0, i16 %a1) nounwind { +; X86-LABEL: test_ext_i16: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl %eax, %edx +; X86-NEXT: xorl %eax, %ecx +; X86-NEXT: movswl %cx, %eax +; X86-NEXT: sarl %eax +; X86-NEXT: addl %edx, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; X64-LABEL: test_ext_i16: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: andl %esi, %ecx +; X64-NEXT: xorl %esi, %edi +; X64-NEXT: movswl %di, %eax +; X64-NEXT: sarl %eax +; X64-NEXT: addl %ecx, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq + %x0 = sext i16 %a0 to i32 + %x1 = sext i16 %a1 to i32 + %sum = add i32 %x0, %x1 + %shift = ashr i32 %sum, 1 + %res = trunc i32 %shift to i16 + ret i16 %res +} + +define i32 @test_fixed_i32(i32 %a0, i32 %a1) nounwind { +; X86-LABEL: test_fixed_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl %ecx, %edx +; X86-NEXT: xorl %ecx, %eax +; X86-NEXT: sarl %eax +; X86-NEXT: addl %edx, %eax +; X86-NEXT: retl +; +; X64-LABEL: test_fixed_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: xorl %esi, %edi +; X64-NEXT: sarl %edi +; X64-NEXT: addl %edi, %eax +; X64-NEXT: retq + %and = and i32 %a0, %a1 + %xor = xor i32 %a1, %a0 + %shift = ashr i32 %xor, 1 + %res = add i32 %and, %shift + ret i32 %res +} + +define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind { +; X86-LABEL: test_ext_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl %ecx, %edx +; X86-NEXT: xorl %ecx, %eax +; X86-NEXT: sarl %eax +; X86-NEXT: addl %edx, %eax +; X86-NEXT: retl +; +; X64-LABEL: test_ext_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: xorl %esi, %edi +; X64-NEXT: sarl %edi +; X64-NEXT: addl %edi, %eax +; X64-NEXT: retq + %x0 = sext i32 %a0 to i64 + %x1 = sext i32 %a1 to i64 + %sum = add i64 %x0, %x1 + %shift = ashr i64 %sum, 1 + %res = trunc i64 %shift to i32 + ret i32 %res +} + +define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind { +; X86-LABEL: test_fixed_i64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: xorl %esi, %ebx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: xorl %edi, %edx +; X86-NEXT: shrdl $1, %edx, %ebx +; X86-NEXT: andl %edi, %ecx +; X86-NEXT: sarl %edx +; X86-NEXT: andl %esi, %eax +; X86-NEXT: addl %ebx, %eax +; X86-NEXT: adcl %ecx, %edx +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: retl +; +; X64-LABEL: test_fixed_i64: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: andq %rsi, %rax +; X64-NEXT: xorq %rsi, %rdi +; X64-NEXT: sarq %rdi +; X64-NEXT: addq %rdi, %rax +; X64-NEXT: retq + %and = and i64 %a0, %a1 + %xor = xor i64 %a1, %a0 + %shift = ashr i64 %xor, 1 + %res = add i64 %and, %shift + ret i64 %res +} + +define i64 @test_ext_i64(i64 %a0, i64 %a1) nounwind { +; X86-LABEL: test_ext_i64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: xorl %esi, %ebx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: xorl %edi, %edx +; X86-NEXT: shrdl $1, %edx, %ebx +; X86-NEXT: andl %edi, %ecx +; X86-NEXT: sarl %edx +; X86-NEXT: andl %esi, %eax +; X86-NEXT: addl %ebx, %eax +; X86-NEXT: adcl %ecx, %edx +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: retl +; +; X64-LABEL: test_ext_i64: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: andq %rsi, %rax +; X64-NEXT: xorq %rsi, %rdi +; X64-NEXT: sarq %rdi +; X64-NEXT: addq %rdi, %rax +; X64-NEXT: retq + %x0 = sext i64 %a0 to i128 + %x1 = sext i64 %a1 to i128 + %sum = add i128 %x0, %x1 + %shift = ashr i128 %sum, 1 + %res = trunc i128 %shift to i64 + ret i64 %res +} diff --git a/llvm/test/CodeGen/X86/avgflooru-scalar.ll b/llvm/test/CodeGen/X86/avgflooru-scalar.ll new file mode 100644 index 00000000000000..592e5e15b936ad --- /dev/null +++ b/llvm/test/CodeGen/X86/avgflooru-scalar.ll @@ -0,0 +1,263 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=X86 +; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=X64 + +; +; fixed avg(x,y) = add(and(x,y),lshr(xor(x,y),1)) +; +; ext avg(x,y) = trunc(lshr(add(zext(x),zext(y)),1)) +; + +define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind { +; X86-LABEL: test_fixed_i8: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andb %cl, %dl +; X86-NEXT: xorb %cl, %al +; X86-NEXT: shrb %al +; X86-NEXT: addb %dl, %al +; X86-NEXT: retl +; +; X64-LABEL: test_fixed_i8: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb %sil, %al +; X64-NEXT: xorb %sil, %dil +; X64-NEXT: shrb %dil +; X64-NEXT: addb %dil, %al +; X64-NEXT: retq + %and = and i8 %a0, %a1 + %xor = xor i8 %a0, %a1 + %shift = lshr i8 %xor, 1 + %res = add i8 %and, %shift + ret i8 %res +} + +define i8 @test_ext_i8(i8 %a0, i8 %a1) nounwind { +; X86-LABEL: test_ext_i8: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andb %cl, %dl +; X86-NEXT: xorb %cl, %al +; X86-NEXT: shrb %al +; X86-NEXT: addb %dl, %al +; X86-NEXT: retl +; +; X64-LABEL: test_ext_i8: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andb %sil, %al +; X64-NEXT: xorb %sil, %dil +; X64-NEXT: shrb %dil +; X64-NEXT: addb %dil, %al +; X64-NEXT: retq + %x0 = zext i8 %a0 to i16 + %x1 = zext i8 %a1 to i16 + %sum = add i16 %x0, %x1 + %shift = lshr i16 %sum, 1 + %res = trunc i16 %shift to i8 + ret i8 %res +} + +define i16 @test_fixed_i16(i16 %a0, i16 %a1) nounwind { +; X86-LABEL: test_fixed_i16: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl %eax, %edx +; X86-NEXT: xorl %eax, %ecx +; X86-NEXT: movzwl %cx, %eax +; X86-NEXT: shrl %eax +; X86-NEXT: addl %edx, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; X64-LABEL: test_fixed_i16: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: andl %esi, %ecx +; X64-NEXT: xorl %esi, %edi +; X64-NEXT: movzwl %di, %eax +; X64-NEXT: shrl %eax +; X64-NEXT: addl %ecx, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq + %and = and i16 %a0, %a1 + %xor = xor i16 %a0, %a1 + %shift = lshr i16 %xor, 1 + %res = add i16 %and, %shift + ret i16 %res +} + +define i16 @test_ext_i16(i16 %a0, i16 %a1) nounwind { +; X86-LABEL: test_ext_i16: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: andl %eax, %edx +; X86-NEXT: xorl %eax, %ecx +; X86-NEXT: movzwl %cx, %eax +; X86-NEXT: shrl %eax +; X86-NEXT: addl %edx, %eax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl +; +; X64-LABEL: test_ext_i16: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: andl %esi, %ecx +; X64-NEXT: xorl %esi, %edi +; X64-NEXT: movzwl %di, %eax +; X64-NEXT: shrl %eax +; X64-NEXT: addl %ecx, %eax +; X64-NEXT: # kill: def $ax killed $ax killed $eax +; X64-NEXT: retq + %x0 = zext i16 %a0 to i32 + %x1 = zext i16 %a1 to i32 + %sum = add i32 %x0, %x1 + %shift = lshr i32 %sum, 1 + %res = trunc i32 %shift to i16 + ret i16 %res +} + +define i32 @test_fixed_i32(i32 %a0, i32 %a1) nounwind { +; X86-LABEL: test_fixed_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl %ecx, %edx +; X86-NEXT: xorl %ecx, %eax +; X86-NEXT: shrl %eax +; X86-NEXT: addl %edx, %eax +; X86-NEXT: retl +; +; X64-LABEL: test_fixed_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: xorl %esi, %edi +; X64-NEXT: shrl %edi +; X64-NEXT: addl %edi, %eax +; X64-NEXT: retq + %and = and i32 %a0, %a1 + %xor = xor i32 %a1, %a0 + %shift = lshr i32 %xor, 1 + %res = add i32 %and, %shift + ret i32 %res +} + +define i32 @test_ext_i32(i32 %a0, i32 %a1) nounwind { +; X86-LABEL: test_ext_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andl %ecx, %edx +; X86-NEXT: xorl %ecx, %eax +; X86-NEXT: shrl %eax +; X86-NEXT: addl %edx, %eax +; X86-NEXT: retl +; +; X64-LABEL: test_ext_i32: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl %esi, %eax +; X64-NEXT: xorl %esi, %edi +; X64-NEXT: shrl %edi +; X64-NEXT: addl %edi, %eax +; X64-NEXT: retq + %x0 = zext i32 %a0 to i64 + %x1 = zext i32 %a1 to i64 + %sum = add i64 %x0, %x1 + %shift = lshr i64 %sum, 1 + %res = trunc i64 %shift to i32 + ret i32 %res +} + +define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind { +; X86-LABEL: test_fixed_i64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: xorl %esi, %ebx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: xorl %edi, %edx +; X86-NEXT: shrdl $1, %edx, %ebx +; X86-NEXT: andl %edi, %ecx +; X86-NEXT: shrl %edx +; X86-NEXT: andl %esi, %eax +; X86-NEXT: addl %ebx, %eax +; X86-NEXT: adcl %ecx, %edx +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: retl +; +; X64-LABEL: test_fixed_i64: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: andq %rsi, %rax +; X64-NEXT: xorq %rsi, %rdi +; X64-NEXT: shrq %rdi +; X64-NEXT: addq %rdi, %rax +; X64-NEXT: retq + %and = and i64 %a0, %a1 + %xor = xor i64 %a1, %a0 + %shift = lshr i64 %xor, 1 + %res = add i64 %and, %shift + ret i64 %res +} + +define i64 @test_ext_i64(i64 %a0, i64 %a1) nounwind { +; X86-LABEL: test_ext_i64: +; X86: # %bb.0: +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: xorl %esi, %ebx +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: xorl %edi, %edx +; X86-NEXT: shrdl $1, %edx, %ebx +; X86-NEXT: andl %edi, %ecx +; X86-NEXT: shrl %edx +; X86-NEXT: andl %esi, %eax +; X86-NEXT: addl %ebx, %eax +; X86-NEXT: adcl %ecx, %edx +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: retl +; +; X64-LABEL: test_ext_i64: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: andq %rsi, %rax +; X64-NEXT: xorq %rsi, %rdi +; X64-NEXT: shrq %rdi +; X64-NEXT: addq %rdi, %rax +; X64-NEXT: retq + %x0 = zext i64 %a0 to i128 + %x1 = zext i64 %a1 to i128 + %sum = add i128 %x0, %x1 + %shift = lshr i128 %sum, 1 + %res = trunc i128 %shift to i64 + ret i64 %res +} From 7e3e9d43086d21f9996a52f0d4f24e0edeb34991 Mon Sep 17 00:00:00 2001 From: Joe Nash Date: Fri, 14 Jun 2024 09:33:03 -0400 Subject: [PATCH 091/155] [AMDGPU] Change getLdStRegisterOperand to !cond for better diagnostic (#95475) If you would hit the unexpected case in these !if trees, you'd get an error message like "error: Not a known RegisterClass! def VReg_1..." This can happen when changing code quite indirectly related to these class definitions. We can use !cond here, which has a builtin facility to throw an error if no case in the !cond statement is hit. NFC. --- llvm/lib/Target/AMDGPU/BUFInstructions.td | 10 ++++------ llvm/lib/Target/AMDGPU/SIInstrInfo.td | 12 +++++------- 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td index 50e62788c5eacb..43e5434ea27007 100644 --- a/llvm/lib/Target/AMDGPU/BUFInstructions.td +++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td @@ -399,12 +399,10 @@ class MUBUF_Invalidate : class getLdStVDataRegisterOperand { RegisterOperand tfeVDataOp = - !if(!eq(RC.Size, 32), AVLdSt_64, - !if(!eq(RC.Size, 64), AVLdSt_96, - !if(!eq(RC.Size, 96), AVLdSt_128, - !if(!eq(RC.Size, 128), AVLdSt_160, - RegisterOperand // Invalid register. - )))); + !cond(!eq(RC.Size, 32) : AVLdSt_64, + !eq(RC.Size, 64) : AVLdSt_96, + !eq(RC.Size, 96) : AVLdSt_128, + !eq(RC.Size, 128) : AVLdSt_160); RegisterOperand ret = !if(isTFE, tfeVDataOp, getLdStRegisterOperand.ret); } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 3921b1469e15ee..6682763210411c 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2242,13 +2242,11 @@ class getHasExt { RegisterOperand ret = - !if(!eq(RC.Size, 32), AVLdSt_32, - !if(!eq(RC.Size, 64), AVLdSt_64, - !if(!eq(RC.Size, 96), AVLdSt_96, - !if(!eq(RC.Size, 128), AVLdSt_128, - !if(!eq(RC.Size, 160), AVLdSt_160, - RegisterOperand // invalid register - ))))); + !cond(!eq(RC.Size, 32) : AVLdSt_32, + !eq(RC.Size, 64) : AVLdSt_64, + !eq(RC.Size, 96) : AVLdSt_96, + !eq(RC.Size, 128) : AVLdSt_128, + !eq(RC.Size, 160) : AVLdSt_160); } class getHasVOP3DPP Date: Fri, 14 Jun 2024 06:38:48 -0700 Subject: [PATCH 092/155] [ProfileData] Migrate to getValueArrayForSite (#95493) This patch migrates uses of getValueForSite to getValueArrayForSite. Each hunk is self-contained, meaning that each one can be applied independently of the others. In the unit test, there are cases where the array length check is performed a lot earlier than the array content check. For now, I'm leaving the length checks where they are. I'll consider moving them when I migrate uses of getNumValueDataForSite to getValueArrayForSite in a follow-up patch. --- llvm/lib/ProfileData/InstrProf.cpp | 22 ++---- llvm/lib/ProfileData/InstrProfWriter.cpp | 13 ++-- llvm/unittests/ProfileData/InstrProfTest.cpp | 77 +++++++++++--------- 3 files changed, 57 insertions(+), 55 deletions(-) diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 184e2c86d6584f..a0662a5976bbef 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -731,10 +731,8 @@ void InstrProfRecord::accumulateCounts(CountSumOrPercent &Sum) const { uint64_t KindSum = 0; uint32_t NumValueSites = getNumValueSites(VK); for (size_t I = 0; I < NumValueSites; ++I) { - uint32_t NV = getNumValueDataForSite(VK, I); - std::unique_ptr VD = getValueForSite(VK, I); - for (uint32_t V = 0; V < NV; V++) - KindSum += VD[V].Count; + for (const auto &V : getValueArrayForSite(VK, I)) + KindSum += V.Count; } Sum.ValueCounts[VK] += KindSum; } @@ -1089,13 +1087,14 @@ uint32_t getNumValueDataInstrProf(const void *Record, uint32_t VKind) { uint32_t getNumValueDataForSiteInstrProf(const void *R, uint32_t VK, uint32_t S) { - return reinterpret_cast(R) - ->getNumValueDataForSite(VK, S); + const auto *IPR = reinterpret_cast(R); + return IPR->getValueArrayForSite(VK, S).size(); } void getValueForSiteInstrProf(const void *R, InstrProfValueData *Dst, uint32_t K, uint32_t S) { - reinterpret_cast(R)->getValueForSite(Dst, K, S); + const auto *IPR = reinterpret_cast(R); + llvm::copy(IPR->getValueArrayForSite(K, S), Dst); } ValueProfData *allocValueProfDataInstrProf(size_t TotalSizeInBytes) { @@ -1274,14 +1273,9 @@ void annotateValueSite(Module &M, Instruction &Inst, const InstrProfRecord &InstrProfR, InstrProfValueKind ValueKind, uint32_t SiteIdx, uint32_t MaxMDCount) { - uint32_t NV = InstrProfR.getNumValueDataForSite(ValueKind, SiteIdx); - if (!NV) + auto VDs = InstrProfR.getValueArrayForSite(ValueKind, SiteIdx); + if (VDs.empty()) return; - - std::unique_ptr VD = - InstrProfR.getValueForSite(ValueKind, SiteIdx); - - ArrayRef VDs(VD.get(), NV); uint64_t Sum = 0; for (const InstrProfValueData &V : VDs) Sum = SaturatingAdd(Sum, V.Count); diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index 1a9add109a360a..7cf4704a79faae 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -1088,15 +1088,14 @@ void InstrProfWriter::writeRecordInText(StringRef Name, uint64_t Hash, OS << "# ValueKind = " << ValueProfKindStr[VK] << ":\n" << VK << "\n"; OS << "# NumValueSites:\n" << NS << "\n"; for (uint32_t S = 0; S < NS; S++) { - uint32_t ND = Func.getNumValueDataForSite(VK, S); - OS << ND << "\n"; - std::unique_ptr VD = Func.getValueForSite(VK, S); - for (uint32_t I = 0; I < ND; I++) { + auto VD = Func.getValueArrayForSite(VK, S); + OS << VD.size() << "\n"; + for (const auto &V : VD) { if (VK == IPVK_IndirectCallTarget || VK == IPVK_VTableTarget) - OS << Symtab.getFuncOrVarNameIfDefined(VD[I].Value) << ":" - << VD[I].Count << "\n"; + OS << Symtab.getFuncOrVarNameIfDefined(V.Value) << ":" << V.Count + << "\n"; else - OS << VD[I].Value << ":" << VD[I].Count << "\n"; + OS << V.Value << ":" << V.Count << "\n"; } } } diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp index 8a04281efeb50d..dae55422909342 100644 --- a/llvm/unittests/ProfileData/InstrProfTest.cpp +++ b/llvm/unittests/ProfileData/InstrProfTest.cpp @@ -1392,9 +1392,9 @@ TEST(ValueProfileReadWriteTest, value_prof_data_read_write) { return VD1.Count > VD2.Count; }; - std::unique_ptr VD_0( - Record.getValueForSite(IPVK_IndirectCallTarget, 0)); - llvm::sort(&VD_0[0], &VD_0[5], Cmp); + SmallVector VD_0( + Record.getValueArrayForSite(IPVK_IndirectCallTarget, 0)); + llvm::sort(VD_0, Cmp); EXPECT_STREQ((const char *)VD_0[0].Value, "callee2"); EXPECT_EQ(1000U, VD_0[0].Count); EXPECT_STREQ((const char *)VD_0[1].Value, "callee3"); @@ -1406,9 +1406,9 @@ TEST(ValueProfileReadWriteTest, value_prof_data_read_write) { EXPECT_STREQ((const char *)VD_0[4].Value, "callee5"); EXPECT_EQ(100U, VD_0[4].Count); - std::unique_ptr VD_1( - Record.getValueForSite(IPVK_IndirectCallTarget, 1)); - llvm::sort(&VD_1[0], &VD_1[4], Cmp); + SmallVector VD_1( + Record.getValueArrayForSite(IPVK_IndirectCallTarget, 1)); + llvm::sort(VD_1, Cmp); EXPECT_STREQ((const char *)VD_1[0].Value, "callee2"); EXPECT_EQ(VD_1[0].Count, 2500U); EXPECT_STREQ((const char *)VD_1[1].Value, "callee1"); @@ -1418,9 +1418,9 @@ TEST(ValueProfileReadWriteTest, value_prof_data_read_write) { EXPECT_STREQ((const char *)VD_1[3].Value, "callee5"); EXPECT_EQ(VD_1[3].Count, 800U); - std::unique_ptr VD_2( - Record.getValueForSite(IPVK_IndirectCallTarget, 2)); - llvm::sort(&VD_2[0], &VD_2[3], Cmp); + SmallVector VD_2( + Record.getValueArrayForSite(IPVK_IndirectCallTarget, 2)); + llvm::sort(VD_2, Cmp); EXPECT_STREQ((const char *)VD_2[0].Value, "callee4"); EXPECT_EQ(VD_2[0].Count, 5500U); EXPECT_STREQ((const char *)VD_2[1].Value, "callee3"); @@ -1428,9 +1428,9 @@ TEST(ValueProfileReadWriteTest, value_prof_data_read_write) { EXPECT_STREQ((const char *)VD_2[2].Value, "callee6"); EXPECT_EQ(VD_2[2].Count, 800U); - std::unique_ptr VD_3( - Record.getValueForSite(IPVK_IndirectCallTarget, 3)); - llvm::sort(&VD_3[0], &VD_3[2], Cmp); + SmallVector VD_3( + Record.getValueArrayForSite(IPVK_IndirectCallTarget, 3)); + llvm::sort(VD_3, Cmp); EXPECT_STREQ((const char *)VD_3[0].Value, "callee3"); EXPECT_EQ(VD_3[0].Count, 2000U); EXPECT_STREQ((const char *)VD_3[1].Value, "callee2"); @@ -1442,8 +1442,9 @@ TEST(ValueProfileReadWriteTest, value_prof_data_read_write) { ASSERT_EQ(Record.getNumValueDataForSite(IPVK_VTableTarget, 2), 3U); ASSERT_EQ(Record.getNumValueDataForSite(IPVK_VTableTarget, 3), 2U); - auto VD0(Record.getValueForSite(IPVK_VTableTarget, 0)); - llvm::sort(&VD0[0], &VD0[5], Cmp); + SmallVector VD0( + Record.getValueArrayForSite(IPVK_VTableTarget, 0)); + llvm::sort(VD0, Cmp); EXPECT_EQ(VD0[0].Value, getCalleeAddress(vtable2)); EXPECT_EQ(VD0[0].Count, 1000U); EXPECT_EQ(VD0[1].Value, getCalleeAddress(vtable3)); @@ -1455,8 +1456,9 @@ TEST(ValueProfileReadWriteTest, value_prof_data_read_write) { EXPECT_EQ(VD0[4].Value, getCalleeAddress(vtable5)); EXPECT_EQ(VD0[4].Count, 100U); - auto VD1(Record.getValueForSite(IPVK_VTableTarget, 1)); - llvm::sort(&VD1[0], &VD1[4], Cmp); + SmallVector VD1( + Record.getValueArrayForSite(IPVK_VTableTarget, 1)); + llvm::sort(VD1, Cmp); EXPECT_EQ(VD1[0].Value, getCalleeAddress(vtable2)); EXPECT_EQ(VD1[0].Count, 2500U); EXPECT_EQ(VD1[1].Value, getCalleeAddress(vtable1)); @@ -1466,8 +1468,9 @@ TEST(ValueProfileReadWriteTest, value_prof_data_read_write) { EXPECT_EQ(VD1[3].Value, getCalleeAddress(vtable5)); EXPECT_EQ(VD1[3].Count, 800U); - auto VD2(Record.getValueForSite(IPVK_VTableTarget, 2)); - llvm::sort(&VD2[0], &VD2[3], Cmp); + SmallVector VD2( + Record.getValueArrayForSite(IPVK_VTableTarget, 2)); + llvm::sort(VD2, Cmp); EXPECT_EQ(VD2[0].Value, getCalleeAddress(vtable4)); EXPECT_EQ(VD2[0].Count, 5500U); EXPECT_EQ(VD2[1].Value, getCalleeAddress(vtable3)); @@ -1475,8 +1478,9 @@ TEST(ValueProfileReadWriteTest, value_prof_data_read_write) { EXPECT_EQ(VD2[2].Value, getCalleeAddress(vtable6)); EXPECT_EQ(VD2[2].Count, 800U); - auto VD3(Record.getValueForSite(IPVK_VTableTarget, 3)); - llvm::sort(&VD3[0], &VD3[2], Cmp); + SmallVector VD3( + Record.getValueArrayForSite(IPVK_VTableTarget, 3)); + llvm::sort(VD3, Cmp); EXPECT_EQ(VD3[0].Value, getCalleeAddress(vtable3)); EXPECT_EQ(VD3[0].Count, 2000U); EXPECT_EQ(VD3[1].Value, getCalleeAddress(vtable2)); @@ -1537,8 +1541,9 @@ TEST(ValueProfileReadWriteTest, symtab_mapping) { auto Cmp = [](const InstrProfValueData &VD1, const InstrProfValueData &VD2) { return VD1.Count > VD2.Count; }; - auto VD_0(Record.getValueForSite(IPVK_IndirectCallTarget, 0)); - llvm::sort(&VD_0[0], &VD_0[5], Cmp); + SmallVector VD_0( + Record.getValueArrayForSite(IPVK_IndirectCallTarget, 0)); + llvm::sort(VD_0, Cmp); ASSERT_EQ(VD_0[0].Value, 0x2000ULL); ASSERT_EQ(VD_0[0].Count, 1000U); ASSERT_EQ(VD_0[1].Value, 0x3000ULL); @@ -1554,9 +1559,10 @@ TEST(ValueProfileReadWriteTest, symtab_mapping) { { // The first vtable site. - auto VD(Record.getValueForSite(IPVK_VTableTarget, 0)); - ASSERT_EQ(Record.getNumValueDataForSite(IPVK_VTableTarget, 0), 5U); - llvm::sort(&VD[0], &VD[5], Cmp); + SmallVector VD( + Record.getValueArrayForSite(IPVK_VTableTarget, 0)); + ASSERT_THAT(VD, SizeIs(5)); + llvm::sort(VD, Cmp); EXPECT_EQ(VD[0].Count, 1000U); EXPECT_EQ(VD[0].Value, MD5Hash("vtable2")); EXPECT_EQ(VD[1].Count, 500U); @@ -1573,9 +1579,10 @@ TEST(ValueProfileReadWriteTest, symtab_mapping) { { // The second vtable site. - auto VD(Record.getValueForSite(IPVK_VTableTarget, 1)); - ASSERT_EQ(Record.getNumValueDataForSite(IPVK_VTableTarget, 1), 4U); - llvm::sort(&VD[0], &VD[4], Cmp); + SmallVector VD( + Record.getValueArrayForSite(IPVK_VTableTarget, 1)); + ASSERT_THAT(VD, SizeIs(4)); + llvm::sort(VD, Cmp); EXPECT_EQ(VD[0].Value, MD5Hash("vtable2")); EXPECT_EQ(VD[0].Count, 2500U); EXPECT_EQ(VD[1].Value, MD5Hash("vtable1")); @@ -1590,9 +1597,10 @@ TEST(ValueProfileReadWriteTest, symtab_mapping) { { // The third vtable site. - auto VD(Record.getValueForSite(IPVK_VTableTarget, 2)); - ASSERT_EQ(Record.getNumValueDataForSite(IPVK_VTableTarget, 2), 3U); - llvm::sort(&VD[0], &VD[3], Cmp); + SmallVector VD( + Record.getValueArrayForSite(IPVK_VTableTarget, 2)); + ASSERT_THAT(VD, SizeIs(3)); + llvm::sort(VD, Cmp); EXPECT_EQ(VD[0].Count, 5500U); EXPECT_EQ(VD[0].Value, MD5Hash("vtable4")); EXPECT_EQ(VD[1].Count, 1000U); @@ -1604,9 +1612,10 @@ TEST(ValueProfileReadWriteTest, symtab_mapping) { { // The fourth vtable site. - auto VD(Record.getValueForSite(IPVK_VTableTarget, 3)); - ASSERT_EQ(Record.getNumValueDataForSite(IPVK_VTableTarget, 3), 2U); - llvm::sort(&VD[0], &VD[2], Cmp); + SmallVector VD( + Record.getValueArrayForSite(IPVK_VTableTarget, 3)); + ASSERT_THAT(VD, SizeIs(2)); + llvm::sort(VD, Cmp); EXPECT_EQ(VD[0].Count, 2000U); EXPECT_EQ(VD[0].Value, MD5Hash("vtable3")); EXPECT_EQ(VD[1].Count, 1800U); From d5297b72aa32ad3a69563a1fcc61294282f0b379 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?kadir=20=C3=A7etinkaya?= Date: Fri, 14 Jun 2024 15:45:04 +0200 Subject: [PATCH 093/155] [include-cleaner] Pass WorkingDir to suggestPathToFileForDiagnostics (#95114) Addresses https://github.com/llvm/llvm-project/issues/81215. --- .../include-cleaner/lib/IncludeSpeller.cpp | 11 +++++++++-- .../unittests/IncludeSpellerTest.cpp | 18 ++++++++++++++++++ clang/include/clang/Testing/TestAST.h | 4 ++++ clang/lib/Testing/TestAST.cpp | 3 +++ 4 files changed, 34 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/include-cleaner/lib/IncludeSpeller.cpp b/clang-tools-extra/include-cleaner/lib/IncludeSpeller.cpp index 2073f0a1d3d878..8332eb685d652f 100644 --- a/clang-tools-extra/include-cleaner/lib/IncludeSpeller.cpp +++ b/clang-tools-extra/include-cleaner/lib/IncludeSpeller.cpp @@ -9,6 +9,7 @@ #include "clang-include-cleaner/IncludeSpeller.h" #include "clang-include-cleaner/Types.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Registry.h" #include @@ -30,8 +31,14 @@ class DefaultIncludeSpeller : public IncludeSpeller { return Input.H.verbatim().str(); case Header::Physical: bool IsAngled = false; + std::string WorkingDir; + if (auto WD = Input.HS.getFileMgr() + .getVirtualFileSystem() + .getCurrentWorkingDirectory()) + WorkingDir = *WD; std::string FinalSpelling = Input.HS.suggestPathToFileForDiagnostics( - Input.H.physical(), Input.Main->tryGetRealPathName(), &IsAngled); + Input.H.resolvedPath(), WorkingDir, Input.Main->tryGetRealPathName(), + &IsAngled); return IsAngled ? "<" + FinalSpelling + ">" : "\"" + FinalSpelling + "\""; } llvm_unreachable("Unknown clang::include_cleaner::Header::Kind enum"); @@ -60,4 +67,4 @@ std::string spellHeader(const IncludeSpeller::Input &Input) { return Spelling; } -} // namespace clang::include_cleaner \ No newline at end of file +} // namespace clang::include_cleaner diff --git a/clang-tools-extra/include-cleaner/unittests/IncludeSpellerTest.cpp b/clang-tools-extra/include-cleaner/unittests/IncludeSpellerTest.cpp index a548868071a122..8f6ad09c46cc4a 100644 --- a/clang-tools-extra/include-cleaner/unittests/IncludeSpellerTest.cpp +++ b/clang-tools-extra/include-cleaner/unittests/IncludeSpellerTest.cpp @@ -89,6 +89,24 @@ TEST(IncludeSpeller, CanOverrideSystemHeaders) { HS, MainFile})); } +TEST(IncludeSpeller, RelativeIncludeSearchPath) { + TestInputs Inputs; + + Inputs.WorkingDir = "/root/inner"; + Inputs.ExtraArgs.push_back("-I.."); + Inputs.ExtraFiles["/root/foo.h"] = ""; + TestAST AST{Inputs}; + + auto &FM = AST.fileManager(); + auto &HS = AST.preprocessor().getHeaderSearchInfo(); + const auto *MainFile = AST.sourceManager().getFileEntryForID( + AST.sourceManager().getMainFileID()); + + EXPECT_EQ("\"foo.h\"", + spellHeader( + {Header{*FM.getOptionalFileRef("/root/foo.h")}, HS, MainFile})); +} + IncludeSpellingStrategy::Add Speller("dummy", "Dummy Include Speller"); diff --git a/clang/include/clang/Testing/TestAST.h b/clang/include/clang/Testing/TestAST.h index 845e31f65438b3..8878bfbe169845 100644 --- a/clang/include/clang/Testing/TestAST.h +++ b/clang/include/clang/Testing/TestAST.h @@ -49,6 +49,10 @@ struct TestInputs { /// Keys are plain filenames ("foo.h"), values are file content. llvm::StringMap ExtraFiles = {}; + /// Root of execution, all relative paths in Args/Files are resolved against + /// this. + std::string WorkingDir; + /// Filename to use for translation unit. A default will be used when empty. std::string FileName; diff --git a/clang/lib/Testing/TestAST.cpp b/clang/lib/Testing/TestAST.cpp index 3a50c2d9b5d05e..fe8b93851613dd 100644 --- a/clang/lib/Testing/TestAST.cpp +++ b/clang/lib/Testing/TestAST.cpp @@ -13,6 +13,7 @@ #include "clang/Frontend/TextDiagnostic.h" #include "clang/Testing/CommandLineArgs.h" #include "llvm/ADT/ScopeExit.h" +#include "llvm/Support/Error.h" #include "llvm/Support/VirtualFileSystem.h" #include "gtest/gtest.h" @@ -106,6 +107,8 @@ TestAST::TestAST(const TestInputs &In) { // Set up a VFS with only the virtual file visible. auto VFS = llvm::makeIntrusiveRefCnt(); + if (auto Err = VFS->setCurrentWorkingDirectory(In.WorkingDir)) + ADD_FAILURE() << "Failed to setWD: " << Err.message(); VFS->addFile(Filename, /*ModificationTime=*/0, llvm::MemoryBuffer::getMemBufferCopy(In.Code, Filename)); for (const auto &Extra : In.ExtraFiles) From 08fae467e4c742e91c8fdff8519718cf2c7c9b0e Mon Sep 17 00:00:00 2001 From: c8ef Date: Fri, 14 Jun 2024 21:53:29 +0800 Subject: [PATCH 094/155] [DAG] fold `avgs(sext(x), sext(y))` -> `sext(avgs(x, y))` (#95365) Follow up of #95134. Context: https://github.com/llvm/llvm-project/pull/95134#issuecomment-2162825594. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 15 ++++ .../AArch64/aarch64-known-bits-hadd.ll | 6 +- llvm/test/CodeGen/AArch64/avg.ll | 78 +++++++++++++++++++ 3 files changed, 95 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 78970bc4fe4ab7..80b8d482514720 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5237,6 +5237,7 @@ SDValue DAGCombiner::visitAVG(SDNode *N) { DAG.getShiftAmountConstant(1, VT, DL)); // fold avgu(zext(x), zext(y)) -> zext(avgu(x, y)) + // fold avgs(sext(x), sext(y)) -> sext(avgs(x, y)) if (sd_match( N, m_BinOp(ISD::AVGFLOORU, m_ZExt(m_Value(X)), m_ZExt(m_Value(Y)))) && X.getValueType() == Y.getValueType() && @@ -5251,6 +5252,20 @@ SDValue DAGCombiner::visitAVG(SDNode *N) { SDValue AvgCeilU = DAG.getNode(ISD::AVGCEILU, DL, X.getValueType(), X, Y); return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, AvgCeilU); } + if (sd_match( + N, m_BinOp(ISD::AVGFLOORS, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) && + X.getValueType() == Y.getValueType() && + hasOperation(ISD::AVGFLOORS, X.getValueType())) { + SDValue AvgFloorS = DAG.getNode(ISD::AVGFLOORS, DL, X.getValueType(), X, Y); + return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgFloorS); + } + if (sd_match( + N, m_BinOp(ISD::AVGCEILS, m_SExt(m_Value(X)), m_SExt(m_Value(Y)))) && + X.getValueType() == Y.getValueType() && + hasOperation(ISD::AVGCEILS, X.getValueType())) { + SDValue AvgCeilS = DAG.getNode(ISD::AVGCEILS, DL, X.getValueType(), X, Y); + return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, AvgCeilS); + } // Fold avgflooru(x,y) -> avgceilu(x,y-1) iff y != 0 // Fold avgflooru(x,y) -> avgceilu(x-1,y) iff x != 0 diff --git a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll index b2cf089d8145fa..0506e1ed9710b2 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-known-bits-hadd.ll @@ -95,9 +95,8 @@ define <8 x i16> @urhadd_sext(<8 x i8> %a0, <8 x i8> %a1) { define <8 x i16> @hadds_sext(<8 x i8> %a0, <8 x i8> %a1) { ; CHECK-LABEL: hadds_sext: ; CHECK: // %bb.0: +; CHECK-NEXT: shadd v0.8b, v0.8b, v1.8b ; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h ; CHECK-NEXT: bic v0.8h, #254, lsl #8 ; CHECK-NEXT: ret %x0 = sext <8 x i8> %a0 to <8 x i16> @@ -110,9 +109,8 @@ define <8 x i16> @hadds_sext(<8 x i8> %a0, <8 x i8> %a1) { define <8 x i16> @shaddu_sext(<8 x i8> %a0, <8 x i8> %a1) { ; CHECK-LABEL: shaddu_sext: ; CHECK: // %bb.0: +; CHECK-NEXT: srhadd v0.8b, v0.8b, v1.8b ; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h ; CHECK-NEXT: bic v0.8h, #254, lsl #8 ; CHECK-NEXT: ret %x0 = sext <8 x i8> %a0 to <8 x i16> diff --git a/llvm/test/CodeGen/AArch64/avg.ll b/llvm/test/CodeGen/AArch64/avg.ll index dc877085559879..cabc0d346b806f 100644 --- a/llvm/test/CodeGen/AArch64/avg.ll +++ b/llvm/test/CodeGen/AArch64/avg.ll @@ -68,3 +68,81 @@ define <16 x i16> @zext_avgceilu_mismatch(<16 x i4> %a0, <16 x i8> %a1) { %avg = sub <16 x i16> %or, %shift ret <16 x i16> %avg } + +define <16 x i16> @sext_avgfloors(<16 x i8> %a0, <16 x i8> %a1) { +; CHECK-LABEL: sext_avgfloors: +; CHECK: // %bb.0: +; CHECK-NEXT: shadd v0.16b, v0.16b, v1.16b +; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0 +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: ret + %x0 = sext <16 x i8> %a0 to <16 x i16> + %x1 = sext <16 x i8> %a1 to <16 x i16> + %and = and <16 x i16> %x0, %x1 + %xor = xor <16 x i16> %x0, %x1 + %shift = ashr <16 x i16> %xor, + %avg = add <16 x i16> %and, %shift + ret <16 x i16> %avg +} + +define <16 x i16> @sext_avgfloors_mismatch(<16 x i8> %a0, <16 x i4> %a1) { +; CHECK-LABEL: sext_avgfloors_mismatch: +; CHECK: // %bb.0: +; CHECK-NEXT: ushll2 v2.8h, v1.16b, #0 +; CHECK-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-NEXT: sshll v3.8h, v0.8b, #0 +; CHECK-NEXT: sshll2 v0.8h, v0.16b, #0 +; CHECK-NEXT: shl v1.8h, v1.8h, #12 +; CHECK-NEXT: shl v2.8h, v2.8h, #12 +; CHECK-NEXT: sshr v4.8h, v1.8h, #12 +; CHECK-NEXT: sshr v1.8h, v2.8h, #12 +; CHECK-NEXT: shadd v1.8h, v0.8h, v1.8h +; CHECK-NEXT: shadd v0.8h, v3.8h, v4.8h +; CHECK-NEXT: ret + %x0 = sext <16 x i8> %a0 to <16 x i16> + %x1 = sext <16 x i4> %a1 to <16 x i16> + %and = and <16 x i16> %x0, %x1 + %xor = xor <16 x i16> %x0, %x1 + %shift = ashr <16 x i16> %xor, + %avg = add <16 x i16> %and, %shift + ret <16 x i16> %avg +} + +define <16 x i16> @sext_avgceils(<16 x i8> %a0, <16 x i8> %a1) { +; CHECK-LABEL: sext_avgceils: +; CHECK: // %bb.0: +; CHECK-NEXT: srhadd v0.16b, v0.16b, v1.16b +; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0 +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: ret + %x0 = sext <16 x i8> %a0 to <16 x i16> + %x1 = sext <16 x i8> %a1 to <16 x i16> + %or = or <16 x i16> %x0, %x1 + %xor = xor <16 x i16> %x0, %x1 + %shift = ashr <16 x i16> %xor, + %avg = sub <16 x i16> %or, %shift + ret <16 x i16> %avg +} + +define <16 x i16> @sext_avgceils_mismatch(<16 x i4> %a0, <16 x i8> %a1) { +; CHECK-LABEL: sext_avgceils_mismatch: +; CHECK: // %bb.0: +; CHECK-NEXT: ushll v2.8h, v0.8b, #0 +; CHECK-NEXT: ushll2 v0.8h, v0.16b, #0 +; CHECK-NEXT: sshll v3.8h, v1.8b, #0 +; CHECK-NEXT: sshll2 v1.8h, v1.16b, #0 +; CHECK-NEXT: shl v2.8h, v2.8h, #12 +; CHECK-NEXT: shl v0.8h, v0.8h, #12 +; CHECK-NEXT: sshr v2.8h, v2.8h, #12 +; CHECK-NEXT: sshr v0.8h, v0.8h, #12 +; CHECK-NEXT: srhadd v1.8h, v0.8h, v1.8h +; CHECK-NEXT: srhadd v0.8h, v2.8h, v3.8h +; CHECK-NEXT: ret + %x0 = sext <16 x i4> %a0 to <16 x i16> + %x1 = sext <16 x i8> %a1 to <16 x i16> + %or = or <16 x i16> %x0, %x1 + %xor = xor <16 x i16> %x0, %x1 + %shift = ashr <16 x i16> %xor, + %avg = sub <16 x i16> %or, %shift + ret <16 x i16> %avg +} From db3a47c810639388c80ed173dda3623dac00ce0a Mon Sep 17 00:00:00 2001 From: beetrees Date: Fri, 14 Jun 2024 15:05:57 +0100 Subject: [PATCH 095/155] Fix silent truncation of inline ASM `srcloc` cookie when going through a `DiagnosticInfoSrcMgr` (#84559) The size of the inline ASM `srcloc` cookie was changed from 32 bits to 64 bits in [D105491](https://reviews.llvm.org/D105491). However, that commit only updated the size of the cookie in `DiagnosticInfoInlineAsm`, meaning that inline ASM diagnostics that are instead represented with a `DiagnosticInfoSrcMgr` have their cookies truncated to 32 bits. This PR replaces the remaining uses of `unsigned` to represent the cookie with `uint64_t`, allowing the cookie to make it all the way to the diagnostic handler without being truncated. --- clang/lib/CodeGen/CGCall.cpp | 2 +- clang/test/CodeGen/attr-error.c | 2 +- clang/test/CodeGen/attr-warning.c | 2 +- llvm/docs/LangRef.rst | 2 +- llvm/include/llvm/IR/DiagnosticInfo.h | 12 ++++++------ llvm/lib/CodeGen/MachineModuleInfo.cpp | 6 +++--- llvm/lib/IR/DiagnosticInfo.cpp | 2 +- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 65d82285b907b5..c11ebe9a3e9c04 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5760,7 +5760,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // Add metadata if calling an __attribute__((error(""))) or warning fn. if (TargetDecl && TargetDecl->hasAttr()) { llvm::ConstantInt *Line = - llvm::ConstantInt::get(Int32Ty, Loc.getRawEncoding()); + llvm::ConstantInt::get(Int64Ty, Loc.getRawEncoding()); llvm::ConstantAsMetadata *MD = llvm::ConstantAsMetadata::get(Line); llvm::MDTuple *MDT = llvm::MDNode::get(getLLVMContext(), {MD}); CI->setMetadata("srcloc", MDT); diff --git a/clang/test/CodeGen/attr-error.c b/clang/test/CodeGen/attr-error.c index a1b63ab9fa9e57..ce0e00c3c465b0 100644 --- a/clang/test/CodeGen/attr-error.c +++ b/clang/test/CodeGen/attr-error.c @@ -8,4 +8,4 @@ void bar(void) { // CHECK: call void @foo(), !srcloc [[SRCLOC:![0-9]+]] // CHECK: declare{{.*}} void @foo() [[ATTR:#[0-9]+]] // CHECK: attributes [[ATTR]] = {{{.*}}"dontcall-error"="oh no" -// CHECK: [[SRCLOC]] = !{i32 {{[0-9]+}}} +// CHECK: [[SRCLOC]] = !{i64 {{[0-9]+}}} diff --git a/clang/test/CodeGen/attr-warning.c b/clang/test/CodeGen/attr-warning.c index 5c89066aff75a7..034ab7869f7697 100644 --- a/clang/test/CodeGen/attr-warning.c +++ b/clang/test/CodeGen/attr-warning.c @@ -8,4 +8,4 @@ void bar(void) { // CHECK: call void @foo(), !srcloc [[SRCLOC:![0-9]+]] // CHECK: declare{{.*}} void @foo() [[ATTR:#[0-9]+]] // CHECK: attributes [[ATTR]] = {{{.*}}"dontcall-warn"="oh no" -// CHECK: [[SRCLOC]] = !{i32 {{[0-9]+}}} +// CHECK: [[SRCLOC]] = !{i64 {{[0-9]+}}} diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 10d53bea149ef6..6935ccdfc91961 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -5658,7 +5658,7 @@ it. For example: call void asm sideeffect "something bad", ""(), !srcloc !42 ... - !42 = !{ i32 1234567 } + !42 = !{ i64 1234567 } It is up to the front-end to make sense of the magic numbers it places in the IR. If the MDNode contains multiple constants, the code generator diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h index 628445fe9fb2cc..b35923efdad5d2 100644 --- a/llvm/include/llvm/IR/DiagnosticInfo.h +++ b/llvm/include/llvm/IR/DiagnosticInfo.h @@ -1076,11 +1076,11 @@ class DiagnosticInfoSrcMgr : public DiagnosticInfo { // For inlineasm !srcloc translation. bool InlineAsmDiag; - unsigned LocCookie; + uint64_t LocCookie; public: DiagnosticInfoSrcMgr(const SMDiagnostic &Diagnostic, StringRef ModName, - bool InlineAsmDiag = true, unsigned LocCookie = 0) + bool InlineAsmDiag = true, uint64_t LocCookie = 0) : DiagnosticInfo(DK_SrcMgr, getDiagnosticSeverity(Diagnostic.getKind())), Diagnostic(Diagnostic), ModName(ModName), InlineAsmDiag(InlineAsmDiag), LocCookie(LocCookie) {} @@ -1088,7 +1088,7 @@ class DiagnosticInfoSrcMgr : public DiagnosticInfo { StringRef getModuleName() const { return ModName; } bool isInlineAsmDiag() const { return InlineAsmDiag; } const SMDiagnostic &getSMDiag() const { return Diagnostic; } - unsigned getLocCookie() const { return LocCookie; } + uint64_t getLocCookie() const { return LocCookie; } void print(DiagnosticPrinter &DP) const override; static bool classof(const DiagnosticInfo *DI) { @@ -1101,16 +1101,16 @@ void diagnoseDontCall(const CallInst &CI); class DiagnosticInfoDontCall : public DiagnosticInfo { StringRef CalleeName; StringRef Note; - unsigned LocCookie; + uint64_t LocCookie; public: DiagnosticInfoDontCall(StringRef CalleeName, StringRef Note, - DiagnosticSeverity DS, unsigned LocCookie) + DiagnosticSeverity DS, uint64_t LocCookie) : DiagnosticInfo(DK_DontCall, DS), CalleeName(CalleeName), Note(Note), LocCookie(LocCookie) {} StringRef getFunctionName() const { return CalleeName; } StringRef getNote() const { return Note; } - unsigned getLocCookie() const { return LocCookie; } + uint64_t getLocCookie() const { return LocCookie; } void print(DiagnosticPrinter &DP) const override; static bool classof(const DiagnosticInfo *DI) { return DI->getKind() == DK_DontCall; diff --git a/llvm/lib/CodeGen/MachineModuleInfo.cpp b/llvm/lib/CodeGen/MachineModuleInfo.cpp index 1dba591d02b6c2..b950f4fdbcf796 100644 --- a/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -185,7 +185,7 @@ INITIALIZE_PASS(MachineModuleInfoWrapperPass, "machinemoduleinfo", "Machine Module Information", false, false) char MachineModuleInfoWrapperPass::ID = 0; -static unsigned getLocCookie(const SMDiagnostic &SMD, const SourceMgr &SrcMgr, +static uint64_t getLocCookie(const SMDiagnostic &SMD, const SourceMgr &SrcMgr, std::vector &LocInfos) { // Look up a LocInfo for the buffer this diagnostic is coming from. unsigned BufNum = SrcMgr.FindBufferContainingLoc(SMD.getLoc()); @@ -195,7 +195,7 @@ static unsigned getLocCookie(const SMDiagnostic &SMD, const SourceMgr &SrcMgr, // If the inline asm had metadata associated with it, pull out a location // cookie corresponding to which line the error occurred on. - unsigned LocCookie = 0; + uint64_t LocCookie = 0; if (LocInfo) { unsigned ErrorLine = SMD.getLineNo() - 1; if (ErrorLine >= LocInfo->getNumOperands()) @@ -218,7 +218,7 @@ bool MachineModuleInfoWrapperPass::doInitialization(Module &M) { [&Ctx, &M](const SMDiagnostic &SMD, bool IsInlineAsm, const SourceMgr &SrcMgr, std::vector &LocInfos) { - unsigned LocCookie = 0; + uint64_t LocCookie = 0; if (IsInlineAsm) LocCookie = getLocCookie(SMD, SrcMgr, LocInfos); Ctx.diagnose( diff --git a/llvm/lib/IR/DiagnosticInfo.cpp b/llvm/lib/IR/DiagnosticInfo.cpp index 31971b179fb4be..108bf689005957 100644 --- a/llvm/lib/IR/DiagnosticInfo.cpp +++ b/llvm/lib/IR/DiagnosticInfo.cpp @@ -432,7 +432,7 @@ void llvm::diagnoseDontCall(const CallInst &CI) { auto Sev = i == 0 ? DS_Error : DS_Warning; if (F->hasFnAttribute(AttrName)) { - unsigned LocCookie = 0; + uint64_t LocCookie = 0; auto A = F->getFnAttribute(AttrName); if (MDNode *MD = CI.getMetadata("srcloc")) LocCookie = From 6b4760acc73394f841fb66bfd04c501826f5c7f7 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Fri, 14 Jun 2024 15:07:04 +0100 Subject: [PATCH 096/155] [AMDGPU] Make use of composeSubRegIndices. NFCI. (#95548) Simplify SIInstrInfo::buildExtractSubReg by building one COPY with a composed subreg index instead of two COPYs. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 0edcdb337b5aff..30c27b6439fc09 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -5651,24 +5651,9 @@ unsigned SIInstrInfo::buildExtractSubReg( DebugLoc DL = MI->getDebugLoc(); Register SubReg = MRI.createVirtualRegister(SubRC); - if (SuperReg.getSubReg() == AMDGPU::NoSubRegister) { - BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) - .addReg(SuperReg.getReg(), 0, SubIdx); - return SubReg; - } - - // Just in case the super register is itself a sub-register, copy it to a new - // value so we don't need to worry about merging its subreg index with the - // SubIdx passed to this function. The register coalescer should be able to - // eliminate this extra copy. - Register NewSuperReg = MRI.createVirtualRegister(SuperRC); - - BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), NewSuperReg) - .addReg(SuperReg.getReg(), 0, SuperReg.getSubReg()); - + unsigned NewSubIdx = RI.composeSubRegIndices(SuperReg.getSubReg(), SubIdx); BuildMI(*MBB, MI, DL, get(TargetOpcode::COPY), SubReg) - .addReg(NewSuperReg, 0, SubIdx); - + .addReg(SuperReg.getReg(), 0, NewSubIdx); return SubReg; } From 094572701dce4aaf36f4521d6cf750420d39f206 Mon Sep 17 00:00:00 2001 From: Stephen Tozer Date: Fri, 14 Jun 2024 15:07:27 +0100 Subject: [PATCH 097/155] [RemoveDIs] Print IR with debug records by default (#91724) This patch makes the final major change of the RemoveDIs project, changing the default IR output from debug intrinsics to debug records. This is expected to break a large number of tests: every single one that tests for uses or declarations of debug intrinsics and does not explicitly disable writing records. If this patch has broken your downstream tests (or upstream tests on a configuration I wasn't able to run): 1. If you need to immediately unblock a build, pass `--write-experimental-debuginfo=false` to LLVM's option processing for all failing tests (remember to use `-mllvm` for clang/flang to forward arguments to LLVM). 2. For most test failures, the changes are trivial and mechanical, enough that they can be done by script; see the migration guide for a guide on how to do this: https://llvm.org/docs/RemoveDIsDebugInfo.html#test-updates 3. If any tests fail for reasons other than FileCheck check lines that need updating, such as assertion failures, that is most likely a real bug with this patch and should be reported as such. For more information, see the recent PSA: https://discourse.llvm.org/t/psa-ir-output-changing-from-debug-intrinsics-to-debug-records/79578 --- .../test/CodeGen/2010-07-08-DeclDebugLineNo.c | 4 +- .../assignment-tracking.cpp | 32 +- .../assignment-tracking/memcpy-fragment.cpp | 6 +- .../assignment-tracking/nested-scope.cpp | 2 +- clang/test/CodeGen/attr-nodebug.c | 2 +- clang/test/CodeGen/debug-info-block-decl.c | 2 +- clang/test/CodeGen/debug-info-block-expr.c | 10 +- clang/test/CodeGen/debug-info-block-vars.c | 8 +- clang/test/CodeGen/debug-info-matrix-types.c | 4 +- clang/test/CodeGen/debug-info-vla.c | 4 +- clang/test/CodeGen/debug-label-inline.c | 2 +- clang/test/CodeGen/debug-label.c | 2 +- clang/test/CodeGen/instrument-objc-method.m | 1 - .../CodeGenCUDA/debug-info-address-class.cu | 4 +- .../debug-info-inheriting-constructor.cpp | 6 +- clang/test/CodeGenCXX/debug-info-nrvo.cpp | 8 +- .../debug-info-range-for-var-names.cpp | 18 +- ...debug-info-structured-binding-bitfield.cpp | 52 +- .../debug-info-structured-binding.cpp | 10 +- clang/test/CodeGenCXX/debug-info.cpp | 2 +- clang/test/CodeGenCXX/linetable-eh.cpp | 6 +- .../test/CodeGenCXX/trivial_abi_debuginfo.cpp | 6 +- clang/test/CodeGenObjC/2010-02-09-DbgSelf.m | 2 +- clang/test/CodeGenObjC/debug-info-blocks.m | 6 +- .../CodeGenObjC/debug-info-nested-blocks.m | 2 +- clang/test/CodeGenObjC/objc-fixed-enum.m | 8 +- clang/test/CodeGenObjCXX/property-objects.mm | 4 +- .../amdgpu-debug-info-variable-expression.cl | 26 +- .../debug-info-kernel-variables.cpp | 16 +- .../test/OpenMP/debug-info-complex-byval.cpp | 20 +- clang/test/OpenMP/debug-info-openmp-array.cpp | 52 +- clang/test/OpenMP/debug_private.c | 8 +- clang/test/OpenMP/debug_task_shared.c | 8 +- .../test/OpenMP/debug_threadprivate_copyin.c | 10 +- .../OpenMP/irbuilder_nested_parallel_for.c | 1662 ++++++++--------- clang/test/OpenMP/nested_loop_codegen.cpp | 358 ++-- clang/test/OpenMP/parallel_codegen.cpp | 270 +-- .../OpenMP/target_parallel_debug_codegen.cpp | 670 +++---- .../target_parallel_for_debug_codegen.cpp | 912 ++++----- ...target_parallel_generic_loop_codegen-3.cpp | 912 ++++----- .../taskgroup_task_reduction_codegen.cpp | 2 +- clang/test/OpenMP/threadprivate_codegen.cpp | 768 ++++---- llvm/lib/IR/IRPrintingPasses.cpp | 2 +- ...-02-05-FunctionLocalMetadataBecomesNull.ll | 2 +- llvm/test/Assembler/debug-label-bitcode.ll | 2 +- llvm/test/Bitcode/DIExpression-aggresult.ll | 2 +- .../Bitcode/constexpr-to-instr-metadata-2.ll | 4 +- .../Bitcode/constexpr-to-instr-metadata.ll | 2 +- llvm/test/Bitcode/dbg-label-record-bc.ll | 2 +- llvm/test/Bitcode/upgrade-dbg-addr.ll | 2 +- llvm/test/Bitcode/upgrade-dbg-value.ll | 7 +- .../AArch64/dbg-declare-swift-async.ll | 2 +- .../stack-tagging-dbg-assign-tag-offset.ll | 4 +- .../stack-tagging-dbg-declare-tag-offset.ll | 8 +- ...tack-tagging-dbg-value-tag-offset-nopad.ll | 6 +- .../stack-tagging-dbg-value-tag-offset.ll | 6 +- .../test/CodeGen/AArch64/stack-tagging-dbg.ll | 4 +- .../AMDGPU/amdgpu-simplify-libcall-sincos.ll | 22 +- ...fer-fat-pointers-unoptimized-debug-data.ll | 18 +- .../CodeGen/AMDGPU/si-annotate-dbg-info.ll | 10 +- .../preserve-static-offset/load-arr-pai.ll | 2 +- .../preserve-static-offset/load-ptr-pai.ll | 2 +- .../preserve-static-offset/load-struct-pai.ll | 2 +- .../preserve-static-offset/load-union-pai.ll | 2 +- .../BPF/preserve-static-offset/store-pai.ll | 2 +- .../MIRDebugify/locations-and-values.mir | 4 +- .../dont-strip-real-debug-info.mir | 4 +- .../CodeGen/X86/fast-isel-dbg-value-alloca.ll | 2 +- llvm/test/CodeGen/X86/pr38763.ll | 6 +- llvm/test/CodeGen/X86/select-optimize.ll | 56 +- llvm/test/DebugInfo/AArch64/ir-outliner.ll | 24 +- .../select-optimize-trailing-dbg-records.ll | 2 +- .../ARM/hardware-loop-phi-insertion.ll | 2 +- .../test/DebugInfo/ARM/lowerbdgdeclare_vla.ll | 4 +- llvm/test/DebugInfo/ARM/salvage-debug-info.ll | 2 +- llvm/test/DebugInfo/ARM/sroa-complex.ll | 12 +- .../assignment-tracking/adce/no-delete.ll | 4 +- .../codegenprepare/sunk-addr.ll | 4 +- .../declare-to-assign/hwasan.ll | 2 +- .../declare-to-assign/long-double-x87.ll | 2 +- .../declare-to-assign/nullptr-declare.ll | 4 +- .../declare-to-assign/scalable-vector.ll | 2 +- .../declare-to-assign/structured-bindings.ll | 10 +- .../declare-to-assign/var-not-alloca-sized.ll | 10 +- .../declare-to-assign/vla.ll | 2 +- .../dse/dse-after-memcpyopt-merge.ll | 10 +- .../assignment-tracking/dse/shorten-offset.ll | 12 +- .../assignment-tracking/dse/shorten.ll | 8 +- .../Generic/assignment-tracking/inline/id.ll | 4 +- .../inline/inline-stores.ll | 12 +- .../inline/shared-alloca.ll | 6 +- .../inline/use-before-def.ll | 2 +- .../do-not-remove-redundant-dbg.ll | 6 +- .../assignment-tracking/instcombine/memset.ll | 2 +- .../instcombine/remove-redundant-dbg.ll | 2 +- .../instcombine/sink-store.ll | 4 +- .../assignment-tracking/instcombine/sink.ll | 2 +- .../instcombine/store-new-type.ll | 2 +- .../instcombine/storemerge.ll | 12 +- .../Generic/assignment-tracking/licm/merge.ll | 4 +- .../assignment-tracking/licm/multi-exit.ll | 2 +- .../loop-deletion/dead-loop.ll | 2 +- .../loop-vectorize/remove-redundant-dbg.ll | 2 +- .../assignment-tracking/mem2reg/phi.ll | 16 +- .../mem2reg/single-block-alloca.ll | 8 +- .../mem2reg/single-store-alloca.ll | 4 +- .../mem2reg/store-to-part-of-alloca.ll | 2 +- .../memcpyopt/merge-stores.ll | 8 +- .../mldst-motion/diamond.ll | 4 +- .../Generic/assignment-tracking/optnone.ll | 4 +- .../parse-and-verify/roundtrip.ll | 10 +- .../remove-redundant-fwd-scan-linked.ll | 10 +- .../assignment-tracking/remove-redundant.ll | 16 +- .../assignment-tracking/salvage-value.ll | 10 +- .../simplifycfg/empty-block.ll | 6 +- .../simplifycfg/speculated-store.ll | 2 +- .../slp-vectorizer/merge-scalars.ll | 8 +- .../sroa/after-inlining.ll | 2 +- .../sroa/alloca-single-slice.ll | 2 +- .../assignment-tracking/sroa/arglist.ll | 4 +- .../assignment-tracking/sroa/complex.ll | 4 +- .../assignment-tracking/sroa/fail-fragment.ll | 12 +- .../assignment-tracking/sroa/frag-2.ll | 4 +- .../Generic/assignment-tracking/sroa/frag.ll | 4 +- .../Generic/assignment-tracking/sroa/id.ll | 4 +- .../assignment-tracking/sroa/memcpy.ll | 10 +- .../sroa/memmove-to-from-same-alloca.ll | 4 +- .../sroa/remove-redundant-dbg.ll | 2 +- .../assignment-tracking/sroa/rewrite.ll | 12 +- .../sroa/split-pre-fragmented-store-2.ll | 8 +- .../sroa/split-pre-fragmented-store.ll | 8 +- .../Generic/assignment-tracking/sroa/store.ll | 12 +- .../sroa/unspecified-var-size.ll | 2 +- .../assignment-tracking/sroa/user-memcpy.ll | 16 +- .../sroa/var-sized-fragment.ll | 4 +- .../Generic/assignment-tracking/sroa/vec-1.ll | 4 +- .../Generic/assignment-tracking/sroa/vec-2.ll | 6 +- .../assignment-tracking/track-assignments.ll | 40 +- .../Generic/dbg-value-lower-linenos.ll | 20 +- .../DebugInfo/Generic/debug_value_list.ll | 8 +- llvm/test/DebugInfo/Generic/empty-metadata.ll | 2 +- .../Generic/inline-alloca-ordering.ll | 3 +- .../DebugInfo/Generic/inline-dbg-values.ll | 21 +- ...nstcombine-replaced-select-with-operand.ll | 2 +- .../Generic/ipsccp-remap-assign-id.ll | 4 +- .../Generic/loop-deletion-inline-var.ll | 4 +- .../Generic/mem2reg-promote-alloca-1.ll | 4 +- .../Generic/mem2reg-promote-alloca-2.ll | 4 +- .../Generic/mem2reg-promote-alloca-3.ll | 4 +- llvm/test/DebugInfo/Generic/pr40628.ll | 4 +- llvm/test/DebugInfo/Generic/sroa-larger.ll | 2 +- llvm/test/DebugInfo/Generic/sroa-samesize.ll | 2 +- .../test/DebugInfo/Generic/volatile-alloca.ll | 6 +- .../DebugInfo/X86/LLVM_implicit_pointer.ll | 6 +- llvm/test/DebugInfo/X86/array2.ll | 2 +- .../DebugInfo/X86/codegenprep-addrsink.ll | 14 +- llvm/test/DebugInfo/X86/codegenprep-value.ll | 4 +- .../DebugInfo/X86/codegenprepare-rollback.ll | 8 +- .../X86/dbg-value-dropped-instcombine.ll | 4 +- .../X86/dead-store-elimination-marks-undef.ll | 2 +- llvm/test/DebugInfo/X86/formal_parameter.ll | 6 +- .../X86/instcombine-demanded-bits-salvage.ll | 2 +- .../X86/instcombine-fold-cast-into-phi.ll | 2 +- .../DebugInfo/X86/instcombine-instrinsics.ll | 4 +- .../DebugInfo/X86/licm-undef-dbg-value.ll | 2 +- llvm/test/DebugInfo/X86/mem2reg_fp80.ll | 4 +- .../test/DebugInfo/X86/sroa-after-inlining.ll | 2 +- llvm/test/DebugInfo/X86/sroasplit-1.ll | 4 +- llvm/test/DebugInfo/X86/sroasplit-2.ll | 8 +- llvm/test/DebugInfo/X86/sroasplit-3.ll | 2 +- llvm/test/DebugInfo/X86/sroasplit-4.ll | 8 +- .../DebugInfo/X86/sroasplit-dbg-declare.ll | 8 +- .../assignment-tracking/X86/hotcoldsplit.ll | 2 +- llvm/test/DebugInfo/duplicate_dbgvalue.ll | 2 +- .../instcombine-sink-latest-assignment.ll | 2 +- .../test/DebugInfo/salvage-cast-debug-info.ll | 6 +- .../DebugInfo/salvage-duplicate-values.ll | 8 +- llvm/test/DebugInfo/salvage-gep.ll | 8 +- llvm/test/DebugInfo/salvage-icmp.ll | 8 +- .../test/DebugInfo/salvage-limit-expr-size.ll | 8 +- llvm/test/DebugInfo/salvage-nonconst-binop.ll | 4 +- .../AddressSanitizer/debug_info.ll | 4 +- .../AddressSanitizer/local_stack_base.ll | 2 +- .../HWAddressSanitizer/RISCV/alloca.ll | 170 +- .../alloca-uninteresting.ll | 2 +- .../HWAddressSanitizer/alloca.ll | 22 +- .../dbg-assign-tag-offset.ll | 4 +- .../dbg-declare-tag-offset.ll | 8 +- .../dbg-value-tag-offset-nopad.ll | 6 +- .../dbg-value-tag-offset.ll | 6 +- llvm/test/Linker/DbgDeclare.ll | 10 +- llvm/test/Linker/debug-info-use-before-def.ll | 4 +- .../Transforms/ADCE/adce-salvage-dbg-value.ll | 4 +- .../Transforms/ADCE/debug-info-intrinsic.ll | 2 +- .../AArch64/combine_ignore_debug.ll | 2 +- .../pr33641_remove_arg_dbgvalue.ll | 2 +- .../pr33641_remove_arg_dbgvalue.ll | 6 +- llvm/test/Transforms/BDCE/basic.ll | 2 +- llvm/test/Transforms/BDCE/dbg-multipleuses.ll | 2 +- llvm/test/Transforms/BDCE/pr26587.ll | 2 +- llvm/test/Transforms/BDCE/pr41925.ll | 14 +- .../CallSiteSplitting/callsite-split-debug.ll | 28 +- .../callsite-split-preserve-debug.ll | 13 +- .../CodeExtractor/LoopExtractor_alloca.ll | 2 +- .../CodeGenPrepare/X86/catchpad-phi-cast.ll | 4 +- .../CodeGenPrepare/X86/cttz-ctlz.ll | 4 +- .../Transforms/CodeGenPrepare/X86/select.ll | 136 +- .../debug-info-on-skipped-selects.ll | 3 +- .../CodeGenPrepare/sink-shift-and-trunc.ll | 2 +- .../Transforms/Coroutines/coro-debug-O2.ll | 2 +- .../Coroutines/coro-debug-coro-frame.ll | 4 +- ...coro-debug-dbg.values-not_used_in_frame.ll | 4 +- .../Coroutines/coro-debug-dbg.values.ll | 28 +- .../Coroutines/coro-debug-frame-variable.ll | 12 +- .../coro-debug-spill-dbg.declare.ll | 6 +- llvm/test/Transforms/Coroutines/coro-debug.ll | 16 +- .../Transforms/Coroutines/swift-async-dbg.ll | 16 +- llvm/test/Transforms/DCE/basic.ll | 12 +- llvm/test/Transforms/DCE/dbg-value-removal.ll | 30 +- .../DeadArgElim/2010-04-30-DbgInfo.ll | 23 +- .../DeadArgElim/dbginfo-preserve-dbgloc.ll | 4 +- .../dbginfo-update-dbgval-local.ll | 4 +- .../DeadArgElim/dbginfo-update-dbgval.ll | 2 +- .../DeadStoreElimination/debuginfo.ll | 4 +- .../Transforms/EarlyCSE/debug-info-undef.ll | 2 +- .../test/Transforms/EarlyCSE/debuginfo-dce.ll | 2 +- .../Transforms/GVN/load-through-select-dbg.ll | 2 +- .../GlobalOpt/deadglobal-diarglist-use.ll | 2 +- .../GlobalOpt/localize-constexpr-debuginfo.ll | 2 +- .../shrink-global-to-bool-check-debug.ll | 2 +- .../HotColdSplit/split-out-dbg-label.ll | 6 +- .../HotColdSplit/transfer-debug-info.ll | 18 +- .../test/Transforms/IROutliner/legal-debug.ll | 24 +- .../IndVarSimplify/X86/indvar-debug-value.ll | 4 +- .../IndVarSimplify/X86/indvar-debug-value2.ll | 4 +- .../IndVarSimplify/X86/scev-phi-debug-info.ll | 2 +- .../Transforms/Inline/alloca-dbgdeclare.ll | 2 +- .../Transforms/Inline/inline_dbg_declare.ll | 2 +- .../local-as-metadata-undominated-use.ll | 2 +- .../InstCombine/alloca-cast-debuginfo.ll | 2 +- llvm/test/Transforms/InstCombine/assume.ll | 2 +- .../Transforms/InstCombine/cast-mul-select.ll | 74 +- .../cast-set-preserve-signed-dbg-val.ll | 6 +- .../InstCombine/consecutive-fences.ll | 2 +- .../dbg-scalable-store-fixed-frag.ll | 4 +- .../InstCombine/dbg-simplify-alloca-size.ll | 2 +- .../Transforms/InstCombine/debuginfo-dce.ll | 18 +- .../Transforms/InstCombine/debuginfo-dce2.ll | 4 +- .../Transforms/InstCombine/debuginfo-sink.ll | 26 +- .../Transforms/InstCombine/debuginfo-skip.ll | 4 +- .../InstCombine/debuginfo-variables.ll | 36 +- llvm/test/Transforms/InstCombine/debuginfo.ll | 14 +- .../Transforms/InstCombine/debuginfo_add.ll | 6 +- .../erase-dbg-values-at-dead-alloc-site.ll | 4 +- .../InstCombine/lifetime-no-null-opt.ll | 2 +- llvm/test/Transforms/InstCombine/lifetime.ll | 2 +- .../InstCombine/lower-dbg-declare.ll | 6 +- llvm/test/Transforms/InstCombine/pr43893.ll | 6 +- .../InstCombine/salvage-dbg-declare.ll | 2 +- ...ion-introduces-unnecessary-poison-value.ll | 4 +- .../InstCombine/stacksave-debuginfo.ll | 10 +- .../InstCombine/unavailable-debug.ll | 2 +- .../JumpThreading/guard-split-debuginfo.ll | 8 +- .../JumpThreading/redundant-dbg-info.ll | 6 +- .../JumpThreading/thread-debug-info.ll | 18 +- .../LCSSA/rewrite-existing-dbg-values.ll | 14 +- llvm/test/Transforms/LICM/dbg-value-sink.ll | 2 +- llvm/test/Transforms/LICM/debug-value.ll | 2 +- llvm/test/Transforms/LICM/sinking-debugify.ll | 2 +- llvm/test/Transforms/LoopDeletion/diundef.ll | 7 +- .../over-defensive-undefing-dbg-values.ll | 4 +- .../X86/arithmetic-right-shift-until-zero.ll | 1208 ++++++------ .../LoopIdiom/X86/left-shift-until-bittest.ll | 394 ++-- ...ogical-right-shift-until-zero-debuginfo.ll | 42 +- llvm/test/Transforms/LoopIdiom/debug-line.ll | 8 +- .../LoopIdiom/memcpy-debugify-remarks.ll | 12 +- .../LoopIdiom/memset-debugify-remarks.ll | 8 +- .../LoopRotate/call-prepare-for-lto.ll | 4 +- .../LoopRotate/dbg-value-duplicates-2.ll | 26 +- .../LoopRotate/dbg-value-duplicates.ll | 8 +- llvm/test/Transforms/LoopRotate/dbgvalue.ll | 49 +- .../LoopRotate/delete-dbg-values.ll | 11 +- .../Transforms/LoopRotate/phi-dbgvalue.ll | 6 +- .../LoopStrengthReduce/X86/lsr-cond-dbg.ll | 2 +- .../LoopStrengthReduce/dbg-preserve-0.ll | 8 +- .../LoopStrengthReduce/dbg-preserve-1.ll | 2 +- .../LoopStrengthReduce/dbg-preserve-2.ll | 4 +- .../debuginfo-scev-salvage-0.ll | 6 +- .../debuginfo-scev-salvage-1.ll | 6 +- .../debuginfo-scev-salvage-2.ll | 6 +- .../debuginfo-scev-salvage-3.ll | 6 +- .../debuginfo-scev-salvage-4.ll | 6 +- .../debuginfo-scev-salvage-5.ll | 8 +- .../Transforms/LoopStrengthReduce/pr51329.ll | 4 +- .../Transforms/LoopStrengthReduce/pr51656.ll | 2 +- .../Transforms/LoopStrengthReduce/pr52161.ll | 2 +- llvm/test/Transforms/LoopUnroll/debug-info.ll | 8 +- .../LoopUnroll/runtime-epilog-debuginfo.ll | 8 +- .../LoopUnroll/unroll-remove-redundant-dbg.ll | 2 +- .../LoopVectorize/dbg-outer-loop-vect.ll | 8 +- .../Transforms/LoopVectorize/discriminator.ll | 2 +- .../Transforms/Mem2Reg/ConvertDebugInfo.ll | 4 +- .../Transforms/Mem2Reg/ConvertDebugInfo2.ll | 8 +- .../dbg_declare_to_value_conversions.ll | 6 +- .../Transforms/Mem2Reg/debug-alloca-phi-2.ll | 8 +- .../Transforms/Mem2Reg/debug-alloca-phi.ll | 8 +- .../Transforms/Mem2Reg/debug-alloca-vla-1.ll | 4 +- .../Transforms/Mem2Reg/debug-alloca-vla-2.ll | 2 +- llvm/test/Transforms/MemCpyOpt/pr37967.ll | 4 +- .../mergefunc-preserve-debug-info.ll | 30 +- .../MergeFunc/no-merge-debug-thunks.ll | 8 +- llvm/test/Transforms/ObjCARC/basic.ll | 1 - ...e-that-exception-unwind-path-is-visited.ll | 5 +- .../Transforms/Reassociate/matching-binops.ll | 26 +- .../reassociate_dbgvalue_discard.ll | 18 +- .../reassociate_salvages_debug_info.ll | 2 +- ...f_intrinsics_when_deleting_instructions.ll | 4 +- llvm/test/Transforms/SCCP/loadtest.ll | 10 +- .../SLPVectorizer/AArch64/spillcost-di.ll | 10 +- .../SLPVectorizer/X86/debug_info.ll | 22 +- .../X86/schedule_budget_debug_info.ll | 16 +- llvm/test/Transforms/SROA/alignment.ll | 102 +- llvm/test/Transforms/SROA/dbg-inline.ll | 49 +- llvm/test/Transforms/SROA/dbg-single-piece.ll | 2 +- llvm/test/Transforms/SROA/vector-promotion.ll | 509 ++--- .../SafeStack/X86/debug-loc-dynamic.ll | 2 +- .../Transforms/SafeStack/X86/debug-loc.ll | 10 +- .../Transforms/SafeStack/X86/debug-loc2.ll | 10 +- llvm/test/Transforms/Scalarizer/dbginfo.ll | 6 +- .../SimpleLoopUnswitch/debuginfo.ll | 4 +- ...patible-invokes-of-landingpad-debuginfo.ll | 5 +- .../Transforms/SimplifyCFG/X86/pr39187-g.ll | 4 +- .../Transforms/SimplifyCFG/branch-fold-dbg.ll | 6 +- .../SimplifyCFG/hoist-dbgvalue-inlined.ll | 4 +- .../Transforms/SimplifyCFG/hoist-dbgvalue.ll | 12 +- .../SimplifyCFG/jump-threading-debuginfo.ll | 26 +- .../Transforms/SimplifyCFG/return-merge.ll | 6 +- .../SimplifyCFG/speculate-dbgvalue.ll | 20 +- .../SimplifyCFG/tail-merge-noreturn.ll | 12 +- .../SpeculativeExecution/PR46267.ll | 10 +- .../test/Transforms/Util/Debugify/loc-only.ll | 4 +- llvm/test/Transforms/Util/dbg-call-bitcast.ll | 8 +- llvm/test/Transforms/Util/dbg-user-of-aext.ll | 8 +- .../test/Transforms/Util/salvage-debuginfo.ll | 4 +- .../Inputs/various_ir_values.ll | 3 +- .../Inputs/various_ir_values.ll.expected | 3 +- .../various_ir_values.ll.funcsig.expected | 3 +- ...ious_ir_values.ll.funcsig.globals.expected | 3 +- ...us_ir_values.ll.funcsig.noglobals.expected | 3 +- ...lues.ll.funcsig.transitiveglobals.expected | 3 +- .../LLVMIR/di-expression-legalization.mlir | 6 +- polly/test/CodeGen/debug-intrinsics.ll | 10 +- 352 files changed, 5416 insertions(+), 5289 deletions(-) diff --git a/clang/test/CodeGen/2010-07-08-DeclDebugLineNo.c b/clang/test/CodeGen/2010-07-08-DeclDebugLineNo.c index 645403284b8b0d..e6b7aa0af81b29 100644 --- a/clang/test/CodeGen/2010-07-08-DeclDebugLineNo.c +++ b/clang/test/CodeGen/2010-07-08-DeclDebugLineNo.c @@ -1,8 +1,8 @@ // RUN: %clang_cc1 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s // Insure that dbg.declare lines for locals refer to correct line number records. void foo(void) { - int l = 0; // line #4: CHECK: {{call.*llvm.dbg.declare.*%l.*\!dbg }}[[variable_l:![0-9]+]] - int p = 0; // line #5: CHECK: {{call.*llvm.dbg.declare.*%p.*\!dbg }}[[variable_p:![0-9]+]] + int l = 0; // line #4: CHECK: #dbg_declare({{.*%l.*}} [[variable_l:![0-9]+]] + int p = 0; // line #5: CHECK: #dbg_declare({{.*%p.*}} [[variable_p:![0-9]+]] } // Now match the line number records: // CHECK: {{^}}[[variable_l]] = !DILocation(line: 4, diff --git a/clang/test/CodeGen/assignment-tracking/assignment-tracking.cpp b/clang/test/CodeGen/assignment-tracking/assignment-tracking.cpp index b96ad7ca6fac33..fd5a5b24920ecf 100644 --- a/clang/test/CodeGen/assignment-tracking/assignment-tracking.cpp +++ b/clang/test/CodeGen/assignment-tracking/assignment-tracking.cpp @@ -20,16 +20,16 @@ Large L; void zeroInit() { int Z[3] = {0, 0, 0}; } // CHECK-LABEL: define dso_local void @_Z8zeroInitv // CHECK: %Z = alloca [3 x i32], align 4, !DIAssignID ![[ID_0:[0-9]+]] -// CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR_0:[0-9]+]], metadata !DIExpression(), metadata ![[ID_0]], metadata ptr %Z, metadata !DIExpression()) +// CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR_0:[0-9]+]], !DIExpression(), ![[ID_0]], ptr %Z, !DIExpression(), // CHECK: @llvm.memset{{.*}}, !DIAssignID ![[ID_1:[0-9]+]] -// CHECK-NEXT: call void @llvm.dbg.assign(metadata i8 0, metadata ![[VAR_0]], metadata !DIExpression(), metadata ![[ID_1]], metadata ptr %Z, metadata !DIExpression()) +// CHECK-NEXT: #dbg_assign(i8 0, ![[VAR_0]], !DIExpression(), ![[ID_1]], ptr %Z, !DIExpression(), void memcpyInit() { int A[4] = {0, 1, 2, 3}; } // CHECK-LABEL: define dso_local void @_Z10memcpyInitv // CHECK: %A = alloca [4 x i32], align 16, !DIAssignID ![[ID_2:[0-9]+]] -// CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR_1:[0-9]+]], metadata !DIExpression(), metadata ![[ID_2]], metadata ptr %A, metadata !DIExpression()) +// CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR_1:[0-9]+]], !DIExpression(), ![[ID_2]], ptr %A, !DIExpression(), // CHECK: @llvm.memcpy{{.*}}, !DIAssignID ![[ID_3:[0-9]+]] -// CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR_1]], metadata !DIExpression(), metadata ![[ID_3]], metadata ptr %A, metadata !DIExpression()) +// CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR_1]], !DIExpression(), ![[ID_3]], ptr %A, !DIExpression(), void setField() { Outer O; @@ -37,9 +37,9 @@ void setField() { } // CHECK-LABEL: define dso_local void @_Z8setFieldv // CHECK: %O = alloca %struct.Outer, align 4, !DIAssignID ![[ID_4:[0-9]+]] -// CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR_2:[0-9]+]], metadata !DIExpression(), metadata ![[ID_4]], metadata ptr %O, metadata !DIExpression()) +// CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR_2:[0-9]+]], !DIExpression(), ![[ID_4]], ptr %O, !DIExpression(), // CHECK: store i32 %0, ptr %B, align 4,{{.*}}!DIAssignID ![[ID_5:[0-9]+]] -// CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 %0, metadata ![[VAR_2]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 32), metadata ![[ID_5]], metadata ptr %B, metadata !DIExpression()) +// CHECK-NEXT: #dbg_assign(i32 %0, ![[VAR_2]], !DIExpression(DW_OP_LLVM_fragment, 32, 32), ![[ID_5]], ptr %B, !DIExpression(), void unknownOffset() { int A[2]; @@ -47,7 +47,7 @@ void unknownOffset() { } // CHECK-LABEL: define dso_local void @_Z13unknownOffsetv // CHECK: %A = alloca [2 x i32], align 4, !DIAssignID ![[ID_6:[0-9]+]] -// CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR_3:[0-9]+]], metadata !DIExpression(), metadata ![[ID_6]], metadata ptr %A, metadata !DIExpression()) +// CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR_3:[0-9]+]], !DIExpression(), ![[ID_6]], ptr %A, !DIExpression(), Inner sharedAlloca() { if (Cond) { @@ -60,34 +60,34 @@ Inner sharedAlloca() { } // CHECK-LABEL: define dso_local i64 @_Z12sharedAllocav // CHECK: %retval = alloca %struct.Inner, align 4, !DIAssignID ![[ID_7:[0-9]+]] -// CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR_4:[0-9]+]], metadata !DIExpression(), metadata ![[ID_7]], metadata ptr %retval, metadata !DIExpression()) -// CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR_5:[0-9]+]], metadata !DIExpression(), metadata ![[ID_7]], metadata ptr %retval, metadata !DIExpression()) +// CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR_4:[0-9]+]], !DIExpression(), ![[ID_7]], ptr %retval, !DIExpression(), +// CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR_5:[0-9]+]], !DIExpression(), ![[ID_7]], ptr %retval, !DIExpression(), // CHECK: if.then: // CHECK: call void @llvm.memcpy{{.*}}, !DIAssignID ![[ID_8:[0-9]+]] -// CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR_4]], metadata !DIExpression(), metadata ![[ID_8]], metadata ptr %retval, metadata !DIExpression()) -// CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR_5]], metadata !DIExpression(), metadata ![[ID_8]], metadata ptr %retval, metadata !DIExpression()) +// CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR_4]], !DIExpression(), ![[ID_8]], ptr %retval, !DIExpression(), +// CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR_5]], !DIExpression(), ![[ID_8]], ptr %retval, !DIExpression(), // CHECK: if.else: // CHECK: call void @llvm.memcpy{{.*}}, !DIAssignID ![[ID_9:[0-9]+]] -// CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR_4]], metadata !DIExpression(), metadata ![[ID_9]], metadata ptr %retval, metadata !DIExpression()) -// CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR_5]], metadata !DIExpression(), metadata ![[ID_9]], metadata ptr %retval, metadata !DIExpression()) +// CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR_4]], !DIExpression(), ![[ID_9]], ptr %retval, !DIExpression(), +// CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR_5]], !DIExpression(), ![[ID_9]], ptr %retval, !DIExpression(), Large sret() { Large X = L; return X; } // CHECK-LABEL: define dso_local void @_Z4sretv -// CHECK: llvm.dbg.declare +// CHECK: #dbg_declare void byval(Large X) {} // CHECK-LABEL: define dso_local void @_Z5byval5Large -// CHECK: llvm.dbg.declare +// CHECK: #dbg_declare LCopyCtor indirectReturn() { LCopyCtor R; return R; } // CHECK-LABEL: define dso_local void @_Z14indirectReturnv -// CHECK: call void @llvm.dbg.declare +// CHECK: #dbg_declare // CHECK-DAG: ![[VAR_0]] = !DILocalVariable(name: "Z", // CHECK-DAG: ![[VAR_1]] = !DILocalVariable(name: "A", diff --git a/clang/test/CodeGen/assignment-tracking/memcpy-fragment.cpp b/clang/test/CodeGen/assignment-tracking/memcpy-fragment.cpp index 126bc8f54eb437..85ec68066199d3 100644 --- a/clang/test/CodeGen/assignment-tracking/memcpy-fragment.cpp +++ b/clang/test/CodeGen/assignment-tracking/memcpy-fragment.cpp @@ -23,7 +23,7 @@ void fragmentWhole() __builtin_memcpy(&dest.ch, &src, sizeof(char)); } // CHECK: call void @llvm.memcpy{{.+}}, !DIAssignID ![[memberID:[0-9]+]] -// CHECK-NEXT: call void @llvm.dbg.assign(metadata{{.*}}undef, metadata !{{[0-9]+}}, metadata !DIExpression(DW_OP_LLVM_fragment, 32, 8), metadata ![[memberID]], metadata ptr %ch, metadata !DIExpression()) +// CHECK-NEXT: #dbg_assign({{.*}}undef, !{{[0-9]+}}, !DIExpression(DW_OP_LLVM_fragment, 32, 8), ![[memberID]], ptr %ch, !DIExpression(), // Write starting at a field and overlapping part of another. void fragmentWholeToPartial() @@ -38,7 +38,7 @@ void fragmentWholeToPartial() __builtin_memcpy(&dest.num1, &src, 5); } // CHECK: call void @llvm.memcpy{{.+}}, !DIAssignID ![[exceed:[0-9]+]] -// CHECK-NEXT: call void @llvm.dbg.assign(metadata{{.*}}undef, metadata !{{[0-9]+}}, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 40), metadata ![[exceed]], metadata ptr %num1, metadata !DIExpression()) +// CHECK-NEXT: #dbg_assign({{.*}}undef, !{{[0-9]+}}, !DIExpression(DW_OP_LLVM_fragment, 0, 40), ![[exceed]], ptr %num1, !DIExpression(), // Write starting between fields. void fragmentPartialToWhole() @@ -54,4 +54,4 @@ void fragmentPartialToWhole() __builtin_memcpy((char*)&(dest.num2) + 3, &src, 5); } // CHECK: call void @llvm.memcpy{{.+}}, !DIAssignID ![[addendID:[0-9]+]] -// CHECK-NEXT: call void @llvm.dbg.assign(metadata{{.*}}undef, metadata !{{.*}}, metadata !DIExpression(DW_OP_LLVM_fragment, 56, 40), metadata ![[addendID]], metadata ptr %add.ptr, metadata !DIExpression()) +// CHECK-NEXT: #dbg_assign({{.*}}undef, !{{.*}}, !DIExpression(DW_OP_LLVM_fragment, 56, 40), ![[addendID]], ptr %add.ptr, !DIExpression(), diff --git a/clang/test/CodeGen/assignment-tracking/nested-scope.cpp b/clang/test/CodeGen/assignment-tracking/nested-scope.cpp index d94e42a139c1cc..7d918821b3e309 100644 --- a/clang/test/CodeGen/assignment-tracking/nested-scope.cpp +++ b/clang/test/CodeGen/assignment-tracking/nested-scope.cpp @@ -6,7 +6,7 @@ // Check that dbg.assign intrinsics get a !dbg with with the same scope as // their variable. -// CHECK: call void @llvm.dbg.assign({{.+}}, metadata [[local:![0-9]+]], {{.+}}, {{.+}}, {{.+}}), !dbg [[dbg:![0-9]+]] +// CHECK: #dbg_assign({{.+}}, [[local:![0-9]+]], {{.+}}, {{.+}}, {{.+}}, [[dbg:![0-9]+]] // CHECK-DAG: [[local]] = !DILocalVariable(name: "local", scope: [[scope:![0-9]+]], // CHECK-DAG: [[dbg]] = !DILocation({{.+}}, scope: [[scope]]) // CHECK-DAG: [[scope]] = distinct !DILexicalBlock diff --git a/clang/test/CodeGen/attr-nodebug.c b/clang/test/CodeGen/attr-nodebug.c index fde0c912b16dce..75b4089408fcb8 100644 --- a/clang/test/CodeGen/attr-nodebug.c +++ b/clang/test/CodeGen/attr-nodebug.c @@ -21,7 +21,7 @@ void t2(void) // Verify those things do occur normally. // CHECK-LABEL: @t2 -// CHECK: call{{.*}}llvm.dbg +// CHECK: #dbg_declare // CHECK: !dbg // CHECK: } diff --git a/clang/test/CodeGen/debug-info-block-decl.c b/clang/test/CodeGen/debug-info-block-decl.c index 8db13c377ede25..6e95ecc54fd5ad 100644 --- a/clang/test/CodeGen/debug-info-block-decl.c +++ b/clang/test/CodeGen/debug-info-block-decl.c @@ -4,7 +4,7 @@ // CHECK: define{{.*}}@main() // CHECK: store {{.*}}, !dbg ![[ASSIGNMENT:[0-9]+]] // CHECK: define {{.*}} @__main_block_invoke -// CHECK: , !dbg ![[BLOCK_ENTRY:[0-9]+]] +// CHECK: , ![[BLOCK_ENTRY:[0-9]+]]) int main(void) { diff --git a/clang/test/CodeGen/debug-info-block-expr.c b/clang/test/CodeGen/debug-info-block-expr.c index 6ca8a826cacfd1..712158f2715226 100644 --- a/clang/test/CodeGen/debug-info-block-expr.c +++ b/clang/test/CodeGen/debug-info-block-expr.c @@ -10,7 +10,7 @@ void noEscapeFunc(__attribute__((noescape)) BlockTy); // 'noescape') blocks. void test_escape_func(void) { // CHECK-LABEL: void @test_escape_func -// CHECK: call void @llvm.dbg.declare({{.*}}metadata ![[ESCAPE_VAR:[0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, {{[0-9]+}}, DW_OP_deref, DW_OP_plus_uconst, {{[0-9]+}}){{.*}}) +// CHECK: #dbg_declare({{.*}}![[ESCAPE_VAR:[0-9]+]], !DIExpression(DW_OP_plus_uconst, {{[0-9]+}}, DW_OP_deref, DW_OP_plus_uconst, {{[0-9]+}}){{.*}}) __block int escape_var; // Blocks in dead code branches still capture __block variables. #ifdef DEAD_CODE @@ -22,7 +22,7 @@ void test_escape_func(void) { // Verify that the desired DIExpression are generated for noescape blocks. void test_noescape_func(void) { // CHECK-LABEL: void @test_noescape_func -// CHECK: call void @llvm.dbg.declare({{.*}}metadata ![[NOESCAPE_VAR:[0-9]+]], metadata !DIExpression()) +// CHECK: #dbg_declare({{.*}}![[NOESCAPE_VAR:[0-9]+]], !DIExpression(), __block int noescape_var; noEscapeFunc(^{ (void)noescape_var; }); } @@ -30,11 +30,11 @@ void test_noescape_func(void) { // Verify that the desired DIExpression are generated for blocks. void test_local_block(void) { // CHECK-LABEL: void @test_local_block -// CHECK: call void @llvm.dbg.declare({{.*}}metadata ![[BLOCK_VAR:[0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, {{[0-9]+}}, DW_OP_deref, DW_OP_plus_uconst, {{[0-9]+}}){{.*}}) +// CHECK: #dbg_declare({{.*}}![[BLOCK_VAR:[0-9]+]], !DIExpression(DW_OP_plus_uconst, {{[0-9]+}}, DW_OP_deref, DW_OP_plus_uconst, {{[0-9]+}}){{.*}}) __block int block_var; // CHECK-LABEL: @__test_local_block_block_invoke -// CHECK: call void @llvm.dbg.declare({{.*}}!DIExpression(DW_OP_deref, DW_OP_plus_uconst, {{[0-9]+}}, DW_OP_deref, DW_OP_plus_uconst, {{[0-9]+}}, DW_OP_deref, DW_OP_plus_uconst, {{[0-9]+}}){{.*}}) +// CHECK: #dbg_declare({{.*}}!DIExpression(DW_OP_deref, DW_OP_plus_uconst, {{[0-9]+}}, DW_OP_deref, DW_OP_plus_uconst, {{[0-9]+}}, DW_OP_deref, DW_OP_plus_uconst, {{[0-9]+}}){{.*}}) ^ { block_var = 1; }(); } @@ -42,7 +42,7 @@ void test_local_block(void) { // in any block. void test_unused(void) { // CHECK-LABEL: void @test_unused -// CHECK: call void @llvm.dbg.declare({{.*}}metadata ![[UNUSED_VAR:[0-9]+]], metadata !DIExpression()) +// CHECK: #dbg_declare({{.*}}![[UNUSED_VAR:[0-9]+]], !DIExpression(), __block int unused_var; // Use i (not inside a block). ++unused_var; diff --git a/clang/test/CodeGen/debug-info-block-vars.c b/clang/test/CodeGen/debug-info-block-vars.c index 4e007214706063..90d1d4b42223cd 100644 --- a/clang/test/CodeGen/debug-info-block-vars.c +++ b/clang/test/CodeGen/debug-info-block-vars.c @@ -8,14 +8,14 @@ // CHECK: %.block_descriptor.addr = alloca ptr, align 8 // CHECK: %block.addr = alloca ptr, align 8 // CHECK: store ptr %.block_descriptor, ptr %.block_descriptor.addr, align 8 -// CHECK: call void @llvm.dbg.declare(metadata ptr %.block_descriptor.addr, -// CHECK-SAME: metadata !DIExpression()) +// CHECK: #dbg_declare(ptr %.block_descriptor.addr, +// CHECK-SAME: !DIExpression(), // CHECK-OPT-NOT: alloca // Since the block address is not used anywhere in this function, // the optimizer (DeadArgElim) has replaced all the false uses // (i.e., metadata users) with poison. -// CHECK-OPT: call void @llvm.dbg.value(metadata ptr poison, -// CHECK-OPT-SAME: metadata !DIExpression()) +// CHECK-OPT: #dbg_value(ptr poison, +// CHECK-OPT-SAME: !DIExpression(), void f(void) { a(^{ b(); diff --git a/clang/test/CodeGen/debug-info-matrix-types.c b/clang/test/CodeGen/debug-info-matrix-types.c index bc0a70a9ec4fac..c16e35e8d33da0 100644 --- a/clang/test/CodeGen/debug-info-matrix-types.c +++ b/clang/test/CodeGen/debug-info-matrix-types.c @@ -3,8 +3,8 @@ typedef double dx2x3_t __attribute__((matrix_type(2, 3))); void load_store_double(dx2x3_t *a, dx2x3_t *b) { - // CHECK-DAG: @llvm.dbg.declare(metadata ptr %a.addr, metadata [[EXPR_A:![0-9]+]] - // CHECK-DAG: @llvm.dbg.declare(metadata ptr %b.addr, metadata [[EXPR_B:![0-9]+]] + // CHECK-DAG: #dbg_declare(ptr %a.addr, [[EXPR_A:![0-9]+]] + // CHECK-DAG: #dbg_declare(ptr %b.addr, [[EXPR_B:![0-9]+]] // CHECK: [[PTR_TY:![0-9]+]] = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: [[TYPEDEF:![0-9]+]], size: 64) // CHECK: [[TYPEDEF]] = !DIDerivedType(tag: DW_TAG_typedef, name: "dx2x3_t", {{.+}} baseType: [[MATRIX_TY:![0-9]+]]) // CHECK: [[MATRIX_TY]] = !DICompositeType(tag: DW_TAG_array_type, baseType: [[ELT_TY:![0-9]+]], size: 384, elements: [[ELEMENTS:![0-9]+]]) diff --git a/clang/test/CodeGen/debug-info-vla.c b/clang/test/CodeGen/debug-info-vla.c index 22b3930dfc88c9..e9494111d207f6 100644 --- a/clang/test/CodeGen/debug-info-vla.c +++ b/clang/test/CodeGen/debug-info-vla.c @@ -2,8 +2,8 @@ void testVLAwithSize(int s) { -// CHECK-DAG: dbg.declare({{.*}} %__vla_expr0, metadata ![[VLAEXPR:[0-9]+]] -// CHECK-DAG: dbg.declare({{.*}} %vla, metadata ![[VAR:[0-9]+]] +// CHECK-DAG: #dbg_declare({{.*}} %__vla_expr0, ![[VLAEXPR:[0-9]+]] +// CHECK-DAG: #dbg_declare({{.*}} %vla, ![[VAR:[0-9]+]] // CHECK-DAG: ![[VLAEXPR]] = !DILocalVariable(name: "__vla_expr0", {{.*}} flags: DIFlagArtificial // CHECK-DAG: ![[VAR]] = !DILocalVariable(name: "vla",{{.*}} line: [[@LINE+2]] // CHECK-DAG: !DISubrange(count: ![[VLAEXPR]]) diff --git a/clang/test/CodeGen/debug-label-inline.c b/clang/test/CodeGen/debug-label-inline.c index c0b089aad8eb99..972a32b5af32d8 100644 --- a/clang/test/CodeGen/debug-label-inline.c +++ b/clang/test/CodeGen/debug-label-inline.c @@ -16,7 +16,7 @@ int f2(void) { int result; result = f1(ga, gb); - // CHECK: call void @llvm.dbg.label(metadata [[LABEL_METADATA:!.*]]), !dbg [[LABEL_LOCATION:!.*]] + // CHECK: #dbg_label([[LABEL_METADATA:!.*]], [[LABEL_LOCATION:![0-9]+]] return result; } diff --git a/clang/test/CodeGen/debug-label.c b/clang/test/CodeGen/debug-label.c index 179132ecd6f0a3..662b1a7dd2062d 100644 --- a/clang/test/CodeGen/debug-label.c +++ b/clang/test/CodeGen/debug-label.c @@ -7,7 +7,7 @@ int f1(int a, int b) { int sum; top: - // CHECK: call void @llvm.dbg.label(metadata [[LABEL_METADATA:!.*]]), !dbg [[LABEL_LOCATION:!.*]] + // CHECK: #dbg_label([[LABEL_METADATA:!.*]], [[LABEL_LOCATION:![0-9]+]] sum = a + b; return sum; } diff --git a/clang/test/CodeGen/instrument-objc-method.m b/clang/test/CodeGen/instrument-objc-method.m index 2c9d1fc88554bd..34b4f84a0af39a 100644 --- a/clang/test/CodeGen/instrument-objc-method.m +++ b/clang/test/CodeGen/instrument-objc-method.m @@ -20,7 +20,6 @@ + (void)load __attribute__((no_instrument_function)) { - (void)dealloc __attribute__((no_instrument_function)) { } -// PREINLINE: declare void @llvm.dbg.declare(metadata, metadata, metadata) #2 // PREINLINE: attributes #0 = { {{.*}}"instrument-function-entry"="__cyg_profile_func_enter" // PREINLINE-NOT: attributes #0 = { {{.*}}"instrument-function-entry"="__cyg_profile_func_enter_bare" // PREINLINE-NOT: attributes #2 = { {{.*}}"__cyg_profile_func_enter" diff --git a/clang/test/CodeGenCUDA/debug-info-address-class.cu b/clang/test/CodeGenCUDA/debug-info-address-class.cu index 5e0f775cbe9e37..876d2de31664a0 100644 --- a/clang/test/CodeGenCUDA/debug-info-address-class.cu +++ b/clang/test/CodeGenCUDA/debug-info-address-class.cu @@ -14,12 +14,12 @@ __device__ __constant__ int FileVar2; __device__ void kernel1( // CHECK-DAG: ![[ARG:[0-9]+]] = !DILocalVariable(name: "Arg", arg: {{[0-9]+}}, scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}) - // CHECK-DAG: call void @llvm.dbg.declare(metadata ptr {{.*}}, metadata ![[ARG]], metadata !DIExpression()), !dbg !{{[0-9]+}} + // CHECK-DAG: #dbg_declare(ptr {{.*}}, ![[ARG]], !DIExpression(), !{{[0-9]+}} int Arg) { // CHECK-DAG: ![[FUNCVAR0:[0-9]+]] = distinct !DIGlobalVariable(name: "FuncVar0", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: true, isDefinition: true) // CHECK-DAG: !DIGlobalVariableExpression(var: ![[FUNCVAR0]], expr: !DIExpression(DW_OP_constu, 8, DW_OP_swap, DW_OP_xderef)) __shared__ int FuncVar0; // CHECK-DAG: ![[FUNCVAR1:[0-9]+]] = !DILocalVariable(name: "FuncVar1", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}) - // CHECK-DAG: call void @llvm.dbg.declare(metadata ptr {{.*}}, metadata ![[FUNCVAR1]], metadata !DIExpression()), !dbg !{{[0-9]+}} + // CHECK-DAG: #dbg_declare(ptr {{.*}}, ![[FUNCVAR1]], !DIExpression(), !{{[0-9]+}} int FuncVar1; } diff --git a/clang/test/CodeGenCXX/debug-info-inheriting-constructor.cpp b/clang/test/CodeGenCXX/debug-info-inheriting-constructor.cpp index 78b99f86ee2926..7918387edc79a6 100644 --- a/clang/test/CodeGenCXX/debug-info-inheriting-constructor.cpp +++ b/clang/test/CodeGenCXX/debug-info-inheriting-constructor.cpp @@ -10,10 +10,10 @@ struct B : A { A::A(int i, ...) {} // CHECK: define{{.*}} void @{{.*}}foo // CHECK-NOT: ret void -// CHECK: call void @llvm.dbg.declare +// CHECK: #dbg_declare // CHECK-NOT: ret void -// CHECK: call void @llvm.dbg.declare(metadata ptr %{{[^,]+}}, -// CHECK-SAME: metadata ![[THIS:[0-9]+]], metadata !DIExpression()), !dbg ![[LOC:[0-9]+]] +// CHECK: #dbg_declare(ptr %{{[^,]+}}, +// CHECK-SAME: ![[THIS:[0-9]+]], !DIExpression(), ![[LOC:[0-9]+]] // CHECK: ret void, !dbg ![[NOINL:[0-9]+]] // CHECK: ![[FOO:.*]] = distinct !DISubprogram(name: "foo" // CHECK-DAG: ![[A:.*]] = distinct !DISubprogram(name: "A", linkageName: "_ZN1BCI11AEiz" diff --git a/clang/test/CodeGenCXX/debug-info-nrvo.cpp b/clang/test/CodeGenCXX/debug-info-nrvo.cpp index 6916207b8806ac..b36e3719101408 100644 --- a/clang/test/CodeGenCXX/debug-info-nrvo.cpp +++ b/clang/test/CodeGenCXX/debug-info-nrvo.cpp @@ -27,9 +27,9 @@ int main() { // stored in the return register. // CHECK: %[[RESULT:.*]] = alloca ptr, align 8 -// CHECK: call void @llvm.dbg.declare(metadata ptr %[[RESULT]], -// CHECK-SAME: metadata !DIExpression(DW_OP_deref) +// CHECK: #dbg_declare(ptr %[[RESULT]], +// CHECK-SAME: !DIExpression(DW_OP_deref) // NOELIDE: %[[FOO:.*]] = alloca %struct.Foo, align 4 -// NOELIDE: call void @llvm.dbg.declare(metadata ptr %[[FOO]], -// NOELIDE-SAME: metadata !DIExpression() +// NOELIDE: #dbg_declare(ptr %[[FOO]], +// NOELIDE-SAME: !DIExpression() diff --git a/clang/test/CodeGenCXX/debug-info-range-for-var-names.cpp b/clang/test/CodeGenCXX/debug-info-range-for-var-names.cpp index 30291d74331a4b..1cc13e15451832 100644 --- a/clang/test/CodeGenCXX/debug-info-range-for-var-names.cpp +++ b/clang/test/CodeGenCXX/debug-info-range-for-var-names.cpp @@ -15,15 +15,15 @@ void test() { } } -// CHECK: call void @llvm.dbg.declare(metadata ptr %__range1, metadata ![[RANGE1:[0-9]+]] -// CHECK: call void @llvm.dbg.declare(metadata ptr {{.*}}, metadata ![[BEGIN1:[0-9]+]] -// CHECK: call void @llvm.dbg.declare(metadata ptr {{.*}}, metadata ![[END1:[0-9]+]] -// CHECK: call void @llvm.dbg.declare(metadata ptr %__range2, metadata ![[RANGE2:[0-9]+]] -// CHECK: call void @llvm.dbg.declare(metadata ptr {{.*}}, metadata ![[BEGIN2:[0-9]+]] -// CHECK: call void @llvm.dbg.declare(metadata ptr {{.*}}, metadata ![[END2:[0-9]+]] -// CHECK: call void @llvm.dbg.declare(metadata ptr %__range3, metadata ![[RANGE3:[0-9]+]] -// CHECK: call void @llvm.dbg.declare(metadata ptr {{.*}}, metadata ![[BEGIN3:[0-9]+]] -// CHECK: call void @llvm.dbg.declare(metadata ptr {{.*}}, metadata ![[END3:[0-9]+]] +// CHECK: #dbg_declare(ptr %__range1, ![[RANGE1:[0-9]+]] +// CHECK: #dbg_declare(ptr {{[^,]*}}, ![[BEGIN1:[0-9]+]] +// CHECK: #dbg_declare(ptr {{[^,]*}}, ![[END1:[0-9]+]] +// CHECK: #dbg_declare(ptr %__range2, ![[RANGE2:[0-9]+]] +// CHECK: #dbg_declare(ptr {{[^,]*}}, ![[BEGIN2:[0-9]+]] +// CHECK: #dbg_declare(ptr {{[^,]*}}, ![[END2:[0-9]+]] +// CHECK: #dbg_declare(ptr %__range3, ![[RANGE3:[0-9]+]] +// CHECK: #dbg_declare(ptr {{[^,]*}}, ![[BEGIN3:[0-9]+]] +// CHECK: #dbg_declare(ptr {{[^,]*}}, ![[END3:[0-9]+]] // CHECK: ![[RANGE1]] = !DILocalVariable(name: "__range1", // CHECK: ![[BEGIN1]] = !DILocalVariable(name: "__begin1", // CHECK: ![[END1]] = !DILocalVariable(name: "__end1", diff --git a/clang/test/CodeGenCXX/debug-info-structured-binding-bitfield.cpp b/clang/test/CodeGenCXX/debug-info-structured-binding-bitfield.cpp index d9f5e3eacac37d..5d9041e0475d24 100644 --- a/clang/test/CodeGenCXX/debug-info-structured-binding-bitfield.cpp +++ b/clang/test/CodeGenCXX/debug-info-structured-binding-bitfield.cpp @@ -8,8 +8,8 @@ struct S0 { // CHECK-LABEL: define dso_local void @_Z3fS0v // CHECK: alloca %struct.S0, align 4 // CHECK-NEXT: [[TMP0:%.*]] = alloca %struct.S0, align 4 -// CHECK: call void @llvm.dbg.declare(metadata ptr [[TMP0]], metadata [[S0_A:![0-9]+]], metadata !DIExpression()) -// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[TMP0]], metadata [[S0_B:![0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 2)) +// CHECK: #dbg_declare(ptr [[TMP0]], [[S0_A:![0-9]+]], !DIExpression(), +// CHECK-NEXT: #dbg_declare(ptr [[TMP0]], [[S0_B:![0-9]+]], !DIExpression(DW_OP_plus_uconst, 2), // void fS0() { S0 s0; @@ -24,8 +24,8 @@ struct S1 { // CHECK-LABEL: define dso_local void @_Z3fS1v // CHECK: alloca %struct.S1, align 4 // CHECK-NEXT: [[TMP0:%.*]] = alloca %struct.S1, align 4 -// CHECK: call void @llvm.dbg.declare(metadata ptr [[TMP0]], metadata [[S1_A:![0-9]+]], metadata !DIExpression()) -// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[TMP0]], metadata [[S1_B:![0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 2)) +// CHECK: #dbg_declare(ptr [[TMP0]], [[S1_A:![0-9]+]], !DIExpression(), +// CHECK-NEXT: #dbg_declare(ptr [[TMP0]], [[S1_B:![0-9]+]], !DIExpression(DW_OP_plus_uconst, 2), // void fS1() { S1 s1; @@ -40,8 +40,8 @@ struct S2 { // CHECK-LABEL: define dso_local void @_Z3fS2v // CHECK: alloca %struct.S2, align 4 // CHECK-NEXT: [[TMP0:%.*]] = alloca %struct.S2, align 4 -// CHECK: call void @llvm.dbg.declare(metadata ptr [[TMP0]], metadata [[S2_A:![0-9]+]], metadata !DIExpression()) -// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[TMP0]], metadata [[S2_B:![0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 1)) +// CHECK: #dbg_declare(ptr [[TMP0]], [[S2_A:![0-9]+]], !DIExpression(), +// CHECK-NEXT: #dbg_declare(ptr [[TMP0]], [[S2_B:![0-9]+]], !DIExpression(DW_OP_plus_uconst, 1), // void fS2() { S2 s2; @@ -56,8 +56,8 @@ struct S3 { // CHECK-LABEL: define dso_local void @_Z3fS3v // CHECK: alloca %struct.S3, align 4 // CHECK-NEXT: [[TMP0:%.*]] = alloca %struct.S3, align 4 -// CHECK: call void @llvm.dbg.declare(metadata ptr [[TMP0]], metadata [[S3_A:![0-9]+]], metadata !DIExpression()) -// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[TMP0]], metadata [[S3_B:![0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 1)) +// CHECK: #dbg_declare(ptr [[TMP0]], [[S3_A:![0-9]+]], !DIExpression(), +// CHECK-NEXT: #dbg_declare(ptr [[TMP0]], [[S3_B:![0-9]+]], !DIExpression(DW_OP_plus_uconst, 1), // void fS3() { S3 s3; @@ -72,8 +72,8 @@ struct S4 { // CHECK-LABEL: define dso_local void @_Z3fS4v // CHECK: alloca %struct.S4, align 4 // CHECK-NEXT: [[TMP0:%.*]] = alloca %struct.S4, align 4 -// CHECK: call void @llvm.dbg.declare(metadata ptr [[TMP0]], metadata [[S4_A:![0-9]+]], metadata !DIExpression()) -// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[TMP0]], metadata [[S4_B:![0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 1)) +// CHECK: #dbg_declare(ptr [[TMP0]], [[S4_A:![0-9]+]], !DIExpression(), +// CHECK-NEXT: #dbg_declare(ptr [[TMP0]], [[S4_B:![0-9]+]], !DIExpression(DW_OP_plus_uconst, 1), // void fS4() { S4 s4; @@ -88,8 +88,8 @@ struct S5 { // CHECK-LABEL: define dso_local void @_Z3fS5v // CHECK: alloca %struct.S5, align 4 // CHECK-NEXT: [[TMP0:%.*]] = alloca %struct.S5, align 4 -// CHECK: call void @llvm.dbg.declare(metadata ptr [[TMP0]], metadata [[S5_A:![0-9]+]], metadata !DIExpression()) -// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[TMP0]], metadata [[S5_B:![0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 1)) +// CHECK: #dbg_declare(ptr [[TMP0]], [[S5_A:![0-9]+]], !DIExpression(), +// CHECK-NEXT: #dbg_declare(ptr [[TMP0]], [[S5_B:![0-9]+]], !DIExpression(DW_OP_plus_uconst, 1), // void fS5() { S5 s5; @@ -104,8 +104,8 @@ struct S6 { // CHECK-LABEL: define dso_local void @_Z3fS6v // CHECK: alloca %struct.S6, align 4 // CHECK-NEXT: [[TMP0:%.*]] = alloca %struct.S6, align 4 -// CHECK: call void @llvm.dbg.declare(metadata ptr [[TMP0]], metadata [[S6_A:![0-9]+]], metadata !DIExpression()) -// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[TMP0]], metadata [[S6_B:![0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 2)) +// CHECK: #dbg_declare(ptr [[TMP0]], [[S6_A:![0-9]+]], !DIExpression(), +// CHECK-NEXT: #dbg_declare(ptr [[TMP0]], [[S6_B:![0-9]+]], !DIExpression(DW_OP_plus_uconst, 2), // void fS6() { S6 s6; @@ -120,8 +120,8 @@ struct S7 { // CHECK-LABEL: define dso_local void @_Z3fS7v // CHECK: alloca %struct.S7, align 4 // CHECK-NEXT: [[TMP0:%.*]] = alloca %struct.S7, align 4 -// CHECK: call void @llvm.dbg.declare(metadata ptr [[TMP0]], metadata [[S7_A:![0-9]+]], metadata !DIExpression()) -// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[TMP0]], metadata [[S7_B:![0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 2)) +// CHECK: #dbg_declare(ptr [[TMP0]], [[S7_A:![0-9]+]], !DIExpression(), +// CHECK-NEXT: #dbg_declare(ptr [[TMP0]], [[S7_B:![0-9]+]], !DIExpression(DW_OP_plus_uconst, 2), // void fS7() { S7 s7; @@ -136,8 +136,8 @@ struct S8 { // CHECK-LABEL: define dso_local void @_Z3fS8v // CHECK: alloca %struct.S8, align 4 // CHECK-NEXT: [[TMP0:%.*]] = alloca %struct.S8, align 4 -// CHECK: call void @llvm.dbg.declare(metadata ptr [[TMP0]], metadata [[S8_A:![0-9]+]], metadata !DIExpression()) -// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[TMP0]], metadata [[S8_B:![0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 2)) +// CHECK: #dbg_declare(ptr [[TMP0]], [[S8_A:![0-9]+]], !DIExpression(), +// CHECK-NEXT: #dbg_declare(ptr [[TMP0]], [[S8_B:![0-9]+]], !DIExpression(DW_OP_plus_uconst, 2), // void fS8() { S8 s8; @@ -152,8 +152,8 @@ struct S9 { // CHECK-LABEL: define dso_local void @_Z3fS9v // CHECK: alloca %struct.S9, align 4 // CHECK-NEXT: [[TMP0:%.*]] = alloca %struct.S9, align 4 -// CHECK: call void @llvm.dbg.declare(metadata ptr [[TMP0]], metadata [[S9_A:![0-9]+]], metadata !DIExpression()) -// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[TMP0]], metadata [[S9_B:![0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 4)) +// CHECK: #dbg_declare(ptr [[TMP0]], [[S9_A:![0-9]+]], !DIExpression(), +// CHECK-NEXT: #dbg_declare(ptr [[TMP0]], [[S9_B:![0-9]+]], !DIExpression(DW_OP_plus_uconst, 4), // void fS9() { S9 s9; @@ -167,8 +167,8 @@ struct S10 { // CHECK-LABEL: define dso_local void @_Z4fS10v // CHECK: alloca %struct.S10, align 4 // CHECK-NEXT: [[TMP0:%.*]] = alloca %struct.S10, align 4 -// CHECK: call void @llvm.dbg.declare(metadata ptr [[TMP0]], metadata [[S10_A:![0-9]+]], metadata !DIExpression()) -// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[TMP0]], metadata [[S10_B:![0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 1)) +// CHECK: #dbg_declare(ptr [[TMP0]], [[S10_A:![0-9]+]], !DIExpression(), +// CHECK-NEXT: #dbg_declare(ptr [[TMP0]], [[S10_B:![0-9]+]], !DIExpression(DW_OP_plus_uconst, 1), // S10() : x(0), y(0) {} }; @@ -189,7 +189,7 @@ struct S11 { // CHECK-LABEL: define dso_local void @_Z4fS11v // CHECK: alloca %struct.S11, align 4 // CHECK-NEXT: [[TMP0:%.*]] = alloca %struct.S11, align 4 -// CHECK-NOT: call void @llvm.dbg.declare(metadata ptr [[TMP0]] +// CHECK-NOT: #dbg_declare(ptr [[TMP0]] // void fS11() { S11 s11; @@ -204,8 +204,8 @@ struct S12 { // CHECK-LABEL: define dso_local void @_Z4fS12v // CHECK: alloca %struct.S12, align 4 // CHECK-NEXT: [[TMP0:%.*]] = alloca %struct.S12, align 4 -// CHECK: call void @llvm.dbg.declare(metadata ptr [[TMP0]], metadata [[S12_A:![0-9]+]], metadata !DIExpression()) -// CHECK-NOT: call void @llvm.dbg.declare(metadata ptr [[TMP0]] +// CHECK: #dbg_declare(ptr [[TMP0]], [[S12_A:![0-9]+]], !DIExpression(), +// CHECK-NOT: #dbg_declare(ptr [[TMP0]] // void fS12() { S12 s12; @@ -220,7 +220,7 @@ struct __attribute__((packed)) S13 { // CHECK-LABEL: define dso_local void @_Z4fS13v // CHECK: alloca %struct.S13, align 1 // CHECK-NEXT: [[TMP0:%.*]] = alloca %struct.S13, align 1 -// CHECK-NOT: call void @llvm.dbg.declare(metadata ptr [[TMP0]] +// CHECK-NOT: #dbg_declare(ptr [[TMP0]] // void fS13() { S13 s13; diff --git a/clang/test/CodeGenCXX/debug-info-structured-binding.cpp b/clang/test/CodeGenCXX/debug-info-structured-binding.cpp index 7f3c3a08941abc..c88a5cdaa20e78 100644 --- a/clang/test/CodeGenCXX/debug-info-structured-binding.cpp +++ b/clang/test/CodeGenCXX/debug-info-structured-binding.cpp @@ -1,10 +1,10 @@ // RUN: %clang_cc1 -emit-llvm -debug-info-kind=standalone -triple %itanium_abi_triple %s -o - | FileCheck %s --implicit-check-not="call void @llvm.dbg.declare" -// CHECK: call void @llvm.dbg.declare(metadata ptr %{{[a-z]+}}, metadata ![[VAR_0:[0-9]+]], metadata !DIExpression()) -// CHECK: call void @llvm.dbg.declare(metadata ptr %{{[0-9]+}}, metadata ![[VAR_1:[0-9]+]], metadata !DIExpression()) -// CHECK: call void @llvm.dbg.declare(metadata ptr %{{[0-9]+}}, metadata ![[VAR_2:[0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 4)) -// CHECK: call void @llvm.dbg.declare(metadata ptr %{{[0-9]+}}, metadata ![[VAR_3:[0-9]+]], metadata !DIExpression(DW_OP_deref)) -// CHECK: call void @llvm.dbg.declare(metadata ptr %{{[0-9]+}}, metadata ![[VAR_4:[0-9]+]], metadata !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 4)) +// CHECK: #dbg_declare(ptr %{{[a-z]+}}, ![[VAR_0:[0-9]+]], !DIExpression(), +// CHECK: #dbg_declare(ptr %{{[0-9]+}}, ![[VAR_1:[0-9]+]], !DIExpression(), +// CHECK: #dbg_declare(ptr %{{[0-9]+}}, ![[VAR_2:[0-9]+]], !DIExpression(DW_OP_plus_uconst, 4), +// CHECK: #dbg_declare(ptr %{{[0-9]+}}, ![[VAR_3:[0-9]+]], !DIExpression(DW_OP_deref), +// CHECK: #dbg_declare(ptr %{{[0-9]+}}, ![[VAR_4:[0-9]+]], !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 4), // CHECK: ![[VAR_0]] = !DILocalVariable(name: "a" // CHECK: ![[VAR_1]] = !DILocalVariable(name: "x1" // CHECK: ![[VAR_2]] = !DILocalVariable(name: "y1" diff --git a/clang/test/CodeGenCXX/debug-info.cpp b/clang/test/CodeGenCXX/debug-info.cpp index 89ab4889d8967f..8594a897ef7c09 100644 --- a/clang/test/CodeGenCXX/debug-info.cpp +++ b/clang/test/CodeGenCXX/debug-info.cpp @@ -10,7 +10,7 @@ // CHECK-NEXT: [[param_addr_storage:%.*]] = alloca ptr, align 8 // CHECK-NEXT: store // CHECK-NEXT: store ptr [[param]], ptr [[param_addr_storage]], align 8 -// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[param_addr_storage]], metadata ![[F:[0-9]+]], metadata !DIExpression(DW_OP_deref)) +// CHECK-NEXT: #dbg_declare(ptr [[param_addr_storage]], ![[F:[0-9]+]], !DIExpression(DW_OP_deref), // !llvm.dbg.cu pulls in globals and their types first. // CHECK-NOT: !DIGlobalVariable(name: "c" diff --git a/clang/test/CodeGenCXX/linetable-eh.cpp b/clang/test/CodeGenCXX/linetable-eh.cpp index 13be13468f0478..362c62635a2610 100644 --- a/clang/test/CodeGenCXX/linetable-eh.cpp +++ b/clang/test/CodeGenCXX/linetable-eh.cpp @@ -3,11 +3,11 @@ // Test that emitting a landing pad does not affect the line table // entries for the code that triggered it. -// CHECK: call void @llvm.dbg.declare -// CHECK: call void @llvm.dbg.declare(metadata {{.*}}, metadata ![[CURRENT_ADDR:.*]], metadata !{{.*}}), !dbg ![[DBG1:.*]] +// CHECK: #dbg_declare +// CHECK: #dbg_declare({{.*}}, ![[CURRENT_ADDR:.*]], !{{.*}}, ![[DBG1:[0-9]+]] // CHECK: unwind label %{{.*}}, !dbg ![[DBG1]] // CHECK: store i64 %{{.*}}, ptr %current_address, align 8, !dbg ![[DBG4:.*]] -// CHECK-NEXT: call void @llvm.dbg.declare(metadata {{.*}}, metadata ![[FOUND_IT:.*]], metadata !{{.*}}), !dbg ![[DBG2:.*]] +// CHECK-NEXT: #dbg_declare({{.*}}, ![[FOUND_IT:.*]], !{{.*}}, ![[DBG2:[0-9]+]] // CHECK: = landingpad // CHECK-NEXT: cleanup, !dbg ![[DBG3:.*]] // CHECK-DAG: ![[CURRENT_ADDR]] = {{.*}}name: "current_address" diff --git a/clang/test/CodeGenCXX/trivial_abi_debuginfo.cpp b/clang/test/CodeGenCXX/trivial_abi_debuginfo.cpp index 3d93f20ee1b242..07d2c846244f58 100644 --- a/clang/test/CodeGenCXX/trivial_abi_debuginfo.cpp +++ b/clang/test/CodeGenCXX/trivial_abi_debuginfo.cpp @@ -14,13 +14,13 @@ struct __attribute__((trivial_abi)) Trivial { // CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_TRIVIAL:%.*]], align 4 // CHECK-NEXT: [[NRVO:%.*]] = alloca i1, align 1 // CHECK-NEXT: store i1 false, ptr [[NRVO]], align 1, !dbg [[DBG18:![0-9]+]] -// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[RETVAL]], metadata [[META19:![0-9]+]], metadata !DIExpression()), !dbg [[DBG20:![0-9]+]] -// CHECK-NEXT: call void @_ZN7TrivialC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[RETVAL]]) #[[ATTR3:[0-9]+]], !dbg [[DBG20]] +// CHECK-NEXT: #dbg_declare(ptr [[RETVAL]], [[META19:![0-9]+]], !DIExpression(), [[META20:![0-9]+]]) +// CHECK-NEXT: call void @_ZN7TrivialC1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[RETVAL]]) #[[ATTR1:[0-9]+]], !dbg [[META20]] // CHECK-NEXT: store i1 true, ptr [[NRVO]], align 1, !dbg [[DBG21:![0-9]+]] // CHECK-NEXT: [[NRVO_VAL:%.*]] = load i1, ptr [[NRVO]], align 1, !dbg [[DBG22:![0-9]+]] // CHECK-NEXT: br i1 [[NRVO_VAL]], label [[NRVO_SKIPDTOR:%.*]], label [[NRVO_UNUSED:%.*]], !dbg [[DBG22]] // CHECK: nrvo.unused: -// CHECK-NEXT: call void @_ZN7TrivialD1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[RETVAL]]) #[[ATTR3]], !dbg [[DBG22]] +// CHECK-NEXT: call void @_ZN7TrivialD1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[RETVAL]]) #[[ATTR1]], !dbg [[DBG22]] // CHECK-NEXT: br label [[NRVO_SKIPDTOR]], !dbg [[DBG22]] // CHECK: nrvo.skipdtor: // CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_TRIVIAL]], ptr [[RETVAL]], i32 0, i32 0, !dbg [[DBG22]] diff --git a/clang/test/CodeGenObjC/2010-02-09-DbgSelf.m b/clang/test/CodeGenObjC/2010-02-09-DbgSelf.m index ad8b29a85ea14d..7696163d44460b 100644 --- a/clang/test/CodeGenObjC/2010-02-09-DbgSelf.m +++ b/clang/test/CodeGenObjC/2010-02-09-DbgSelf.m @@ -1,6 +1,6 @@ // RUN: %clang_cc1 -x objective-c -emit-llvm -debug-info-kind=limited < %s | FileCheck %s // Test to check that "self" argument is assigned a location. -// CHECK: call void @llvm.dbg.declare(metadata ptr %self.addr, metadata [[SELF:![0-9]*]], metadata !{{.*}}) +// CHECK: #dbg_declare(ptr %self.addr, [[SELF:![0-9]*]], !{{.*}}) // CHECK: [[SELF]] = !DILocalVariable(name: "self", arg: 1, @interface Foo diff --git a/clang/test/CodeGenObjC/debug-info-blocks.m b/clang/test/CodeGenObjC/debug-info-blocks.m index 59171da016da1e..de4eec4fe053a2 100644 --- a/clang/test/CodeGenObjC/debug-info-blocks.m +++ b/clang/test/CodeGenObjC/debug-info-blocks.m @@ -5,15 +5,15 @@ // CHECK: define {{.*}}_block_invoke // CHECK: store ptr %.block_descriptor, ptr %[[ALLOCA:block.addr]], align -// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr %d, metadata ![[D:[0-9]+]], metadata !{{.*}}) -// CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr %[[ALLOCA]], metadata ![[SELF:[0-9]+]], metadata !{{.*}}) +// CHECK-NEXT: #dbg_declare(ptr %d, ![[D:[0-9]+]], !{{.*}}) +// CHECK-NEXT: #dbg_declare(ptr %[[ALLOCA]], ![[SELF:[0-9]+]], !{{.*}}) // Test that we do emit scope info for the helper functions, and that the // parameters to these functions are marked as artificial (so the debugger // doesn't accidentally step into the function). // CHECK: define {{.*}} @__copy_helper_block_{{.*}}(ptr noundef %0, ptr noundef %1) // CHECK-NOT: ret -// CHECK: call {{.*}}, !dbg ![[DBG_LINE:[0-9]+]] +// CHECK: #dbg_declare({{.+}}, ![[DBG_LINE:[0-9]+]] // CHECK-NOT: ret // CHECK: load {{.*}}, !dbg ![[DBG_LINE]] // CHECK: ret {{.*}}, !dbg ![[DBG_LINE]] diff --git a/clang/test/CodeGenObjC/debug-info-nested-blocks.m b/clang/test/CodeGenObjC/debug-info-nested-blocks.m index 3f35ef7003e218..de30eedac45972 100644 --- a/clang/test/CodeGenObjC/debug-info-nested-blocks.m +++ b/clang/test/CodeGenObjC/debug-info-nested-blocks.m @@ -23,4 +23,4 @@ void bar(void) { // // CHECK: define {{.*}}void @__bar_block_invoke_3(ptr noundef %.block_descriptor) // CHECK: %[[BLOCKADDR:.*]] = alloca ptr, align -// CHECK: call void @llvm.dbg.declare(metadata {{.*}}%[[BLOCKADDR]] +// CHECK: #dbg_declare({{.*}}%[[BLOCKADDR]] diff --git a/clang/test/CodeGenObjC/objc-fixed-enum.m b/clang/test/CodeGenObjC/objc-fixed-enum.m index 0c123ce6f69d89..6ac2ae6debee61 100644 --- a/clang/test/CodeGenObjC/objc-fixed-enum.m +++ b/clang/test/CodeGenObjC/objc-fixed-enum.m @@ -34,13 +34,13 @@ typedef NS_ENUM(NSInteger, Enum1) { int main(void) { Enum0 e0 = Enum0One; - // CHECK: call void @llvm.dbg.declare(metadata {{.*}}, metadata ![[ENUM0:[0-9]+]], metadata !{{.*}}) + // CHECK: #dbg_declare({{.*}}, ![[ENUM0:[0-9]+]], !{{.*}}) Enum1 e1 = Enum1One; - // CHECK: call void @llvm.dbg.declare(metadata {{.*}}, metadata ![[ENUM1:[0-9]+]], metadata !{{.*}}) + // CHECK: #dbg_declare({{.*}}, ![[ENUM1:[0-9]+]], !{{.*}}) Enum2 e2 = Enum2One; - // CHECK: call void @llvm.dbg.declare(metadata {{.*}}, metadata ![[ENUM2:[0-9]+]], metadata !{{.*}}) + // CHECK: #dbg_declare({{.*}}, ![[ENUM2:[0-9]+]], !{{.*}}) Enum3 e3 = Enum3One; - // CHECK: call void @llvm.dbg.declare(metadata {{.*}}, metadata ![[ENUM3:[0-9]+]], metadata !{{.*}}) + // CHECK: #dbg_declare({{.*}}, ![[ENUM3:[0-9]+]], !{{.*}}) // -Werror and the following line ensures that these enums are not // -treated as C++11 strongly typed enums. diff --git a/clang/test/CodeGenObjCXX/property-objects.mm b/clang/test/CodeGenObjCXX/property-objects.mm index 500a81698f99c2..c4958f634341bb 100644 --- a/clang/test/CodeGenObjCXX/property-objects.mm +++ b/clang/test/CodeGenObjCXX/property-objects.mm @@ -39,7 +39,7 @@ @implementation I // leaking over from the previous function emission by accident. // CHECK: define internal void @"\01-[I setBounds:]"({{.*}} { // CHECK-NOT: !dbg -// CHECK: call void @llvm.dbg.declare +// CHECK: #dbg_declare - (void)setFrame:(CGRect)frameRect {} - (CGRect)frame {return bounds;} @@ -158,7 +158,7 @@ void testB2(B *b) { // CHECK: define{{.*}} void @_Z6testB2P1B(ptr // CHECK: [[BVAR:%.*]] = alloca ptr, align 8 -// CHECK: call void @llvm.dbg.declare( +// CHECK: #dbg_declare( // CHECK: call void @_ZN2B3C1Ev( // CHECK-NEXT: [[T0:%.*]] = call i64 @_ZN2B3cv2B1Ev( // CHECK-NOT: call diff --git a/clang/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl b/clang/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl index ba645442f6a6f9..479e893000942f 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-debug-info-variable-expression.cl @@ -52,31 +52,31 @@ int *constant FileVar14 = 0; kernel void kernel1( // CHECK-DAG: ![[KERNELARG0:[0-9]+]] = !DILocalVariable(name: "KernelArg0", arg: {{[0-9]+}}, scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}) - // CHECK-DAG: call void @llvm.dbg.declare(metadata ptr addrspace(5) {{.*}}, metadata ![[KERNELARG0]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}} + // CHECK-DAG: #dbg_declare(ptr addrspace(5) {{.*}}, ![[KERNELARG0]], !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), !{{[0-9]+}} global int *KernelArg0, // CHECK-DAG: ![[KERNELARG1:[0-9]+]] = !DILocalVariable(name: "KernelArg1", arg: {{[0-9]+}}, scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}) - // CHECK-DAG: call void @llvm.dbg.declare(metadata ptr addrspace(5) {{.*}}, metadata ![[KERNELARG1]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}} + // CHECK-DAG: #dbg_declare(ptr addrspace(5) {{.*}}, ![[KERNELARG1]], !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), !{{[0-9]+}} constant int *KernelArg1, // CHECK-DAG: ![[KERNELARG2:[0-9]+]] = !DILocalVariable(name: "KernelArg2", arg: {{[0-9]+}}, scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}) - // CHECK-DAG: call void @llvm.dbg.declare(metadata ptr addrspace(5) {{.*}}, metadata ![[KERNELARG2]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}} + // CHECK-DAG: #dbg_declare(ptr addrspace(5) {{.*}}, ![[KERNELARG2]], !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), !{{[0-9]+}} local int *KernelArg2) { private int *Tmp0; int *Tmp1; // CHECK-DAG: ![[FUNCVAR0:[0-9]+]] = !DILocalVariable(name: "FuncVar0", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}) - // CHECK-DAG: call void @llvm.dbg.declare(metadata ptr addrspace(5) {{.*}}, metadata ![[FUNCVAR0]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}} + // CHECK-DAG: #dbg_declare(ptr addrspace(5) {{.*}}, ![[FUNCVAR0]], !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), !{{[0-9]+}} global int *FuncVar0 = KernelArg0; // CHECK-DAG: ![[FUNCVAR1:[0-9]+]] = !DILocalVariable(name: "FuncVar1", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}) - // CHECK-DAG: call void @llvm.dbg.declare(metadata ptr addrspace(5) {{.*}}, metadata ![[FUNCVAR1]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}} + // CHECK-DAG: #dbg_declare(ptr addrspace(5) {{.*}}, ![[FUNCVAR1]], !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), !{{[0-9]+}} constant int *FuncVar1 = KernelArg1; // CHECK-DAG: ![[FUNCVAR2:[0-9]+]] = !DILocalVariable(name: "FuncVar2", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}) - // CHECK-DAG: call void @llvm.dbg.declare(metadata ptr addrspace(5) {{.*}}, metadata ![[FUNCVAR2]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}} + // CHECK-DAG: #dbg_declare(ptr addrspace(5) {{.*}}, ![[FUNCVAR2]], !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), !{{[0-9]+}} local int *FuncVar2 = KernelArg2; // CHECK-DAG: ![[FUNCVAR3:[0-9]+]] = !DILocalVariable(name: "FuncVar3", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}) - // CHECK-DAG: call void @llvm.dbg.declare(metadata ptr addrspace(5) {{.*}}, metadata ![[FUNCVAR3]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}} + // CHECK-DAG: #dbg_declare(ptr addrspace(5) {{.*}}, ![[FUNCVAR3]], !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), !{{[0-9]+}} private int *FuncVar3 = Tmp0; // CHECK-DAG: ![[FUNCVAR4:[0-9]+]] = !DILocalVariable(name: "FuncVar4", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}) - // CHECK-DAG: call void @llvm.dbg.declare(metadata ptr addrspace(5) {{.*}}, metadata ![[FUNCVAR4]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}} + // CHECK-DAG: #dbg_declare(ptr addrspace(5) {{.*}}, ![[FUNCVAR4]], !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), !{{[0-9]+}} int *FuncVar4 = Tmp1; // CHECK-DAG: ![[FUNCVAR5:[0-9]+]] = distinct !DIGlobalVariable(name: "FuncVar5", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}, isLocal: true, isDefinition: true) @@ -112,18 +112,18 @@ kernel void kernel1( int *local FuncVar14; FuncVar14 = Tmp1; // CHECK-DAG: ![[FUNCVAR15:[0-9]+]] = !DILocalVariable(name: "FuncVar15", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}) - // CHECK-DAG: call void @llvm.dbg.declare(metadata ptr addrspace(5) {{.*}}, metadata ![[FUNCVAR15]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}} + // CHECK-DAG: #dbg_declare(ptr addrspace(5) {{.*}}, ![[FUNCVAR15]], !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), !{{[0-9]+}} global int *private FuncVar15 = KernelArg0; // CHECK-DAG: ![[FUNCVAR16:[0-9]+]] = !DILocalVariable(name: "FuncVar16", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}) - // CHECK-DAG: call void @llvm.dbg.declare(metadata ptr addrspace(5) {{.*}}, metadata ![[FUNCVAR16]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}} + // CHECK-DAG: #dbg_declare(ptr addrspace(5) {{.*}}, ![[FUNCVAR16]], !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), !{{[0-9]+}} constant int *private FuncVar16 = KernelArg1; // CHECK-DAG: ![[FUNCVAR17:[0-9]+]] = !DILocalVariable(name: "FuncVar17", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}) - // CHECK-DAG: call void @llvm.dbg.declare(metadata ptr addrspace(5) {{.*}}, metadata ![[FUNCVAR17]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}} + // CHECK-DAG: #dbg_declare(ptr addrspace(5) {{.*}}, ![[FUNCVAR17]], !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), !{{[0-9]+}} local int *private FuncVar17 = KernelArg2; // CHECK-DAG: ![[FUNCVAR18:[0-9]+]] = !DILocalVariable(name: "FuncVar18", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}) - // CHECK-DAG: call void @llvm.dbg.declare(metadata ptr addrspace(5) {{.*}}, metadata ![[FUNCVAR18]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}} + // CHECK-DAG: #dbg_declare(ptr addrspace(5) {{.*}}, ![[FUNCVAR18]], !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), !{{[0-9]+}} private int *private FuncVar18 = Tmp0; // CHECK-DAG: ![[FUNCVAR19:[0-9]+]] = !DILocalVariable(name: "FuncVar19", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: {{[0-9]+}}, type: !{{[0-9]+}}) - // CHECK-DAG: call void @llvm.dbg.declare(metadata ptr addrspace(5) {{.*}}, metadata ![[FUNCVAR19]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef)), !dbg !{{[0-9]+}} + // CHECK-DAG: #dbg_declare(ptr addrspace(5) {{.*}}, ![[FUNCVAR19]], !DIExpression(DW_OP_constu, 1, DW_OP_swap, DW_OP_xderef), !{{[0-9]+}} int *private FuncVar19 = Tmp1; } diff --git a/clang/test/CodeGenSYCL/debug-info-kernel-variables.cpp b/clang/test/CodeGenSYCL/debug-info-kernel-variables.cpp index 361ce0abb1044a..96c0dcfdb75b6d 100644 --- a/clang/test/CodeGenSYCL/debug-info-kernel-variables.cpp +++ b/clang/test/CodeGenSYCL/debug-info-kernel-variables.cpp @@ -34,15 +34,15 @@ int my_host() { // CHECK-SAME: { // CHECK: %my_param.addr = alloca i32, align 4 // CHECK: %my_local = alloca i32, align 4 -// CHECK: call void @llvm.dbg.declare( -// CHECK-SAME: metadata ptr %my_param.addr, -// CHECK-SAME: metadata [[MY_PARAM:![0-9]+]], -// CHECK-SAME: metadata !DIExpression(DW_OP_constu, 4, DW_OP_swap, DW_OP_xderef) +// CHECK: #dbg_declare( +// CHECK-SAME: ptr %my_param.addr, +// CHECK-SAME: [[MY_PARAM:![0-9]+]], +// CHECK-SAME: !DIExpression(DW_OP_constu, 4, DW_OP_swap, DW_OP_xderef) // CHECK-SAME: ) -// CHECK: call void @llvm.dbg.declare( -// CHECK-SAME: metadata ptr %my_local, -// CHECK-SAME: metadata [[MY_LOCAL:![0-9]+]], -// CHECK-SAME: metadata !DIExpression(DW_OP_constu, 4, DW_OP_swap, DW_OP_xderef) +// CHECK: #dbg_declare( +// CHECK-SAME: ptr %my_local, +// CHECK-SAME: [[MY_LOCAL:![0-9]+]], +// CHECK-SAME: !DIExpression(DW_OP_constu, 4, DW_OP_swap, DW_OP_xderef) // CHECK-SAME: ) // CHECK: } diff --git a/clang/test/OpenMP/debug-info-complex-byval.cpp b/clang/test/OpenMP/debug-info-complex-byval.cpp index 7f6f960b1f97e7..895d73c3d818a9 100644 --- a/clang/test/OpenMP/debug-info-complex-byval.cpp +++ b/clang/test/OpenMP/debug-info-complex-byval.cpp @@ -16,7 +16,7 @@ void a() { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[B:%.*]] = alloca { float, float }, align 4 // CHECK1-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG13:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[B]], [[META11:![0-9]+]], !DIExpression(), [[META13:![0-9]+]]) // CHECK1-NEXT: [[TMP0:%.*]] = load { float, float }, ptr [[B]], align 4, !dbg [[DBG14:![0-9]+]] // CHECK1-NEXT: store { float, float } [[TMP0]], ptr [[B_CASTED]], align 4, !dbg [[DBG14]] // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[B_CASTED]], align 8, !dbg [[DBG14]] @@ -25,35 +25,35 @@ void a() { // // // CHECK1-LABEL: define {{[^@]+}}@_Z1av.omp_outlined_debug__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], <2 x float> noundef [[B_COERCE:%.*]]) #[[ATTR2:[0-9]+]] !dbg [[DBG16:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], <2 x float> noundef [[B_COERCE:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG16:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[B:%.*]] = alloca { float, float }, align 4 // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store <2 x float> [[B_COERCE]], ptr [[B]], align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META24:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META24:![0-9]+]], !DIExpression(), [[META25:![0-9]+]]) // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META26:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B]], metadata [[META27:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META26:![0-9]+]], !DIExpression(), [[META25]]) +// CHECK1-NEXT: #dbg_declare(ptr [[B]], [[META27:![0-9]+]], !DIExpression(), [[META28:![0-9]+]]) // CHECK1-NEXT: ret void, !dbg [[DBG29:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@_Z1av.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[B:%.*]]) #[[ATTR3:[0-9]+]] !dbg [[DBG30:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[B:%.*]]) #[[ATTR2:[0-9]+]] !dbg [[DBG30:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META34:![0-9]+]], metadata !DIExpression()), !dbg [[DBG35:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META34:![0-9]+]], !DIExpression(), [[META35:![0-9]+]]) // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META36:![0-9]+]], metadata !DIExpression()), !dbg [[DBG35]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META36:![0-9]+]], !DIExpression(), [[META35]]) // CHECK1-NEXT: store i64 [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META37:![0-9]+]], metadata !DIExpression()), !dbg [[DBG35]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META37:![0-9]+]], !DIExpression(), [[META35]]) // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG38:![0-9]+]] // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG38]] // CHECK1-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[B_ADDR]], align 4, !dbg [[DBG38]] -// CHECK1-NEXT: call void @_Z1av.omp_outlined_debug__(ptr [[TMP0]], ptr [[TMP1]], <2 x float> [[TMP2]]) #[[ATTR4:[0-9]+]], !dbg [[DBG38]] +// CHECK1-NEXT: call void @_Z1av.omp_outlined_debug__(ptr [[TMP0]], ptr [[TMP1]], <2 x float> [[TMP2]]) #[[ATTR3:[0-9]+]], !dbg [[DBG38]] // CHECK1-NEXT: ret void, !dbg [[DBG38]] // diff --git a/clang/test/OpenMP/debug-info-openmp-array.cpp b/clang/test/OpenMP/debug-info-openmp-array.cpp index 6c5c3bbffee343..3fc1bbc668c3b1 100644 --- a/clang/test/OpenMP/debug-info-openmp-array.cpp +++ b/clang/test/OpenMP/debug-info-openmp-array.cpp @@ -21,16 +21,16 @@ void f(int m) { // CHECK1-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store i32 [[M]], ptr [[M_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[M_ADDR]], metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG13:![0-9]+]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META14:![0-9]+]], metadata !DIExpression()), !dbg [[DBG15:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[M_ADDR]], [[META12:![0-9]+]], !DIExpression(), [[META13:![0-9]+]]) +// CHECK1-NEXT: #dbg_declare(ptr [[I]], [[META14:![0-9]+]], !DIExpression(), [[META15:![0-9]+]]) // CHECK1-NEXT: [[TMP0:%.*]] = load i32, ptr [[M_ADDR]], align 4, !dbg [[DBG16:![0-9]+]] // CHECK1-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64, !dbg [[DBG17:![0-9]+]] // CHECK1-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0(), !dbg [[DBG17]] // CHECK1-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8, !dbg [[DBG17]] // CHECK1-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16, !dbg [[DBG17]] // CHECK1-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8, !dbg [[DBG17]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[__VLA_EXPR0]], metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG20:![0-9]+]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA]], metadata [[META21:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[__VLA_EXPR0]], [[META18:![0-9]+]], !DIExpression(), [[META20:![0-9]+]]) +// CHECK1-NEXT: #dbg_declare(ptr [[VLA]], [[META21:![0-9]+]], !DIExpression(), [[META25:![0-9]+]]) // CHECK1-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB4:[0-9]+]], i32 3, ptr @_Z1fi.omp_outlined, ptr [[M_ADDR]], i64 [[TMP1]], ptr [[VLA]]), !dbg [[DBG26:![0-9]+]] // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8, !dbg [[DBG27:![0-9]+]] // CHECK1-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP3]]), !dbg [[DBG27]] @@ -38,7 +38,7 @@ void f(int m) { // // // CHECK1-LABEL: define {{[^@]+}}@_Z1fi.omp_outlined_debug__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CEN:%.*]]) #[[ATTR3:[0-9]+]] !dbg [[DBG28:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CEN:%.*]]) #[[ATTR2:[0-9]+]] !dbg [[DBG28:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -56,44 +56,44 @@ void f(int m) { // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META36:![0-9]+]], metadata !DIExpression()), !dbg [[DBG37:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META36:![0-9]+]], !DIExpression(), [[META37:![0-9]+]]) // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META38:![0-9]+]], metadata !DIExpression()), !dbg [[DBG37]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META38:![0-9]+]], !DIExpression(), [[META37]]) // CHECK1-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[M_ADDR]], metadata [[META39:![0-9]+]], metadata !DIExpression()), !dbg [[DBG40:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[M_ADDR]], [[META39:![0-9]+]], !DIExpression(), [[META40:![0-9]+]]) // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META41:![0-9]+]], metadata !DIExpression()), !dbg [[DBG37]] +// CHECK1-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META41:![0-9]+]], !DIExpression(), [[META37]]) // CHECK1-NEXT: store ptr [[CEN]], ptr [[CEN_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[CEN_ADDR]], metadata [[META42:![0-9]+]], metadata !DIExpression()), !dbg [[DBG43:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[CEN_ADDR]], [[META42:![0-9]+]], !DIExpression(), [[META43:![0-9]+]]) // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[M_ADDR]], align 8, !dbg [[DBG44:![0-9]+]] // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG44]] // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[CEN_ADDR]], align 8, !dbg [[DBG44]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META45:![0-9]+]], metadata !DIExpression()), !dbg [[DBG37]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTCAPTURE_EXPR_]], metadata [[META46:![0-9]+]], metadata !DIExpression()), !dbg [[DBG37]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_IV]], [[META45:![0-9]+]], !DIExpression(), [[META37]]) +// CHECK1-NEXT: #dbg_declare(ptr [[DOTCAPTURE_EXPR_]], [[META46:![0-9]+]], !DIExpression(), [[META37]]) // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4, !dbg [[DBG47:![0-9]+]] // CHECK1-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4, !dbg [[DBG47]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTCAPTURE_EXPR_1]], metadata [[META46]], metadata !DIExpression()), !dbg [[DBG37]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTCAPTURE_EXPR_1]], [[META46]], !DIExpression(), [[META37]]) // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !dbg [[DBG47]] // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0, !dbg [[DBG44]] // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1, !dbg [[DBG44]] // CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1, !dbg [[DBG44]] // CHECK1-NEXT: store i32 [[SUB2]], ptr [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG44]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META48:![0-9]+]], metadata !DIExpression()), !dbg [[DBG37]] +// CHECK1-NEXT: #dbg_declare(ptr [[I]], [[META48:![0-9]+]], !DIExpression(), [[META37]]) // CHECK1-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[DBG49:![0-9]+]] // CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !dbg [[DBG47]] // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]], !dbg [[DBG44]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]], !dbg [[DBG44]] // CHECK1: omp.precond.then: -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META50:![0-9]+]], metadata !DIExpression()), !dbg [[DBG37]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_LB]], [[META50:![0-9]+]], !DIExpression(), [[META37]]) // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG51:![0-9]+]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META52:![0-9]+]], metadata !DIExpression()), !dbg [[DBG37]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_UB]], [[META52:![0-9]+]], !DIExpression(), [[META37]]) // CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4, !dbg [[DBG44]] // CHECK1-NEXT: store i32 [[TMP6]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG51]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META53:![0-9]+]], metadata !DIExpression()), !dbg [[DBG37]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_STRIDE]], [[META53:![0-9]+]], !DIExpression(), [[META37]]) // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG51]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META54:![0-9]+]], metadata !DIExpression()), !dbg [[DBG37]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_IS_LAST]], [[META54:![0-9]+]], !DIExpression(), [[META37]]) // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG51]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I3]], metadata [[META48]], metadata !DIExpression()), !dbg [[DBG37]] +// CHECK1-NEXT: #dbg_declare(ptr [[I3]], [[META48]], !DIExpression(), [[META37]]) // CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG44]] // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4, !dbg [[DBG44]] // CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG55:![0-9]+]] @@ -148,7 +148,7 @@ void f(int m) { // // // CHECK1-LABEL: define {{[^@]+}}@_Z1fi.omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CEN:%.*]]) #[[ATTR3]] !dbg [[DBG65:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[M:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[CEN:%.*]]) #[[ATTR2]] !dbg [[DBG65:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -156,15 +156,15 @@ void f(int m) { // CHECK1-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[CEN_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META66:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META66:![0-9]+]], !DIExpression(), [[META67:![0-9]+]]) // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META68:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META68:![0-9]+]], !DIExpression(), [[META67]]) // CHECK1-NEXT: store ptr [[M]], ptr [[M_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[M_ADDR]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67]] +// CHECK1-NEXT: #dbg_declare(ptr [[M_ADDR]], [[META69:![0-9]+]], !DIExpression(), [[META67]]) // CHECK1-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67]] +// CHECK1-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META70:![0-9]+]], !DIExpression(), [[META67]]) // CHECK1-NEXT: store ptr [[CEN]], ptr [[CEN_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[CEN_ADDR]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67]] +// CHECK1-NEXT: #dbg_declare(ptr [[CEN_ADDR]], [[META71:![0-9]+]], !DIExpression(), [[META67]]) // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[M_ADDR]], align 8, !dbg [[DBG72:![0-9]+]] // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG72]] // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[CEN_ADDR]], align 8, !dbg [[DBG72]] @@ -172,6 +172,6 @@ void f(int m) { // CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG72]] // CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[M_ADDR]], align 8, !dbg [[DBG72]] // CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CEN_ADDR]], align 8, !dbg [[DBG72]] -// CHECK1-NEXT: call void @_Z1fi.omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr [[TMP5]], i64 [[TMP1]], ptr [[TMP6]]) #[[ATTR4:[0-9]+]], !dbg [[DBG72]] +// CHECK1-NEXT: call void @_Z1fi.omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr [[TMP5]], i64 [[TMP1]], ptr [[TMP6]]) #[[ATTR3:[0-9]+]], !dbg [[DBG72]] // CHECK1-NEXT: ret void, !dbg [[DBG72]] // diff --git a/clang/test/OpenMP/debug_private.c b/clang/test/OpenMP/debug_private.c index e1d095b5de2baf..26b5358dae2fa5 100644 --- a/clang/test/OpenMP/debug_private.c +++ b/clang/test/OpenMP/debug_private.c @@ -11,10 +11,10 @@ // CHECK: define internal i32 @.omp_task_entry. -// CHECK: call void @llvm.dbg.declare(metadata ptr %.priv.ptr.addr.i, metadata [[PRIV1:![0-9]+]], metadata !DIExpression(DW_OP_deref)) -// CHECK: call void @llvm.dbg.declare(metadata ptr %.priv.ptr.addr1.i, metadata [[PRIV2:![0-9]+]], metadata !DIExpression(DW_OP_deref)) -// CHECK: call void @llvm.dbg.declare(metadata ptr %.firstpriv.ptr.addr.i, metadata [[FPRIV:![0-9]+]], metadata !DIExpression(DW_OP_deref)) -// NEG-NOT: call void @llvm.dbg.declare +// CHECK: #dbg_declare(ptr %.priv.ptr.addr.i, [[PRIV1:![0-9]+]], !DIExpression(DW_OP_deref), +// CHECK: #dbg_declare(ptr %.priv.ptr.addr1.i, [[PRIV2:![0-9]+]], !DIExpression(DW_OP_deref), +// CHECK: #dbg_declare(ptr %.firstpriv.ptr.addr.i, [[FPRIV:![0-9]+]], !DIExpression(DW_OP_deref), +// NEG-NOT: #dbg_declare // CHECK: [[PRIV1]] = !DILocalVariable(name: "priv1" // CHECK: [[PRIV2]] = !DILocalVariable(name: "priv2" diff --git a/clang/test/OpenMP/debug_task_shared.c b/clang/test/OpenMP/debug_task_shared.c index 7bbd080befec2a..da52c9a5dd8713 100644 --- a/clang/test/OpenMP/debug_task_shared.c +++ b/clang/test/OpenMP/debug_task_shared.c @@ -13,9 +13,9 @@ // CHECK-LABEL: define internal i32 @.omp_task_entry. // CHECK-DAG: [[CONTEXT:%[0-9]+]] = load ptr, ptr %__context.addr.i, align 8 -// CHECK-DAG: call void @llvm.dbg.declare(metadata ptr [[CONTEXT]], metadata [[SHARE2:![0-9]+]], metadata !DIExpression(DW_OP_deref)) -// CHECK-DAG: call void @llvm.dbg.declare(metadata ptr [[CONTEXT]], metadata [[SHARE3:![0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref)) -// CHECK-DAG: call void @llvm.dbg.declare(metadata ptr [[CONTEXT]], metadata [[SHARE1:![0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 16, DW_OP_deref)) +// CHECK-DAG: #dbg_declare(ptr [[CONTEXT]], [[SHARE2:![0-9]+]], !DIExpression(DW_OP_deref), +// CHECK-DAG: #dbg_declare(ptr [[CONTEXT]], [[SHARE3:![0-9]+]], !DIExpression(DW_OP_plus_uconst, 8, DW_OP_deref), +// CHECK-DAG: #dbg_declare(ptr [[CONTEXT]], [[SHARE1:![0-9]+]], !DIExpression(DW_OP_plus_uconst, 16, DW_OP_deref), // CHECK-DAG: [[SHARE2]] = !DILocalVariable(name: "share2" // CHECK-DAG: [[SHARE3]] = !DILocalVariable(name: "share3" @@ -23,7 +23,7 @@ // NEG-LABEL: define internal i32 @.omp_task_entry. // NEG: [[CONTEXT:%[0-9]+]] = load ptr, ptr %__context.addr.i, align 8 -// NEG-NOT: call void @llvm.dbg.declare(metadata ptr [[CONTEXT]], metadata {{![0-9]+}}, metadata !DIExpression(DW_OP_deref)) +// NEG-NOT: #dbg_declare(ptr [[CONTEXT]], {{![0-9]+}}, !DIExpression(DW_OP_deref), extern int printf(const char *, ...); diff --git a/clang/test/OpenMP/debug_threadprivate_copyin.c b/clang/test/OpenMP/debug_threadprivate_copyin.c index 8dcb9350e8eacd..ac86299ff1600d 100644 --- a/clang/test/OpenMP/debug_threadprivate_copyin.c +++ b/clang/test/OpenMP/debug_threadprivate_copyin.c @@ -7,12 +7,12 @@ // expected-no-diagnostics // CHECK: define internal void @main.omp_outlined_debug__( -// CHECK: call void @llvm.dbg.declare(metadata ptr %.global_tid..addr, -// CHECK: call void @llvm.dbg.declare(metadata ptr %.bound_tid..addr, -// CHECK: call void @llvm.dbg.declare(metadata ptr %nt.addr +// CHECK: #dbg_declare(ptr %.global_tid..addr, +// CHECK: #dbg_declare(ptr %.bound_tid..addr, +// CHECK: #dbg_declare(ptr %nt.addr // CHECK: store ptr %gbl_dynamic_int, ptr %gbl_dynamic_int.addr, align 8 -// CHECK-NOT: call void @llvm.dbg.declare(metadata ptr %gbl_dynamic_int.addr -// CHECK-NOT: call void @llvm.dbg.declare(metadata ptr %gbl_static_int.addr +// CHECK-NOT: #dbg_declare(ptr %gbl_dynamic_int.addr +// CHECK-NOT: #dbg_declare(ptr %gbl_static_int.addr extern int printf(const char *, ...); extern void omp_set_num_threads(int); diff --git a/clang/test/OpenMP/irbuilder_nested_parallel_for.c b/clang/test/OpenMP/irbuilder_nested_parallel_for.c index d737ab33e9ca45..7effefb46c8d7b 100644 --- a/clang/test/OpenMP/irbuilder_nested_parallel_for.c +++ b/clang/test/OpenMP/irbuilder_nested_parallel_for.c @@ -1535,8 +1535,8 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK-DEBUG: omp.par.region: -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META21:![0-9]+]], metadata !DIExpression()), !dbg [[DBG26:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[DBG26]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I]], [[META21:![0-9]+]], !DIExpression(), [[META26:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[META26]] // CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG27:![0-9]+]] // CHECK-DEBUG-NEXT: store ptr [[I]], ptr [[TMP1]], align 8, !dbg [[DBG27]] // CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_0]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[DBG27]] @@ -1586,7 +1586,7 @@ void parallel_for_2(float *r, int a, double b) { // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] !dbg [[DBG31:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] !dbg [[DBG31:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -1594,79 +1594,79 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META40:![0-9]+]], metadata !DIExpression()), !dbg [[DBG41:![0-9]+]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META39:![0-9]+]], !DIExpression(), [[META40:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META42:![0-9]+]], metadata !DIExpression()), !dbg [[DBG41]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META41:![0-9]+]], !DIExpression(), [[META40]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META43:![0-9]+]], metadata !DIExpression()), !dbg [[DBG45:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG46:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG46]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG46]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG45]] -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META48:![0-9]+]], metadata !DIExpression()), !dbg [[DBG49:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG49]] -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META50:![0-9]+]], metadata !DIExpression()), !dbg [[DBG49]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG49]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG49]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG49]] -// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG49]] -// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG49]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META42:![0-9]+]], !DIExpression(), [[META44:![0-9]+]]) +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG45:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG45]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG45]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META44]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META47:![0-9]+]], !DIExpression(), [[META48:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META48]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META49:![0-9]+]], !DIExpression(), [[META48]]) +// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META48]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META48]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META48]] +// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META48]] +// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META48]] // CHECK-DEBUG: cond.true: -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG49]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG49]] -// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG49]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG49]] -// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG49]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG49]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG49]] -// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG49]] -// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG49]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META48]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META48]] +// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META48]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META48]] +// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META48]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META48]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META48]] +// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META48]] +// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META48]] // CHECK-DEBUG: cond.false: -// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG49]] +// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META48]] // CHECK-DEBUG: cond.end: -// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG49]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG49]] -// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG49]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG51:![0-9]+]] +// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META48]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META48]] +// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META48]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG50:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.1 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG53:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG52:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META61:![0-9]+]], metadata !DIExpression()), !dbg [[DBG62:![0-9]+]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META60:![0-9]+]], !DIExpression(), [[META61:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META63:![0-9]+]], metadata !DIExpression()), !dbg [[DBG62]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META62:![0-9]+]], !DIExpression(), [[META61]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META64:![0-9]+]], metadata !DIExpression()), !dbg [[DBG62]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META63:![0-9]+]], !DIExpression(), [[META61]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG65:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG65]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG67:![0-9]+]] -// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG67]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG67]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG67]] -// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG62]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG65]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG64:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG64]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG66:![0-9]+]] +// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG66]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG66]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG66]] +// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META61]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG64]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_1Pfid -// CHECK-DEBUG-SAME: (ptr noundef [[R:%.*]], i32 noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] !dbg [[DBG70:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef [[R:%.*]], i32 noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] !dbg [[DBG69:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[STRUCTARG17:%.*]] = alloca { ptr, ptr, ptr }, align 8 // CHECK-DEBUG-NEXT: [[R_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 // CHECK-DEBUG-NEXT: store ptr [[R]], ptr [[R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[R_ADDR]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG77:![0-9]+]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[R_ADDR]], [[META75:![0-9]+]], !DIExpression(), [[META76:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META78:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79:![0-9]+]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META77:![0-9]+]], !DIExpression(), [[META78:![0-9]+]]) // CHECK-DEBUG-NEXT: store double [[B]], ptr [[B_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META80:![0-9]+]], metadata !DIExpression()), !dbg [[DBG81:![0-9]+]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]), !dbg [[DBG82:![0-9]+]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META79:![0-9]+]], !DIExpression(), [[META80:![0-9]+]]) +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]), !dbg [[DBG81:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR18:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG17]], i32 0, i32 0 @@ -1675,16 +1675,16 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: store ptr [[B_ADDR]], ptr [[GEP_B_ADDR19]], align 8 // CHECK-DEBUG-NEXT: [[GEP_R_ADDR20:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG17]], i32 0, i32 2 // CHECK-DEBUG-NEXT: store ptr [[R_ADDR]], ptr [[GEP_R_ADDR20]], align 8 -// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB6]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par.4, ptr [[STRUCTARG17]]), !dbg [[DBG83:![0-9]+]] +// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB6]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par.4, ptr [[STRUCTARG17]]), !dbg [[DBG82:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16:%.*]] // CHECK-DEBUG: omp.par.outlined.exit16: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK-DEBUG: omp.par.exit.split: -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG85:![0-9]+]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG84:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_1Pfid..omp_par.4 -// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG86:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG85:![0-9]+]] { // CHECK-DEBUG-NEXT: omp.par.entry: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 // CHECK-DEBUG-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 @@ -1699,7 +1699,7 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4 // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK-DEBUG: omp.par.region: -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB8:[0-9]+]]), !dbg [[DBG87:![0-9]+]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB8:[0-9]+]]), !dbg [[DBG86:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR1:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 @@ -1708,22 +1708,22 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR2]], align 8 // CHECK-DEBUG-NEXT: [[GEP_R_ADDR3:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2 // CHECK-DEBUG-NEXT: store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR3]], align 8 -// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB8]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG89:![0-9]+]] +// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB8]], i32 1, ptr @_Z14parallel_for_1Pfid..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG88:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] // CHECK-DEBUG: omp.par.outlined.exit: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT7_SPLIT:%.*]] // CHECK-DEBUG: omp.par.exit7.split: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG93:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG92:![0-9]+]] // CHECK-DEBUG: omp.par.region.parallel.after: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK-DEBUG: omp.par.pre_finalize: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16_EXITSTUB:%.*]], !dbg [[DBG93]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16_EXITSTUB:%.*]], !dbg [[DBG92]] // CHECK-DEBUG: omp.par.outlined.exit16.exitStub: // CHECK-DEBUG-NEXT: ret void // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_1Pfid..omp_par -// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR2:%.*]], ptr noalias [[ZERO_ADDR3:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG94:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR2:%.*]], ptr noalias [[ZERO_ADDR3:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG93:![0-9]+]] { // CHECK-DEBUG-NEXT: omp.par.entry4: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 // CHECK-DEBUG-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 @@ -1745,65 +1745,65 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION5:%.*]] // CHECK-DEBUG: omp.par.region5: -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META95:![0-9]+]], metadata !DIExpression()), !dbg [[DBG100:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[DBG100]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG101:![0-9]+]] -// CHECK-DEBUG-NEXT: store ptr [[I]], ptr [[TMP2]], align 8, !dbg [[DBG101]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED12]], i32 0, i32 0, !dbg [[DBG101]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG102:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG101]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.2(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG101]] -// CHECK-DEBUG-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG101]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG101]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I]], [[META94:![0-9]+]], !DIExpression(), [[META99:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[META99]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG100:![0-9]+]] +// CHECK-DEBUG-NEXT: store ptr [[I]], ptr [[TMP2]], align 8, !dbg [[DBG100]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED12]], i32 0, i32 0, !dbg [[DBG100]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG101:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG100]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.2(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG100]] +// CHECK-DEBUG-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG100]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG100]] // CHECK-DEBUG: omp_loop.preheader: -// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG101]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG101]] -// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG101]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG101]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM14:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10:[0-9]+]]), !dbg [[DBG101]] -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB10]], i32 [[OMP_GLOBAL_THREAD_NUM14]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG101]] -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG101]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG101]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG101]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG101]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG101]] +// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG100]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG100]] +// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG100]] +// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG100]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM14:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10:[0-9]+]]), !dbg [[DBG100]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB10]], i32 [[OMP_GLOBAL_THREAD_NUM14]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG100]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG100]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG100]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG100]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG100]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG100]] // CHECK-DEBUG: omp_loop.header: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG101]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG101]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG100]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG100]] // CHECK-DEBUG: omp_loop.cond: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP9]], !dbg [[DBG101]] -// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG101]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP9]], !dbg [[DBG100]] +// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG100]] // CHECK-DEBUG: omp_loop.exit: -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB10]], i32 [[OMP_GLOBAL_THREAD_NUM14]]), !dbg [[DBG101]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM15:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10]]), !dbg [[DBG103:![0-9]+]] -// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB11:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM15]]), !dbg [[DBG103]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG101]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB10]], i32 [[OMP_GLOBAL_THREAD_NUM14]]), !dbg [[DBG100]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM15:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB10]]), !dbg [[DBG102:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB11:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM15]]), !dbg [[DBG102]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG100]] // CHECK-DEBUG: omp_loop.after: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION5_PARALLEL_AFTER:%.*]], !dbg [[DBG104:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION5_PARALLEL_AFTER:%.*]], !dbg [[DBG103:![0-9]+]] // CHECK-DEBUG: omp.par.region5.parallel.after: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE6:%.*]] // CHECK-DEBUG: omp.par.pre_finalize6: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG104]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG103]] // CHECK-DEBUG: omp_loop.body: -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]], !dbg [[DBG103]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.3(ptr [[I]], i32 [[TMP10]], ptr [[AGG_CAPTURED12]]), !dbg [[DBG101]] -// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG105:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double, !dbg [[DBG105]] -// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG107:![0-9]+]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP12]], !dbg [[DBG108:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD]] to float, !dbg [[DBG105]] -// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG109:![0-9]+]] -// CHECK-DEBUG-NEXT: store float [[CONV13]], ptr [[TMP13]], align 4, !dbg [[DBG110:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC]], !dbg [[DBG101]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]], !dbg [[DBG102]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.3(ptr [[I]], i32 [[TMP10]], ptr [[AGG_CAPTURED12]]), !dbg [[DBG100]] +// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG104:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP11]] to double, !dbg [[DBG104]] +// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG106:![0-9]+]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP12]], !dbg [[DBG107:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD]] to float, !dbg [[DBG104]] +// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG108:![0-9]+]] +// CHECK-DEBUG-NEXT: store float [[CONV13]], ptr [[TMP13]], align 4, !dbg [[DBG109:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC]], !dbg [[DBG100]] // CHECK-DEBUG: omp_loop.inc: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG101]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[DBG101]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG100]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[DBG100]] // CHECK-DEBUG: omp.par.outlined.exit.exitStub: // CHECK-DEBUG-NEXT: ret void // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.2 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG111:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG110:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -1811,67 +1811,67 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META112:![0-9]+]], metadata !DIExpression()), !dbg [[DBG113:![0-9]+]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META111:![0-9]+]], !DIExpression(), [[META112:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META114:![0-9]+]], metadata !DIExpression()), !dbg [[DBG113]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META113:![0-9]+]], !DIExpression(), [[META112]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META115:![0-9]+]], metadata !DIExpression()), !dbg [[DBG117:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG118:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG118]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG118]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG117]] -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG121]] -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META122:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG121]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG121]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG121]] -// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG121]] -// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG121]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META114:![0-9]+]], !DIExpression(), [[META116:![0-9]+]]) +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG117:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG117]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG117]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META116]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META119:![0-9]+]], !DIExpression(), [[META120:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META120]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META121:![0-9]+]], !DIExpression(), [[META120]]) +// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META120]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META120]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META120]] +// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META120]] +// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META120]] // CHECK-DEBUG: cond.true: -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG121]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG121]] -// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG121]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG121]] -// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG121]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG121]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG121]] -// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG121]] -// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG121]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META120]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META120]] +// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META120]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META120]] +// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META120]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META120]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META120]] +// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META120]] +// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META120]] // CHECK-DEBUG: cond.false: -// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG121]] +// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META120]] // CHECK-DEBUG: cond.end: -// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG121]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG121]] -// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG121]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG123:![0-9]+]] +// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META120]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META120]] +// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META120]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG122:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.3 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG125:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG124:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META126:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127:![0-9]+]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META125:![0-9]+]], !DIExpression(), [[META126:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META128:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META127:![0-9]+]], !DIExpression(), [[META126]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META128:![0-9]+]], !DIExpression(), [[META126]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG130:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG130]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG132:![0-9]+]] -// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG132]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG132]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG132]] -// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG127]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG130]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG129:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG129]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG131:![0-9]+]] +// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG131]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG131]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG131]] +// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META126]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG129]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_2Pfid -// CHECK-DEBUG-SAME: (ptr noundef [[R:%.*]], i32 noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] !dbg [[DBG133:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef [[R:%.*]], i32 noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] !dbg [[DBG132:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8 // CHECK-DEBUG-NEXT: [[R_ADDR:%.*]] = alloca ptr, align 8 @@ -1886,12 +1886,12 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[P_UPPERBOUND205:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[P_STRIDE206:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[R]], ptr [[R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[R_ADDR]], metadata [[META134:![0-9]+]], metadata !DIExpression()), !dbg [[DBG135:![0-9]+]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[R_ADDR]], [[META133:![0-9]+]], !DIExpression(), [[META134:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META136:![0-9]+]], metadata !DIExpression()), !dbg [[DBG137:![0-9]+]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META135:![0-9]+]], !DIExpression(), [[META136:![0-9]+]]) // CHECK-DEBUG-NEXT: store double [[B]], ptr [[B_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META138:![0-9]+]], metadata !DIExpression()), !dbg [[DBG139:![0-9]+]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]]), !dbg [[DBG140:![0-9]+]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META137:![0-9]+]], !DIExpression(), [[META138:![0-9]+]]) +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]]), !dbg [[DBG139:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 @@ -1900,64 +1900,64 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: store ptr [[B_ADDR]], ptr [[GEP_B_ADDR]], align 8 // CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2 // CHECK-DEBUG-NEXT: store ptr [[R_ADDR]], ptr [[GEP_R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB13]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.23, ptr [[STRUCTARG]]), !dbg [[DBG141:![0-9]+]] +// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB13]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.23, ptr [[STRUCTARG]]), !dbg [[DBG140:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184:%.*]] // CHECK-DEBUG: omp.par.outlined.exit184: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK-DEBUG: omp.par.exit.split: -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I185]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG148:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 0, ptr [[I185]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG149:![0-9]+]] -// CHECK-DEBUG-NEXT: store ptr [[I185]], ptr [[TMP0]], align 8, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[AGG_CAPTURED187]], i32 0, i32 0, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[I185]], align 4, !dbg [[DBG150:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.19(ptr [[DOTCOUNT_ADDR188]], ptr [[AGG_CAPTURED186]]), !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: [[DOTCOUNT189:%.*]] = load i32, ptr [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I185]], [[META144:![0-9]+]], !DIExpression(), [[META147:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 0, ptr [[I185]], align 4, !dbg [[META147]] +// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], ptr [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG148:![0-9]+]] +// CHECK-DEBUG-NEXT: store ptr [[I185]], ptr [[TMP0]], align 8, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], ptr [[AGG_CAPTURED187]], i32 0, i32 0, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[I185]], align 4, !dbg [[DBG149:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.19(ptr [[DOTCOUNT_ADDR188]], ptr [[AGG_CAPTURED186]]), !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[DOTCOUNT189:%.*]] = load i32, ptr [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.preheader190: -// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND204]], align 4, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[P_UPPERBOUND205]], align 4, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE206]], align 4, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42:[0-9]+]]), !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, ptr [[P_LASTITER203]], ptr [[P_LOWERBOUND204]], ptr [[P_UPPERBOUND205]], ptr [[P_STRIDE206]], i32 1, i32 0), !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[P_LOWERBOUND204]], align 4, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[P_UPPERBOUND205]], align 4, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE206]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42:[0-9]+]]), !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, ptr [[P_LASTITER203]], ptr [[P_LOWERBOUND204]], ptr [[P_UPPERBOUND205]], ptr [[P_STRIDE206]], i32 1, i32 0), !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.header191: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.cond192: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.body193: -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG151:![0-9]+]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.20(ptr [[I185]], i32 [[TMP8]], ptr [[AGG_CAPTURED187]]), !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG152:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG152]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load double, ptr [[B_ADDR]], align 8, !dbg [[DBG151]] -// CHECK-DEBUG-NEXT: [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], !dbg [[DBG153:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, !dbg [[DBG152]] -// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load ptr, ptr [[R_ADDR]], align 8, !dbg [[DBG154:![0-9]+]] -// CHECK-DEBUG-NEXT: store float [[CONV202]], ptr [[TMP11]], align 4, !dbg [[DBG155:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC194]], !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG150:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.20(ptr [[I185]], i32 [[TMP8]], ptr [[AGG_CAPTURED187]]), !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG151:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG151]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load double, ptr [[B_ADDR]], align 8, !dbg [[DBG150]] +// CHECK-DEBUG-NEXT: [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], !dbg [[DBG152:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, !dbg [[DBG151]] +// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load ptr, ptr [[R_ADDR]], align 8, !dbg [[DBG153:![0-9]+]] +// CHECK-DEBUG-NEXT: store float [[CONV202]], ptr [[TMP11]], align 4, !dbg [[DBG154:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC194]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.inc194: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191]], !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.exit195: -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42]]), !dbg [[DBG151]] -// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG151]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB42]]), !dbg [[DBG150]] +// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG150]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.after196: -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG156:![0-9]+]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG155:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_2Pfid..omp_par.23 -// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG157:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG156:![0-9]+]] { // CHECK-DEBUG-NEXT: omp.par.entry: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 // CHECK-DEBUG-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 @@ -1988,41 +1988,41 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[P_STRIDE181:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK-DEBUG: omp.par.region: -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META158:![0-9]+]], metadata !DIExpression()), !dbg [[DBG162:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[DBG162]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG163:![0-9]+]] -// CHECK-DEBUG-NEXT: store ptr [[I]], ptr [[TMP2]], align 8, !dbg [[DBG163]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[DBG163]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG164:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG163]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.5(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG163]] -// CHECK-DEBUG-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG163]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG163]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I]], [[META157:![0-9]+]], !DIExpression(), [[META161:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 0, ptr [[I]], align 4, !dbg [[META161]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_3]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG162:![0-9]+]] +// CHECK-DEBUG-NEXT: store ptr [[I]], ptr [[TMP2]], align 8, !dbg [[DBG162]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[DBG162]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4, !dbg [[DBG163:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG162]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.5(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG162]] +// CHECK-DEBUG-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG162]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG162]] // CHECK-DEBUG: omp_loop.preheader: -// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG163]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG163]] -// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG163]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG163]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]]), !dbg [[DBG163]] -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG163]] -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG163]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG163]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG163]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG163]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG163]] +// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG162]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG162]] +// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG162]] +// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG162]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15:[0-9]+]]), !dbg [[DBG162]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG162]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG162]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG162]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG162]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG162]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG162]] // CHECK-DEBUG: omp_loop.header: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG163]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG163]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG162]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG162]] // CHECK-DEBUG: omp_loop.cond: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP9]], !dbg [[DBG163]] -// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG163]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP9]], !dbg [[DBG162]] +// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG162]] // CHECK-DEBUG: omp_loop.exit: -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]), !dbg [[DBG163]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15]]), !dbg [[DBG165:![0-9]+]] -// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB16:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]), !dbg [[DBG165]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG163]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB15]], i32 [[OMP_GLOBAL_THREAD_NUM3]]), !dbg [[DBG162]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB15]]), !dbg [[DBG164:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB16:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM4]]), !dbg [[DBG164]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG162]] // CHECK-DEBUG: omp_loop.after: -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB18:[0-9]+]]), !dbg [[DBG166:![0-9]+]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM5:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB18:[0-9]+]]), !dbg [[DBG165:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR215:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG214]], i32 0, i32 0 @@ -2031,84 +2031,84 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR216]], align 8 // CHECK-DEBUG-NEXT: [[GEP_R_ADDR217:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG214]], i32 0, i32 2 // CHECK-DEBUG-NEXT: store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR217]], align 8 -// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB18]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.22, ptr [[STRUCTARG214]]), !dbg [[DBG167:![0-9]+]] +// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB18]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.22, ptr [[STRUCTARG214]]), !dbg [[DBG166:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT159:%.*]] // CHECK-DEBUG: omp.par.outlined.exit159: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT11_SPLIT:%.*]] // CHECK-DEBUG: omp.par.exit11.split: -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I160]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG174:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 0, ptr [[I160]], align 4, !dbg [[DBG174]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[AGG_CAPTURED161]], i32 0, i32 0, !dbg [[DBG175:![0-9]+]] -// CHECK-DEBUG-NEXT: store ptr [[I160]], ptr [[TMP10]], align 8, !dbg [[DBG175]] -// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[AGG_CAPTURED162]], i32 0, i32 0, !dbg [[DBG175]] -// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load i32, ptr [[I160]], align 4, !dbg [[DBG176:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4, !dbg [[DBG175]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.17(ptr [[DOTCOUNT_ADDR163]], ptr [[AGG_CAPTURED161]]), !dbg [[DBG175]] -// CHECK-DEBUG-NEXT: [[DOTCOUNT164:%.*]] = load i32, ptr [[DOTCOUNT_ADDR163]], align 4, !dbg [[DBG175]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER165:%.*]], !dbg [[DBG175]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I160]], [[META170:![0-9]+]], !DIExpression(), [[META173:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 0, ptr [[I160]], align 4, !dbg [[META173]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_15]], ptr [[AGG_CAPTURED161]], i32 0, i32 0, !dbg [[DBG174:![0-9]+]] +// CHECK-DEBUG-NEXT: store ptr [[I160]], ptr [[TMP10]], align 8, !dbg [[DBG174]] +// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_16]], ptr [[AGG_CAPTURED162]], i32 0, i32 0, !dbg [[DBG174]] +// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load i32, ptr [[I160]], align 4, !dbg [[DBG175:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4, !dbg [[DBG174]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.17(ptr [[DOTCOUNT_ADDR163]], ptr [[AGG_CAPTURED161]]), !dbg [[DBG174]] +// CHECK-DEBUG-NEXT: [[DOTCOUNT164:%.*]] = load i32, ptr [[DOTCOUNT_ADDR163]], align 4, !dbg [[DBG174]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER165:%.*]], !dbg [[DBG174]] // CHECK-DEBUG: omp_loop.preheader165: -// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND179]], align 4, !dbg [[DBG175]] -// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = sub i32 [[DOTCOUNT164]], 1, !dbg [[DBG175]] -// CHECK-DEBUG-NEXT: store i32 [[TMP13]], ptr [[P_UPPERBOUND180]], align 4, !dbg [[DBG175]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE181]], align 4, !dbg [[DBG175]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM182:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB39:[0-9]+]]), !dbg [[DBG175]] -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB39]], i32 [[OMP_GLOBAL_THREAD_NUM182]], i32 34, ptr [[P_LASTITER178]], ptr [[P_LOWERBOUND179]], ptr [[P_UPPERBOUND180]], ptr [[P_STRIDE181]], i32 1, i32 0), !dbg [[DBG175]] -// CHECK-DEBUG-NEXT: [[TMP14:%.*]] = load i32, ptr [[P_LOWERBOUND179]], align 4, !dbg [[DBG175]] -// CHECK-DEBUG-NEXT: [[TMP15:%.*]] = load i32, ptr [[P_UPPERBOUND180]], align 4, !dbg [[DBG175]] -// CHECK-DEBUG-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], [[TMP14]], !dbg [[DBG175]] -// CHECK-DEBUG-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], 1, !dbg [[DBG175]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER166:%.*]], !dbg [[DBG175]] +// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND179]], align 4, !dbg [[DBG174]] +// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = sub i32 [[DOTCOUNT164]], 1, !dbg [[DBG174]] +// CHECK-DEBUG-NEXT: store i32 [[TMP13]], ptr [[P_UPPERBOUND180]], align 4, !dbg [[DBG174]] +// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE181]], align 4, !dbg [[DBG174]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM182:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB39:[0-9]+]]), !dbg [[DBG174]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB39]], i32 [[OMP_GLOBAL_THREAD_NUM182]], i32 34, ptr [[P_LASTITER178]], ptr [[P_LOWERBOUND179]], ptr [[P_UPPERBOUND180]], ptr [[P_STRIDE181]], i32 1, i32 0), !dbg [[DBG174]] +// CHECK-DEBUG-NEXT: [[TMP14:%.*]] = load i32, ptr [[P_LOWERBOUND179]], align 4, !dbg [[DBG174]] +// CHECK-DEBUG-NEXT: [[TMP15:%.*]] = load i32, ptr [[P_UPPERBOUND180]], align 4, !dbg [[DBG174]] +// CHECK-DEBUG-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], [[TMP14]], !dbg [[DBG174]] +// CHECK-DEBUG-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], 1, !dbg [[DBG174]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER166:%.*]], !dbg [[DBG174]] // CHECK-DEBUG: omp_loop.header166: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV172:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER165]] ], [ [[OMP_LOOP_NEXT174:%.*]], [[OMP_LOOP_INC169:%.*]] ], !dbg [[DBG175]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND167:%.*]], !dbg [[DBG175]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV172:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER165]] ], [ [[OMP_LOOP_NEXT174:%.*]], [[OMP_LOOP_INC169:%.*]] ], !dbg [[DBG174]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND167:%.*]], !dbg [[DBG174]] // CHECK-DEBUG: omp_loop.cond167: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP173:%.*]] = icmp ult i32 [[OMP_LOOP_IV172]], [[TMP17]], !dbg [[DBG175]] -// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP173]], label [[OMP_LOOP_BODY168:%.*]], label [[OMP_LOOP_EXIT170:%.*]], !dbg [[DBG175]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP173:%.*]] = icmp ult i32 [[OMP_LOOP_IV172]], [[TMP17]], !dbg [[DBG174]] +// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP173]], label [[OMP_LOOP_BODY168:%.*]], label [[OMP_LOOP_EXIT170:%.*]], !dbg [[DBG174]] // CHECK-DEBUG: omp_loop.exit170: -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB39]], i32 [[OMP_GLOBAL_THREAD_NUM182]]), !dbg [[DBG175]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM183:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB39]]), !dbg [[DBG177:![0-9]+]] -// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB40:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM183]]), !dbg [[DBG177]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER171:%.*]], !dbg [[DBG175]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB39]], i32 [[OMP_GLOBAL_THREAD_NUM182]]), !dbg [[DBG174]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM183:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB39]]), !dbg [[DBG176:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB40:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM183]]), !dbg [[DBG176]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER171:%.*]], !dbg [[DBG174]] // CHECK-DEBUG: omp_loop.after171: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG178:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG177:![0-9]+]] // CHECK-DEBUG: omp.par.region.parallel.after: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK-DEBUG: omp.par.pre_finalize: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184_EXITSTUB:%.*]], !dbg [[DBG178]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184_EXITSTUB:%.*]], !dbg [[DBG177]] // CHECK-DEBUG: omp_loop.body168: -// CHECK-DEBUG-NEXT: [[TMP18:%.*]] = add i32 [[OMP_LOOP_IV172]], [[TMP14]], !dbg [[DBG177]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.18(ptr [[I160]], i32 [[TMP18]], ptr [[AGG_CAPTURED162]]), !dbg [[DBG175]] -// CHECK-DEBUG-NEXT: [[TMP19:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG179:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV175:%.*]] = sitofp i32 [[TMP19]] to double, !dbg [[DBG179]] -// CHECK-DEBUG-NEXT: [[TMP20:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG177]] -// CHECK-DEBUG-NEXT: [[ADD176:%.*]] = fadd double [[CONV175]], [[TMP20]], !dbg [[DBG180:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV177:%.*]] = fptrunc double [[ADD176]] to float, !dbg [[DBG179]] -// CHECK-DEBUG-NEXT: [[TMP21:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG181:![0-9]+]] -// CHECK-DEBUG-NEXT: store float [[CONV177]], ptr [[TMP21]], align 4, !dbg [[DBG182:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC169]], !dbg [[DBG175]] +// CHECK-DEBUG-NEXT: [[TMP18:%.*]] = add i32 [[OMP_LOOP_IV172]], [[TMP14]], !dbg [[DBG176]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.18(ptr [[I160]], i32 [[TMP18]], ptr [[AGG_CAPTURED162]]), !dbg [[DBG174]] +// CHECK-DEBUG-NEXT: [[TMP19:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG178:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV175:%.*]] = sitofp i32 [[TMP19]] to double, !dbg [[DBG178]] +// CHECK-DEBUG-NEXT: [[TMP20:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG176]] +// CHECK-DEBUG-NEXT: [[ADD176:%.*]] = fadd double [[CONV175]], [[TMP20]], !dbg [[DBG179:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV177:%.*]] = fptrunc double [[ADD176]] to float, !dbg [[DBG178]] +// CHECK-DEBUG-NEXT: [[TMP21:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG180:![0-9]+]] +// CHECK-DEBUG-NEXT: store float [[CONV177]], ptr [[TMP21]], align 4, !dbg [[DBG181:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC169]], !dbg [[DBG174]] // CHECK-DEBUG: omp_loop.inc169: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT174]] = add nuw i32 [[OMP_LOOP_IV172]], 1, !dbg [[DBG175]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER166]], !dbg [[DBG175]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT174]] = add nuw i32 [[OMP_LOOP_IV172]], 1, !dbg [[DBG174]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER166]], !dbg [[DBG174]] // CHECK-DEBUG: omp_loop.body: -// CHECK-DEBUG-NEXT: [[TMP22:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]], !dbg [[DBG165]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.6(ptr [[I]], i32 [[TMP22]], ptr [[AGG_CAPTURED1]]), !dbg [[DBG163]] -// CHECK-DEBUG-NEXT: [[TMP23:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG183:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP23]] to double, !dbg [[DBG183]] -// CHECK-DEBUG-NEXT: [[TMP24:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG165]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP24]], !dbg [[DBG184:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD]] to float, !dbg [[DBG183]] -// CHECK-DEBUG-NEXT: [[TMP25:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG185:![0-9]+]] -// CHECK-DEBUG-NEXT: store float [[CONV2]], ptr [[TMP25]], align 4, !dbg [[DBG186:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC]], !dbg [[DBG163]] +// CHECK-DEBUG-NEXT: [[TMP22:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP6]], !dbg [[DBG164]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.6(ptr [[I]], i32 [[TMP22]], ptr [[AGG_CAPTURED1]]), !dbg [[DBG162]] +// CHECK-DEBUG-NEXT: [[TMP23:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG182:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP23]] to double, !dbg [[DBG182]] +// CHECK-DEBUG-NEXT: [[TMP24:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG164]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = fadd double [[CONV]], [[TMP24]], !dbg [[DBG183:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV2:%.*]] = fptrunc double [[ADD]] to float, !dbg [[DBG182]] +// CHECK-DEBUG-NEXT: [[TMP25:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG184:![0-9]+]] +// CHECK-DEBUG-NEXT: store float [[CONV2]], ptr [[TMP25]], align 4, !dbg [[DBG185:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC]], !dbg [[DBG162]] // CHECK-DEBUG: omp_loop.inc: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG163]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[DBG163]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG162]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[DBG162]] // CHECK-DEBUG: omp.par.outlined.exit184.exitStub: // CHECK-DEBUG-NEXT: ret void // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_2Pfid..omp_par.22 -// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR6:%.*]], ptr noalias [[ZERO_ADDR7:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG187:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR6:%.*]], ptr noalias [[ZERO_ADDR7:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG186:![0-9]+]] { // CHECK-DEBUG-NEXT: omp.par.entry8: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 // CHECK-DEBUG-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 @@ -2148,41 +2148,41 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[P_STRIDE156:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION9:%.*]] // CHECK-DEBUG: omp.par.region9: -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I16]], metadata [[META188:![0-9]+]], metadata !DIExpression()), !dbg [[DBG193:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 0, ptr [[I16]], align 4, !dbg [[DBG193]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED17]], i32 0, i32 0, !dbg [[DBG194:![0-9]+]] -// CHECK-DEBUG-NEXT: store ptr [[I16]], ptr [[TMP2]], align 8, !dbg [[DBG194]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[AGG_CAPTURED18]], i32 0, i32 0, !dbg [[DBG194]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I16]], align 4, !dbg [[DBG195:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG194]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.7(ptr [[DOTCOUNT_ADDR19]], ptr [[AGG_CAPTURED17]]), !dbg [[DBG194]] -// CHECK-DEBUG-NEXT: [[DOTCOUNT20:%.*]] = load i32, ptr [[DOTCOUNT_ADDR19]], align 4, !dbg [[DBG194]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER21:%.*]], !dbg [[DBG194]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I16]], [[META187:![0-9]+]], !DIExpression(), [[META192:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 0, ptr [[I16]], align 4, !dbg [[META192]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_5]], ptr [[AGG_CAPTURED17]], i32 0, i32 0, !dbg [[DBG193:![0-9]+]] +// CHECK-DEBUG-NEXT: store ptr [[I16]], ptr [[TMP2]], align 8, !dbg [[DBG193]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], ptr [[AGG_CAPTURED18]], i32 0, i32 0, !dbg [[DBG193]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I16]], align 4, !dbg [[DBG194:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG193]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.7(ptr [[DOTCOUNT_ADDR19]], ptr [[AGG_CAPTURED17]]), !dbg [[DBG193]] +// CHECK-DEBUG-NEXT: [[DOTCOUNT20:%.*]] = load i32, ptr [[DOTCOUNT_ADDR19]], align 4, !dbg [[DBG193]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER21:%.*]], !dbg [[DBG193]] // CHECK-DEBUG: omp_loop.preheader21: -// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND35]], align 4, !dbg [[DBG194]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT20]], 1, !dbg [[DBG194]] -// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND36]], align 4, !dbg [[DBG194]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE37]], align 4, !dbg [[DBG194]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM38:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]]), !dbg [[DBG194]] -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB20]], i32 [[OMP_GLOBAL_THREAD_NUM38]], i32 34, ptr [[P_LASTITER34]], ptr [[P_LOWERBOUND35]], ptr [[P_UPPERBOUND36]], ptr [[P_STRIDE37]], i32 1, i32 0), !dbg [[DBG194]] -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND35]], align 4, !dbg [[DBG194]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND36]], align 4, !dbg [[DBG194]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG194]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG194]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER22:%.*]], !dbg [[DBG194]] +// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND35]], align 4, !dbg [[DBG193]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT20]], 1, !dbg [[DBG193]] +// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND36]], align 4, !dbg [[DBG193]] +// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE37]], align 4, !dbg [[DBG193]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM38:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]]), !dbg [[DBG193]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB20]], i32 [[OMP_GLOBAL_THREAD_NUM38]], i32 34, ptr [[P_LASTITER34]], ptr [[P_LOWERBOUND35]], ptr [[P_UPPERBOUND36]], ptr [[P_STRIDE37]], i32 1, i32 0), !dbg [[DBG193]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND35]], align 4, !dbg [[DBG193]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND36]], align 4, !dbg [[DBG193]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG193]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG193]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER22:%.*]], !dbg [[DBG193]] // CHECK-DEBUG: omp_loop.header22: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV28:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER21]] ], [ [[OMP_LOOP_NEXT30:%.*]], [[OMP_LOOP_INC25:%.*]] ], !dbg [[DBG194]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND23:%.*]], !dbg [[DBG194]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV28:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER21]] ], [ [[OMP_LOOP_NEXT30:%.*]], [[OMP_LOOP_INC25:%.*]] ], !dbg [[DBG193]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND23:%.*]], !dbg [[DBG193]] // CHECK-DEBUG: omp_loop.cond23: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP29:%.*]] = icmp ult i32 [[OMP_LOOP_IV28]], [[TMP9]], !dbg [[DBG194]] -// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP29]], label [[OMP_LOOP_BODY24:%.*]], label [[OMP_LOOP_EXIT26:%.*]], !dbg [[DBG194]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP29:%.*]] = icmp ult i32 [[OMP_LOOP_IV28]], [[TMP9]], !dbg [[DBG193]] +// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP29]], label [[OMP_LOOP_BODY24:%.*]], label [[OMP_LOOP_EXIT26:%.*]], !dbg [[DBG193]] // CHECK-DEBUG: omp_loop.exit26: -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB20]], i32 [[OMP_GLOBAL_THREAD_NUM38]]), !dbg [[DBG194]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM39:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20]]), !dbg [[DBG196:![0-9]+]] -// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB21:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM39]]), !dbg [[DBG196]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER27:%.*]], !dbg [[DBG194]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB20]], i32 [[OMP_GLOBAL_THREAD_NUM38]]), !dbg [[DBG193]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM39:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20]]), !dbg [[DBG195:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB21:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM39]]), !dbg [[DBG195]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER27:%.*]], !dbg [[DBG193]] // CHECK-DEBUG: omp_loop.after27: -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB23:[0-9]+]]), !dbg [[DBG197:![0-9]+]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM40:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB23:[0-9]+]]), !dbg [[DBG196:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR1:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 @@ -2191,46 +2191,46 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR2]], align 8 // CHECK-DEBUG-NEXT: [[GEP_R_ADDR3:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 2 // CHECK-DEBUG-NEXT: store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR3]], align 8 -// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB23]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG198:![0-9]+]] +// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB23]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG197:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] // CHECK-DEBUG: omp.par.outlined.exit: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT46_SPLIT:%.*]] // CHECK-DEBUG: omp.par.exit46.split: -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I75]], metadata [[META202:![0-9]+]], metadata !DIExpression()), !dbg [[DBG205:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 0, ptr [[I75]], align 4, !dbg [[DBG205]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED76]], i32 0, i32 0, !dbg [[DBG206:![0-9]+]] -// CHECK-DEBUG-NEXT: store ptr [[I75]], ptr [[TMP10]], align 8, !dbg [[DBG206]] -// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[AGG_CAPTURED77]], i32 0, i32 0, !dbg [[DBG206]] -// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load i32, ptr [[I75]], align 4, !dbg [[DBG207:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4, !dbg [[DBG206]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.11(ptr [[DOTCOUNT_ADDR78]], ptr [[AGG_CAPTURED76]]), !dbg [[DBG206]] -// CHECK-DEBUG-NEXT: [[DOTCOUNT79:%.*]] = load i32, ptr [[DOTCOUNT_ADDR78]], align 4, !dbg [[DBG206]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER80:%.*]], !dbg [[DBG206]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I75]], [[META201:![0-9]+]], !DIExpression(), [[META204:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 0, ptr [[I75]], align 4, !dbg [[META204]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT_ANON_9]], ptr [[AGG_CAPTURED76]], i32 0, i32 0, !dbg [[DBG205:![0-9]+]] +// CHECK-DEBUG-NEXT: store ptr [[I75]], ptr [[TMP10]], align 8, !dbg [[DBG205]] +// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_10]], ptr [[AGG_CAPTURED77]], i32 0, i32 0, !dbg [[DBG205]] +// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load i32, ptr [[I75]], align 4, !dbg [[DBG206:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 [[TMP12]], ptr [[TMP11]], align 4, !dbg [[DBG205]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.11(ptr [[DOTCOUNT_ADDR78]], ptr [[AGG_CAPTURED76]]), !dbg [[DBG205]] +// CHECK-DEBUG-NEXT: [[DOTCOUNT79:%.*]] = load i32, ptr [[DOTCOUNT_ADDR78]], align 4, !dbg [[DBG205]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER80:%.*]], !dbg [[DBG205]] // CHECK-DEBUG: omp_loop.preheader80: -// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND94]], align 4, !dbg [[DBG206]] -// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = sub i32 [[DOTCOUNT79]], 1, !dbg [[DBG206]] -// CHECK-DEBUG-NEXT: store i32 [[TMP13]], ptr [[P_UPPERBOUND95]], align 4, !dbg [[DBG206]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE96]], align 4, !dbg [[DBG206]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM97:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB28:[0-9]+]]), !dbg [[DBG206]] -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB28]], i32 [[OMP_GLOBAL_THREAD_NUM97]], i32 34, ptr [[P_LASTITER93]], ptr [[P_LOWERBOUND94]], ptr [[P_UPPERBOUND95]], ptr [[P_STRIDE96]], i32 1, i32 0), !dbg [[DBG206]] -// CHECK-DEBUG-NEXT: [[TMP14:%.*]] = load i32, ptr [[P_LOWERBOUND94]], align 4, !dbg [[DBG206]] -// CHECK-DEBUG-NEXT: [[TMP15:%.*]] = load i32, ptr [[P_UPPERBOUND95]], align 4, !dbg [[DBG206]] -// CHECK-DEBUG-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], [[TMP14]], !dbg [[DBG206]] -// CHECK-DEBUG-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], 1, !dbg [[DBG206]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER81:%.*]], !dbg [[DBG206]] +// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND94]], align 4, !dbg [[DBG205]] +// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = sub i32 [[DOTCOUNT79]], 1, !dbg [[DBG205]] +// CHECK-DEBUG-NEXT: store i32 [[TMP13]], ptr [[P_UPPERBOUND95]], align 4, !dbg [[DBG205]] +// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE96]], align 4, !dbg [[DBG205]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM97:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB28:[0-9]+]]), !dbg [[DBG205]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB28]], i32 [[OMP_GLOBAL_THREAD_NUM97]], i32 34, ptr [[P_LASTITER93]], ptr [[P_LOWERBOUND94]], ptr [[P_UPPERBOUND95]], ptr [[P_STRIDE96]], i32 1, i32 0), !dbg [[DBG205]] +// CHECK-DEBUG-NEXT: [[TMP14:%.*]] = load i32, ptr [[P_LOWERBOUND94]], align 4, !dbg [[DBG205]] +// CHECK-DEBUG-NEXT: [[TMP15:%.*]] = load i32, ptr [[P_UPPERBOUND95]], align 4, !dbg [[DBG205]] +// CHECK-DEBUG-NEXT: [[TMP16:%.*]] = sub i32 [[TMP15]], [[TMP14]], !dbg [[DBG205]] +// CHECK-DEBUG-NEXT: [[TMP17:%.*]] = add i32 [[TMP16]], 1, !dbg [[DBG205]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER81:%.*]], !dbg [[DBG205]] // CHECK-DEBUG: omp_loop.header81: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV87:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER80]] ], [ [[OMP_LOOP_NEXT89:%.*]], [[OMP_LOOP_INC84:%.*]] ], !dbg [[DBG206]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND82:%.*]], !dbg [[DBG206]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV87:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER80]] ], [ [[OMP_LOOP_NEXT89:%.*]], [[OMP_LOOP_INC84:%.*]] ], !dbg [[DBG205]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND82:%.*]], !dbg [[DBG205]] // CHECK-DEBUG: omp_loop.cond82: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP88:%.*]] = icmp ult i32 [[OMP_LOOP_IV87]], [[TMP17]], !dbg [[DBG206]] -// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP88]], label [[OMP_LOOP_BODY83:%.*]], label [[OMP_LOOP_EXIT85:%.*]], !dbg [[DBG206]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP88:%.*]] = icmp ult i32 [[OMP_LOOP_IV87]], [[TMP17]], !dbg [[DBG205]] +// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP88]], label [[OMP_LOOP_BODY83:%.*]], label [[OMP_LOOP_EXIT85:%.*]], !dbg [[DBG205]] // CHECK-DEBUG: omp_loop.exit85: -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB28]], i32 [[OMP_GLOBAL_THREAD_NUM97]]), !dbg [[DBG206]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM98:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB28]]), !dbg [[DBG208:![0-9]+]] -// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB29:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM98]]), !dbg [[DBG208]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER86:%.*]], !dbg [[DBG206]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB28]], i32 [[OMP_GLOBAL_THREAD_NUM97]]), !dbg [[DBG205]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM98:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB28]]), !dbg [[DBG207:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB29:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM98]]), !dbg [[DBG207]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER86:%.*]], !dbg [[DBG205]] // CHECK-DEBUG: omp_loop.after86: -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM99:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB31:[0-9]+]]), !dbg [[DBG209:![0-9]+]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM99:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB31:[0-9]+]]), !dbg [[DBG208:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL213:%.*]] // CHECK-DEBUG: omp_parallel213: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR210:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG209]], i32 0, i32 0 @@ -2239,98 +2239,98 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: store ptr [[LOADGEP_B_ADDR]], ptr [[GEP_B_ADDR211]], align 8 // CHECK-DEBUG-NEXT: [[GEP_R_ADDR212:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[STRUCTARG209]], i32 0, i32 2 // CHECK-DEBUG-NEXT: store ptr [[LOADGEP_R_ADDR]], ptr [[GEP_R_ADDR212]], align 8 -// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB31]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.21, ptr [[STRUCTARG209]]), !dbg [[DBG210:![0-9]+]] +// CHECK-DEBUG-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB31]], i32 1, ptr @_Z14parallel_for_2Pfid..omp_par.21, ptr [[STRUCTARG209]]), !dbg [[DBG209:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT134:%.*]] // CHECK-DEBUG: omp.par.outlined.exit134: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT105_SPLIT:%.*]] // CHECK-DEBUG: omp.par.exit105.split: -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I135]], metadata [[META214:![0-9]+]], metadata !DIExpression()), !dbg [[DBG217:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 0, ptr [[I135]], align 4, !dbg [[DBG217]] -// CHECK-DEBUG-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[AGG_CAPTURED136]], i32 0, i32 0, !dbg [[DBG218:![0-9]+]] -// CHECK-DEBUG-NEXT: store ptr [[I135]], ptr [[TMP18]], align 8, !dbg [[DBG218]] -// CHECK-DEBUG-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[AGG_CAPTURED137]], i32 0, i32 0, !dbg [[DBG218]] -// CHECK-DEBUG-NEXT: [[TMP20:%.*]] = load i32, ptr [[I135]], align 4, !dbg [[DBG219:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4, !dbg [[DBG218]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.15(ptr [[DOTCOUNT_ADDR138]], ptr [[AGG_CAPTURED136]]), !dbg [[DBG218]] -// CHECK-DEBUG-NEXT: [[DOTCOUNT139:%.*]] = load i32, ptr [[DOTCOUNT_ADDR138]], align 4, !dbg [[DBG218]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER140:%.*]], !dbg [[DBG218]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I135]], [[META213:![0-9]+]], !DIExpression(), [[META216:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 0, ptr [[I135]], align 4, !dbg [[META216]] +// CHECK-DEBUG-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_13]], ptr [[AGG_CAPTURED136]], i32 0, i32 0, !dbg [[DBG217:![0-9]+]] +// CHECK-DEBUG-NEXT: store ptr [[I135]], ptr [[TMP18]], align 8, !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: [[TMP19:%.*]] = getelementptr inbounds [[STRUCT_ANON_14]], ptr [[AGG_CAPTURED137]], i32 0, i32 0, !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: [[TMP20:%.*]] = load i32, ptr [[I135]], align 4, !dbg [[DBG218:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 [[TMP20]], ptr [[TMP19]], align 4, !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.15(ptr [[DOTCOUNT_ADDR138]], ptr [[AGG_CAPTURED136]]), !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: [[DOTCOUNT139:%.*]] = load i32, ptr [[DOTCOUNT_ADDR138]], align 4, !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER140:%.*]], !dbg [[DBG217]] // CHECK-DEBUG: omp_loop.preheader140: -// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND154]], align 4, !dbg [[DBG218]] -// CHECK-DEBUG-NEXT: [[TMP21:%.*]] = sub i32 [[DOTCOUNT139]], 1, !dbg [[DBG218]] -// CHECK-DEBUG-NEXT: store i32 [[TMP21]], ptr [[P_UPPERBOUND155]], align 4, !dbg [[DBG218]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE156]], align 4, !dbg [[DBG218]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM157:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB36:[0-9]+]]), !dbg [[DBG218]] -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM157]], i32 34, ptr [[P_LASTITER153]], ptr [[P_LOWERBOUND154]], ptr [[P_UPPERBOUND155]], ptr [[P_STRIDE156]], i32 1, i32 0), !dbg [[DBG218]] -// CHECK-DEBUG-NEXT: [[TMP22:%.*]] = load i32, ptr [[P_LOWERBOUND154]], align 4, !dbg [[DBG218]] -// CHECK-DEBUG-NEXT: [[TMP23:%.*]] = load i32, ptr [[P_UPPERBOUND155]], align 4, !dbg [[DBG218]] -// CHECK-DEBUG-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], [[TMP22]], !dbg [[DBG218]] -// CHECK-DEBUG-NEXT: [[TMP25:%.*]] = add i32 [[TMP24]], 1, !dbg [[DBG218]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER141:%.*]], !dbg [[DBG218]] +// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND154]], align 4, !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: [[TMP21:%.*]] = sub i32 [[DOTCOUNT139]], 1, !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: store i32 [[TMP21]], ptr [[P_UPPERBOUND155]], align 4, !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE156]], align 4, !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM157:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB36:[0-9]+]]), !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM157]], i32 34, ptr [[P_LASTITER153]], ptr [[P_LOWERBOUND154]], ptr [[P_UPPERBOUND155]], ptr [[P_STRIDE156]], i32 1, i32 0), !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: [[TMP22:%.*]] = load i32, ptr [[P_LOWERBOUND154]], align 4, !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: [[TMP23:%.*]] = load i32, ptr [[P_UPPERBOUND155]], align 4, !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: [[TMP24:%.*]] = sub i32 [[TMP23]], [[TMP22]], !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: [[TMP25:%.*]] = add i32 [[TMP24]], 1, !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER141:%.*]], !dbg [[DBG217]] // CHECK-DEBUG: omp_loop.header141: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV147:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER140]] ], [ [[OMP_LOOP_NEXT149:%.*]], [[OMP_LOOP_INC144:%.*]] ], !dbg [[DBG218]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND142:%.*]], !dbg [[DBG218]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV147:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER140]] ], [ [[OMP_LOOP_NEXT149:%.*]], [[OMP_LOOP_INC144:%.*]] ], !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND142:%.*]], !dbg [[DBG217]] // CHECK-DEBUG: omp_loop.cond142: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP148:%.*]] = icmp ult i32 [[OMP_LOOP_IV147]], [[TMP25]], !dbg [[DBG218]] -// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP148]], label [[OMP_LOOP_BODY143:%.*]], label [[OMP_LOOP_EXIT145:%.*]], !dbg [[DBG218]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP148:%.*]] = icmp ult i32 [[OMP_LOOP_IV147]], [[TMP25]], !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP148]], label [[OMP_LOOP_BODY143:%.*]], label [[OMP_LOOP_EXIT145:%.*]], !dbg [[DBG217]] // CHECK-DEBUG: omp_loop.exit145: -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM157]]), !dbg [[DBG218]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM158:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB36]]), !dbg [[DBG220:![0-9]+]] -// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB37:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM158]]), !dbg [[DBG220]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER146:%.*]], !dbg [[DBG218]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB36]], i32 [[OMP_GLOBAL_THREAD_NUM157]]), !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM158:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB36]]), !dbg [[DBG219:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB37:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM158]]), !dbg [[DBG219]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER146:%.*]], !dbg [[DBG217]] // CHECK-DEBUG: omp_loop.after146: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION9_PARALLEL_AFTER:%.*]], !dbg [[DBG221:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION9_PARALLEL_AFTER:%.*]], !dbg [[DBG220:![0-9]+]] // CHECK-DEBUG: omp.par.region9.parallel.after: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE10:%.*]] // CHECK-DEBUG: omp.par.pre_finalize10: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT159_EXITSTUB:%.*]], !dbg [[DBG221]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT159_EXITSTUB:%.*]], !dbg [[DBG220]] // CHECK-DEBUG: omp_loop.body143: -// CHECK-DEBUG-NEXT: [[TMP26:%.*]] = add i32 [[OMP_LOOP_IV147]], [[TMP22]], !dbg [[DBG220]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.16(ptr [[I135]], i32 [[TMP26]], ptr [[AGG_CAPTURED137]]), !dbg [[DBG218]] -// CHECK-DEBUG-NEXT: [[TMP27:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG222:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV150:%.*]] = sitofp i32 [[TMP27]] to double, !dbg [[DBG222]] -// CHECK-DEBUG-NEXT: [[TMP28:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG220]] -// CHECK-DEBUG-NEXT: [[ADD151:%.*]] = fadd double [[CONV150]], [[TMP28]], !dbg [[DBG223:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV152:%.*]] = fptrunc double [[ADD151]] to float, !dbg [[DBG222]] -// CHECK-DEBUG-NEXT: [[TMP29:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG224:![0-9]+]] -// CHECK-DEBUG-NEXT: store float [[CONV152]], ptr [[TMP29]], align 4, !dbg [[DBG225:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC144]], !dbg [[DBG218]] +// CHECK-DEBUG-NEXT: [[TMP26:%.*]] = add i32 [[OMP_LOOP_IV147]], [[TMP22]], !dbg [[DBG219]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.16(ptr [[I135]], i32 [[TMP26]], ptr [[AGG_CAPTURED137]]), !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: [[TMP27:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG221:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV150:%.*]] = sitofp i32 [[TMP27]] to double, !dbg [[DBG221]] +// CHECK-DEBUG-NEXT: [[TMP28:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG219]] +// CHECK-DEBUG-NEXT: [[ADD151:%.*]] = fadd double [[CONV150]], [[TMP28]], !dbg [[DBG222:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV152:%.*]] = fptrunc double [[ADD151]] to float, !dbg [[DBG221]] +// CHECK-DEBUG-NEXT: [[TMP29:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG223:![0-9]+]] +// CHECK-DEBUG-NEXT: store float [[CONV152]], ptr [[TMP29]], align 4, !dbg [[DBG224:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC144]], !dbg [[DBG217]] // CHECK-DEBUG: omp_loop.inc144: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT149]] = add nuw i32 [[OMP_LOOP_IV147]], 1, !dbg [[DBG218]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER141]], !dbg [[DBG218]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT149]] = add nuw i32 [[OMP_LOOP_IV147]], 1, !dbg [[DBG217]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER141]], !dbg [[DBG217]] // CHECK-DEBUG: omp_loop.body83: -// CHECK-DEBUG-NEXT: [[TMP30:%.*]] = add i32 [[OMP_LOOP_IV87]], [[TMP14]], !dbg [[DBG208]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.12(ptr [[I75]], i32 [[TMP30]], ptr [[AGG_CAPTURED77]]), !dbg [[DBG206]] -// CHECK-DEBUG-NEXT: [[TMP31:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG226:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV90:%.*]] = sitofp i32 [[TMP31]] to double, !dbg [[DBG226]] -// CHECK-DEBUG-NEXT: [[TMP32:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG208]] -// CHECK-DEBUG-NEXT: [[ADD91:%.*]] = fadd double [[CONV90]], [[TMP32]], !dbg [[DBG227:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV92:%.*]] = fptrunc double [[ADD91]] to float, !dbg [[DBG226]] -// CHECK-DEBUG-NEXT: [[TMP33:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG228:![0-9]+]] -// CHECK-DEBUG-NEXT: store float [[CONV92]], ptr [[TMP33]], align 4, !dbg [[DBG229:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC84]], !dbg [[DBG206]] +// CHECK-DEBUG-NEXT: [[TMP30:%.*]] = add i32 [[OMP_LOOP_IV87]], [[TMP14]], !dbg [[DBG207]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.12(ptr [[I75]], i32 [[TMP30]], ptr [[AGG_CAPTURED77]]), !dbg [[DBG205]] +// CHECK-DEBUG-NEXT: [[TMP31:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG225:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV90:%.*]] = sitofp i32 [[TMP31]] to double, !dbg [[DBG225]] +// CHECK-DEBUG-NEXT: [[TMP32:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG207]] +// CHECK-DEBUG-NEXT: [[ADD91:%.*]] = fadd double [[CONV90]], [[TMP32]], !dbg [[DBG226:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV92:%.*]] = fptrunc double [[ADD91]] to float, !dbg [[DBG225]] +// CHECK-DEBUG-NEXT: [[TMP33:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG227:![0-9]+]] +// CHECK-DEBUG-NEXT: store float [[CONV92]], ptr [[TMP33]], align 4, !dbg [[DBG228:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC84]], !dbg [[DBG205]] // CHECK-DEBUG: omp_loop.inc84: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT89]] = add nuw i32 [[OMP_LOOP_IV87]], 1, !dbg [[DBG206]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER81]], !dbg [[DBG206]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT89]] = add nuw i32 [[OMP_LOOP_IV87]], 1, !dbg [[DBG205]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER81]], !dbg [[DBG205]] // CHECK-DEBUG: omp_loop.body24: -// CHECK-DEBUG-NEXT: [[TMP34:%.*]] = add i32 [[OMP_LOOP_IV28]], [[TMP6]], !dbg [[DBG196]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.8(ptr [[I16]], i32 [[TMP34]], ptr [[AGG_CAPTURED18]]), !dbg [[DBG194]] -// CHECK-DEBUG-NEXT: [[TMP35:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG230:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV31:%.*]] = sitofp i32 [[TMP35]] to double, !dbg [[DBG230]] -// CHECK-DEBUG-NEXT: [[TMP36:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG196]] -// CHECK-DEBUG-NEXT: [[ADD32:%.*]] = fadd double [[CONV31]], [[TMP36]], !dbg [[DBG231:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV33:%.*]] = fptrunc double [[ADD32]] to float, !dbg [[DBG230]] -// CHECK-DEBUG-NEXT: [[TMP37:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG232:![0-9]+]] -// CHECK-DEBUG-NEXT: store float [[CONV33]], ptr [[TMP37]], align 4, !dbg [[DBG233:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC25]], !dbg [[DBG194]] +// CHECK-DEBUG-NEXT: [[TMP34:%.*]] = add i32 [[OMP_LOOP_IV28]], [[TMP6]], !dbg [[DBG195]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.8(ptr [[I16]], i32 [[TMP34]], ptr [[AGG_CAPTURED18]]), !dbg [[DBG193]] +// CHECK-DEBUG-NEXT: [[TMP35:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG229:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV31:%.*]] = sitofp i32 [[TMP35]] to double, !dbg [[DBG229]] +// CHECK-DEBUG-NEXT: [[TMP36:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG195]] +// CHECK-DEBUG-NEXT: [[ADD32:%.*]] = fadd double [[CONV31]], [[TMP36]], !dbg [[DBG230:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV33:%.*]] = fptrunc double [[ADD32]] to float, !dbg [[DBG229]] +// CHECK-DEBUG-NEXT: [[TMP37:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG231:![0-9]+]] +// CHECK-DEBUG-NEXT: store float [[CONV33]], ptr [[TMP37]], align 4, !dbg [[DBG232:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC25]], !dbg [[DBG193]] // CHECK-DEBUG: omp_loop.inc25: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT30]] = add nuw i32 [[OMP_LOOP_IV28]], 1, !dbg [[DBG194]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER22]], !dbg [[DBG194]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT30]] = add nuw i32 [[OMP_LOOP_IV28]], 1, !dbg [[DBG193]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER22]], !dbg [[DBG193]] // CHECK-DEBUG: omp.par.outlined.exit159.exitStub: // CHECK-DEBUG-NEXT: ret void // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_2Pfid..omp_par.21 -// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR100:%.*]], ptr noalias [[ZERO_ADDR101:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG234:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR100:%.*]], ptr noalias [[ZERO_ADDR101:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG233:![0-9]+]] { // CHECK-DEBUG-NEXT: omp.par.entry102: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 // CHECK-DEBUG-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 @@ -2352,65 +2352,65 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[P_STRIDE131:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION103:%.*]] // CHECK-DEBUG: omp.par.region103: -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I110]], metadata [[META235:![0-9]+]], metadata !DIExpression()), !dbg [[DBG241:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 0, ptr [[I110]], align 4, !dbg [[DBG241]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[AGG_CAPTURED111]], i32 0, i32 0, !dbg [[DBG242:![0-9]+]] -// CHECK-DEBUG-NEXT: store ptr [[I110]], ptr [[TMP2]], align 8, !dbg [[DBG242]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[AGG_CAPTURED112]], i32 0, i32 0, !dbg [[DBG242]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I110]], align 4, !dbg [[DBG243:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG242]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.13(ptr [[DOTCOUNT_ADDR113]], ptr [[AGG_CAPTURED111]]), !dbg [[DBG242]] -// CHECK-DEBUG-NEXT: [[DOTCOUNT114:%.*]] = load i32, ptr [[DOTCOUNT_ADDR113]], align 4, !dbg [[DBG242]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER115:%.*]], !dbg [[DBG242]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I110]], [[META234:![0-9]+]], !DIExpression(), [[META240:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 0, ptr [[I110]], align 4, !dbg [[META240]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_11]], ptr [[AGG_CAPTURED111]], i32 0, i32 0, !dbg [[DBG241:![0-9]+]] +// CHECK-DEBUG-NEXT: store ptr [[I110]], ptr [[TMP2]], align 8, !dbg [[DBG241]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_12]], ptr [[AGG_CAPTURED112]], i32 0, i32 0, !dbg [[DBG241]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I110]], align 4, !dbg [[DBG242:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG241]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.13(ptr [[DOTCOUNT_ADDR113]], ptr [[AGG_CAPTURED111]]), !dbg [[DBG241]] +// CHECK-DEBUG-NEXT: [[DOTCOUNT114:%.*]] = load i32, ptr [[DOTCOUNT_ADDR113]], align 4, !dbg [[DBG241]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER115:%.*]], !dbg [[DBG241]] // CHECK-DEBUG: omp_loop.preheader115: -// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND129]], align 4, !dbg [[DBG242]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT114]], 1, !dbg [[DBG242]] -// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND130]], align 4, !dbg [[DBG242]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE131]], align 4, !dbg [[DBG242]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM132:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB33:[0-9]+]]), !dbg [[DBG242]] -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB33]], i32 [[OMP_GLOBAL_THREAD_NUM132]], i32 34, ptr [[P_LASTITER128]], ptr [[P_LOWERBOUND129]], ptr [[P_UPPERBOUND130]], ptr [[P_STRIDE131]], i32 1, i32 0), !dbg [[DBG242]] -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND129]], align 4, !dbg [[DBG242]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND130]], align 4, !dbg [[DBG242]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG242]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG242]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER116:%.*]], !dbg [[DBG242]] +// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND129]], align 4, !dbg [[DBG241]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT114]], 1, !dbg [[DBG241]] +// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND130]], align 4, !dbg [[DBG241]] +// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE131]], align 4, !dbg [[DBG241]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM132:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB33:[0-9]+]]), !dbg [[DBG241]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB33]], i32 [[OMP_GLOBAL_THREAD_NUM132]], i32 34, ptr [[P_LASTITER128]], ptr [[P_LOWERBOUND129]], ptr [[P_UPPERBOUND130]], ptr [[P_STRIDE131]], i32 1, i32 0), !dbg [[DBG241]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND129]], align 4, !dbg [[DBG241]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND130]], align 4, !dbg [[DBG241]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG241]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG241]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER116:%.*]], !dbg [[DBG241]] // CHECK-DEBUG: omp_loop.header116: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV122:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER115]] ], [ [[OMP_LOOP_NEXT124:%.*]], [[OMP_LOOP_INC119:%.*]] ], !dbg [[DBG242]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND117:%.*]], !dbg [[DBG242]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV122:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER115]] ], [ [[OMP_LOOP_NEXT124:%.*]], [[OMP_LOOP_INC119:%.*]] ], !dbg [[DBG241]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND117:%.*]], !dbg [[DBG241]] // CHECK-DEBUG: omp_loop.cond117: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP123:%.*]] = icmp ult i32 [[OMP_LOOP_IV122]], [[TMP9]], !dbg [[DBG242]] -// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP123]], label [[OMP_LOOP_BODY118:%.*]], label [[OMP_LOOP_EXIT120:%.*]], !dbg [[DBG242]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP123:%.*]] = icmp ult i32 [[OMP_LOOP_IV122]], [[TMP9]], !dbg [[DBG241]] +// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP123]], label [[OMP_LOOP_BODY118:%.*]], label [[OMP_LOOP_EXIT120:%.*]], !dbg [[DBG241]] // CHECK-DEBUG: omp_loop.exit120: -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB33]], i32 [[OMP_GLOBAL_THREAD_NUM132]]), !dbg [[DBG242]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM133:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB33]]), !dbg [[DBG244:![0-9]+]] -// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB34:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM133]]), !dbg [[DBG244]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER121:%.*]], !dbg [[DBG242]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB33]], i32 [[OMP_GLOBAL_THREAD_NUM132]]), !dbg [[DBG241]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM133:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB33]]), !dbg [[DBG243:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB34:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM133]]), !dbg [[DBG243]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER121:%.*]], !dbg [[DBG241]] // CHECK-DEBUG: omp_loop.after121: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION103_PARALLEL_AFTER:%.*]], !dbg [[DBG245:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION103_PARALLEL_AFTER:%.*]], !dbg [[DBG244:![0-9]+]] // CHECK-DEBUG: omp.par.region103.parallel.after: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE104:%.*]] // CHECK-DEBUG: omp.par.pre_finalize104: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT134_EXITSTUB:%.*]], !dbg [[DBG245]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT134_EXITSTUB:%.*]], !dbg [[DBG244]] // CHECK-DEBUG: omp_loop.body118: -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV122]], [[TMP6]], !dbg [[DBG244]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.14(ptr [[I110]], i32 [[TMP10]], ptr [[AGG_CAPTURED112]]), !dbg [[DBG242]] -// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG246:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV125:%.*]] = sitofp i32 [[TMP11]] to double, !dbg [[DBG246]] -// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG244]] -// CHECK-DEBUG-NEXT: [[ADD126:%.*]] = fadd double [[CONV125]], [[TMP12]], !dbg [[DBG247:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV127:%.*]] = fptrunc double [[ADD126]] to float, !dbg [[DBG246]] -// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG248:![0-9]+]] -// CHECK-DEBUG-NEXT: store float [[CONV127]], ptr [[TMP13]], align 4, !dbg [[DBG249:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC119]], !dbg [[DBG242]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV122]], [[TMP6]], !dbg [[DBG243]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.14(ptr [[I110]], i32 [[TMP10]], ptr [[AGG_CAPTURED112]]), !dbg [[DBG241]] +// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG245:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV125:%.*]] = sitofp i32 [[TMP11]] to double, !dbg [[DBG245]] +// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG243]] +// CHECK-DEBUG-NEXT: [[ADD126:%.*]] = fadd double [[CONV125]], [[TMP12]], !dbg [[DBG246:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV127:%.*]] = fptrunc double [[ADD126]] to float, !dbg [[DBG245]] +// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG247:![0-9]+]] +// CHECK-DEBUG-NEXT: store float [[CONV127]], ptr [[TMP13]], align 4, !dbg [[DBG248:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC119]], !dbg [[DBG241]] // CHECK-DEBUG: omp_loop.inc119: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT124]] = add nuw i32 [[OMP_LOOP_IV122]], 1, !dbg [[DBG242]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER116]], !dbg [[DBG242]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT124]] = add nuw i32 [[OMP_LOOP_IV122]], 1, !dbg [[DBG241]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER116]], !dbg [[DBG241]] // CHECK-DEBUG: omp.par.outlined.exit134.exitStub: // CHECK-DEBUG-NEXT: ret void // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_2Pfid..omp_par -// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR41:%.*]], ptr noalias [[ZERO_ADDR42:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG250:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR41:%.*]], ptr noalias [[ZERO_ADDR42:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG249:![0-9]+]] { // CHECK-DEBUG-NEXT: omp.par.entry43: // CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 // CHECK-DEBUG-NEXT: [[LOADGEP_A_ADDR:%.*]] = load ptr, ptr [[GEP_A_ADDR]], align 8 @@ -2432,65 +2432,65 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[P_STRIDE72:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION44:%.*]] // CHECK-DEBUG: omp.par.region44: -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I51]], metadata [[META251:![0-9]+]], metadata !DIExpression()), !dbg [[DBG257:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 0, ptr [[I51]], align 4, !dbg [[DBG257]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[AGG_CAPTURED52]], i32 0, i32 0, !dbg [[DBG258:![0-9]+]] -// CHECK-DEBUG-NEXT: store ptr [[I51]], ptr [[TMP2]], align 8, !dbg [[DBG258]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED53]], i32 0, i32 0, !dbg [[DBG258]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I51]], align 4, !dbg [[DBG259:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG258]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.9(ptr [[DOTCOUNT_ADDR54]], ptr [[AGG_CAPTURED52]]), !dbg [[DBG258]] -// CHECK-DEBUG-NEXT: [[DOTCOUNT55:%.*]] = load i32, ptr [[DOTCOUNT_ADDR54]], align 4, !dbg [[DBG258]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER56:%.*]], !dbg [[DBG258]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[I51]], [[META250:![0-9]+]], !DIExpression(), [[META256:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 0, ptr [[I51]], align 4, !dbg [[META256]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_ANON_7]], ptr [[AGG_CAPTURED52]], i32 0, i32 0, !dbg [[DBG257:![0-9]+]] +// CHECK-DEBUG-NEXT: store ptr [[I51]], ptr [[TMP2]], align 8, !dbg [[DBG257]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_8]], ptr [[AGG_CAPTURED53]], i32 0, i32 0, !dbg [[DBG257]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[I51]], align 4, !dbg [[DBG258:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 [[TMP4]], ptr [[TMP3]], align 4, !dbg [[DBG257]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.9(ptr [[DOTCOUNT_ADDR54]], ptr [[AGG_CAPTURED52]]), !dbg [[DBG257]] +// CHECK-DEBUG-NEXT: [[DOTCOUNT55:%.*]] = load i32, ptr [[DOTCOUNT_ADDR54]], align 4, !dbg [[DBG257]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER56:%.*]], !dbg [[DBG257]] // CHECK-DEBUG: omp_loop.preheader56: -// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND70]], align 4, !dbg [[DBG258]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT55]], 1, !dbg [[DBG258]] -// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND71]], align 4, !dbg [[DBG258]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE72]], align 4, !dbg [[DBG258]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM73:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB25:[0-9]+]]), !dbg [[DBG258]] -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB25]], i32 [[OMP_GLOBAL_THREAD_NUM73]], i32 34, ptr [[P_LASTITER69]], ptr [[P_LOWERBOUND70]], ptr [[P_UPPERBOUND71]], ptr [[P_STRIDE72]], i32 1, i32 0), !dbg [[DBG258]] -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND70]], align 4, !dbg [[DBG258]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND71]], align 4, !dbg [[DBG258]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG258]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG258]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER57:%.*]], !dbg [[DBG258]] +// CHECK-DEBUG-NEXT: store i32 0, ptr [[P_LOWERBOUND70]], align 4, !dbg [[DBG257]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = sub i32 [[DOTCOUNT55]], 1, !dbg [[DBG257]] +// CHECK-DEBUG-NEXT: store i32 [[TMP5]], ptr [[P_UPPERBOUND71]], align 4, !dbg [[DBG257]] +// CHECK-DEBUG-NEXT: store i32 1, ptr [[P_STRIDE72]], align 4, !dbg [[DBG257]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM73:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB25:[0-9]+]]), !dbg [[DBG257]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB25]], i32 [[OMP_GLOBAL_THREAD_NUM73]], i32 34, ptr [[P_LASTITER69]], ptr [[P_LOWERBOUND70]], ptr [[P_UPPERBOUND71]], ptr [[P_STRIDE72]], i32 1, i32 0), !dbg [[DBG257]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[P_LOWERBOUND70]], align 4, !dbg [[DBG257]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_UPPERBOUND71]], align 4, !dbg [[DBG257]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], [[TMP6]], !dbg [[DBG257]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], 1, !dbg [[DBG257]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER57:%.*]], !dbg [[DBG257]] // CHECK-DEBUG: omp_loop.header57: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV63:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER56]] ], [ [[OMP_LOOP_NEXT65:%.*]], [[OMP_LOOP_INC60:%.*]] ], !dbg [[DBG258]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND58:%.*]], !dbg [[DBG258]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV63:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER56]] ], [ [[OMP_LOOP_NEXT65:%.*]], [[OMP_LOOP_INC60:%.*]] ], !dbg [[DBG257]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND58:%.*]], !dbg [[DBG257]] // CHECK-DEBUG: omp_loop.cond58: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP64:%.*]] = icmp ult i32 [[OMP_LOOP_IV63]], [[TMP9]], !dbg [[DBG258]] -// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP64]], label [[OMP_LOOP_BODY59:%.*]], label [[OMP_LOOP_EXIT61:%.*]], !dbg [[DBG258]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP64:%.*]] = icmp ult i32 [[OMP_LOOP_IV63]], [[TMP9]], !dbg [[DBG257]] +// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP64]], label [[OMP_LOOP_BODY59:%.*]], label [[OMP_LOOP_EXIT61:%.*]], !dbg [[DBG257]] // CHECK-DEBUG: omp_loop.exit61: -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB25]], i32 [[OMP_GLOBAL_THREAD_NUM73]]), !dbg [[DBG258]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM74:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB25]]), !dbg [[DBG260:![0-9]+]] -// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB26:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM74]]), !dbg [[DBG260]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER62:%.*]], !dbg [[DBG258]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB25]], i32 [[OMP_GLOBAL_THREAD_NUM73]]), !dbg [[DBG257]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM74:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB25]]), !dbg [[DBG259:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(ptr @[[GLOB26:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM74]]), !dbg [[DBG259]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER62:%.*]], !dbg [[DBG257]] // CHECK-DEBUG: omp_loop.after62: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION44_PARALLEL_AFTER:%.*]], !dbg [[DBG261:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION44_PARALLEL_AFTER:%.*]], !dbg [[DBG260:![0-9]+]] // CHECK-DEBUG: omp.par.region44.parallel.after: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_PRE_FINALIZE45:%.*]] // CHECK-DEBUG: omp.par.pre_finalize45: -// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG261]] +// CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG260]] // CHECK-DEBUG: omp_loop.body59: -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV63]], [[TMP6]], !dbg [[DBG260]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.10(ptr [[I51]], i32 [[TMP10]], ptr [[AGG_CAPTURED53]]), !dbg [[DBG258]] -// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG262:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV66:%.*]] = sitofp i32 [[TMP11]] to double, !dbg [[DBG262]] -// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG260]] -// CHECK-DEBUG-NEXT: [[ADD67:%.*]] = fadd double [[CONV66]], [[TMP12]], !dbg [[DBG263:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV68:%.*]] = fptrunc double [[ADD67]] to float, !dbg [[DBG262]] -// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG264:![0-9]+]] -// CHECK-DEBUG-NEXT: store float [[CONV68]], ptr [[TMP13]], align 4, !dbg [[DBG265:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC60]], !dbg [[DBG258]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = add i32 [[OMP_LOOP_IV63]], [[TMP6]], !dbg [[DBG259]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.10(ptr [[I51]], i32 [[TMP10]], ptr [[AGG_CAPTURED53]]), !dbg [[DBG257]] +// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_A_ADDR]], align 4, !dbg [[DBG261:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV66:%.*]] = sitofp i32 [[TMP11]] to double, !dbg [[DBG261]] +// CHECK-DEBUG-NEXT: [[TMP12:%.*]] = load double, ptr [[LOADGEP_B_ADDR]], align 8, !dbg [[DBG259]] +// CHECK-DEBUG-NEXT: [[ADD67:%.*]] = fadd double [[CONV66]], [[TMP12]], !dbg [[DBG262:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV68:%.*]] = fptrunc double [[ADD67]] to float, !dbg [[DBG261]] +// CHECK-DEBUG-NEXT: [[TMP13:%.*]] = load ptr, ptr [[LOADGEP_R_ADDR]], align 8, !dbg [[DBG263:![0-9]+]] +// CHECK-DEBUG-NEXT: store float [[CONV68]], ptr [[TMP13]], align 4, !dbg [[DBG264:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC60]], !dbg [[DBG257]] // CHECK-DEBUG: omp_loop.inc60: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT65]] = add nuw i32 [[OMP_LOOP_IV63]], 1, !dbg [[DBG258]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER57]], !dbg [[DBG258]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT65]] = add nuw i32 [[OMP_LOOP_IV63]], 1, !dbg [[DBG257]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER57]], !dbg [[DBG257]] // CHECK-DEBUG: omp.par.outlined.exit.exitStub: // CHECK-DEBUG-NEXT: ret void // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.5 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG266:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG265:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -2498,67 +2498,67 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META267:![0-9]+]], metadata !DIExpression()), !dbg [[DBG268:![0-9]+]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META266:![0-9]+]], !DIExpression(), [[META267:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META269:![0-9]+]], metadata !DIExpression()), !dbg [[DBG268]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META268:![0-9]+]], !DIExpression(), [[META267]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META270:![0-9]+]], metadata !DIExpression()), !dbg [[DBG272:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG273:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG273]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG273]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG272]] -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META275:![0-9]+]], metadata !DIExpression()), !dbg [[DBG276:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG276]] -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META277:![0-9]+]], metadata !DIExpression()), !dbg [[DBG276]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG276]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG276]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG276]] -// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG276]] -// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG276]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META269:![0-9]+]], !DIExpression(), [[META271:![0-9]+]]) +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_3:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG272:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG272]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG272]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META271]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META274:![0-9]+]], !DIExpression(), [[META275:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META275]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META276:![0-9]+]], !DIExpression(), [[META275]]) +// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META275]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META275]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META275]] +// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META275]] +// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META275]] // CHECK-DEBUG: cond.true: -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG276]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG276]] -// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG276]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG276]] -// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG276]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG276]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG276]] -// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG276]] -// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG276]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META275]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META275]] +// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META275]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META275]] +// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META275]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META275]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META275]] +// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META275]] +// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META275]] // CHECK-DEBUG: cond.false: -// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG276]] +// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META275]] // CHECK-DEBUG: cond.end: -// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG276]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG276]] -// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG276]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG278:![0-9]+]] +// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META275]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META275]] +// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META275]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG277:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.6 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG280:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG279:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META281:![0-9]+]], metadata !DIExpression()), !dbg [[DBG282:![0-9]+]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META280:![0-9]+]], !DIExpression(), [[META281:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META283:![0-9]+]], metadata !DIExpression()), !dbg [[DBG282]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META282:![0-9]+]], !DIExpression(), [[META281]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META284:![0-9]+]], metadata !DIExpression()), !dbg [[DBG282]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META283:![0-9]+]], !DIExpression(), [[META281]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG285:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG285]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG287:![0-9]+]] -// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG287]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG287]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG287]] -// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG282]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG285]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG284:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG284]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG286:![0-9]+]] +// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG286]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG286]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG286]] +// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META281]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG284]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.7 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG288:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG287:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -2566,67 +2566,67 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META289:![0-9]+]], metadata !DIExpression()), !dbg [[DBG290:![0-9]+]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META288:![0-9]+]], !DIExpression(), [[META289:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META291:![0-9]+]], metadata !DIExpression()), !dbg [[DBG290]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META290:![0-9]+]], !DIExpression(), [[META289]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META292:![0-9]+]], metadata !DIExpression()), !dbg [[DBG294:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG295:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG295]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG295]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG294]] -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META297:![0-9]+]], metadata !DIExpression()), !dbg [[DBG298:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG298]] -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META299:![0-9]+]], metadata !DIExpression()), !dbg [[DBG298]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG298]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG298]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG298]] -// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG298]] -// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG298]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META291:![0-9]+]], !DIExpression(), [[META293:![0-9]+]]) +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_5:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG294:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG294]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG294]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META293]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META296:![0-9]+]], !DIExpression(), [[META297:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META297]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META298:![0-9]+]], !DIExpression(), [[META297]]) +// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META297]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META297]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META297]] +// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META297]] +// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META297]] // CHECK-DEBUG: cond.true: -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG298]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG298]] -// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG298]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG298]] -// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG298]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG298]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG298]] -// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG298]] -// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG298]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META297]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META297]] +// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META297]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META297]] +// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META297]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META297]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META297]] +// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META297]] +// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META297]] // CHECK-DEBUG: cond.false: -// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG298]] +// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META297]] // CHECK-DEBUG: cond.end: -// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG298]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG298]] -// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG298]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG300:![0-9]+]] +// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META297]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META297]] +// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META297]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG299:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.8 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG302:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG301:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META303:![0-9]+]], metadata !DIExpression()), !dbg [[DBG304:![0-9]+]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META302:![0-9]+]], !DIExpression(), [[META303:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META305:![0-9]+]], metadata !DIExpression()), !dbg [[DBG304]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META304:![0-9]+]], !DIExpression(), [[META303]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META306:![0-9]+]], metadata !DIExpression()), !dbg [[DBG304]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META305:![0-9]+]], !DIExpression(), [[META303]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG307:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG307]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG309:![0-9]+]] -// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG309]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG309]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG309]] -// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG304]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG307]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG306:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG306]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG308:![0-9]+]] +// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG308]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG308]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG308]] +// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META303]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG306]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.9 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG310:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG309:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -2634,67 +2634,67 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META311:![0-9]+]], metadata !DIExpression()), !dbg [[DBG312:![0-9]+]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META310:![0-9]+]], !DIExpression(), [[META311:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META313:![0-9]+]], metadata !DIExpression()), !dbg [[DBG312]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META312:![0-9]+]], !DIExpression(), [[META311]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META314:![0-9]+]], metadata !DIExpression()), !dbg [[DBG316:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG317:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG317]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG317]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG316]] -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META319:![0-9]+]], metadata !DIExpression()), !dbg [[DBG320:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG320]] -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META321:![0-9]+]], metadata !DIExpression()), !dbg [[DBG320]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG320]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG320]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG320]] -// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG320]] -// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG320]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META313:![0-9]+]], !DIExpression(), [[META315:![0-9]+]]) +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_7:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG316:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG316]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG316]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META315]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META318:![0-9]+]], !DIExpression(), [[META319:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META319]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META320:![0-9]+]], !DIExpression(), [[META319]]) +// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META319]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META319]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META319]] +// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META319]] +// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META319]] // CHECK-DEBUG: cond.true: -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG320]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG320]] -// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG320]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG320]] -// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG320]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG320]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG320]] -// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG320]] -// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG320]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META319]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META319]] +// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META319]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META319]] +// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META319]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META319]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META319]] +// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META319]] +// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META319]] // CHECK-DEBUG: cond.false: -// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG320]] +// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META319]] // CHECK-DEBUG: cond.end: -// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG320]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG320]] -// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG320]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG322:![0-9]+]] +// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META319]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META319]] +// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META319]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG321:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.10 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG324:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG323:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META325:![0-9]+]], metadata !DIExpression()), !dbg [[DBG326:![0-9]+]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META324:![0-9]+]], !DIExpression(), [[META325:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META327:![0-9]+]], metadata !DIExpression()), !dbg [[DBG326]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META326:![0-9]+]], !DIExpression(), [[META325]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META328:![0-9]+]], metadata !DIExpression()), !dbg [[DBG326]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META327:![0-9]+]], !DIExpression(), [[META325]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG329:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG329]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG331:![0-9]+]] -// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG331]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG331]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG331]] -// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG326]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG329]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_8:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG328:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG328]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG330:![0-9]+]] +// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG330]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG330]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG330]] +// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META325]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG328]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.11 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG332:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG331:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -2702,67 +2702,67 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META333:![0-9]+]], metadata !DIExpression()), !dbg [[DBG334:![0-9]+]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META332:![0-9]+]], !DIExpression(), [[META333:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META335:![0-9]+]], metadata !DIExpression()), !dbg [[DBG334]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META334:![0-9]+]], !DIExpression(), [[META333]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META336:![0-9]+]], metadata !DIExpression()), !dbg [[DBG338:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG339:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG339]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG339]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG338]] -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META341:![0-9]+]], metadata !DIExpression()), !dbg [[DBG342:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG342]] -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META343:![0-9]+]], metadata !DIExpression()), !dbg [[DBG342]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG342]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG342]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG342]] -// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG342]] -// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG342]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META335:![0-9]+]], !DIExpression(), [[META337:![0-9]+]]) +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_9:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG338:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG338]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG338]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META337]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META340:![0-9]+]], !DIExpression(), [[META341:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META341]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META342:![0-9]+]], !DIExpression(), [[META341]]) +// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META341]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META341]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META341]] +// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META341]] +// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META341]] // CHECK-DEBUG: cond.true: -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG342]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG342]] -// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG342]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG342]] -// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG342]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG342]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG342]] -// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG342]] -// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG342]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META341]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META341]] +// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META341]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META341]] +// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META341]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META341]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META341]] +// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META341]] +// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META341]] // CHECK-DEBUG: cond.false: -// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG342]] +// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META341]] // CHECK-DEBUG: cond.end: -// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG342]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG342]] -// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG342]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG344:![0-9]+]] +// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META341]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META341]] +// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META341]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG343:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.12 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG346:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG345:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META347:![0-9]+]], metadata !DIExpression()), !dbg [[DBG348:![0-9]+]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META346:![0-9]+]], !DIExpression(), [[META347:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META349:![0-9]+]], metadata !DIExpression()), !dbg [[DBG348]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META348:![0-9]+]], !DIExpression(), [[META347]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META350:![0-9]+]], metadata !DIExpression()), !dbg [[DBG348]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META349:![0-9]+]], !DIExpression(), [[META347]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG351:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG351]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG353:![0-9]+]] -// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG353]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG353]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG353]] -// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG348]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG351]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_10:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG350:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG350]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG352:![0-9]+]] +// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG352]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG352]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG352]] +// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META347]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG350]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.13 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG354:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG353:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -2770,67 +2770,67 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META355:![0-9]+]], metadata !DIExpression()), !dbg [[DBG356:![0-9]+]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META354:![0-9]+]], !DIExpression(), [[META355:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META357:![0-9]+]], metadata !DIExpression()), !dbg [[DBG356]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META356:![0-9]+]], !DIExpression(), [[META355]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META358:![0-9]+]], metadata !DIExpression()), !dbg [[DBG360:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG361:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG361]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG361]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG360]] -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META363:![0-9]+]], metadata !DIExpression()), !dbg [[DBG364:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG364]] -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META365:![0-9]+]], metadata !DIExpression()), !dbg [[DBG364]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG364]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG364]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG364]] -// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG364]] -// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG364]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META357:![0-9]+]], !DIExpression(), [[META359:![0-9]+]]) +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_11:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG360:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG360]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG360]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META359]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META362:![0-9]+]], !DIExpression(), [[META363:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META363]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META364:![0-9]+]], !DIExpression(), [[META363]]) +// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META363]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META363]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META363]] +// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META363]] +// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META363]] // CHECK-DEBUG: cond.true: -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG364]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG364]] -// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG364]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG364]] -// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG364]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG364]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG364]] -// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG364]] -// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG364]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META363]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META363]] +// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META363]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META363]] +// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META363]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META363]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META363]] +// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META363]] +// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META363]] // CHECK-DEBUG: cond.false: -// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG364]] +// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META363]] // CHECK-DEBUG: cond.end: -// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG364]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG364]] -// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG364]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG366:![0-9]+]] +// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META363]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META363]] +// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META363]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG365:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.14 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG368:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG367:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META369:![0-9]+]], metadata !DIExpression()), !dbg [[DBG370:![0-9]+]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META368:![0-9]+]], !DIExpression(), [[META369:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META371:![0-9]+]], metadata !DIExpression()), !dbg [[DBG370]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META370:![0-9]+]], !DIExpression(), [[META369]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META372:![0-9]+]], metadata !DIExpression()), !dbg [[DBG370]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META371:![0-9]+]], !DIExpression(), [[META369]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG373:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG373]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG375:![0-9]+]] -// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG375]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG375]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG375]] -// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG370]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG373]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_12:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG372:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG372]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG374:![0-9]+]] +// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG374]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG374]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG374]] +// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META369]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG372]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.15 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG376:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG375:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -2838,67 +2838,67 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META377:![0-9]+]], metadata !DIExpression()), !dbg [[DBG378:![0-9]+]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META376:![0-9]+]], !DIExpression(), [[META377:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META379:![0-9]+]], metadata !DIExpression()), !dbg [[DBG378]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META378:![0-9]+]], !DIExpression(), [[META377]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META380:![0-9]+]], metadata !DIExpression()), !dbg [[DBG382:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG383:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG383]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG383]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG382]] -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META385:![0-9]+]], metadata !DIExpression()), !dbg [[DBG386:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG386]] -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META387:![0-9]+]], metadata !DIExpression()), !dbg [[DBG386]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG386]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG386]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG386]] -// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG386]] -// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG386]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META379:![0-9]+]], !DIExpression(), [[META381:![0-9]+]]) +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_13:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG382:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG382]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG382]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META381]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META384:![0-9]+]], !DIExpression(), [[META385:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META385]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META386:![0-9]+]], !DIExpression(), [[META385]]) +// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META385]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META385]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META385]] +// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META385]] +// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META385]] // CHECK-DEBUG: cond.true: -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG386]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG386]] -// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG386]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG386]] -// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG386]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG386]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG386]] -// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG386]] -// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG386]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META385]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META385]] +// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META385]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META385]] +// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META385]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META385]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META385]] +// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META385]] +// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META385]] // CHECK-DEBUG: cond.false: -// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG386]] +// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META385]] // CHECK-DEBUG: cond.end: -// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG386]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG386]] -// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG386]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG388:![0-9]+]] +// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META385]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META385]] +// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META385]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG387:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.16 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG390:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG389:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META391:![0-9]+]], metadata !DIExpression()), !dbg [[DBG392:![0-9]+]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META390:![0-9]+]], !DIExpression(), [[META391:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META393:![0-9]+]], metadata !DIExpression()), !dbg [[DBG392]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META392:![0-9]+]], !DIExpression(), [[META391]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META394:![0-9]+]], metadata !DIExpression()), !dbg [[DBG392]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META393:![0-9]+]], !DIExpression(), [[META391]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG395:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG395]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG397:![0-9]+]] -// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG397]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG397]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG397]] -// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG392]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG395]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG394:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG394]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG396:![0-9]+]] +// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG396]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG396]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG396]] +// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META391]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG394]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.17 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG398:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG397:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -2906,67 +2906,67 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META399:![0-9]+]], metadata !DIExpression()), !dbg [[DBG400:![0-9]+]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META398:![0-9]+]], !DIExpression(), [[META399:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META401:![0-9]+]], metadata !DIExpression()), !dbg [[DBG400]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META400:![0-9]+]], !DIExpression(), [[META399]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META402:![0-9]+]], metadata !DIExpression()), !dbg [[DBG404:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG405:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG405]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG405]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG404]] -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META407:![0-9]+]], metadata !DIExpression()), !dbg [[DBG408:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG408]] -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META409:![0-9]+]], metadata !DIExpression()), !dbg [[DBG408]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG408]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG408]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG408]] -// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG408]] -// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG408]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META401:![0-9]+]], !DIExpression(), [[META403:![0-9]+]]) +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_15:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG404:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG404]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG404]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META403]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META406:![0-9]+]], !DIExpression(), [[META407:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META407]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META408:![0-9]+]], !DIExpression(), [[META407]]) +// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META407]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META407]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META407]] +// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META407]] +// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META407]] // CHECK-DEBUG: cond.true: -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG408]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG408]] -// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG408]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG408]] -// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG408]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG408]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG408]] -// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG408]] -// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG408]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META407]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META407]] +// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META407]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META407]] +// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META407]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META407]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META407]] +// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META407]] +// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META407]] // CHECK-DEBUG: cond.false: -// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG408]] +// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META407]] // CHECK-DEBUG: cond.end: -// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG408]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG408]] -// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG408]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG410:![0-9]+]] +// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META407]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META407]] +// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META407]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG409:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.18 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG412:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG411:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META413:![0-9]+]], metadata !DIExpression()), !dbg [[DBG414:![0-9]+]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META412:![0-9]+]], !DIExpression(), [[META413:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META415:![0-9]+]], metadata !DIExpression()), !dbg [[DBG414]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META414:![0-9]+]], !DIExpression(), [[META413]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META416:![0-9]+]], metadata !DIExpression()), !dbg [[DBG414]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META415:![0-9]+]], !DIExpression(), [[META413]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG417:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG417]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG419:![0-9]+]] -// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG419]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG419]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG419]] -// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG414]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG417]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_16:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG416:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG416]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG418:![0-9]+]] +// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG418]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG418]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG418]] +// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META413]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG416]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.19 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG420:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG419:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -2974,61 +2974,61 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META421:![0-9]+]], metadata !DIExpression()), !dbg [[DBG422:![0-9]+]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META420:![0-9]+]], !DIExpression(), [[META421:![0-9]+]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META423:![0-9]+]], metadata !DIExpression()), !dbg [[DBG422]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META422:![0-9]+]], !DIExpression(), [[META421]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META424:![0-9]+]], metadata !DIExpression()), !dbg [[DBG426:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG427:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG427]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG427]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG426]] -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META429:![0-9]+]], metadata !DIExpression()), !dbg [[DBG430:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[DBG430]] -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META431:![0-9]+]], metadata !DIExpression()), !dbg [[DBG430]] -// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG430]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG430]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG430]] -// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG430]] -// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG430]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META423:![0-9]+]], !DIExpression(), [[META425:![0-9]+]]) +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_17:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG426:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG426]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG426]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META425]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META428:![0-9]+]], !DIExpression(), [[META429:![0-9]+]]) +// CHECK-DEBUG-NEXT: store i32 100, ptr [[DOTSTOP]], align 4, !dbg [[META429]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META430:![0-9]+]], !DIExpression(), [[META429]]) +// CHECK-DEBUG-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META429]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META429]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META429]] +// CHECK-DEBUG-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META429]] +// CHECK-DEBUG-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META429]] // CHECK-DEBUG: cond.true: -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG430]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG430]] -// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG430]] -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG430]] -// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG430]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG430]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG430]] -// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG430]] -// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[DBG430]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META429]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META429]] +// CHECK-DEBUG-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META429]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META429]] +// CHECK-DEBUG-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META429]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META429]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META429]] +// CHECK-DEBUG-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META429]] +// CHECK-DEBUG-NEXT: br label [[COND_END:%.*]], !dbg [[META429]] // CHECK-DEBUG: cond.false: -// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[DBG430]] +// CHECK-DEBUG-NEXT: br label [[COND_END]], !dbg [[META429]] // CHECK-DEBUG: cond.end: -// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG430]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG430]] -// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG430]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG432:![0-9]+]] +// CHECK-DEBUG-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META429]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META429]] +// CHECK-DEBUG-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META429]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG431:![0-9]+]] // // // CHECK-DEBUG-LABEL: define {{[^@]+}}@__captured_stmt.20 -// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG434:![0-9]+]] { +// CHECK-DEBUG-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG433:![0-9]+]] { // CHECK-DEBUG-NEXT: entry: // CHECK-DEBUG-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK-DEBUG-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META435:![0-9]+]], metadata !DIExpression()), !dbg [[DBG436:![0-9]+]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META434:![0-9]+]], !DIExpression(), [[META435:![0-9]+]]) // CHECK-DEBUG-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META437:![0-9]+]], metadata !DIExpression()), !dbg [[DBG436]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META436:![0-9]+]], !DIExpression(), [[META435]]) // CHECK-DEBUG-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META438:![0-9]+]], metadata !DIExpression()), !dbg [[DBG436]] +// CHECK-DEBUG-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META437:![0-9]+]], !DIExpression(), [[META435]]) // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG439:![0-9]+]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG439]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG441:![0-9]+]] -// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG441]] -// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG441]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG441]] -// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG436]] -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG439]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG438:![0-9]+]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG438]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG440:![0-9]+]] +// CHECK-DEBUG-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG440]] +// CHECK-DEBUG-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG440]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG440]] +// CHECK-DEBUG-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META435]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG438]] // diff --git a/clang/test/OpenMP/nested_loop_codegen.cpp b/clang/test/OpenMP/nested_loop_codegen.cpp index 0eb76bc2e1c694..79b6a14f1cd667 100644 --- a/clang/test/OpenMP/nested_loop_codegen.cpp +++ b/clang/test/OpenMP/nested_loop_codegen.cpp @@ -260,15 +260,15 @@ int inline_decl() { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG13:![0-9]+]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[K]], metadata [[META14:![0-9]+]], metadata !DIExpression()), !dbg [[DBG15:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[I]], [[META12:![0-9]+]], !DIExpression(), [[META13:![0-9]+]]) +// CHECK2-NEXT: #dbg_declare(ptr [[K]], [[META14:![0-9]+]], !DIExpression(), [[META15:![0-9]+]]) // CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB6:[0-9]+]], i32 1, ptr @_Z12outline_declv.omp_outlined, ptr [[I]]), !dbg [[DBG16:![0-9]+]] // CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG17:![0-9]+]] // CHECK2-NEXT: ret i32 [[TMP0]], !dbg [[DBG18:![0-9]+]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z12outline_declv.omp_outlined_debug__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR2:[0-9]+]] !dbg [[DBG19:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG19:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -281,11 +281,11 @@ int inline_decl() { // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META27:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META27:![0-9]+]], !DIExpression(), [[META28:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META29:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META29:![0-9]+]], !DIExpression(), [[META28]]) // CHECK2-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[I_ADDR]], metadata [[META30:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[I_ADDR]], [[META30:![0-9]+]], !DIExpression(), [[META31:![0-9]+]]) // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !dbg [[DBG32:![0-9]+]] // CHECK2-NEXT: store i32 0, ptr [[TMP0]], align 4, !dbg [[DBG33:![0-9]+]] // CHECK2-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG35:![0-9]+]] @@ -294,16 +294,16 @@ int inline_decl() { // CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10, !dbg [[DBG38:![0-9]+]] // CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]], !dbg [[DBG39:![0-9]+]] // CHECK2: for.body: -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META40:![0-9]+]], metadata !DIExpression()), !dbg [[DBG43:![0-9]+]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META44:![0-9]+]], metadata !DIExpression()), !dbg [[DBG43]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_IV]], [[META40:![0-9]+]], !DIExpression(), [[META43:![0-9]+]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_LB]], [[META44:![0-9]+]], !DIExpression(), [[META43]]) // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG45:![0-9]+]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META46:![0-9]+]], metadata !DIExpression()), !dbg [[DBG43]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_UB]], [[META46:![0-9]+]], !DIExpression(), [[META43]]) // CHECK2-NEXT: store i32 4, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG45]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META47:![0-9]+]], metadata !DIExpression()), !dbg [[DBG43]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_STRIDE]], [[META47:![0-9]+]], !DIExpression(), [[META43]]) // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG45]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META48:![0-9]+]], metadata !DIExpression()), !dbg [[DBG43]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_IS_LAST]], [[META48:![0-9]+]], !DIExpression(), [[META43]]) // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG45]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[K]], metadata [[META49:![0-9]+]], metadata !DIExpression()), !dbg [[DBG43]] +// CHECK2-NEXT: #dbg_declare(ptr [[K]], [[META49:![0-9]+]], !DIExpression(), [[META43]]) // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG50:![0-9]+]] // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG50]] // CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG51:![0-9]+]] @@ -362,22 +362,22 @@ int inline_decl() { // // // CHECK2-LABEL: define {{[^@]+}}@_Z12outline_declv.omp_outlined -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR2]] !dbg [[DBG66:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] !dbg [[DBG66:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META67:![0-9]+]], !DIExpression(), [[META68:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META69:![0-9]+]], !DIExpression(), [[META68]]) // CHECK2-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[I_ADDR]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68]] +// CHECK2-NEXT: #dbg_declare(ptr [[I_ADDR]], [[META70:![0-9]+]], !DIExpression(), [[META68]]) // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !dbg [[DBG71:![0-9]+]] // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG71]] // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG71]] // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !dbg [[DBG71]] -// CHECK2-NEXT: call void @_Z12outline_declv.omp_outlined_debug__(ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) #[[ATTR3:[0-9]+]], !dbg [[DBG71]] +// CHECK2-NEXT: call void @_Z12outline_declv.omp_outlined_debug__(ptr [[TMP1]], ptr [[TMP2]], ptr [[TMP3]]) #[[ATTR2:[0-9]+]], !dbg [[DBG71]] // CHECK2-NEXT: ret void, !dbg [[DBG71]] // // @@ -386,15 +386,15 @@ int inline_decl() { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[RES:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META75:![0-9]+]], metadata !DIExpression()), !dbg [[DBG76:![0-9]+]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[RES]], metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[I]], [[META75:![0-9]+]], !DIExpression(), [[META76:![0-9]+]]) +// CHECK2-NEXT: #dbg_declare(ptr [[RES]], [[META77:![0-9]+]], !DIExpression(), [[META78:![0-9]+]]) // CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB13:[0-9]+]], i32 2, ptr @_Z11inline_declv.omp_outlined, ptr [[I]], ptr [[RES]]), !dbg [[DBG79:![0-9]+]] // CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[RES]], align 4, !dbg [[DBG80:![0-9]+]] // CHECK2-NEXT: ret i32 [[TMP0]], !dbg [[DBG81:![0-9]+]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z11inline_declv.omp_outlined_debug__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RES:%.*]]) #[[ATTR2]] !dbg [[DBG82:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RES:%.*]]) #[[ATTR1]] !dbg [[DBG82:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -408,13 +408,13 @@ int inline_decl() { // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META85:![0-9]+]], metadata !DIExpression()), !dbg [[DBG86:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META85:![0-9]+]], !DIExpression(), [[META86:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META87:![0-9]+]], metadata !DIExpression()), !dbg [[DBG86]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META87:![0-9]+]], !DIExpression(), [[META86]]) // CHECK2-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[I_ADDR]], metadata [[META88:![0-9]+]], metadata !DIExpression()), !dbg [[DBG89:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[I_ADDR]], [[META88:![0-9]+]], !DIExpression(), [[META89:![0-9]+]]) // CHECK2-NEXT: store ptr [[RES]], ptr [[RES_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[RES_ADDR]], metadata [[META90:![0-9]+]], metadata !DIExpression()), !dbg [[DBG91:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[RES_ADDR]], [[META90:![0-9]+]], !DIExpression(), [[META91:![0-9]+]]) // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !dbg [[DBG92:![0-9]+]] // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RES_ADDR]], align 8, !dbg [[DBG92]] // CHECK2-NEXT: store i32 0, ptr [[TMP0]], align 4, !dbg [[DBG93:![0-9]+]] @@ -424,16 +424,16 @@ int inline_decl() { // CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10, !dbg [[DBG98:![0-9]+]] // CHECK2-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]], !dbg [[DBG99:![0-9]+]] // CHECK2: for.body: -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META100:![0-9]+]], metadata !DIExpression()), !dbg [[DBG103:![0-9]+]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META104:![0-9]+]], metadata !DIExpression()), !dbg [[DBG103]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_IV]], [[META100:![0-9]+]], !DIExpression(), [[META103:![0-9]+]]) +// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_LB]], [[META104:![0-9]+]], !DIExpression(), [[META103]]) // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG105:![0-9]+]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META106:![0-9]+]], metadata !DIExpression()), !dbg [[DBG103]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_UB]], [[META106:![0-9]+]], !DIExpression(), [[META103]]) // CHECK2-NEXT: store i32 4, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG105]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META107:![0-9]+]], metadata !DIExpression()), !dbg [[DBG103]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_STRIDE]], [[META107:![0-9]+]], !DIExpression(), [[META103]]) // CHECK2-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG105]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META108:![0-9]+]], metadata !DIExpression()), !dbg [[DBG103]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTOMP_IS_LAST]], [[META108:![0-9]+]], !DIExpression(), [[META103]]) // CHECK2-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG105]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[K]], metadata [[META109:![0-9]+]], metadata !DIExpression()), !dbg [[DBG103]] +// CHECK2-NEXT: #dbg_declare(ptr [[K]], [[META109:![0-9]+]], !DIExpression(), [[META103]]) // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG110:![0-9]+]] // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4, !dbg [[DBG110]] // CHECK2-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB8:[0-9]+]], i32 [[TMP4]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG111:![0-9]+]] @@ -492,27 +492,27 @@ int inline_decl() { // // // CHECK2-LABEL: define {{[^@]+}}@_Z11inline_declv.omp_outlined -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RES:%.*]]) #[[ATTR2]] !dbg [[DBG125:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[I:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[RES:%.*]]) #[[ATTR1]] !dbg [[DBG125:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[RES_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META126:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META126:![0-9]+]], !DIExpression(), [[META127:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META128:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META128:![0-9]+]], !DIExpression(), [[META127]]) // CHECK2-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[I_ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] +// CHECK2-NEXT: #dbg_declare(ptr [[I_ADDR]], [[META129:![0-9]+]], !DIExpression(), [[META127]]) // CHECK2-NEXT: store ptr [[RES]], ptr [[RES_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[RES_ADDR]], metadata [[META130:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] +// CHECK2-NEXT: #dbg_declare(ptr [[RES_ADDR]], [[META130:![0-9]+]], !DIExpression(), [[META127]]) // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !dbg [[DBG131:![0-9]+]] // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[RES_ADDR]], align 8, !dbg [[DBG131]] // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG131]] // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG131]] // CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[I_ADDR]], align 8, !dbg [[DBG131]] // CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[RES_ADDR]], align 8, !dbg [[DBG131]] -// CHECK2-NEXT: call void @_Z11inline_declv.omp_outlined_debug__(ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], ptr [[TMP5]]) #[[ATTR3]], !dbg [[DBG131]] +// CHECK2-NEXT: call void @_Z11inline_declv.omp_outlined_debug__(ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], ptr [[TMP5]]) #[[ATTR2]], !dbg [[DBG131]] // CHECK2-NEXT: ret void, !dbg [[DBG131]] // // @@ -859,8 +859,8 @@ int inline_decl() { // CHECK4-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr }, align 8 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META14:![0-9]+]], metadata !DIExpression()), !dbg [[DBG15:![0-9]+]] -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[K]], metadata [[META16:![0-9]+]], metadata !DIExpression()), !dbg [[DBG15]] +// CHECK4-NEXT: #dbg_declare(ptr [[I]], [[META14:![0-9]+]], !DIExpression(), [[META15:![0-9]+]]) +// CHECK4-NEXT: #dbg_declare(ptr [[K]], [[META16:![0-9]+]], !DIExpression(), [[META15]]) // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]), !dbg [[DBG17:![0-9]+]] // CHECK4-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK4: omp_parallel: @@ -964,7 +964,7 @@ int inline_decl() { // // // CHECK4-LABEL: define {{[^@]+}}@__captured_stmt -// CHECK4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4:[0-9]+]] !dbg [[DBG39:![0-9]+]] { +// CHECK4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3:[0-9]+]] !dbg [[DBG39:![0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -972,91 +972,91 @@ int inline_decl() { // CHECK4-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META48:![0-9]+]], metadata !DIExpression()), !dbg [[DBG49:![0-9]+]] +// CHECK4-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META47:![0-9]+]], !DIExpression(), [[META48:![0-9]+]]) // CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META50:![0-9]+]], metadata !DIExpression()), !dbg [[DBG49]] +// CHECK4-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META49:![0-9]+]], !DIExpression(), [[META48]]) // CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META51:![0-9]+]], metadata !DIExpression()), !dbg [[DBG53:![0-9]+]] -// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG54:![0-9]+]] -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG54]] -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG54]] -// CHECK4-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG53]] -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META56:![0-9]+]], metadata !DIExpression()), !dbg [[DBG57:![0-9]+]] -// CHECK4-NEXT: store i32 5, ptr [[DOTSTOP]], align 4, !dbg [[DBG57]] -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META58:![0-9]+]], metadata !DIExpression()), !dbg [[DBG57]] -// CHECK4-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG57]] -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG57]] -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG57]] -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG57]] -// CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG57]] +// CHECK4-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META50:![0-9]+]], !DIExpression(), [[META52:![0-9]+]]) +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG53:![0-9]+]] +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG53]] +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG53]] +// CHECK4-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META52]] +// CHECK4-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META55:![0-9]+]], !DIExpression(), [[META56:![0-9]+]]) +// CHECK4-NEXT: store i32 5, ptr [[DOTSTOP]], align 4, !dbg [[META56]] +// CHECK4-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META57:![0-9]+]], !DIExpression(), [[META56]]) +// CHECK4-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META56]] +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META56]] +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META56]] +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META56]] +// CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META56]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG57]] -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG57]] -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG57]] -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG57]] -// CHECK4-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG57]] -// CHECK4-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG57]] -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG57]] -// CHECK4-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG57]] -// CHECK4-NEXT: br label [[COND_END:%.*]], !dbg [[DBG57]] +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META56]] +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META56]] +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META56]] +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META56]] +// CHECK4-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META56]] +// CHECK4-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META56]] +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META56]] +// CHECK4-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META56]] +// CHECK4-NEXT: br label [[COND_END:%.*]], !dbg [[META56]] // CHECK4: cond.false: -// CHECK4-NEXT: br label [[COND_END]], !dbg [[DBG57]] +// CHECK4-NEXT: br label [[COND_END]], !dbg [[META56]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG57]] -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG57]] -// CHECK4-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG57]] -// CHECK4-NEXT: ret void, !dbg [[DBG59:![0-9]+]] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META56]] +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META56]] +// CHECK4-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META56]] +// CHECK4-NEXT: ret void, !dbg [[DBG58:![0-9]+]] // // // CHECK4-LABEL: define {{[^@]+}}@__captured_stmt.1 -// CHECK4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG61:![0-9]+]] { +// CHECK4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG60:![0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG70:![0-9]+]] +// CHECK4-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META68:![0-9]+]], !DIExpression(), [[META69:![0-9]+]]) // CHECK4-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG70]] +// CHECK4-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META70:![0-9]+]], !DIExpression(), [[META69]]) // CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG70]] +// CHECK4-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META71:![0-9]+]], !DIExpression(), [[META69]]) // CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG73:![0-9]+]] -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG73]] -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG75:![0-9]+]] -// CHECK4-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG75]] -// CHECK4-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG75]] -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG75]] -// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG70]] -// CHECK4-NEXT: ret void, !dbg [[DBG73]] +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG72:![0-9]+]] +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG72]] +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG74:![0-9]+]] +// CHECK4-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG74]] +// CHECK4-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG74]] +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG74]] +// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META69]] +// CHECK4-NEXT: ret void, !dbg [[DBG72]] // // // CHECK4-LABEL: define {{[^@]+}}@_Z11inline_declv -// CHECK4-SAME: () #[[ATTR0]] !dbg [[DBG78:![0-9]+]] { +// CHECK4-SAME: () #[[ATTR0]] !dbg [[DBG77:![0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr }, align 8 // CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[RES:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META79:![0-9]+]], metadata !DIExpression()), !dbg [[DBG80:![0-9]+]] -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[RES]], metadata [[META81:![0-9]+]], metadata !DIExpression()), !dbg [[DBG80]] -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]), !dbg [[DBG82:![0-9]+]] +// CHECK4-NEXT: #dbg_declare(ptr [[I]], [[META78:![0-9]+]], !DIExpression(), [[META79:![0-9]+]]) +// CHECK4-NEXT: #dbg_declare(ptr [[RES]], [[META80:![0-9]+]], !DIExpression(), [[META79]]) +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]), !dbg [[DBG81:![0-9]+]] // CHECK4-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK4: omp_parallel: // CHECK4-NEXT: [[GEP_I:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 0 // CHECK4-NEXT: store ptr [[I]], ptr [[GEP_I]], align 8 // CHECK4-NEXT: [[GEP_RES:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1 // CHECK4-NEXT: store ptr [[RES]], ptr [[GEP_RES]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB6]], i32 1, ptr @_Z11inline_declv..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG83:![0-9]+]] +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB6]], i32 1, ptr @_Z11inline_declv..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG82:![0-9]+]] // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] // CHECK4: omp.par.outlined.exit: // CHECK4-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK4: omp.par.exit.split: -// CHECK4-NEXT: [[TMP0:%.*]] = load i32, ptr [[RES]], align 4, !dbg [[DBG85:![0-9]+]] -// CHECK4-NEXT: ret i32 [[TMP0]], !dbg [[DBG85]] +// CHECK4-NEXT: [[TMP0:%.*]] = load i32, ptr [[RES]], align 4, !dbg [[DBG84:![0-9]+]] +// CHECK4-NEXT: ret i32 [[TMP0]], !dbg [[DBG84]] // // // CHECK4-LABEL: define {{[^@]+}}@_Z11inline_declv..omp_par -// CHECK4-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG86:![0-9]+]] { +// CHECK4-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR1]] !dbg [[DBG85:![0-9]+]] { // CHECK4-NEXT: omp.par.entry: // CHECK4-NEXT: [[GEP_I:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 // CHECK4-NEXT: [[LOADGEP_I:%.*]] = load ptr, ptr [[GEP_I]], align 8 @@ -1076,75 +1076,75 @@ int inline_decl() { // CHECK4-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK4: omp.par.region: -// CHECK4-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4, !dbg [[DBG87:![0-9]+]] -// CHECK4-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG87]] +// CHECK4-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4, !dbg [[DBG86:![0-9]+]] +// CHECK4-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG86]] // CHECK4: for.cond: -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[LOADGEP_I]], align 4, !dbg [[DBG89:![0-9]+]] -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10, !dbg [[DBG89]] -// CHECK4-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]], !dbg [[DBG87]] +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[LOADGEP_I]], align 4, !dbg [[DBG88:![0-9]+]] +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 10, !dbg [[DBG88]] +// CHECK4-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]], !dbg [[DBG86]] // CHECK4: for.end: -// CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG91:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG90:![0-9]+]] // CHECK4: omp.par.region.parallel.after: // CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK4: omp.par.pre_finalize: -// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG91]] +// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG90]] // CHECK4: for.body: -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[K]], metadata [[META92:![0-9]+]], metadata !DIExpression()), !dbg [[DBG96:![0-9]+]] -// CHECK4-NEXT: store i32 0, ptr [[K]], align 4, !dbg [[DBG96]] -// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[DBG96]] -// CHECK4-NEXT: store ptr [[K]], ptr [[TMP3]], align 8, !dbg [[DBG96]] -// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[DBG96]] -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG97:![0-9]+]] -// CHECK4-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4, !dbg [[DBG96]] -// CHECK4-NEXT: call void @__captured_stmt.2(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[DBG96]] -// CHECK4-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[DBG96]] -// CHECK4-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[DBG96]] +// CHECK4-NEXT: #dbg_declare(ptr [[K]], [[META91:![0-9]+]], !DIExpression(), [[META95:![0-9]+]]) +// CHECK4-NEXT: store i32 0, ptr [[K]], align 4, !dbg [[META95]] +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], ptr [[AGG_CAPTURED]], i32 0, i32 0, !dbg [[META95]] +// CHECK4-NEXT: store ptr [[K]], ptr [[TMP3]], align 8, !dbg [[META95]] +// CHECK4-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], ptr [[AGG_CAPTURED1]], i32 0, i32 0, !dbg [[META95]] +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[K]], align 4, !dbg [[DBG96:![0-9]+]] +// CHECK4-NEXT: store i32 [[TMP5]], ptr [[TMP4]], align 4, !dbg [[META95]] +// CHECK4-NEXT: call void @__captured_stmt.2(ptr [[DOTCOUNT_ADDR]], ptr [[AGG_CAPTURED]]), !dbg [[META95]] +// CHECK4-NEXT: [[DOTCOUNT:%.*]] = load i32, ptr [[DOTCOUNT_ADDR]], align 4, !dbg [[META95]] +// CHECK4-NEXT: br label [[OMP_LOOP_PREHEADER:%.*]], !dbg [[META95]] // CHECK4: omp_loop.preheader: -// CHECK4-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG96]] -// CHECK4-NEXT: [[TMP6:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[DBG96]] -// CHECK4-NEXT: store i32 [[TMP6]], ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG96]] -// CHECK4-NEXT: store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[DBG96]] -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB8:[0-9]+]]), !dbg [[DBG96]] -// CHECK4-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB8]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[DBG96]] -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[DBG96]] -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[DBG96]] -// CHECK4-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]], !dbg [[DBG96]] -// CHECK4-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 1, !dbg [[DBG96]] -// CHECK4-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[DBG96]] +// CHECK4-NEXT: store i32 0, ptr [[P_LOWERBOUND]], align 4, !dbg [[META95]] +// CHECK4-NEXT: [[TMP6:%.*]] = sub i32 [[DOTCOUNT]], 1, !dbg [[META95]] +// CHECK4-NEXT: store i32 [[TMP6]], ptr [[P_UPPERBOUND]], align 4, !dbg [[META95]] +// CHECK4-NEXT: store i32 1, ptr [[P_STRIDE]], align 4, !dbg [[META95]] +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB8:[0-9]+]]), !dbg [[META95]] +// CHECK4-NEXT: call void @__kmpc_for_static_init_4u(ptr @[[GLOB8]], i32 [[OMP_GLOBAL_THREAD_NUM2]], i32 34, ptr [[P_LASTITER]], ptr [[P_LOWERBOUND]], ptr [[P_UPPERBOUND]], ptr [[P_STRIDE]], i32 1, i32 0), !dbg [[META95]] +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[P_LOWERBOUND]], align 4, !dbg [[META95]] +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[P_UPPERBOUND]], align 4, !dbg [[META95]] +// CHECK4-NEXT: [[TMP9:%.*]] = sub i32 [[TMP8]], [[TMP7]], !dbg [[META95]] +// CHECK4-NEXT: [[TMP10:%.*]] = add i32 [[TMP9]], 1, !dbg [[META95]] +// CHECK4-NEXT: br label [[OMP_LOOP_HEADER:%.*]], !dbg [[META95]] // CHECK4: omp_loop.header: -// CHECK4-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[DBG96]] -// CHECK4-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[DBG96]] +// CHECK4-NEXT: [[OMP_LOOP_IV:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER]] ], [ [[OMP_LOOP_NEXT:%.*]], [[OMP_LOOP_INC:%.*]] ], !dbg [[META95]] +// CHECK4-NEXT: br label [[OMP_LOOP_COND:%.*]], !dbg [[META95]] // CHECK4: omp_loop.cond: -// CHECK4-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP10]], !dbg [[DBG96]] -// CHECK4-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG96]] +// CHECK4-NEXT: [[OMP_LOOP_CMP:%.*]] = icmp ult i32 [[OMP_LOOP_IV]], [[TMP10]], !dbg [[META95]] +// CHECK4-NEXT: br i1 [[OMP_LOOP_CMP]], label [[OMP_LOOP_BODY:%.*]], label [[OMP_LOOP_EXIT:%.*]], !dbg [[META95]] // CHECK4: omp_loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB8]], i32 [[OMP_GLOBAL_THREAD_NUM2]]), !dbg [[DBG96]] -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB8]]), !dbg [[DBG98:![0-9]+]] -// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB9:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM3]]), !dbg [[DBG98]] -// CHECK4-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[DBG96]] +// CHECK4-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB8]], i32 [[OMP_GLOBAL_THREAD_NUM2]]), !dbg [[META95]] +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB8]]), !dbg [[DBG97:![0-9]+]] +// CHECK4-NEXT: call void @__kmpc_barrier(ptr @[[GLOB9:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM3]]), !dbg [[DBG97]] +// CHECK4-NEXT: br label [[OMP_LOOP_AFTER:%.*]], !dbg [[META95]] // CHECK4: omp_loop.after: -// CHECK4-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG99:![0-9]+]] +// CHECK4-NEXT: br label [[FOR_INC:%.*]], !dbg [[DBG98:![0-9]+]] // CHECK4: for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_I]], align 4, !dbg [[DBG89]] -// CHECK4-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP11]], 1, !dbg [[DBG89]] -// CHECK4-NEXT: store i32 [[INC4]], ptr [[LOADGEP_I]], align 4, !dbg [[DBG89]] -// CHECK4-NEXT: br label [[FOR_COND]], !dbg [[DBG89]], !llvm.loop [[LOOP100:![0-9]+]] +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, ptr [[LOADGEP_I]], align 4, !dbg [[DBG88]] +// CHECK4-NEXT: [[INC4:%.*]] = add nsw i32 [[TMP11]], 1, !dbg [[DBG88]] +// CHECK4-NEXT: store i32 [[INC4]], ptr [[LOADGEP_I]], align 4, !dbg [[DBG88]] +// CHECK4-NEXT: br label [[FOR_COND]], !dbg [[DBG88]], !llvm.loop [[LOOP99:![0-9]+]] // CHECK4: omp_loop.body: -// CHECK4-NEXT: [[TMP12:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP7]], !dbg [[DBG98]] -// CHECK4-NEXT: call void @__captured_stmt.3(ptr [[K]], i32 [[TMP12]], ptr [[AGG_CAPTURED1]]), !dbg [[DBG96]] -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[LOADGEP_RES]], align 4, !dbg [[DBG101:![0-9]+]] -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1, !dbg [[DBG101]] -// CHECK4-NEXT: store i32 [[INC]], ptr [[LOADGEP_RES]], align 4, !dbg [[DBG101]] -// CHECK4-NEXT: br label [[OMP_LOOP_INC]], !dbg [[DBG96]] +// CHECK4-NEXT: [[TMP12:%.*]] = add i32 [[OMP_LOOP_IV]], [[TMP7]], !dbg [[DBG97]] +// CHECK4-NEXT: call void @__captured_stmt.3(ptr [[K]], i32 [[TMP12]], ptr [[AGG_CAPTURED1]]), !dbg [[META95]] +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, ptr [[LOADGEP_RES]], align 4, !dbg [[DBG100:![0-9]+]] +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP13]], 1, !dbg [[DBG100]] +// CHECK4-NEXT: store i32 [[INC]], ptr [[LOADGEP_RES]], align 4, !dbg [[DBG100]] +// CHECK4-NEXT: br label [[OMP_LOOP_INC]], !dbg [[META95]] // CHECK4: omp_loop.inc: -// CHECK4-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[DBG96]] -// CHECK4-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[DBG96]] +// CHECK4-NEXT: [[OMP_LOOP_NEXT]] = add nuw i32 [[OMP_LOOP_IV]], 1, !dbg [[META95]] +// CHECK4-NEXT: br label [[OMP_LOOP_HEADER]], !dbg [[META95]] // CHECK4: omp.par.outlined.exit.exitStub: // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@__captured_stmt.2 -// CHECK4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG103:![0-9]+]] { +// CHECK4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[DISTANCE:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG102:![0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DISTANCE_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 @@ -1152,61 +1152,61 @@ int inline_decl() { // CHECK4-NEXT: [[DOTSTOP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[DOTSTEP:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store ptr [[DISTANCE]], ptr [[DISTANCE_ADDR]], align 8 -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[DISTANCE_ADDR]], metadata [[META104:![0-9]+]], metadata !DIExpression()), !dbg [[DBG105:![0-9]+]] +// CHECK4-NEXT: #dbg_declare(ptr [[DISTANCE_ADDR]], [[META103:![0-9]+]], !DIExpression(), [[META104:![0-9]+]]) // CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META106:![0-9]+]], metadata !DIExpression()), !dbg [[DBG105]] +// CHECK4-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META105:![0-9]+]], !DIExpression(), [[META104]]) // CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTART]], metadata [[META107:![0-9]+]], metadata !DIExpression()), !dbg [[DBG109:![0-9]+]] -// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG110:![0-9]+]] -// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG110]] -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG110]] -// CHECK4-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[DBG109]] -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTOP]], metadata [[META112:![0-9]+]], metadata !DIExpression()), !dbg [[DBG113:![0-9]+]] -// CHECK4-NEXT: store i32 5, ptr [[DOTSTOP]], align 4, !dbg [[DBG113]] -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTSTEP]], metadata [[META114:![0-9]+]], metadata !DIExpression()), !dbg [[DBG113]] -// CHECK4-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[DBG113]] -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG113]] -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG113]] -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[DBG113]] -// CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG113]] +// CHECK4-NEXT: #dbg_declare(ptr [[DOTSTART]], [[META106:![0-9]+]], !DIExpression(), [[META108:![0-9]+]]) +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG109:![0-9]+]] +// CHECK4-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG109]] +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG109]] +// CHECK4-NEXT: store i32 [[TMP3]], ptr [[DOTSTART]], align 4, !dbg [[META108]] +// CHECK4-NEXT: #dbg_declare(ptr [[DOTSTOP]], [[META111:![0-9]+]], !DIExpression(), [[META112:![0-9]+]]) +// CHECK4-NEXT: store i32 5, ptr [[DOTSTOP]], align 4, !dbg [[META112]] +// CHECK4-NEXT: #dbg_declare(ptr [[DOTSTEP]], [[META113:![0-9]+]], !DIExpression(), [[META112]]) +// CHECK4-NEXT: store i32 1, ptr [[DOTSTEP]], align 4, !dbg [[META112]] +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META112]] +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META112]] +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]], !dbg [[META112]] +// CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[META112]] // CHECK4: cond.true: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[DBG113]] -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[DBG113]] -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[DBG113]] -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG113]] -// CHECK4-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[DBG113]] -// CHECK4-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[DBG113]] -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[DBG113]] -// CHECK4-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[DBG113]] -// CHECK4-NEXT: br label [[COND_END:%.*]], !dbg [[DBG113]] +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTSTOP]], align 4, !dbg [[META112]] +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTSTART]], align 4, !dbg [[META112]] +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], [[TMP7]], !dbg [[META112]] +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META112]] +// CHECK4-NEXT: [[SUB1:%.*]] = sub i32 [[TMP8]], 1, !dbg [[META112]] +// CHECK4-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]], !dbg [[META112]] +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTSTEP]], align 4, !dbg [[META112]] +// CHECK4-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], [[TMP9]], !dbg [[META112]] +// CHECK4-NEXT: br label [[COND_END:%.*]], !dbg [[META112]] // CHECK4: cond.false: -// CHECK4-NEXT: br label [[COND_END]], !dbg [[DBG113]] +// CHECK4-NEXT: br label [[COND_END]], !dbg [[META112]] // CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[DBG113]] -// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[DBG113]] -// CHECK4-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[DBG113]] -// CHECK4-NEXT: ret void, !dbg [[DBG115:![0-9]+]] +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[DIV]], [[COND_TRUE]] ], [ 0, [[COND_FALSE]] ], !dbg [[META112]] +// CHECK4-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DISTANCE_ADDR]], align 8, !dbg [[META112]] +// CHECK4-NEXT: store i32 [[COND]], ptr [[TMP10]], align 4, !dbg [[META112]] +// CHECK4-NEXT: ret void, !dbg [[DBG114:![0-9]+]] // // // CHECK4-LABEL: define {{[^@]+}}@__captured_stmt.3 -// CHECK4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR4]] !dbg [[DBG117:![0-9]+]] { +// CHECK4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[LOOPVAR:%.*]], i32 noundef [[LOGICAL:%.*]], ptr noalias noundef [[__CONTEXT:%.*]]) #[[ATTR3]] !dbg [[DBG116:![0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[LOOPVAR_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: [[LOGICAL_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[__CONTEXT_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: store ptr [[LOOPVAR]], ptr [[LOOPVAR_ADDR]], align 8 -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOOPVAR_ADDR]], metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG119:![0-9]+]] +// CHECK4-NEXT: #dbg_declare(ptr [[LOOPVAR_ADDR]], [[META117:![0-9]+]], !DIExpression(), [[META118:![0-9]+]]) // CHECK4-NEXT: store i32 [[LOGICAL]], ptr [[LOGICAL_ADDR]], align 4 -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOGICAL_ADDR]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG119]] +// CHECK4-NEXT: #dbg_declare(ptr [[LOGICAL_ADDR]], [[META119:![0-9]+]], !DIExpression(), [[META118]]) // CHECK4-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[__CONTEXT_ADDR]], metadata [[META121:![0-9]+]], metadata !DIExpression()), !dbg [[DBG119]] +// CHECK4-NEXT: #dbg_declare(ptr [[__CONTEXT_ADDR]], [[META120:![0-9]+]], !DIExpression(), [[META118]]) // CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG122:![0-9]+]] -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG122]] -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG124:![0-9]+]] -// CHECK4-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG124]] -// CHECK4-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG124]] -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG124]] -// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[DBG119]] -// CHECK4-NEXT: ret void, !dbg [[DBG122]] +// CHECK4-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG121:![0-9]+]] +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !dbg [[DBG121]] +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, ptr [[LOGICAL_ADDR]], align 4, !dbg [[DBG123:![0-9]+]] +// CHECK4-NEXT: [[MUL:%.*]] = mul i32 1, [[TMP3]], !dbg [[DBG123]] +// CHECK4-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[MUL]], !dbg [[DBG123]] +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[LOOPVAR_ADDR]], align 8, !dbg [[DBG123]] +// CHECK4-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4, !dbg [[META118]] +// CHECK4-NEXT: ret void, !dbg [[DBG121]] // diff --git a/clang/test/OpenMP/parallel_codegen.cpp b/clang/test/OpenMP/parallel_codegen.cpp index 9082f1c3232afc..41d43048d6a12b 100644 --- a/clang/test/OpenMP/parallel_codegen.cpp +++ b/clang/test/OpenMP/parallel_codegen.cpp @@ -115,7 +115,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP2]]) -// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr @global, align 4 // CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 @@ -123,7 +123,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: // CHECK1-NEXT: [[TMP4:%.*]] = landingpad { ptr, i32 } -// CHECK1-NEXT: catch ptr null +// CHECK1-NEXT: catch ptr null // CHECK1-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP4]], 0 // CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR6:[0-9]+]] // CHECK1-NEXT: unreachable @@ -186,7 +186,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP3]]) -// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4 // CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 @@ -194,7 +194,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: // CHECK1-NEXT: [[TMP5:%.*]] = landingpad { ptr, i32 } -// CHECK1-NEXT: catch ptr null +// CHECK1-NEXT: catch ptr null // CHECK1-NEXT: [[TMP6:%.*]] = extractvalue { ptr, i32 } [[TMP5]], 0 // CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR6]] // CHECK1-NEXT: unreachable @@ -233,7 +233,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 // CHECK1-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP2]]) -// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: // CHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr @global, align 4 // CHECK1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1 @@ -241,7 +241,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: // CHECK1-NEXT: [[TMP4:%.*]] = landingpad { ptr, i32 } -// CHECK1-NEXT: catch ptr null +// CHECK1-NEXT: catch ptr null // CHECK1-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP4]], 0 // CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR6]] // CHECK1-NEXT: unreachable @@ -278,7 +278,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0]], align 8 // CHECK1-NEXT: invoke void @_Z3fooIPPcEvT_(ptr noundef [[TMP2]]) -// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] +// CHECK1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]] // CHECK1: invoke.cont: // CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = mul nsw i64 0, [[TMP1]] @@ -287,7 +287,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: ret void // CHECK1: terminate.lpad: // CHECK1-NEXT: [[TMP5:%.*]] = landingpad { ptr, i32 } -// CHECK1-NEXT: catch ptr null +// CHECK1-NEXT: catch ptr null // CHECK1-NEXT: [[TMP6:%.*]] = extractvalue { ptr, i32 } [[TMP5]], 0 // CHECK1-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR6]] // CHECK1-NEXT: unreachable @@ -311,17 +311,17 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META18:![0-9]+]], !DIExpression(), [[META19:![0-9]+]]) // CHECK2-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGV_ADDR]], metadata [[META20:![0-9]+]], metadata !DIExpression()), !dbg [[DBG21:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[ARGV_ADDR]], [[META20:![0-9]+]], !DIExpression(), [[META21:![0-9]+]]) // CHECK2-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !dbg [[DBG22:![0-9]+]] // CHECK2-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64, !dbg [[DBG23:![0-9]+]] // CHECK2-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0(), !dbg [[DBG23]] // CHECK2-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8, !dbg [[DBG23]] // CHECK2-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16, !dbg [[DBG23]] // CHECK2-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8, !dbg [[DBG23]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[__VLA_EXPR0]], metadata [[META24:![0-9]+]], metadata !DIExpression()), !dbg [[DBG26:![0-9]+]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA]], metadata [[META27:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[__VLA_EXPR0]], [[META24:![0-9]+]], !DIExpression(), [[META26:![0-9]+]]) +// CHECK2-NEXT: #dbg_declare(ptr [[VLA]], [[META27:![0-9]+]], !DIExpression(), [[META31:![0-9]+]]) // CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 2, ptr @main.omp_outlined, i64 [[TMP1]], ptr [[VLA]]), !dbg [[DBG32:![0-9]+]] // CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB5:[0-9]+]], i32 1, ptr @main.omp_outlined.2, i64 [[TMP1]]), !dbg [[DBG33:![0-9]+]] // CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB9:[0-9]+]], i32 2, ptr @main.omp_outlined.4, i64 [[TMP1]], ptr [[VLA]]), !dbg [[DBG34:![0-9]+]] @@ -335,26 +335,26 @@ int main (int argc, char **argv) { // // // CHECK2-LABEL: define {{[^@]+}}@main.omp_outlined_debug__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3:[0-9]+]] personality ptr @__gxx_personality_v0 !dbg [[DBG39:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2:[0-9]+]] personality ptr @__gxx_personality_v0 !dbg [[DBG39:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META47:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META47:![0-9]+]], !DIExpression(), [[META48:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META49:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META49:![0-9]+]], !DIExpression(), [[META48]]) // CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META50:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48]] +// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META50:![0-9]+]], !DIExpression(), [[META48]]) // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META51:![0-9]+]], metadata !DIExpression()), !dbg [[DBG52:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META51:![0-9]+]], !DIExpression(), [[META52:![0-9]+]]) // CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG53:![0-9]+]] // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG53]] // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG54:![0-9]+]] // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG54]] // CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP2]]) -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG53]] +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG53]] // CHECK2: invoke.cont: // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr @global, align 4, !dbg [[DBG55:![0-9]+]] // CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG56:![0-9]+]] @@ -362,54 +362,54 @@ int main (int argc, char **argv) { // CHECK2-NEXT: ret void, !dbg [[DBG55]] // CHECK2: terminate.lpad: // CHECK2-NEXT: [[TMP4:%.*]] = landingpad { ptr, i32 } -// CHECK2-NEXT: catch ptr null, !dbg [[DBG53]] +// CHECK2-NEXT: catch ptr null, !dbg [[DBG53]] // CHECK2-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP4]], 0, !dbg [[DBG53]] -// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR7:[0-9]+]], !dbg [[DBG53]] +// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR6:[0-9]+]], !dbg [[DBG53]] // CHECK2-NEXT: unreachable, !dbg [[DBG53]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z3fooIiEvT_ -// CHECK2-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat !dbg [[DBG58:![0-9]+]] { +// CHECK2-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR3:[0-9]+]] comdat !dbg [[DBG58:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META63:![0-9]+]], metadata !DIExpression()), !dbg [[DBG64:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META63:![0-9]+]], !DIExpression(), [[META64:![0-9]+]]) // CHECK2-NEXT: ret void, !dbg [[DBG65:![0-9]+]] // // // CHECK2-LABEL: define {{[^@]+}}@__clang_call_terminate -// CHECK2-SAME: (ptr noundef [[TMP0:%.*]]) #[[ATTR5:[0-9]+]] comdat { -// CHECK2-NEXT: [[TMP2:%.*]] = call ptr @__cxa_begin_catch(ptr [[TMP0]]) #[[ATTR6:[0-9]+]] -// CHECK2-NEXT: call void @_ZSt9terminatev() #[[ATTR7]] +// CHECK2-SAME: (ptr noundef [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] comdat { +// CHECK2-NEXT: [[TMP2:%.*]] = call ptr @__cxa_begin_catch(ptr [[TMP0]]) #[[ATTR5:[0-9]+]] +// CHECK2-NEXT: call void @_ZSt9terminatev() #[[ATTR6]] // CHECK2-NEXT: unreachable // // // CHECK2-LABEL: define {{[^@]+}}@main.omp_outlined -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG66:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] !dbg [[DBG66:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META67:![0-9]+]], !DIExpression(), [[META68:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META69:![0-9]+]], !DIExpression(), [[META68]]) // CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68]] +// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META70:![0-9]+]], !DIExpression(), [[META68]]) // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68]] +// CHECK2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META71:![0-9]+]], !DIExpression(), [[META68]]) // CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG72:![0-9]+]] // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG72]] // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG72]] // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG72]] // CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG72]] -// CHECK2-NEXT: call void @main.omp_outlined_debug__(ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP0]], ptr [[TMP4]]) #[[ATTR6]], !dbg [[DBG72]] +// CHECK2-NEXT: call void @main.omp_outlined_debug__(ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP0]], ptr [[TMP4]]) #[[ATTR5]], !dbg [[DBG72]] // CHECK2-NEXT: ret void, !dbg [[DBG72]] // // // CHECK2-LABEL: define {{[^@]+}}@main.omp_outlined_debug__.1 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] !dbg [[DBG75:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR2]] !dbg [[DBG75:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -418,19 +418,19 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[SAVED_STACK:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META78:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META78:![0-9]+]], !DIExpression(), [[META79:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META80:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META80:![0-9]+]], !DIExpression(), [[META79]]) // CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META81:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] +// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META81:![0-9]+]], !DIExpression(), [[META79]]) // CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG82:![0-9]+]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[GLOBAL]], metadata [[META83:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] +// CHECK2-NEXT: #dbg_declare(ptr [[GLOBAL]], [[META83:![0-9]+]], !DIExpression(), [[META79]]) // CHECK2-NEXT: [[TMP1:%.*]] = call ptr @llvm.stacksave.p0(), !dbg [[DBG82]] // CHECK2-NEXT: store ptr [[TMP1]], ptr [[SAVED_STACK]], align 8, !dbg [[DBG82]] // CHECK2-NEXT: [[VLA1:%.*]] = alloca i32, i64 [[TMP0]], align 16, !dbg [[DBG82]] // CHECK2-NEXT: store i64 [[TMP0]], ptr [[__VLA_EXPR0]], align 8, !dbg [[DBG82]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[__VLA_EXPR0]], metadata [[META84:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA1]], metadata [[META85:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] +// CHECK2-NEXT: #dbg_declare(ptr [[__VLA_EXPR0]], [[META84:![0-9]+]], !DIExpression(), [[META79]]) +// CHECK2-NEXT: #dbg_declare(ptr [[VLA1]], [[META85:![0-9]+]], !DIExpression(), [[META79]]) // CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3:[0-9]+]], i32 3, ptr @main.omp_outlined_debug__.1.omp_outlined, i64 [[TMP0]], ptr [[VLA1]], ptr [[GLOBAL]]), !dbg [[DBG82]] // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8, !dbg [[DBG86:![0-9]+]] // CHECK2-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP2]]), !dbg [[DBG86]] @@ -438,7 +438,7 @@ int main (int argc, char **argv) { // // // CHECK2-LABEL: define {{[^@]+}}@main.omp_outlined_debug__.1.omp_outlined_debug__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[GLOBAL:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 !dbg [[DBG89:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[GLOBAL:%.*]]) #[[ATTR2]] personality ptr @__gxx_personality_v0 !dbg [[DBG89:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -446,22 +446,22 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[GLOBAL_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META92:![0-9]+]], metadata !DIExpression()), !dbg [[DBG93:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META92:![0-9]+]], !DIExpression(), [[META93:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META94:![0-9]+]], metadata !DIExpression()), !dbg [[DBG93]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META94:![0-9]+]], !DIExpression(), [[META93]]) // CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META95:![0-9]+]], metadata !DIExpression()), !dbg [[DBG93]] +// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META95:![0-9]+]], !DIExpression(), [[META93]]) // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META96:![0-9]+]], metadata !DIExpression()), !dbg [[DBG97:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META96:![0-9]+]], !DIExpression(), [[META97:![0-9]+]]) // CHECK2-NEXT: store ptr [[GLOBAL]], ptr [[GLOBAL_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[GLOBAL_ADDR]], metadata [[META98:![0-9]+]], metadata !DIExpression()), !dbg [[DBG99:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[GLOBAL_ADDR]], [[META98:![0-9]+]], !DIExpression(), [[META99:![0-9]+]]) // CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG100:![0-9]+]] // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG100]] // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ADDR]], align 8, !dbg [[DBG100]] // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG101:![0-9]+]] // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG101]] // CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP3]]) -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG100]] +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG100]] // CHECK2: invoke.cont: // CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP2]], align 4, !dbg [[DBG102:![0-9]+]] // CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG103:![0-9]+]] @@ -469,14 +469,14 @@ int main (int argc, char **argv) { // CHECK2-NEXT: ret void, !dbg [[DBG102]] // CHECK2: terminate.lpad: // CHECK2-NEXT: [[TMP5:%.*]] = landingpad { ptr, i32 } -// CHECK2-NEXT: catch ptr null, !dbg [[DBG100]] +// CHECK2-NEXT: catch ptr null, !dbg [[DBG100]] // CHECK2-NEXT: [[TMP6:%.*]] = extractvalue { ptr, i32 } [[TMP5]], 0, !dbg [[DBG100]] -// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR7]], !dbg [[DBG100]] +// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR6]], !dbg [[DBG100]] // CHECK2-NEXT: unreachable, !dbg [[DBG100]] // // // CHECK2-LABEL: define {{[^@]+}}@main.omp_outlined_debug__.1.omp_outlined -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[GLOBAL:%.*]]) #[[ATTR3]] !dbg [[DBG105:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[GLOBAL:%.*]]) #[[ATTR2]] !dbg [[DBG105:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -484,15 +484,15 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[GLOBAL_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META106:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META106:![0-9]+]], !DIExpression(), [[META107:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META108:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META108:![0-9]+]], !DIExpression(), [[META107]]) // CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META109:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107]] +// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META109:![0-9]+]], !DIExpression(), [[META107]]) // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META110:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107]] +// CHECK2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META110:![0-9]+]], !DIExpression(), [[META107]]) // CHECK2-NEXT: store ptr [[GLOBAL]], ptr [[GLOBAL_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[GLOBAL_ADDR]], metadata [[META111:![0-9]+]], metadata !DIExpression()), !dbg [[DBG107]] +// CHECK2-NEXT: #dbg_declare(ptr [[GLOBAL_ADDR]], [[META111:![0-9]+]], !DIExpression(), [[META107]]) // CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG112:![0-9]+]] // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG112]] // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[GLOBAL_ADDR]], align 8, !dbg [[DBG112]] @@ -500,44 +500,44 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG112]] // CHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG112]] // CHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[GLOBAL_ADDR]], align 8, !dbg [[DBG112]] -// CHECK2-NEXT: call void @main.omp_outlined_debug__.1.omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP0]], ptr [[TMP5]], ptr [[TMP6]]) #[[ATTR6]], !dbg [[DBG112]] +// CHECK2-NEXT: call void @main.omp_outlined_debug__.1.omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP0]], ptr [[TMP5]], ptr [[TMP6]]) #[[ATTR5]], !dbg [[DBG112]] // CHECK2-NEXT: ret void, !dbg [[DBG112]] // // // CHECK2-LABEL: define {{[^@]+}}@main.omp_outlined.2 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] !dbg [[DBG113:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR2]] !dbg [[DBG113:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META114:![0-9]+]], metadata !DIExpression()), !dbg [[DBG115:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META114:![0-9]+]], !DIExpression(), [[META115:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG115]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META116:![0-9]+]], !DIExpression(), [[META115]]) // CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META117:![0-9]+]], metadata !DIExpression()), !dbg [[DBG115]] +// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META117:![0-9]+]], !DIExpression(), [[META115]]) // CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG118:![0-9]+]] // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG118]] // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG118]] -// CHECK2-NEXT: call void @main.omp_outlined_debug__.1(ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP0]]) #[[ATTR6]], !dbg [[DBG118]] +// CHECK2-NEXT: call void @main.omp_outlined_debug__.1(ptr [[TMP1]], ptr [[TMP2]], i64 [[TMP0]]) #[[ATTR5]], !dbg [[DBG118]] // CHECK2-NEXT: ret void, !dbg [[DBG118]] // // // CHECK2-LABEL: define {{[^@]+}}@main.omp_outlined_debug__.3 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG119:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] !dbg [[DBG119:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META120:![0-9]+]], !DIExpression(), [[META121:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META122:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META122:![0-9]+]], !DIExpression(), [[META121]]) // CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META123:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121]] +// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META123:![0-9]+]], !DIExpression(), [[META121]]) // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META124:![0-9]+]], metadata !DIExpression()), !dbg [[DBG125:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META124:![0-9]+]], !DIExpression(), [[META125:![0-9]+]]) // CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG126:![0-9]+]] // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG126]] // CHECK2-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB7:[0-9]+]], i32 2, ptr @main.omp_outlined_debug__.3.omp_outlined, i64 [[TMP0]], ptr [[TMP1]]), !dbg [[DBG126]] @@ -545,26 +545,26 @@ int main (int argc, char **argv) { // // // CHECK2-LABEL: define {{[^@]+}}@main.omp_outlined_debug__.3.omp_outlined_debug__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 !dbg [[DBG128:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] personality ptr @__gxx_personality_v0 !dbg [[DBG128:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META129:![0-9]+]], !DIExpression(), [[META130:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META131:![0-9]+]], !DIExpression(), [[META130]]) // CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META132:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130]] +// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META132:![0-9]+]], !DIExpression(), [[META130]]) // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG134:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META133:![0-9]+]], !DIExpression(), [[META134:![0-9]+]]) // CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG135:![0-9]+]] // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG135]] // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG136:![0-9]+]] // CHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !dbg [[DBG136]] // CHECK2-NEXT: invoke void @_Z3fooIiEvT_(i32 noundef [[TMP2]]) -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG135]] +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG135]] // CHECK2: invoke.cont: // CHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr @global, align 4, !dbg [[DBG137:![0-9]+]] // CHECK2-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i64 1, !dbg [[DBG138:![0-9]+]] @@ -572,66 +572,66 @@ int main (int argc, char **argv) { // CHECK2-NEXT: ret void, !dbg [[DBG137]] // CHECK2: terminate.lpad: // CHECK2-NEXT: [[TMP4:%.*]] = landingpad { ptr, i32 } -// CHECK2-NEXT: catch ptr null, !dbg [[DBG135]] +// CHECK2-NEXT: catch ptr null, !dbg [[DBG135]] // CHECK2-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP4]], 0, !dbg [[DBG135]] -// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR7]], !dbg [[DBG135]] +// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP5]]) #[[ATTR6]], !dbg [[DBG135]] // CHECK2-NEXT: unreachable, !dbg [[DBG135]] // // // CHECK2-LABEL: define {{[^@]+}}@main.omp_outlined_debug__.3.omp_outlined -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG140:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] !dbg [[DBG140:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META141:![0-9]+]], metadata !DIExpression()), !dbg [[DBG142:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META141:![0-9]+]], !DIExpression(), [[META142:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META143:![0-9]+]], metadata !DIExpression()), !dbg [[DBG142]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META143:![0-9]+]], !DIExpression(), [[META142]]) // CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG142]] +// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META144:![0-9]+]], !DIExpression(), [[META142]]) // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG142]] +// CHECK2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META145:![0-9]+]], !DIExpression(), [[META142]]) // CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG146:![0-9]+]] // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG146]] // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG146]] // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG146]] // CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG146]] -// CHECK2-NEXT: call void @main.omp_outlined_debug__.3.omp_outlined_debug__(ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP0]], ptr [[TMP4]]) #[[ATTR6]], !dbg [[DBG146]] +// CHECK2-NEXT: call void @main.omp_outlined_debug__.3.omp_outlined_debug__(ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP0]], ptr [[TMP4]]) #[[ATTR5]], !dbg [[DBG146]] // CHECK2-NEXT: ret void, !dbg [[DBG146]] // // // CHECK2-LABEL: define {{[^@]+}}@main.omp_outlined.4 -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR3]] !dbg [[DBG147:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], i64 noundef [[VLA:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR2]] !dbg [[DBG147:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META148:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META148:![0-9]+]], !DIExpression(), [[META149:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META150:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META150:![0-9]+]], !DIExpression(), [[META149]]) // CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META151:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149]] +// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META151:![0-9]+]], !DIExpression(), [[META149]]) // CHECK2-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META152:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149]] +// CHECK2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META152:![0-9]+]], !DIExpression(), [[META149]]) // CHECK2-NEXT: [[TMP0:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG153:![0-9]+]] // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG153]] // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG153]] // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG153]] // CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG153]] -// CHECK2-NEXT: call void @main.omp_outlined_debug__.3(ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP0]], ptr [[TMP4]]) #[[ATTR6]], !dbg [[DBG153]] +// CHECK2-NEXT: call void @main.omp_outlined_debug__.3(ptr [[TMP2]], ptr [[TMP3]], i64 [[TMP0]], ptr [[TMP4]]) #[[ATTR5]], !dbg [[DBG153]] // CHECK2-NEXT: ret void, !dbg [[DBG153]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_ -// CHECK2-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR4]] comdat !dbg [[DBG154:![0-9]+]] { +// CHECK2-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR3]] comdat !dbg [[DBG154:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META159:![0-9]+]], metadata !DIExpression()), !dbg [[DBG160:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META159:![0-9]+]], !DIExpression(), [[META160:![0-9]+]]) // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !dbg [[DBG161:![0-9]+]] // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 0, !dbg [[DBG161]] // CHECK2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !dbg [[DBG161]] @@ -643,7 +643,7 @@ int main (int argc, char **argv) { // // // CHECK2-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_.omp_outlined_debug__ -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] personality ptr @__gxx_personality_v0 !dbg [[DBG165:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR2]] personality ptr @__gxx_personality_v0 !dbg [[DBG165:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -651,20 +651,20 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: [[VAR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META169:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META169:![0-9]+]], !DIExpression(), [[META170:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META171:![0-9]+]], !DIExpression(), [[META170]]) // CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG173:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META172:![0-9]+]], !DIExpression(), [[META173:![0-9]+]]) // CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META174:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170]] +// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META174:![0-9]+]], !DIExpression(), [[META170]]) // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !dbg [[DBG175:![0-9]+]] // CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG175]] // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG176:![0-9]+]] // CHECK2-NEXT: invoke void @_Z3fooIPPcEvT_(ptr noundef [[TMP2]]) -// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG178:![0-9]+]] +// CHECK2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG178:![0-9]+]] // CHECK2: invoke.cont: -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VAR]], metadata [[META179:![0-9]+]], metadata !DIExpression()), !dbg [[DBG186:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[VAR]], [[META179:![0-9]+]], !DIExpression(), [[META186:![0-9]+]]) // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[VAR]], align 8, !dbg [[DBG187:![0-9]+]] // CHECK2-NEXT: [[TMP4:%.*]] = mul nsw i64 0, [[TMP1]], !dbg [[DBG187]] // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 [[TMP4]], !dbg [[DBG187]] @@ -672,42 +672,42 @@ int main (int argc, char **argv) { // CHECK2-NEXT: ret void, !dbg [[DBG188:![0-9]+]] // CHECK2: terminate.lpad: // CHECK2-NEXT: [[TMP5:%.*]] = landingpad { ptr, i32 } -// CHECK2-NEXT: catch ptr null, !dbg [[DBG178]] +// CHECK2-NEXT: catch ptr null, !dbg [[DBG178]] // CHECK2-NEXT: [[TMP6:%.*]] = extractvalue { ptr, i32 } [[TMP5]], 0, !dbg [[DBG178]] -// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR7]], !dbg [[DBG178]] +// CHECK2-NEXT: call void @__clang_call_terminate(ptr [[TMP6]]) #[[ATTR6]], !dbg [[DBG178]] // CHECK2-NEXT: unreachable, !dbg [[DBG178]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z3fooIPPcEvT_ -// CHECK2-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR4]] comdat !dbg [[DBG189:![0-9]+]] { +// CHECK2-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR3]] comdat !dbg [[DBG189:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META192:![0-9]+]], metadata !DIExpression()), !dbg [[DBG193:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META192:![0-9]+]], !DIExpression(), [[META193:![0-9]+]]) // CHECK2-NEXT: ret void, !dbg [[DBG194:![0-9]+]] // // // CHECK2-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_.omp_outlined -// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR3]] !dbg [[DBG195:![0-9]+]] { +// CHECK2-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[ARGC:%.*]], i64 noundef [[VLA:%.*]]) #[[ATTR2]] !dbg [[DBG195:![0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 // CHECK2-NEXT: [[VLA_ADDR:%.*]] = alloca i64, align 8 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META196:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197:![0-9]+]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META196:![0-9]+]], !DIExpression(), [[META197:![0-9]+]]) // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META198:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197]] +// CHECK2-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META198:![0-9]+]], !DIExpression(), [[META197]]) // CHECK2-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META199:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197]] +// CHECK2-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META199:![0-9]+]], !DIExpression(), [[META197]]) // CHECK2-NEXT: store i64 [[VLA]], ptr [[VLA_ADDR]], align 8 -// CHECK2-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA_ADDR]], metadata [[META200:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197]] +// CHECK2-NEXT: #dbg_declare(ptr [[VLA_ADDR]], [[META200:![0-9]+]], !DIExpression(), [[META197]]) // CHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !dbg [[DBG201:![0-9]+]] // CHECK2-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8, !dbg [[DBG201]] // CHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG201]] // CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG201]] // CHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !dbg [[DBG201]] -// CHECK2-NEXT: call void @_Z5tmainIPPcEiT_.omp_outlined_debug__(ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP1]]) #[[ATTR6]], !dbg [[DBG201]] +// CHECK2-NEXT: call void @_Z5tmainIPPcEiT_.omp_outlined_debug__(ptr [[TMP2]], ptr [[TMP3]], ptr [[TMP4]], i64 [[TMP1]]) #[[ATTR5]], !dbg [[DBG201]] // CHECK2-NEXT: ret void, !dbg [[DBG201]] // // @@ -812,7 +812,7 @@ int main (int argc, char **argv) { // // // CHECK3-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_..omp_par -// CHECK3-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] +// CHECK3-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR6:[0-9]+]] { // CHECK3-NEXT: omp.par.entry: // CHECK3-NEXT: [[GEP__RELOADED:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 // CHECK3-NEXT: [[LOADGEP__RELOADED:%.*]] = load ptr, ptr [[GEP__RELOADED]], align 8 @@ -860,17 +860,17 @@ int main (int argc, char **argv) { // CHECK4-NEXT: [[__VLA_EXPR0:%.*]] = alloca i64, align 8 // CHECK4-NEXT: store i32 0, ptr [[RETVAL]], align 4 // CHECK4-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19:![0-9]+]] +// CHECK4-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META18:![0-9]+]], !DIExpression(), [[META19:![0-9]+]]) // CHECK4-NEXT: store ptr [[ARGV]], ptr [[ARGV_ADDR]], align 8 -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGV_ADDR]], metadata [[META20:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19]] +// CHECK4-NEXT: #dbg_declare(ptr [[ARGV_ADDR]], [[META20:![0-9]+]], !DIExpression(), [[META19]]) // CHECK4-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARGC_ADDR]], align 4, !dbg [[DBG21:![0-9]+]] // CHECK4-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64, !dbg [[DBG21]] // CHECK4-NEXT: [[TMP2:%.*]] = call ptr @llvm.stacksave.p0(), !dbg [[DBG21]] // CHECK4-NEXT: store ptr [[TMP2]], ptr [[SAVED_STACK]], align 8, !dbg [[DBG21]] // CHECK4-NEXT: [[VLA:%.*]] = alloca i32, i64 [[TMP1]], align 16, !dbg [[DBG21]] // CHECK4-NEXT: store i64 [[TMP1]], ptr [[__VLA_EXPR0]], align 8, !dbg [[DBG21]] -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[__VLA_EXPR0]], metadata [[META22:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24:![0-9]+]] -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[VLA]], metadata [[META25:![0-9]+]], metadata !DIExpression()), !dbg [[DBG21]] +// CHECK4-NEXT: #dbg_declare(ptr [[__VLA_EXPR0]], [[META22:![0-9]+]], !DIExpression(), [[META24:![0-9]+]]) +// CHECK4-NEXT: #dbg_declare(ptr [[VLA]], [[META25:![0-9]+]], !DIExpression(), [[DBG21]]) // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]), !dbg [[DBG29:![0-9]+]] // CHECK4-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK4: omp_parallel: @@ -917,29 +917,29 @@ int main (int argc, char **argv) { // // // CHECK4-LABEL: define {{[^@]+}}@_Z3fooIiEvT_ -// CHECK4-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat !dbg [[DBG36:![0-9]+]] { +// CHECK4-SAME: (i32 noundef [[ARGC:%.*]]) #[[ATTR4:[0-9]+]] comdat !dbg [[DBG36:![0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32 [[ARGC]], ptr [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META41:![0-9]+]], metadata !DIExpression()), !dbg [[DBG42:![0-9]+]] -// CHECK4-NEXT: ret void, !dbg [[DBG42]] +// CHECK4-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META41:![0-9]+]], !DIExpression(), [[META42:![0-9]+]]) +// CHECK4-NEXT: ret void, !dbg [[META42]] // // // CHECK4-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_ -// CHECK4-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR6:[0-9]+]] comdat !dbg [[DBG45:![0-9]+]] { +// CHECK4-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR5:[0-9]+]] comdat !dbg [[DBG43:![0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr }, align 8 // CHECK4-NEXT: [[DOTRELOADED:%.*]] = alloca i64, align 8 // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META50:![0-9]+]], metadata !DIExpression()), !dbg [[DBG51:![0-9]+]] -// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !dbg [[DBG52:![0-9]+]] -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 0, !dbg [[DBG52]] -// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !dbg [[DBG52]] -// CHECK4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 0, !dbg [[DBG52]] -// CHECK4-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1, !dbg [[DBG52]] -// CHECK4-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i64, !dbg [[DBG52]] -// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]), !dbg [[DBG53:![0-9]+]] +// CHECK4-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META48:![0-9]+]], !DIExpression(), [[META49:![0-9]+]]) +// CHECK4-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARGC_ADDR]], align 8, !dbg [[DBG50:![0-9]+]] +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i64 0, !dbg [[DBG50]] +// CHECK4-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX]], align 8, !dbg [[DBG50]] +// CHECK4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 0, !dbg [[DBG50]] +// CHECK4-NEXT: [[TMP2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1, !dbg [[DBG50]] +// CHECK4-NEXT: [[TMP3:%.*]] = zext i8 [[TMP2]] to i64, !dbg [[DBG50]] +// CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]), !dbg [[DBG51:![0-9]+]] // CHECK4-NEXT: store i64 [[TMP3]], ptr [[DOTRELOADED]], align 8 // CHECK4-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK4: omp_parallel: @@ -947,16 +947,16 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store ptr [[DOTRELOADED]], ptr [[GEP__RELOADED]], align 8 // CHECK4-NEXT: [[GEP_ARGC_ADDR:%.*]] = getelementptr { ptr, ptr }, ptr [[STRUCTARG]], i32 0, i32 1 // CHECK4-NEXT: store ptr [[ARGC_ADDR]], ptr [[GEP_ARGC_ADDR]], align 8 -// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @_Z5tmainIPPcEiT_..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG54:![0-9]+]] +// CHECK4-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB3]], i32 1, ptr @_Z5tmainIPPcEiT_..omp_par, ptr [[STRUCTARG]]), !dbg [[DBG52:![0-9]+]] // CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT:%.*]] // CHECK4: omp.par.outlined.exit: // CHECK4-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK4: omp.par.exit.split: -// CHECK4-NEXT: ret i32 0, !dbg [[DBG56:![0-9]+]] +// CHECK4-NEXT: ret i32 0, !dbg [[DBG54:![0-9]+]] // // // CHECK4-LABEL: define {{[^@]+}}@_Z5tmainIPPcEiT_..omp_par -// CHECK4-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] !dbg [[DBG57:![0-9]+]] { +// CHECK4-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]], ptr [[TMP0:%.*]]) #[[ATTR6:[0-9]+]] !dbg [[DBG55:![0-9]+]] { // CHECK4-NEXT: omp.par.entry: // CHECK4-NEXT: [[GEP__RELOADED:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 0 // CHECK4-NEXT: [[LOADGEP__RELOADED:%.*]] = load ptr, ptr [[GEP__RELOADED]], align 8 @@ -970,27 +970,27 @@ int main (int argc, char **argv) { // CHECK4-NEXT: [[TMP2:%.*]] = load i64, ptr [[LOADGEP__RELOADED]], align 8 // CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK4: omp.par.region: -// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[LOADGEP_ARGC_ADDR]], align 8, !dbg [[DBG58:![0-9]+]] -// CHECK4-NEXT: call void @_Z3fooIPPcEvT_(ptr noundef [[TMP3]]), !dbg [[DBG58]] -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[VAR]], metadata [[META60:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67:![0-9]+]] -// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VAR]], align 8, !dbg [[DBG67]] -// CHECK4-NEXT: [[TMP5:%.*]] = mul nsw i64 0, [[TMP2]], !dbg [[DBG67]] -// CHECK4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[TMP4]], i64 [[TMP5]], !dbg [[DBG67]] -// CHECK4-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX2]], i64 0, !dbg [[DBG67]] -// CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG68:![0-9]+]] +// CHECK4-NEXT: [[TMP3:%.*]] = load ptr, ptr [[LOADGEP_ARGC_ADDR]], align 8, !dbg [[DBG56:![0-9]+]] +// CHECK4-NEXT: call void @_Z3fooIPPcEvT_(ptr noundef [[TMP3]]), !dbg [[DBG56]] +// CHECK4-NEXT: #dbg_declare(ptr [[VAR]], [[META58:![0-9]+]], !DIExpression(), [[META65:![0-9]+]]) +// CHECK4-NEXT: [[TMP4:%.*]] = load ptr, ptr [[VAR]], align 8, !dbg [[META65]] +// CHECK4-NEXT: [[TMP5:%.*]] = mul nsw i64 0, [[TMP2]], !dbg [[META65]] +// CHECK4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[TMP4]], i64 [[TMP5]], !dbg [[META65]] +// CHECK4-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX2]], i64 0, !dbg [[META65]] +// CHECK4-NEXT: br label [[OMP_PAR_REGION_PARALLEL_AFTER:%.*]], !dbg [[DBG66:![0-9]+]] // CHECK4: omp.par.region.parallel.after: // CHECK4-NEXT: br label [[OMP_PAR_PRE_FINALIZE:%.*]] // CHECK4: omp.par.pre_finalize: -// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG68]] +// CHECK4-NEXT: br label [[OMP_PAR_OUTLINED_EXIT_EXITSTUB:%.*]], !dbg [[DBG66]] // CHECK4: omp.par.outlined.exit.exitStub: // CHECK4-NEXT: ret void // // // CHECK4-LABEL: define {{[^@]+}}@_Z3fooIPPcEvT_ -// CHECK4-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR5]] comdat !dbg [[DBG69:![0-9]+]] { +// CHECK4-SAME: (ptr noundef [[ARGC:%.*]]) #[[ATTR4]] comdat !dbg [[DBG69:![0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca ptr, align 8 // CHECK4-NEXT: store ptr [[ARGC]], ptr [[ARGC_ADDR]], align 8 -// CHECK4-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARGC_ADDR]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG73:![0-9]+]] -// CHECK4-NEXT: ret void, !dbg [[DBG73]] +// CHECK4-NEXT: #dbg_declare(ptr [[ARGC_ADDR]], [[META72:![0-9]+]], !DIExpression(), [[META73:![0-9]+]]) +// CHECK4-NEXT: ret void, !dbg [[META73]] // diff --git a/clang/test/OpenMP/target_parallel_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_debug_codegen.cpp index e99c393a4021ca..7f5592841fa680 100644 --- a/clang/test/OpenMP/target_parallel_debug_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_debug_codegen.cpp @@ -65,7 +65,7 @@ int main() { return 0; } // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug__ -// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG32:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG29:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 @@ -78,52 +78,52 @@ int main() { // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DYN_PTR_ADDR]], metadata [[META51:![0-9]+]], metadata !DIExpression()), !dbg [[DBG52:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META48:![0-9]+]], !DIExpression(), [[META49:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META53:![0-9]+]], metadata !DIExpression()), !dbg [[DBG54:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META50:![0-9]+]], !DIExpression(), [[META51:![0-9]+]]) // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META55:![0-9]+]], metadata !DIExpression()), !dbg [[DBG56:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META52:![0-9]+]], !DIExpression(), [[META53:![0-9]+]]) // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META57:![0-9]+]], metadata !DIExpression()), !dbg [[DBG58:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META54:![0-9]+]], !DIExpression(), [[META55:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META59:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG61:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG61]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG61]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG61]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG61]] -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG61]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG61]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG61]] -// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG61]] -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG61]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG61]] -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG61]] -// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG61]] -// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG61]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META56:![0-9]+]], !DIExpression(), [[META57:![0-9]+]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG58:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG58]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG58]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG58]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG58]] +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG58]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG58]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG58]] +// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG58]] +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG58]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG58]] +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG58]] +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG58]] +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG58]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG62:![0-9]+]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4, !dbg [[DBG62]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG62]] -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG62]] -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8, !dbg [[DBG62]] -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG62]] -// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG62]] -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !dbg [[DBG62]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG62]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP15]], align 8, !dbg [[DBG62]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG62]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG62]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP9]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG62]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG63:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG65:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]), !dbg [[DBG59:![0-9]+]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG61:![0-9]+]] +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4, !dbg [[DBG61]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG61]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG61]] +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8, !dbg [[DBG61]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG61]] +// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG61]] +// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !dbg [[DBG61]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG61]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP15]], align 8, !dbg [[DBG61]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG61]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG61]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB3]], i32 [[TMP9]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG61]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG62:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG63:![0-9]+]] // CHECK1: worker.exit: -// CHECK1-NEXT: ret void, !dbg [[DBG61]] +// CHECK1-NEXT: ret void, !dbg [[DBG58]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug___omp_outlined_debug__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR2:[0-9]+]] !dbg [[DBG66:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG64:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -140,83 +140,83 @@ int main() { // CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META73:![0-9]+]], metadata !DIExpression()), !dbg [[DBG74:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META71:![0-9]+]], !DIExpression(), [[META72:![0-9]+]]) // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META75:![0-9]+]], metadata !DIExpression()), !dbg [[DBG74]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META73:![0-9]+]], !DIExpression(), [[META72]]) // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG77:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META74:![0-9]+]], !DIExpression(), [[META75:![0-9]+]]) // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META78:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META76:![0-9]+]], !DIExpression(), [[META77:![0-9]+]]) // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META80:![0-9]+]], metadata !DIExpression()), !dbg [[DBG81:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META78:![0-9]+]], !DIExpression(), [[META79:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META82:![0-9]+]], metadata !DIExpression()), !dbg [[DBG83:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG84:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG84]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG84]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG84]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG84]] -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG84]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG84]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG84]] -// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG84]] -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG84]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG84]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B3]], metadata [[META85:![0-9]+]], metadata !DIExpression()), !dbg [[DBG74]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B3]], ptr align 4 [[TMP4]], i64 400, i1 false), !dbg [[DBG84]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META86:![0-9]+]], metadata !DIExpression()), !dbg [[DBG89:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG90:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG90]] -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX4]], i64 0, i64 1, !dbg [[DBG90]] -// CHECK1-NEXT: store ptr [[ARRAYIDX5]], ptr [[F]], align 8, !dbg [[DBG89]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META91:![0-9]+]], metadata !DIExpression()), !dbg [[DBG92:![0-9]+]] -// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[DBG92]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META93:![0-9]+]], metadata !DIExpression()), !dbg [[DBG94:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 1, !dbg [[DBG95:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG95]] -// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[H]], align 8, !dbg [[DBG94]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META96:![0-9]+]], metadata !DIExpression()), !dbg [[DBG97:![0-9]+]] -// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG97]] -// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG98:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 0, !dbg [[DBG99:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG100:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP8]] to i64, !dbg [[DBG99]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 [[IDXPROM]], !dbg [[DBG99]] -// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX9]], align 4, !dbg [[DBG101:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG102:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX10]], i64 0, i64 0, !dbg [[DBG102]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG103:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG102]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM12]], !dbg [[DBG102]] -// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX13]], align 4, !dbg [[DBG104:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG105:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX14]], i64 0, i64 0, !dbg [[DBG105]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG106:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG105]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX15]], i64 0, i64 [[IDXPROM16]], !dbg [[DBG105]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX17]], align 4, !dbg [[DBG105]] -// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 0, !dbg [[DBG107:![0-9]+]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG108:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP12]] to i64, !dbg [[DBG107]] -// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG107]] -// CHECK1-NEXT: store i32 [[TMP11]], ptr [[ARRAYIDX20]], align 4, !dbg [[DBG109:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 0, !dbg [[DBG110:![0-9]+]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG111:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG110]] -// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG110]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX23]], align 4, !dbg [[DBG110]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG112:![0-9]+]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP15]] to i1, !dbg [[DBG112]] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG112]] -// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP14]], !dbg [[DBG112]] -// CHECK1-NEXT: [[TOBOOL24:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG112]] -// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL24]] to i8, !dbg [[DBG112]] -// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP7]], align 1, !dbg [[DBG112]] -// CHECK1-NEXT: ret void, !dbg [[DBG113:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META80:![0-9]+]], !DIExpression(), [[META81:![0-9]+]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG82:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG82]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG82]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG82]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG82]] +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG82]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG82]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG82]] +// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG82]] +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG82]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG82]] +// CHECK1-NEXT: #dbg_declare(ptr [[B3]], [[META83:![0-9]+]], !DIExpression(), [[META72]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B3]], ptr align 4 [[TMP4]], i64 400, i1 false), !dbg [[DBG82]] +// CHECK1-NEXT: #dbg_declare(ptr [[F]], [[META84:![0-9]+]], !DIExpression(), [[META87:![0-9]+]]) +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG88:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG88]] +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX4]], i64 0, i64 1, !dbg [[DBG88]] +// CHECK1-NEXT: store ptr [[ARRAYIDX5]], ptr [[F]], align 8, !dbg [[META87]] +// CHECK1-NEXT: #dbg_declare(ptr [[G]], [[META89:![0-9]+]], !DIExpression(), [[META90:![0-9]+]]) +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[META90]] +// CHECK1-NEXT: #dbg_declare(ptr [[H]], [[META91:![0-9]+]], !DIExpression(), [[META92:![0-9]+]]) +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 1, !dbg [[DBG93:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG93]] +// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[H]], align 8, !dbg [[META92]] +// CHECK1-NEXT: #dbg_declare(ptr [[D]], [[META94:![0-9]+]], !DIExpression(), [[META95:![0-9]+]]) +// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[META95]] +// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG96:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 0, !dbg [[DBG97:![0-9]+]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG98:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP8]] to i64, !dbg [[DBG97]] +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 [[IDXPROM]], !dbg [[DBG97]] +// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX9]], align 4, !dbg [[DBG99:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG100:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX10]], i64 0, i64 0, !dbg [[DBG100]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG101:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG100]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM12]], !dbg [[DBG100]] +// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX13]], align 4, !dbg [[DBG102:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG103:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX14]], i64 0, i64 0, !dbg [[DBG103]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG104:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG103]] +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX15]], i64 0, i64 [[IDXPROM16]], !dbg [[DBG103]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX17]], align 4, !dbg [[DBG103]] +// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 0, !dbg [[DBG105:![0-9]+]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG106:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP12]] to i64, !dbg [[DBG105]] +// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG105]] +// CHECK1-NEXT: store i32 [[TMP11]], ptr [[ARRAYIDX20]], align 4, !dbg [[DBG107:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B3]], i64 0, i64 0, !dbg [[DBG108:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG109:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG108]] +// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG108]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX23]], align 4, !dbg [[DBG108]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG110:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP15]] to i1, !dbg [[DBG110]] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG110]] +// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP14]], !dbg [[DBG110]] +// CHECK1-NEXT: [[TOBOOL24:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG110]] +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL24]] to i8, !dbg [[DBG110]] +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP7]], align 1, !dbg [[DBG110]] +// CHECK1-NEXT: ret void, !dbg [[DBG111:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug___omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR2]] !dbg [[DBG114:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG112:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -225,34 +225,34 @@ int main() { // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META121:![0-9]+]], metadata !DIExpression()), !dbg [[DBG122:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META119:![0-9]+]], !DIExpression(), [[META120:![0-9]+]]) // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META123:![0-9]+]], metadata !DIExpression()), !dbg [[DBG122]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META121:![0-9]+]], !DIExpression(), [[META120]]) // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META124:![0-9]+]], metadata !DIExpression()), !dbg [[DBG122]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META122:![0-9]+]], !DIExpression(), [[META120]]) // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META125:![0-9]+]], metadata !DIExpression()), !dbg [[DBG122]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META123:![0-9]+]], !DIExpression(), [[META120]]) // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META126:![0-9]+]], metadata !DIExpression()), !dbg [[DBG122]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META124:![0-9]+]], !DIExpression(), [[META120]]) // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META127:![0-9]+]], metadata !DIExpression()), !dbg [[DBG122]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG128:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG128]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG128]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG128]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG128]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG128]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG128]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG128]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG128]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG128]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG128]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr [[TMP7]], ptr addrspace(1) [[TMP10]]) #[[ATTR4:[0-9]+]], !dbg [[DBG128]] -// CHECK1-NEXT: ret void, !dbg [[DBG128]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META125:![0-9]+]], !DIExpression(), [[META120]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG126:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG126]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG126]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG126]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG126]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG126]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG126]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG126]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG126]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG126]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG126]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr [[TMP7]], ptr addrspace(1) [[TMP10]]) #[[ATTR3:[0-9]+]], !dbg [[DBG126]] +// CHECK1-NEXT: ret void, !dbg [[DBG126]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 -// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR6:[0-9]+]] !dbg [[DBG129:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5:[0-9]+]] !dbg [[DBG127:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 @@ -260,31 +260,31 @@ int main() { // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DYN_PTR_ADDR]], metadata [[META132:![0-9]+]], metadata !DIExpression()), !dbg [[DBG133:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META130:![0-9]+]], !DIExpression(), [[META131:![0-9]+]]) // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META134:![0-9]+]], metadata !DIExpression()), !dbg [[DBG133]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META132:![0-9]+]], !DIExpression(), [[META131]]) // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META135:![0-9]+]], metadata !DIExpression()), !dbg [[DBG133]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META133:![0-9]+]], !DIExpression(), [[META131]]) // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META136:![0-9]+]], metadata !DIExpression()), !dbg [[DBG133]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META134:![0-9]+]], !DIExpression(), [[META131]]) // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META137:![0-9]+]], metadata !DIExpression()), !dbg [[DBG133]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG138:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG138]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG138]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug__(ptr [[TMP3]], ptr addrspace(1) [[TMP8]], i32 [[TMP5]], ptr [[TMP6]], ptr addrspace(1) [[TMP9]]) #[[ATTR4]], !dbg [[DBG138]] -// CHECK1-NEXT: ret void, !dbg [[DBG138]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META135:![0-9]+]], !DIExpression(), [[META131]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG136:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG136]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG136]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8, !dbg [[DBG136]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG136]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG136]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG136]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG136]] +// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG136]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG136]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_debug__(ptr [[TMP3]], ptr addrspace(1) [[TMP8]], i32 [[TMP5]], ptr [[TMP6]], ptr addrspace(1) [[TMP9]]) #[[ATTR3]], !dbg [[DBG136]] +// CHECK1-NEXT: ret void, !dbg [[DBG136]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug__ -// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG139:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG137:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 @@ -297,32 +297,32 @@ int main() { // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DYN_PTR_ADDR]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG145:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META142:![0-9]+]], !DIExpression(), [[META143:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META146:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META144:![0-9]+]], !DIExpression(), [[META145:![0-9]+]]) // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META148:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META146:![0-9]+]], !DIExpression(), [[META147:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META150:![0-9]+]], metadata !DIExpression()), !dbg [[DBG151:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META148:![0-9]+]], !DIExpression(), [[META149:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META152:![0-9]+]], metadata !DIExpression()), !dbg [[DBG153:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG154:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG154]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG154]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG154]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG154]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG154]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG154]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG154]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG154]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG154]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG154]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG154]] -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG154]] -// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG154]] -// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG154]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META150:![0-9]+]], !DIExpression(), [[META151:![0-9]+]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG152:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG152]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG152]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG152]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG152]] +// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG152]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG152]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG152]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG152]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG152]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG152]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG152]] +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG152]] +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG152]] +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG152]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB7:[0-9]+]]) +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB7:[0-9]+]]), !dbg [[DBG153:![0-9]+]] // CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG155:![0-9]+]] // CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4, !dbg [[DBG155]] // CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG155]] @@ -337,13 +337,13 @@ int main() { // CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG155]] // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB7]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG155]] // CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG156:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG158:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG157:![0-9]+]] // CHECK1: worker.exit: -// CHECK1-NEXT: ret void, !dbg [[DBG154]] +// CHECK1-NEXT: ret void, !dbg [[DBG152]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug___omp_outlined_debug__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR2]] !dbg [[DBG159:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG158:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -359,74 +359,74 @@ int main() { // CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META162:![0-9]+]], metadata !DIExpression()), !dbg [[DBG163:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META161:![0-9]+]], !DIExpression(), [[META162:![0-9]+]]) // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META164:![0-9]+]], metadata !DIExpression()), !dbg [[DBG163]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META163:![0-9]+]], !DIExpression(), [[META162]]) // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META165:![0-9]+]], metadata !DIExpression()), !dbg [[DBG166:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META164:![0-9]+]], !DIExpression(), [[META165:![0-9]+]]) // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META167:![0-9]+]], metadata !DIExpression()), !dbg [[DBG168:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META166:![0-9]+]], !DIExpression(), [[META167:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META169:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META168:![0-9]+]], !DIExpression(), [[META169:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG172:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG173:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG173]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG173]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG173]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG173]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG173]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG173]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG173]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG173]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG173]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG173]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG173]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META174:![0-9]+]], metadata !DIExpression()), !dbg [[DBG176:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG177:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG177]] -// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX3]], i64 0, i64 1, !dbg [[DBG177]] -// CHECK1-NEXT: store ptr [[ARRAYIDX4]], ptr [[F]], align 8, !dbg [[DBG176]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META178:![0-9]+]], metadata !DIExpression()), !dbg [[DBG179:![0-9]+]] -// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[DBG179]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META180:![0-9]+]], metadata !DIExpression()), !dbg [[DBG181:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 1, !dbg [[DBG182:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX5]], i64 0, i64 1, !dbg [[DBG182]] -// CHECK1-NEXT: store ptr [[ARRAYIDX6]], ptr [[H]], align 8, !dbg [[DBG181]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META183:![0-9]+]], metadata !DIExpression()), !dbg [[DBG184:![0-9]+]] -// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG184]] -// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG185:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG186:![0-9]+]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG187:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG186]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 [[IDXPROM]], !dbg [[DBG186]] -// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX8]], align 4, !dbg [[DBG188:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG189:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX9]], i64 0, i64 0, !dbg [[DBG189]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG190:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG189]] -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX10]], i64 0, i64 [[IDXPROM11]], !dbg [[DBG189]] -// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG191:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG192:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG192]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG193:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP11]] to i64, !dbg [[DBG192]] -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG192]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG192]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG194:![0-9]+]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG195:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG194]] -// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG194]] -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX19]], align 4, !dbg [[DBG196:![0-9]+]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG197:![0-9]+]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1, !dbg [[DBG197]] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG197]] -// CHECK1-NEXT: store i32 [[CONV]], ptr [[D]], align 4, !dbg [[DBG198:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG199:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META170:![0-9]+]], !DIExpression(), [[META171:![0-9]+]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG172:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG172]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG172]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG172]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG172]] +// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG172]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG172]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG172]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG172]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG172]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG172]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG172]] +// CHECK1-NEXT: #dbg_declare(ptr [[F]], [[META173:![0-9]+]], !DIExpression(), [[META175:![0-9]+]]) +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG176:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG176]] +// CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX3]], i64 0, i64 1, !dbg [[DBG176]] +// CHECK1-NEXT: store ptr [[ARRAYIDX4]], ptr [[F]], align 8, !dbg [[META175]] +// CHECK1-NEXT: #dbg_declare(ptr [[G]], [[META177:![0-9]+]], !DIExpression(), [[META178:![0-9]+]]) +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[META178]] +// CHECK1-NEXT: #dbg_declare(ptr [[H]], [[META179:![0-9]+]], !DIExpression(), [[META180:![0-9]+]]) +// CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 1, !dbg [[DBG181:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX5]], i64 0, i64 1, !dbg [[DBG181]] +// CHECK1-NEXT: store ptr [[ARRAYIDX6]], ptr [[H]], align 8, !dbg [[META180]] +// CHECK1-NEXT: #dbg_declare(ptr [[D]], [[META182:![0-9]+]], !DIExpression(), [[META183:![0-9]+]]) +// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[META183]] +// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG184:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG185:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG186:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64, !dbg [[DBG185]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 [[IDXPROM]], !dbg [[DBG185]] +// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX8]], align 4, !dbg [[DBG187:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG188:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX9]], i64 0, i64 0, !dbg [[DBG188]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG189:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP10]] to i64, !dbg [[DBG188]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX10]], i64 0, i64 [[IDXPROM11]], !dbg [[DBG188]] +// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG190:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG191:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG191]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG192:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP11]] to i64, !dbg [[DBG191]] +// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG191]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG191]] +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG193:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG194:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG193]] +// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG193]] +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[ARRAYIDX19]], align 4, !dbg [[DBG195:![0-9]+]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG196:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP14]] to i1, !dbg [[DBG196]] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG196]] +// CHECK1-NEXT: store i32 [[CONV]], ptr [[D]], align 4, !dbg [[DBG197:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG198:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug___omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR2]] !dbg [[DBG200:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG199:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -435,35 +435,35 @@ int main() { // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META201:![0-9]+]], metadata !DIExpression()), !dbg [[DBG202:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META200:![0-9]+]], !DIExpression(), [[META201:![0-9]+]]) // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META203:![0-9]+]], metadata !DIExpression()), !dbg [[DBG202]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META202:![0-9]+]], !DIExpression(), [[META201]]) // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META204:![0-9]+]], metadata !DIExpression()), !dbg [[DBG202]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META203:![0-9]+]], !DIExpression(), [[META201]]) // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META205:![0-9]+]], metadata !DIExpression()), !dbg [[DBG202]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META204:![0-9]+]], !DIExpression(), [[META201]]) // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META206:![0-9]+]], metadata !DIExpression()), !dbg [[DBG202]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META205:![0-9]+]], !DIExpression(), [[META201]]) // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META207:![0-9]+]], metadata !DIExpression()), !dbg [[DBG202]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG208:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG208]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG208]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG208]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG208]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG208]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG208]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG208]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG208]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG208]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG208]] -// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG208]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR4]], !dbg [[DBG208]] -// CHECK1-NEXT: ret void, !dbg [[DBG208]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META206:![0-9]+]], !DIExpression(), [[META201]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG207:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG207]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG207]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG207]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG207]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG207]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG207]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG207]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG207]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG207]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG207]] +// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG207]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR3]], !dbg [[DBG207]] +// CHECK1-NEXT: ret void, !dbg [[DBG207]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37 -// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR6]] !dbg [[DBG209:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG208:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 @@ -471,32 +471,32 @@ int main() { // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DYN_PTR_ADDR]], metadata [[META210:![0-9]+]], metadata !DIExpression()), !dbg [[DBG211:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META209:![0-9]+]], !DIExpression(), [[META210:![0-9]+]]) // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META212:![0-9]+]], metadata !DIExpression()), !dbg [[DBG211]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META211:![0-9]+]], !DIExpression(), [[META210]]) // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META213:![0-9]+]], metadata !DIExpression()), !dbg [[DBG211]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META212:![0-9]+]], !DIExpression(), [[META210]]) // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META214:![0-9]+]], metadata !DIExpression()), !dbg [[DBG211]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META213:![0-9]+]], !DIExpression(), [[META210]]) // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META215:![0-9]+]], metadata !DIExpression()), !dbg [[DBG211]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG216:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG216]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG216]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8, !dbg [[DBG216]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG216]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG216]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG216]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG216]] -// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG216]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG216]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG216]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug__(ptr [[TMP3]], ptr addrspace(1) [[TMP8]], i32 [[TMP5]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]]) #[[ATTR4]], !dbg [[DBG216]] -// CHECK1-NEXT: ret void, !dbg [[DBG216]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META214:![0-9]+]], !DIExpression(), [[META210]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG215:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG215]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG215]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8, !dbg [[DBG215]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG215]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG215]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG215]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG215]] +// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG215]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG215]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG215]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l37_debug__(ptr [[TMP3]], ptr addrspace(1) [[TMP8]], i32 [[TMP5]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]]) #[[ATTR3]], !dbg [[DBG215]] +// CHECK1-NEXT: ret void, !dbg [[DBG215]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug__ -// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG217:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG216:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 @@ -509,53 +509,53 @@ int main() { // CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DYN_PTR_ADDR]], metadata [[META222:![0-9]+]], metadata !DIExpression()), !dbg [[DBG223:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META221:![0-9]+]], !DIExpression(), [[META222:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META224:![0-9]+]], metadata !DIExpression()), !dbg [[DBG225:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META223:![0-9]+]], !DIExpression(), [[META224:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META226:![0-9]+]], metadata !DIExpression()), !dbg [[DBG227:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META225:![0-9]+]], !DIExpression(), [[META226:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META228:![0-9]+]], metadata !DIExpression()), !dbg [[DBG229:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META227:![0-9]+]], !DIExpression(), [[META228:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META230:![0-9]+]], metadata !DIExpression()), !dbg [[DBG231:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG232:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG232]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG232]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG232]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG232]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG232]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG232]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG232]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG232]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG232]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG232]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG232]] -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG232]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG232]] -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG232]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG232]] -// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG232]] -// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG232]] -// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG232]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META229:![0-9]+]], !DIExpression(), [[META230:![0-9]+]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG231:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG231]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG231]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG231]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG231]] +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG231]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG231]] +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG231]] +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG231]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB11:[0-9]+]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG233:![0-9]+]] -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG233]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG233]] -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP15]], align 8, !dbg [[DBG233]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG233]] -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG233]] -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG233]] -// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG233]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB11]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG233]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG234:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB11:[0-9]+]]), !dbg [[DBG232:![0-9]+]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG234:![0-9]+]] +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG234]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG234]] +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP15]], align 8, !dbg [[DBG234]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG234]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG234]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG234]] +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG234]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB11]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG234]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG235:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG236:![0-9]+]] // CHECK1: worker.exit: -// CHECK1-NEXT: ret void, !dbg [[DBG232]] +// CHECK1-NEXT: ret void, !dbg [[DBG231]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug___omp_outlined_debug__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR2]] !dbg [[DBG237:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG237:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -572,17 +572,17 @@ int main() { // CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META240:![0-9]+]], metadata !DIExpression()), !dbg [[DBG241:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META240:![0-9]+]], !DIExpression(), [[META241:![0-9]+]]) // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META242:![0-9]+]], metadata !DIExpression()), !dbg [[DBG241]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META242:![0-9]+]], !DIExpression(), [[META241]]) // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META243:![0-9]+]], metadata !DIExpression()), !dbg [[DBG244:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META243:![0-9]+]], !DIExpression(), [[META244:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META245:![0-9]+]], metadata !DIExpression()), !dbg [[DBG246:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META245:![0-9]+]], !DIExpression(), [[META246:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META247:![0-9]+]], metadata !DIExpression()), !dbg [[DBG248:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META247:![0-9]+]], !DIExpression(), [[META248:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META249:![0-9]+]], metadata !DIExpression()), !dbg [[DBG250:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META249:![0-9]+]], !DIExpression(), [[META250:![0-9]+]]) // CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG251:![0-9]+]] // CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG251]] // CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG251]] @@ -599,19 +599,19 @@ int main() { // CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG251]] // CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG251]] // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG251]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META252:![0-9]+]], metadata !DIExpression()), !dbg [[DBG254:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[F]], [[META252:![0-9]+]], !DIExpression(), [[META254:![0-9]+]]) // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG255:![0-9]+]] // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG255]] // CHECK1-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX4]], i64 0, i64 1, !dbg [[DBG255]] -// CHECK1-NEXT: store ptr [[ARRAYIDX5]], ptr [[F]], align 8, !dbg [[DBG254]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META256:![0-9]+]], metadata !DIExpression()), !dbg [[DBG257:![0-9]+]] -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[G]], align 8, !dbg [[DBG257]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META258:![0-9]+]], metadata !DIExpression()), !dbg [[DBG259:![0-9]+]] +// CHECK1-NEXT: store ptr [[ARRAYIDX5]], ptr [[F]], align 8, !dbg [[META254]] +// CHECK1-NEXT: #dbg_declare(ptr [[G]], [[META256:![0-9]+]], !DIExpression(), [[META257:![0-9]+]]) +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[G]], align 8, !dbg [[META257]] +// CHECK1-NEXT: #dbg_declare(ptr [[H]], [[META258:![0-9]+]], !DIExpression(), [[META259:![0-9]+]]) // CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 1, !dbg [[DBG260:![0-9]+]] // CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG260]] -// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[H]], align 8, !dbg [[DBG259]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META261:![0-9]+]], metadata !DIExpression()), !dbg [[DBG262:![0-9]+]] -// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG262]] +// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[H]], align 8, !dbg [[META259]] +// CHECK1-NEXT: #dbg_declare(ptr [[D]], [[META261:![0-9]+]], !DIExpression(), [[META262:![0-9]+]]) +// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[META262]] // CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4, !dbg [[DBG263:![0-9]+]] // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG264:![0-9]+]] // CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG265:![0-9]+]] @@ -647,7 +647,7 @@ int main() { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug___omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR2]] !dbg [[DBG279:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG279:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -656,17 +656,17 @@ int main() { // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META282:![0-9]+]], metadata !DIExpression()), !dbg [[DBG283:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META282:![0-9]+]], !DIExpression(), [[META283:![0-9]+]]) // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META284:![0-9]+]], metadata !DIExpression()), !dbg [[DBG283]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META284:![0-9]+]], !DIExpression(), [[META283]]) // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META285:![0-9]+]], metadata !DIExpression()), !dbg [[DBG283]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META285:![0-9]+]], !DIExpression(), [[META283]]) // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META286:![0-9]+]], metadata !DIExpression()), !dbg [[DBG283]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META286:![0-9]+]], !DIExpression(), [[META283]]) // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META287:![0-9]+]], metadata !DIExpression()), !dbg [[DBG283]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META287:![0-9]+]], !DIExpression(), [[META283]]) // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META288:![0-9]+]], metadata !DIExpression()), !dbg [[DBG283]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META288:![0-9]+]], !DIExpression(), [[META283]]) // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG289:![0-9]+]] // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG289]] // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG289]] @@ -681,12 +681,12 @@ int main() { // CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG289]] // CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG289]] // CHECK1-NEXT: [[TMP13:%.*]] = addrspacecast ptr [[TMP9]] to ptr addrspace(1), !dbg [[DBG289]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug___omp_outlined_debug__(ptr [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]], ptr addrspace(1) [[TMP13]]) #[[ATTR4]], !dbg [[DBG289]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug___omp_outlined_debug__(ptr [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]], ptr addrspace(1) [[TMP13]]) #[[ATTR3]], !dbg [[DBG289]] // CHECK1-NEXT: ret void, !dbg [[DBG289]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51 -// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR6]] !dbg [[DBG290:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG290:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 @@ -694,15 +694,15 @@ int main() { // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DYN_PTR_ADDR]], metadata [[META293:![0-9]+]], metadata !DIExpression()), !dbg [[DBG294:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META293:![0-9]+]], !DIExpression(), [[META294:![0-9]+]]) // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META295:![0-9]+]], metadata !DIExpression()), !dbg [[DBG294]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META295:![0-9]+]], !DIExpression(), [[META294]]) // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META296:![0-9]+]], metadata !DIExpression()), !dbg [[DBG294]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META296:![0-9]+]], !DIExpression(), [[META294]]) // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META297:![0-9]+]], metadata !DIExpression()), !dbg [[DBG294]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META297:![0-9]+]], !DIExpression(), [[META294]]) // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META298:![0-9]+]], metadata !DIExpression()), !dbg [[DBG294]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META298:![0-9]+]], !DIExpression(), [[META294]]) // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG299:![0-9]+]] // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG299]] // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG299]] @@ -716,6 +716,6 @@ int main() { // CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG299]] // CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG299]] // CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG299]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug__(ptr [[TMP4]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]]) #[[ATTR4]], !dbg [[DBG299]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l51_debug__(ptr [[TMP4]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]]) #[[ATTR3]], !dbg [[DBG299]] // CHECK1-NEXT: ret void, !dbg [[DBG299]] // diff --git a/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp index 688456a6477647..e27cc0d536269d 100644 --- a/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_debug_codegen.cpp @@ -55,7 +55,7 @@ int main() { return 0; } // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug__ -// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]], i1 noundef zeroext [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG22:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]], i1 noundef zeroext [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG19:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 @@ -69,58 +69,58 @@ int main() { // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DYN_PTR_ADDR]], metadata [[META43:![0-9]+]], metadata !DIExpression()), !dbg [[DBG44:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META40:![0-9]+]], !DIExpression(), [[META41:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META45:![0-9]+]], metadata !DIExpression()), !dbg [[DBG46:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META42:![0-9]+]], !DIExpression(), [[META43:![0-9]+]]) // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META47:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META44:![0-9]+]], !DIExpression(), [[META45:![0-9]+]]) // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META49:![0-9]+]], metadata !DIExpression()), !dbg [[DBG50:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META46:![0-9]+]], !DIExpression(), [[META47:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META51:![0-9]+]], metadata !DIExpression()), !dbg [[DBG52:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META48:![0-9]+]], !DIExpression(), [[META49:![0-9]+]]) // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[DOTCAPTURE_EXPR_]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTCAPTURE_EXPR__ADDR]], metadata [[META53:![0-9]+]], metadata !DIExpression()), !dbg [[DBG54:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG55:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG55]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG55]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG55]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG55]] -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG55]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG55]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG55]] -// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG55]] -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG55]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG55]] -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG55]] -// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG55]] -// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG55]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTCAPTURE_EXPR__ADDR]], [[META50:![0-9]+]], !DIExpression(), [[META51:![0-9]+]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG52:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG52]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG52]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG52]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG52]] +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG52]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG52]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG52]] +// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG52]] +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG52]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG52]] +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG52]] +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG52]] +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG52]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG56:![0-9]+]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4, !dbg [[DBG56]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG56]] -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG56]] -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8, !dbg [[DBG56]] -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG56]] -// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG56]] -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !dbg [[DBG56]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG56]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP15]], align 8, !dbg [[DBG56]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG56]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG56]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG57:![0-9]+]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG57]] -// CHECK1-NEXT: [[TMP18:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG56]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB6]], i32 [[TMP9]], i32 [[TMP18]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG56]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG59:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG60:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]), !dbg [[DBG53:![0-9]+]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG55:![0-9]+]] +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4, !dbg [[DBG55]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG55]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG55]] +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8, !dbg [[DBG55]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG55]] +// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG55]] +// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !dbg [[DBG55]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG55]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP15]], align 8, !dbg [[DBG55]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG55]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG55]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG56:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG56]] +// CHECK1-NEXT: [[TMP18:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG55]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB6]], i32 [[TMP9]], i32 [[TMP18]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG55]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG57:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG58:![0-9]+]] // CHECK1: worker.exit: -// CHECK1-NEXT: ret void, !dbg [[DBG55]] +// CHECK1-NEXT: ret void, !dbg [[DBG52]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined_debug__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR2:[0-9]+]] !dbg [[DBG61:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG59:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -144,149 +144,149 @@ int main() { // CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META68:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META66:![0-9]+]], !DIExpression(), [[META67:![0-9]+]]) // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META68:![0-9]+]], !DIExpression(), [[META67]]) // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG72:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META69:![0-9]+]], !DIExpression(), [[META70:![0-9]+]]) // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META73:![0-9]+]], metadata !DIExpression()), !dbg [[DBG74:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META71:![0-9]+]], !DIExpression(), [[META72:![0-9]+]]) // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META75:![0-9]+]], metadata !DIExpression()), !dbg [[DBG76:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META73:![0-9]+]], !DIExpression(), [[META74:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG79:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG79]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG79]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG79]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG79]] -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG79]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG79]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG79]] -// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG79]] -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG79]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG79]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META80:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META81:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG82:![0-9]+]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META83:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69]] -// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META84:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69]] -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META85:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B4]], metadata [[META86:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4]], ptr align 4 [[TMP4]], i64 400, i1 false), !dbg [[DBG79]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META87:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG79]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !dbg [[DBG79]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG88:![0-9]+]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG79]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META75:![0-9]+]], !DIExpression(), [[META76:![0-9]+]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG77:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG77]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG77]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG77]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG77]] +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG77]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG77]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG77]] +// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG77]] +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG77]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG77]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_IV]], [[META78:![0-9]+]], !DIExpression(), [[META67]]) +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_LB]], [[META79:![0-9]+]], !DIExpression(), [[META67]]) +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG80:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_UB]], [[META81:![0-9]+]], !DIExpression(), [[META67]]) +// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_STRIDE]], [[META82:![0-9]+]], !DIExpression(), [[META67]]) +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_IS_LAST]], [[META83:![0-9]+]], !DIExpression(), [[META67]]) +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: #dbg_declare(ptr [[B4]], [[META84:![0-9]+]], !DIExpression(), [[META67]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4]], ptr align 4 [[TMP4]], i64 400, i1 false), !dbg [[DBG77]] +// CHECK1-NEXT: #dbg_declare(ptr [[I]], [[META85:![0-9]+]], !DIExpression(), [[META67]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG77]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !dbg [[DBG77]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG86:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG77]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9, !dbg [[DBG82]] -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG82]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9, !dbg [[DBG80]] +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG80]] // CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG82]] +// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG80]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG82]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG80]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ], !dbg [[DBG82]] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]], !dbg [[DBG79]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG79]] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ], !dbg [[DBG80]] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]], !dbg [[DBG77]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG77]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG79]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG77]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]], !dbg [[DBG79]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG79]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]], !dbg [[DBG77]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG77]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1, !dbg [[DBG89:![0-9]+]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG89]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG89]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META90:![0-9]+]], metadata !DIExpression()), !dbg [[DBG93:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG94:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG94]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 1, !dbg [[DBG94]] -// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[F]], align 8, !dbg [[DBG93]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META95:![0-9]+]], metadata !DIExpression()), !dbg [[DBG96:![0-9]+]] -// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[DBG96]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META97:![0-9]+]], metadata !DIExpression()), !dbg [[DBG98:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 1, !dbg [[DBG99:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX9]], i64 0, i64 1, !dbg [[DBG99]] -// CHECK1-NEXT: store ptr [[ARRAYIDX10]], ptr [[H]], align 8, !dbg [[DBG98]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META100:![0-9]+]], metadata !DIExpression()), !dbg [[DBG101:![0-9]+]] -// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG101]] -// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG102:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG103:![0-9]+]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG104:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG103]] -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM]], !dbg [[DBG103]] -// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG105:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG106:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG106]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG107:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG106]] -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG106]] -// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG108:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG109:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX17]], i64 0, i64 0, !dbg [[DBG109]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG110:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG109]] -// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG109]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG109]] -// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG111:![0-9]+]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG112:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG111]] -// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG111]] -// CHECK1-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX23]], align 4, !dbg [[DBG113:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG114:![0-9]+]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG115:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG114]] -// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX24]], i64 0, i64 [[IDXPROM25]], !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG116:![0-9]+]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP25]] to i1, !dbg [[DBG116]] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG116]] -// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP24]], !dbg [[DBG116]] -// CHECK1-NEXT: [[TOBOOL27:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG116]] -// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL27]] to i8, !dbg [[DBG116]] -// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP7]], align 1, !dbg [[DBG116]] -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG117:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1, !dbg [[DBG87:![0-9]+]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG87]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG87]] +// CHECK1-NEXT: #dbg_declare(ptr [[F]], [[META88:![0-9]+]], !DIExpression(), [[META91:![0-9]+]]) +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG92:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG92]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 1, !dbg [[DBG92]] +// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[F]], align 8, !dbg [[META91]] +// CHECK1-NEXT: #dbg_declare(ptr [[G]], [[META93:![0-9]+]], !DIExpression(), [[META94:![0-9]+]]) +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[META94]] +// CHECK1-NEXT: #dbg_declare(ptr [[H]], [[META95:![0-9]+]], !DIExpression(), [[META96:![0-9]+]]) +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 1, !dbg [[DBG97:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX9]], i64 0, i64 1, !dbg [[DBG97]] +// CHECK1-NEXT: store ptr [[ARRAYIDX10]], ptr [[H]], align 8, !dbg [[META96]] +// CHECK1-NEXT: #dbg_declare(ptr [[D]], [[META98:![0-9]+]], !DIExpression(), [[META99:![0-9]+]]) +// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[META99]] +// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG100:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG101:![0-9]+]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG102:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG101]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM]], !dbg [[DBG101]] +// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG103:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG104:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG104]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG105:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG104]] +// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG104]] +// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG106:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG107:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX17]], i64 0, i64 0, !dbg [[DBG107]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG108:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG107]] +// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG107]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG107]] +// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG109:![0-9]+]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG110:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG109]] +// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG109]] +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX23]], align 4, !dbg [[DBG111:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG112:![0-9]+]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG113:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG112]] +// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX24]], i64 0, i64 [[IDXPROM25]], !dbg [[DBG112]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4, !dbg [[DBG112]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG114:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP25]] to i1, !dbg [[DBG114]] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG114]] +// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP24]], !dbg [[DBG114]] +// CHECK1-NEXT: [[TOBOOL27:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG114]] +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL27]] to i8, !dbg [[DBG114]] +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP7]], align 1, !dbg [[DBG114]] +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG115:![0-9]+]] // CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG88]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG86]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP26]], 1, !dbg [[DBG79]] -// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG79]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG88]], !llvm.loop [[LOOP118:![0-9]+]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP26]], 1, !dbg [[DBG77]] +// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG77]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG86]], !llvm.loop [[LOOP116:![0-9]+]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG88]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG86]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP27]], [[TMP28]], !dbg [[DBG79]] -// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG79]] -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP29]], [[TMP30]], !dbg [[DBG79]] -// CHECK1-NEXT: store i32 [[ADD30]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG79]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG88]], !llvm.loop [[LOOP120:![0-9]+]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP27]], [[TMP28]], !dbg [[DBG77]] +// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG77]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP29]], [[TMP30]], !dbg [[DBG77]] +// CHECK1-NEXT: store i32 [[ADD30]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG77]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG86]], !llvm.loop [[LOOP118:![0-9]+]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB5:[0-9]+]], i32 [[TMP9]]), !dbg [[DBG119:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG121:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB5:[0-9]+]], i32 [[TMP9]]), !dbg [[DBG117:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG119:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR2]] !dbg [[DBG122:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG120:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -295,34 +295,34 @@ int main() { // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META127:![0-9]+]], !DIExpression(), [[META128:![0-9]+]]) // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META129:![0-9]+]], !DIExpression(), [[META128]]) // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META132:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META130:![0-9]+]], !DIExpression(), [[META128]]) // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META131:![0-9]+]], !DIExpression(), [[META128]]) // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META134:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META132:![0-9]+]], !DIExpression(), [[META128]]) // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META135:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG136:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG136]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG136]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG136]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG136]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG136]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG136]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG136]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG136]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG136]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG136]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr [[TMP7]], ptr addrspace(1) [[TMP10]]) #[[ATTR4:[0-9]+]], !dbg [[DBG136]] -// CHECK1-NEXT: ret void, !dbg [[DBG136]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META133:![0-9]+]], !DIExpression(), [[META128]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG134:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG134]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr [[TMP7]], ptr addrspace(1) [[TMP10]]) #[[ATTR3:[0-9]+]], !dbg [[DBG134]] +// CHECK1-NEXT: ret void, !dbg [[DBG134]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13 -// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR6:[0-9]+]] !dbg [[DBG137:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR5:[0-9]+]] !dbg [[DBG135:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 @@ -331,35 +331,35 @@ int main() { // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DYN_PTR_ADDR]], metadata [[META140:![0-9]+]], metadata !DIExpression()), !dbg [[DBG141:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META138:![0-9]+]], !DIExpression(), [[META139:![0-9]+]]) // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META142:![0-9]+]], metadata !DIExpression()), !dbg [[DBG141]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META140:![0-9]+]], !DIExpression(), [[META139]]) // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META143:![0-9]+]], metadata !DIExpression()), !dbg [[DBG141]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META141:![0-9]+]], !DIExpression(), [[META139]]) // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG141]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META142:![0-9]+]], !DIExpression(), [[META139]]) // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG141]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META143:![0-9]+]], !DIExpression(), [[META139]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTCAPTURE_EXPR__ADDR]], metadata [[META146:![0-9]+]], metadata !DIExpression()), !dbg [[DBG141]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG147:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG147]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG147]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug__(ptr [[TMP3]], ptr addrspace(1) [[TMP9]], i32 [[TMP5]], ptr [[TMP6]], ptr addrspace(1) [[TMP10]], i1 [[TOBOOL]]) #[[ATTR4]], !dbg [[DBG147]] -// CHECK1-NEXT: ret void, !dbg [[DBG147]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTCAPTURE_EXPR__ADDR]], [[META144:![0-9]+]], !DIExpression(), [[META139]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG145:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG145]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG145]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8, !dbg [[DBG145]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG145]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG145]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG145]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG145]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG145]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1, !dbg [[DBG145]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG145]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG145]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug__(ptr [[TMP3]], ptr addrspace(1) [[TMP9]], i32 [[TMP5]], ptr [[TMP6]], ptr addrspace(1) [[TMP10]], i1 [[TOBOOL]]) #[[ATTR3]], !dbg [[DBG145]] +// CHECK1-NEXT: ret void, !dbg [[DBG145]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__ -// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG148:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG146:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 @@ -372,32 +372,32 @@ int main() { // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DYN_PTR_ADDR]], metadata [[META153:![0-9]+]], metadata !DIExpression()), !dbg [[DBG154:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META151:![0-9]+]], !DIExpression(), [[META152:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META155:![0-9]+]], metadata !DIExpression()), !dbg [[DBG156:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META153:![0-9]+]], !DIExpression(), [[META154:![0-9]+]]) // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META157:![0-9]+]], metadata !DIExpression()), !dbg [[DBG158:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META155:![0-9]+]], !DIExpression(), [[META156:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META159:![0-9]+]], metadata !DIExpression()), !dbg [[DBG160:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META157:![0-9]+]], !DIExpression(), [[META158:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META161:![0-9]+]], metadata !DIExpression()), !dbg [[DBG162:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG163:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG163]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG163]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG163]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG163]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG163]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG163]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG163]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG163]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG163]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG163]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG163]] -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG163]] -// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG163]] -// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG163]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META159:![0-9]+]], !DIExpression(), [[META160:![0-9]+]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG161:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG161]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG161]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG161]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG161]] +// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG161]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG161]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG161]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG161]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG161]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG161]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG161]] +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG161]] +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG161]] +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG161]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]]) +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]]), !dbg [[DBG162:![0-9]+]] // CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG164:![0-9]+]] // CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4, !dbg [[DBG164]] // CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG164]] @@ -412,13 +412,13 @@ int main() { // CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG164]] // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB13]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG164]] // CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG165:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG167:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG166:![0-9]+]] // CHECK1: worker.exit: -// CHECK1-NEXT: ret void, !dbg [[DBG163]] +// CHECK1-NEXT: ret void, !dbg [[DBG161]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined_debug__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR2]] !dbg [[DBG168:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG167:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -441,140 +441,140 @@ int main() { // CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG172:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META170:![0-9]+]], !DIExpression(), [[META171:![0-9]+]]) // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META173:![0-9]+]], metadata !DIExpression()), !dbg [[DBG172]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META172:![0-9]+]], !DIExpression(), [[META171]]) // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META174:![0-9]+]], metadata !DIExpression()), !dbg [[DBG175:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META173:![0-9]+]], !DIExpression(), [[META174:![0-9]+]]) // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META176:![0-9]+]], metadata !DIExpression()), !dbg [[DBG177:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META175:![0-9]+]], !DIExpression(), [[META176:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META178:![0-9]+]], metadata !DIExpression()), !dbg [[DBG179:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META177:![0-9]+]], !DIExpression(), [[META178:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META180:![0-9]+]], metadata !DIExpression()), !dbg [[DBG181:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG182:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG182]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG182]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG182]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG182]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG182]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG182]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG182]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG182]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG182]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG182]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG182]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META183:![0-9]+]], metadata !DIExpression()), !dbg [[DBG172]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META184:![0-9]+]], metadata !DIExpression()), !dbg [[DBG172]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG185:![0-9]+]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META186:![0-9]+]], metadata !DIExpression()), !dbg [[DBG172]] -// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META187:![0-9]+]], metadata !DIExpression()), !dbg [[DBG172]] -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META188:![0-9]+]], metadata !DIExpression()), !dbg [[DBG172]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META189:![0-9]+]], metadata !DIExpression()), !dbg [[DBG172]] -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG182]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG182]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB10:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG190:![0-9]+]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG182]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META179:![0-9]+]], !DIExpression(), [[META180:![0-9]+]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG181:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG181]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG181]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG181]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG181]] +// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG181]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG181]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG181]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG181]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG181]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG181]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG181]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_IV]], [[META182:![0-9]+]], !DIExpression(), [[META171]]) +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_LB]], [[META183:![0-9]+]], !DIExpression(), [[META171]]) +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG184:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_UB]], [[META185:![0-9]+]], !DIExpression(), [[META171]]) +// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_STRIDE]], [[META186:![0-9]+]], !DIExpression(), [[META171]]) +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_IS_LAST]], [[META187:![0-9]+]], !DIExpression(), [[META171]]) +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: #dbg_declare(ptr [[I]], [[META188:![0-9]+]], !DIExpression(), [[META171]]) +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG181]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG181]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB10:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG189:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG181]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9, !dbg [[DBG185]] -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG185]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9, !dbg [[DBG184]] +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG184]] // CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG185]] +// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG184]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG185]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG184]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG185]] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG182]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG182]] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG184]] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG181]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG181]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG182]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG181]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]], !dbg [[DBG182]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG182]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]], !dbg [[DBG181]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG181]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1, !dbg [[DBG191:![0-9]+]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG191]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG191]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META192:![0-9]+]], metadata !DIExpression()), !dbg [[DBG194:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG195:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG195]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG195]] -// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[F]], align 8, !dbg [[DBG194]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META196:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197:![0-9]+]] -// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[DBG197]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META198:![0-9]+]], metadata !DIExpression()), !dbg [[DBG199:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 1, !dbg [[DBG200:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 1, !dbg [[DBG200]] -// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[H]], align 8, !dbg [[DBG199]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META201:![0-9]+]], metadata !DIExpression()), !dbg [[DBG202:![0-9]+]] -// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG202]] -// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG203:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG204:![0-9]+]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG205:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG204]] -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX10]], i64 0, i64 [[IDXPROM]], !dbg [[DBG204]] -// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX11]], align 4, !dbg [[DBG206:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG207:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX12]], i64 0, i64 0, !dbg [[DBG207]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG208:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG207]] -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX13]], i64 0, i64 [[IDXPROM14]], !dbg [[DBG207]] -// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX15]], align 4, !dbg [[DBG209:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG210:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX16]], i64 0, i64 0, !dbg [[DBG210]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG211:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG210]] -// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG210]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX19]], align 4, !dbg [[DBG210]] -// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG212:![0-9]+]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG213:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG212]] -// CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX20]], i64 0, i64 [[IDXPROM21]], !dbg [[DBG212]] -// CHECK1-NEXT: store i32 [[TMP22]], ptr [[ARRAYIDX22]], align 4, !dbg [[DBG214:![0-9]+]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG215:![0-9]+]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP24]] to i1, !dbg [[DBG215]] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG215]] -// CHECK1-NEXT: store i32 [[CONV]], ptr [[D]], align 4, !dbg [[DBG216:![0-9]+]] -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG217:![0-9]+]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1, !dbg [[DBG190:![0-9]+]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG190]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG190]] +// CHECK1-NEXT: #dbg_declare(ptr [[F]], [[META191:![0-9]+]], !DIExpression(), [[META193:![0-9]+]]) +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG194:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG194]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG194]] +// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[F]], align 8, !dbg [[META193]] +// CHECK1-NEXT: #dbg_declare(ptr [[G]], [[META195:![0-9]+]], !DIExpression(), [[META196:![0-9]+]]) +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[META196]] +// CHECK1-NEXT: #dbg_declare(ptr [[H]], [[META197:![0-9]+]], !DIExpression(), [[META198:![0-9]+]]) +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 1, !dbg [[DBG199:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 1, !dbg [[DBG199]] +// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[H]], align 8, !dbg [[META198]] +// CHECK1-NEXT: #dbg_declare(ptr [[D]], [[META200:![0-9]+]], !DIExpression(), [[META201:![0-9]+]]) +// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[META201]] +// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG202:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG203:![0-9]+]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG204:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG203]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX10]], i64 0, i64 [[IDXPROM]], !dbg [[DBG203]] +// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX11]], align 4, !dbg [[DBG205:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG206:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX12]], i64 0, i64 0, !dbg [[DBG206]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG207:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG206]] +// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX13]], i64 0, i64 [[IDXPROM14]], !dbg [[DBG206]] +// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX15]], align 4, !dbg [[DBG208:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG209:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX16]], i64 0, i64 0, !dbg [[DBG209]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG210:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG209]] +// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG209]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX19]], align 4, !dbg [[DBG209]] +// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG211:![0-9]+]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG212:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG211]] +// CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX20]], i64 0, i64 [[IDXPROM21]], !dbg [[DBG211]] +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[ARRAYIDX22]], align 4, !dbg [[DBG213:![0-9]+]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG214:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP24]] to i1, !dbg [[DBG214]] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG214]] +// CHECK1-NEXT: store i32 [[CONV]], ptr [[D]], align 4, !dbg [[DBG215:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG216:![0-9]+]] // CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG190]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG189]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP25]], 1, !dbg [[DBG182]] -// CHECK1-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG182]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG190]], !llvm.loop [[LOOP218:![0-9]+]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP25]], 1, !dbg [[DBG181]] +// CHECK1-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG181]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG189]], !llvm.loop [[LOOP217:![0-9]+]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG190]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG189]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP26]], [[TMP27]], !dbg [[DBG182]] -// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG182]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]], !dbg [[DBG182]] -// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG182]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG190]], !llvm.loop [[LOOP220:![0-9]+]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP26]], [[TMP27]], !dbg [[DBG181]] +// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG181]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]], !dbg [[DBG181]] +// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG181]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG189]], !llvm.loop [[LOOP219:![0-9]+]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG219:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG221:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG218:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG220:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR2]] !dbg [[DBG222:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG221:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -583,35 +583,35 @@ int main() { // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META223:![0-9]+]], metadata !DIExpression()), !dbg [[DBG224:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META222:![0-9]+]], !DIExpression(), [[META223:![0-9]+]]) // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META225:![0-9]+]], metadata !DIExpression()), !dbg [[DBG224]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META224:![0-9]+]], !DIExpression(), [[META223]]) // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META226:![0-9]+]], metadata !DIExpression()), !dbg [[DBG224]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META225:![0-9]+]], !DIExpression(), [[META223]]) // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META227:![0-9]+]], metadata !DIExpression()), !dbg [[DBG224]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META226:![0-9]+]], !DIExpression(), [[META223]]) // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META228:![0-9]+]], metadata !DIExpression()), !dbg [[DBG224]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META227:![0-9]+]], !DIExpression(), [[META223]]) // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META229:![0-9]+]], metadata !DIExpression()), !dbg [[DBG224]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG230:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG230]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG230]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG230]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG230]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG230]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG230]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG230]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG230]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG230]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG230]] -// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG230]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR4]], !dbg [[DBG230]] -// CHECK1-NEXT: ret void, !dbg [[DBG230]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META228:![0-9]+]], !DIExpression(), [[META223]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG229:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG229]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG229]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG229]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG229]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG229]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG229]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG229]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG229]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG229]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG229]] +// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG229]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR3]], !dbg [[DBG229]] +// CHECK1-NEXT: ret void, !dbg [[DBG229]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27 -// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR6]] !dbg [[DBG231:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG230:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 @@ -619,32 +619,32 @@ int main() { // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DYN_PTR_ADDR]], metadata [[META234:![0-9]+]], metadata !DIExpression()), !dbg [[DBG235:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META233:![0-9]+]], !DIExpression(), [[META234:![0-9]+]]) // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META236:![0-9]+]], metadata !DIExpression()), !dbg [[DBG235]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META235:![0-9]+]], !DIExpression(), [[META234]]) // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META237:![0-9]+]], metadata !DIExpression()), !dbg [[DBG235]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META236:![0-9]+]], !DIExpression(), [[META234]]) // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META238:![0-9]+]], metadata !DIExpression()), !dbg [[DBG235]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META237:![0-9]+]], !DIExpression(), [[META234]]) // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META239:![0-9]+]], metadata !DIExpression()), !dbg [[DBG235]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG240:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG240]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG240]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8, !dbg [[DBG240]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG240]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG240]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG240]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG240]] -// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG240]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG240]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG240]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__(ptr [[TMP3]], ptr addrspace(1) [[TMP8]], i32 [[TMP5]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]]) #[[ATTR4]], !dbg [[DBG240]] -// CHECK1-NEXT: ret void, !dbg [[DBG240]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META238:![0-9]+]], !DIExpression(), [[META234]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG239:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG239]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG239]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8, !dbg [[DBG239]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG239]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG239]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG239]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG239]] +// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG239]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG239]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG239]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__(ptr [[TMP3]], ptr addrspace(1) [[TMP8]], i32 [[TMP5]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]]) #[[ATTR3]], !dbg [[DBG239]] +// CHECK1-NEXT: ret void, !dbg [[DBG239]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__ -// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG241:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG240:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 @@ -657,53 +657,53 @@ int main() { // CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DYN_PTR_ADDR]], metadata [[META246:![0-9]+]], metadata !DIExpression()), !dbg [[DBG247:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META245:![0-9]+]], !DIExpression(), [[META246:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META248:![0-9]+]], metadata !DIExpression()), !dbg [[DBG249:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META247:![0-9]+]], !DIExpression(), [[META248:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META250:![0-9]+]], metadata !DIExpression()), !dbg [[DBG251:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META249:![0-9]+]], !DIExpression(), [[META250:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META252:![0-9]+]], metadata !DIExpression()), !dbg [[DBG253:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META251:![0-9]+]], !DIExpression(), [[META252:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META254:![0-9]+]], metadata !DIExpression()), !dbg [[DBG255:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG256:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG256]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG256]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG256]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG256]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG256]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG256]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG256]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG256]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG256]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG256]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG256]] -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG256]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG256]] -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG256]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG256]] -// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG256]] -// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG256]] -// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG256]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META253:![0-9]+]], !DIExpression(), [[META254:![0-9]+]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG255:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG255]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG255]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG255]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG255]] +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG255]] +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG255]] +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG255]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG257:![0-9]+]] -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG257]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG257]] -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP15]], align 8, !dbg [[DBG257]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG257]] -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG257]] -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG257]] -// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG257]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB20]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG257]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG258:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]]), !dbg [[DBG256:![0-9]+]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG258:![0-9]+]] +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG258]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG258]] +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP15]], align 8, !dbg [[DBG258]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG258]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG258]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG258]] +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG258]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB20]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG258]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG259:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG260:![0-9]+]] // CHECK1: worker.exit: -// CHECK1-NEXT: ret void, !dbg [[DBG256]] +// CHECK1-NEXT: ret void, !dbg [[DBG255]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined_debug__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR2]] !dbg [[DBG261:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG261:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -727,17 +727,17 @@ int main() { // CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META264:![0-9]+]], metadata !DIExpression()), !dbg [[DBG265:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META264:![0-9]+]], !DIExpression(), [[META265:![0-9]+]]) // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META266:![0-9]+]], metadata !DIExpression()), !dbg [[DBG265]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META266:![0-9]+]], !DIExpression(), [[META265]]) // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META267:![0-9]+]], metadata !DIExpression()), !dbg [[DBG268:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META267:![0-9]+]], !DIExpression(), [[META268:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META269:![0-9]+]], metadata !DIExpression()), !dbg [[DBG270:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META269:![0-9]+]], !DIExpression(), [[META270:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META271:![0-9]+]], metadata !DIExpression()), !dbg [[DBG272:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META271:![0-9]+]], !DIExpression(), [[META272:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META273:![0-9]+]], metadata !DIExpression()), !dbg [[DBG274:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META273:![0-9]+]], !DIExpression(), [[META274:![0-9]+]]) // CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG275:![0-9]+]] // CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG275]] // CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG275]] @@ -754,16 +754,16 @@ int main() { // CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG275]] // CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG275]] // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG275]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META276:![0-9]+]], metadata !DIExpression()), !dbg [[DBG265]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META277:![0-9]+]], metadata !DIExpression()), !dbg [[DBG265]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_IV]], [[META276:![0-9]+]], !DIExpression(), [[META265]]) +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_LB]], [[META277:![0-9]+]], !DIExpression(), [[META265]]) // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG278:![0-9]+]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META279:![0-9]+]], metadata !DIExpression()), !dbg [[DBG265]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_UB]], [[META279:![0-9]+]], !DIExpression(), [[META265]]) // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG278]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META280:![0-9]+]], metadata !DIExpression()), !dbg [[DBG265]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_STRIDE]], [[META280:![0-9]+]], !DIExpression(), [[META265]]) // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG278]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META281:![0-9]+]], metadata !DIExpression()), !dbg [[DBG265]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_IS_LAST]], [[META281:![0-9]+]], !DIExpression(), [[META265]]) // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG278]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META282:![0-9]+]], metadata !DIExpression()), !dbg [[DBG265]] +// CHECK1-NEXT: #dbg_declare(ptr [[I]], [[META282:![0-9]+]], !DIExpression(), [[META265]]) // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG275]] // CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG275]] // CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB17:[0-9]+]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG283:![0-9]+]] @@ -798,19 +798,19 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1, !dbg [[DBG284:![0-9]+]] // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG284]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG284]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META285:![0-9]+]], metadata !DIExpression()), !dbg [[DBG287:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[F]], [[META285:![0-9]+]], !DIExpression(), [[META287:![0-9]+]]) // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG288:![0-9]+]] // CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG288]] // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 1, !dbg [[DBG288]] -// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[F]], align 8, !dbg [[DBG287]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META289:![0-9]+]], metadata !DIExpression()), !dbg [[DBG290:![0-9]+]] -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[G]], align 8, !dbg [[DBG290]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META291:![0-9]+]], metadata !DIExpression()), !dbg [[DBG292:![0-9]+]] +// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[F]], align 8, !dbg [[META287]] +// CHECK1-NEXT: #dbg_declare(ptr [[G]], [[META289:![0-9]+]], !DIExpression(), [[META290:![0-9]+]]) +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[G]], align 8, !dbg [[META290]] +// CHECK1-NEXT: #dbg_declare(ptr [[H]], [[META291:![0-9]+]], !DIExpression(), [[META292:![0-9]+]]) // CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 1, !dbg [[DBG293:![0-9]+]] // CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX9]], i64 0, i64 1, !dbg [[DBG293]] -// CHECK1-NEXT: store ptr [[ARRAYIDX10]], ptr [[H]], align 8, !dbg [[DBG292]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META294:![0-9]+]], metadata !DIExpression()), !dbg [[DBG295:![0-9]+]] -// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG295]] +// CHECK1-NEXT: store ptr [[ARRAYIDX10]], ptr [[H]], align 8, !dbg [[META292]] +// CHECK1-NEXT: #dbg_declare(ptr [[D]], [[META294:![0-9]+]], !DIExpression(), [[META295:![0-9]+]]) +// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[META295]] // CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4, !dbg [[DBG296:![0-9]+]] // CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG297:![0-9]+]] // CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG298:![0-9]+]] @@ -868,7 +868,7 @@ int main() { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR2]] !dbg [[DBG316:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG316:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -877,17 +877,17 @@ int main() { // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META319:![0-9]+]], metadata !DIExpression()), !dbg [[DBG320:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META319:![0-9]+]], !DIExpression(), [[META320:![0-9]+]]) // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META321:![0-9]+]], metadata !DIExpression()), !dbg [[DBG320]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META321:![0-9]+]], !DIExpression(), [[META320]]) // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META322:![0-9]+]], metadata !DIExpression()), !dbg [[DBG320]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META322:![0-9]+]], !DIExpression(), [[META320]]) // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META323:![0-9]+]], metadata !DIExpression()), !dbg [[DBG320]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META323:![0-9]+]], !DIExpression(), [[META320]]) // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META324:![0-9]+]], metadata !DIExpression()), !dbg [[DBG320]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META324:![0-9]+]], !DIExpression(), [[META320]]) // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META325:![0-9]+]], metadata !DIExpression()), !dbg [[DBG320]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META325:![0-9]+]], !DIExpression(), [[META320]]) // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG326:![0-9]+]] // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG326]] // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG326]] @@ -902,12 +902,12 @@ int main() { // CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG326]] // CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG326]] // CHECK1-NEXT: [[TMP13:%.*]] = addrspacecast ptr [[TMP9]] to ptr addrspace(1), !dbg [[DBG326]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined_debug__(ptr [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]], ptr addrspace(1) [[TMP13]]) #[[ATTR4]], !dbg [[DBG326]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined_debug__(ptr [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]], ptr addrspace(1) [[TMP13]]) #[[ATTR3]], !dbg [[DBG326]] // CHECK1-NEXT: ret void, !dbg [[DBG326]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41 -// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR6]] !dbg [[DBG327:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG327:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 @@ -915,15 +915,15 @@ int main() { // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DYN_PTR_ADDR]], metadata [[META330:![0-9]+]], metadata !DIExpression()), !dbg [[DBG331:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META330:![0-9]+]], !DIExpression(), [[META331:![0-9]+]]) // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META332:![0-9]+]], metadata !DIExpression()), !dbg [[DBG331]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META332:![0-9]+]], !DIExpression(), [[META331]]) // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META333:![0-9]+]], metadata !DIExpression()), !dbg [[DBG331]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META333:![0-9]+]], !DIExpression(), [[META331]]) // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META334:![0-9]+]], metadata !DIExpression()), !dbg [[DBG331]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META334:![0-9]+]], !DIExpression(), [[META331]]) // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META335:![0-9]+]], metadata !DIExpression()), !dbg [[DBG331]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META335:![0-9]+]], !DIExpression(), [[META331]]) // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG336:![0-9]+]] // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG336]] // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG336]] @@ -937,6 +937,6 @@ int main() { // CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG336]] // CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG336]] // CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG336]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__(ptr [[TMP4]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]]) #[[ATTR4]], !dbg [[DBG336]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__(ptr [[TMP4]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]]) #[[ATTR3]], !dbg [[DBG336]] // CHECK1-NEXT: ret void, !dbg [[DBG336]] // diff --git a/clang/test/OpenMP/target_parallel_generic_loop_codegen-3.cpp b/clang/test/OpenMP/target_parallel_generic_loop_codegen-3.cpp index 5a34dab3134197..ea0ef01f8161cd 100644 --- a/clang/test/OpenMP/target_parallel_generic_loop_codegen-3.cpp +++ b/clang/test/OpenMP/target_parallel_generic_loop_codegen-3.cpp @@ -55,7 +55,7 @@ int main() { return 0; } // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug__ -// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]], i1 noundef zeroext [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG22:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]], i1 noundef zeroext [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG19:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 @@ -69,58 +69,58 @@ int main() { // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DYN_PTR_ADDR]], metadata [[META43:![0-9]+]], metadata !DIExpression()), !dbg [[DBG44:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META40:![0-9]+]], !DIExpression(), [[META41:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META45:![0-9]+]], metadata !DIExpression()), !dbg [[DBG46:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META42:![0-9]+]], !DIExpression(), [[META43:![0-9]+]]) // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META47:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META44:![0-9]+]], !DIExpression(), [[META45:![0-9]+]]) // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META49:![0-9]+]], metadata !DIExpression()), !dbg [[DBG50:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META46:![0-9]+]], !DIExpression(), [[META47:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META51:![0-9]+]], metadata !DIExpression()), !dbg [[DBG52:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META48:![0-9]+]], !DIExpression(), [[META49:![0-9]+]]) // CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[DOTCAPTURE_EXPR_]] to i8 // CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 1 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTCAPTURE_EXPR__ADDR]], metadata [[META53:![0-9]+]], metadata !DIExpression()), !dbg [[DBG54:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG55:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG55]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG55]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG55]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG55]] -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG55]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG55]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG55]] -// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG55]] -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG55]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG55]] -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG55]] -// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG55]] -// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG55]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTCAPTURE_EXPR__ADDR]], [[META50:![0-9]+]], !DIExpression(), [[META51:![0-9]+]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG52:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG52]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG52]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG52]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG52]] +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG52]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG52]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG52]] +// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG52]] +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG52]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG52]] +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG52]] +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP8]], -1, !dbg [[DBG52]] +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG52]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG56:![0-9]+]] -// CHECK1-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4, !dbg [[DBG56]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG56]] -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG56]] -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8, !dbg [[DBG56]] -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG56]] -// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG56]] -// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !dbg [[DBG56]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG56]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP15]], align 8, !dbg [[DBG56]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG56]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG56]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG57:![0-9]+]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG57]] -// CHECK1-NEXT: [[TMP18:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG56]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB6]], i32 [[TMP9]], i32 [[TMP18]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG56]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG59:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG60:![0-9]+]] +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB6:[0-9]+]]), !dbg [[DBG53:![0-9]+]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG55:![0-9]+]] +// CHECK1-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4, !dbg [[DBG55]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG55]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG55]] +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP12]], align 8, !dbg [[DBG55]] +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG55]] +// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG55]] +// CHECK1-NEXT: store ptr [[TMP14]], ptr [[TMP13]], align 8, !dbg [[DBG55]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG55]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[TMP15]], align 8, !dbg [[DBG55]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG55]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP16]], align 8, !dbg [[DBG55]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG56:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP17]] to i1, !dbg [[DBG56]] +// CHECK1-NEXT: [[TMP18:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG55]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB6]], i32 [[TMP9]], i32 [[TMP18]], i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG55]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG57:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG58:![0-9]+]] // CHECK1: worker.exit: -// CHECK1-NEXT: ret void, !dbg [[DBG55]] +// CHECK1-NEXT: ret void, !dbg [[DBG52]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined_debug__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR2:[0-9]+]] !dbg [[DBG61:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG59:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -144,149 +144,149 @@ int main() { // CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META68:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META66:![0-9]+]], !DIExpression(), [[META67:![0-9]+]]) // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META68:![0-9]+]], !DIExpression(), [[META67]]) // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG72:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META69:![0-9]+]], !DIExpression(), [[META70:![0-9]+]]) // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META73:![0-9]+]], metadata !DIExpression()), !dbg [[DBG74:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META71:![0-9]+]], !DIExpression(), [[META72:![0-9]+]]) // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META75:![0-9]+]], metadata !DIExpression()), !dbg [[DBG76:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META73:![0-9]+]], !DIExpression(), [[META74:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG79:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG79]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG79]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG79]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG79]] -// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG79]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG79]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG79]] -// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG79]] -// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG79]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG79]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META80:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META81:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG82:![0-9]+]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META83:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69]] -// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META84:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69]] -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META85:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B4]], metadata [[META86:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69]] -// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4]], ptr align 4 [[TMP4]], i64 400, i1 false), !dbg [[DBG79]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META87:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG79]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !dbg [[DBG79]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG88:![0-9]+]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG79]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META75:![0-9]+]], !DIExpression(), [[META76:![0-9]+]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG77:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG77]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG77]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG77]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG77]] +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[_TMP1]], align 8, !dbg [[DBG77]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG77]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG77]] +// CHECK1-NEXT: [[TMP6:%.*]] = addrspacecast ptr addrspace(1) [[TMP5]] to ptr, !dbg [[DBG77]] +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[_TMP2]], align 8, !dbg [[DBG77]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG77]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_IV]], [[META78:![0-9]+]], !DIExpression(), [[META67]]) +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_LB]], [[META79:![0-9]+]], !DIExpression(), [[META67]]) +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG80:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_UB]], [[META81:![0-9]+]], !DIExpression(), [[META67]]) +// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_STRIDE]], [[META82:![0-9]+]], !DIExpression(), [[META67]]) +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_IS_LAST]], [[META83:![0-9]+]], !DIExpression(), [[META67]]) +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: #dbg_declare(ptr [[B4]], [[META84:![0-9]+]], !DIExpression(), [[META67]]) +// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B4]], ptr align 4 [[TMP4]], i64 400, i1 false), !dbg [[DBG77]] +// CHECK1-NEXT: #dbg_declare(ptr [[I]], [[META85:![0-9]+]], !DIExpression(), [[META67]]) +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG77]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4, !dbg [[DBG77]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB3:[0-9]+]], i32 [[TMP9]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG86:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG77]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9, !dbg [[DBG82]] -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG82]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP10]], 9, !dbg [[DBG80]] +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG80]] // CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG82]] +// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG80]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG82]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG80]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ], !dbg [[DBG82]] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]], !dbg [[DBG79]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG79]] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP11]], [[COND_FALSE]] ], !dbg [[DBG80]] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]], !dbg [[DBG77]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG77]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG79]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG77]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]], !dbg [[DBG79]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG79]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]], !dbg [[DBG77]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG77]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1, !dbg [[DBG89:![0-9]+]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG89]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG89]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META90:![0-9]+]], metadata !DIExpression()), !dbg [[DBG93:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG94:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG94]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 1, !dbg [[DBG94]] -// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[F]], align 8, !dbg [[DBG93]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META95:![0-9]+]], metadata !DIExpression()), !dbg [[DBG96:![0-9]+]] -// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[DBG96]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META97:![0-9]+]], metadata !DIExpression()), !dbg [[DBG98:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 1, !dbg [[DBG99:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX9]], i64 0, i64 1, !dbg [[DBG99]] -// CHECK1-NEXT: store ptr [[ARRAYIDX10]], ptr [[H]], align 8, !dbg [[DBG98]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META100:![0-9]+]], metadata !DIExpression()), !dbg [[DBG101:![0-9]+]] -// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG101]] -// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG102:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG103:![0-9]+]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG104:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG103]] -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM]], !dbg [[DBG103]] -// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG105:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG106:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG106]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG107:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG106]] -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG106]] -// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG108:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG109:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX17]], i64 0, i64 0, !dbg [[DBG109]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG110:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG109]] -// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG109]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG109]] -// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG111:![0-9]+]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG112:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG111]] -// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG111]] -// CHECK1-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX23]], align 4, !dbg [[DBG113:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG114:![0-9]+]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG115:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG114]] -// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX24]], i64 0, i64 [[IDXPROM25]], !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4, !dbg [[DBG114]] -// CHECK1-NEXT: [[TMP25:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG116:![0-9]+]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP25]] to i1, !dbg [[DBG116]] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG116]] -// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP24]], !dbg [[DBG116]] -// CHECK1-NEXT: [[TOBOOL27:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG116]] -// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL27]] to i8, !dbg [[DBG116]] -// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP7]], align 1, !dbg [[DBG116]] -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG117:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1, !dbg [[DBG87:![0-9]+]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG87]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG87]] +// CHECK1-NEXT: #dbg_declare(ptr [[F]], [[META88:![0-9]+]], !DIExpression(), [[META91:![0-9]+]]) +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG92:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG92]] +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 1, !dbg [[DBG92]] +// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[F]], align 8, !dbg [[META91]] +// CHECK1-NEXT: #dbg_declare(ptr [[G]], [[META93:![0-9]+]], !DIExpression(), [[META94:![0-9]+]]) +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[META94]] +// CHECK1-NEXT: #dbg_declare(ptr [[H]], [[META95:![0-9]+]], !DIExpression(), [[META96:![0-9]+]]) +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 1, !dbg [[DBG97:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX9]], i64 0, i64 1, !dbg [[DBG97]] +// CHECK1-NEXT: store ptr [[ARRAYIDX10]], ptr [[H]], align 8, !dbg [[META96]] +// CHECK1-NEXT: #dbg_declare(ptr [[D]], [[META98:![0-9]+]], !DIExpression(), [[META99:![0-9]+]]) +// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[META99]] +// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG100:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG101:![0-9]+]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG102:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64, !dbg [[DBG101]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX11]], i64 0, i64 [[IDXPROM]], !dbg [[DBG101]] +// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX12]], align 4, !dbg [[DBG103:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG104:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX13]], i64 0, i64 0, !dbg [[DBG104]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG105:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM15:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG104]] +// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX14]], i64 0, i64 [[IDXPROM15]], !dbg [[DBG104]] +// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX16]], align 4, !dbg [[DBG106:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG107:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX17]], i64 0, i64 0, !dbg [[DBG107]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG108:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM19:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG107]] +// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX18]], i64 0, i64 [[IDXPROM19]], !dbg [[DBG107]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX20]], align 4, !dbg [[DBG107]] +// CHECK1-NEXT: [[ARRAYIDX21:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG109:![0-9]+]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG110:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM22:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG109]] +// CHECK1-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX21]], i64 0, i64 [[IDXPROM22]], !dbg [[DBG109]] +// CHECK1-NEXT: store i32 [[TMP21]], ptr [[ARRAYIDX23]], align 4, !dbg [[DBG111:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX24:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[B4]], i64 0, i64 0, !dbg [[DBG112:![0-9]+]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG113:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM25:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG112]] +// CHECK1-NEXT: [[ARRAYIDX26:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX24]], i64 0, i64 [[IDXPROM25]], !dbg [[DBG112]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX26]], align 4, !dbg [[DBG112]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i8, ptr [[TMP7]], align 1, !dbg [[DBG114:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP25]] to i1, !dbg [[DBG114]] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG114]] +// CHECK1-NEXT: [[OR:%.*]] = or i32 [[CONV]], [[TMP24]], !dbg [[DBG114]] +// CHECK1-NEXT: [[TOBOOL27:%.*]] = icmp ne i32 [[OR]], 0, !dbg [[DBG114]] +// CHECK1-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL27]] to i8, !dbg [[DBG114]] +// CHECK1-NEXT: store i8 [[FROMBOOL]], ptr [[TMP7]], align 1, !dbg [[DBG114]] +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG115:![0-9]+]] // CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG88]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG86]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP26]], 1, !dbg [[DBG79]] -// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG79]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG88]], !llvm.loop [[LOOP118:![0-9]+]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: [[ADD28:%.*]] = add nsw i32 [[TMP26]], 1, !dbg [[DBG77]] +// CHECK1-NEXT: store i32 [[ADD28]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG77]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG86]], !llvm.loop [[LOOP116:![0-9]+]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG88]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG86]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP27]], [[TMP28]], !dbg [[DBG79]] -// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG79]] -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG82]] -// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP29]], [[TMP30]], !dbg [[DBG79]] -// CHECK1-NEXT: store i32 [[ADD30]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG79]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG88]], !llvm.loop [[LOOP120:![0-9]+]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP27]], [[TMP28]], !dbg [[DBG77]] +// CHECK1-NEXT: store i32 [[ADD29]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG77]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG80]] +// CHECK1-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP29]], [[TMP30]], !dbg [[DBG77]] +// CHECK1-NEXT: store i32 [[ADD30]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG77]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG86]], !llvm.loop [[LOOP118:![0-9]+]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB5:[0-9]+]], i32 [[TMP9]]), !dbg [[DBG119:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG121:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB5:[0-9]+]], i32 [[TMP9]]), !dbg [[DBG117:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG119:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR2]] !dbg [[DBG122:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG120:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -295,34 +295,34 @@ int main() { // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META129:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META127:![0-9]+]], !DIExpression(), [[META128:![0-9]+]]) // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META129:![0-9]+]], !DIExpression(), [[META128]]) // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META132:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META130:![0-9]+]], !DIExpression(), [[META128]]) // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META131:![0-9]+]], !DIExpression(), [[META128]]) // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META134:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META132:![0-9]+]], !DIExpression(), [[META128]]) // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META135:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG136:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG136]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG136]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG136]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG136]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG136]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG136]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG136]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG136]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG136]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG136]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr [[TMP7]], ptr addrspace(1) [[TMP10]]) #[[ATTR4:[0-9]+]], !dbg [[DBG136]] -// CHECK1-NEXT: ret void, !dbg [[DBG136]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META133:![0-9]+]], !DIExpression(), [[META128]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG134:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG134]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG134]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr [[TMP7]], ptr addrspace(1) [[TMP10]]) #[[ATTR3:[0-9]+]], !dbg [[DBG134]] +// CHECK1-NEXT: ret void, !dbg [[DBG134]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13 -// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR6:[0-9]+]] !dbg [[DBG137:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]], i64 noundef [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR5:[0-9]+]] !dbg [[DBG135:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 @@ -331,35 +331,35 @@ int main() { // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTCAPTURE_EXPR__ADDR:%.*]] = alloca i64, align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DYN_PTR_ADDR]], metadata [[META140:![0-9]+]], metadata !DIExpression()), !dbg [[DBG141:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META138:![0-9]+]], !DIExpression(), [[META139:![0-9]+]]) // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META142:![0-9]+]], metadata !DIExpression()), !dbg [[DBG141]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META140:![0-9]+]], !DIExpression(), [[META139]]) // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META143:![0-9]+]], metadata !DIExpression()), !dbg [[DBG141]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META141:![0-9]+]], !DIExpression(), [[META139]]) // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG141]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META142:![0-9]+]], !DIExpression(), [[META139]]) // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG141]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META143:![0-9]+]], !DIExpression(), [[META139]]) // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTCAPTURE_EXPR__ADDR]], metadata [[META146:![0-9]+]], metadata !DIExpression()), !dbg [[DBG141]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG147:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG147]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1, !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG147]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG147]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug__(ptr [[TMP3]], ptr addrspace(1) [[TMP9]], i32 [[TMP5]], ptr [[TMP6]], ptr addrspace(1) [[TMP10]], i1 [[TOBOOL]]) #[[ATTR4]], !dbg [[DBG147]] -// CHECK1-NEXT: ret void, !dbg [[DBG147]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTCAPTURE_EXPR__ADDR]], [[META144:![0-9]+]], !DIExpression(), [[META139]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG145:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG145]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG145]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8, !dbg [[DBG145]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG145]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG145]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG145]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG145]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR__ADDR]], align 1, !dbg [[DBG145]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP8]] to i1, !dbg [[DBG145]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG145]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG145]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l13_debug__(ptr [[TMP3]], ptr addrspace(1) [[TMP9]], i32 [[TMP5]], ptr [[TMP6]], ptr addrspace(1) [[TMP10]], i1 [[TOBOOL]]) #[[ATTR3]], !dbg [[DBG145]] +// CHECK1-NEXT: ret void, !dbg [[DBG145]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__ -// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG148:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG146:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 @@ -372,32 +372,32 @@ int main() { // CHECK1-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DYN_PTR_ADDR]], metadata [[META153:![0-9]+]], metadata !DIExpression()), !dbg [[DBG154:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META151:![0-9]+]], !DIExpression(), [[META152:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META155:![0-9]+]], metadata !DIExpression()), !dbg [[DBG156:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META153:![0-9]+]], !DIExpression(), [[META154:![0-9]+]]) // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META157:![0-9]+]], metadata !DIExpression()), !dbg [[DBG158:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META155:![0-9]+]], !DIExpression(), [[META156:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META159:![0-9]+]], metadata !DIExpression()), !dbg [[DBG160:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META157:![0-9]+]], !DIExpression(), [[META158:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META161:![0-9]+]], metadata !DIExpression()), !dbg [[DBG162:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG163:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG163]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG163]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG163]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG163]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG163]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG163]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG163]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG163]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG163]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG163]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG163]] -// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG163]] -// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG163]] -// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG163]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META159:![0-9]+]], !DIExpression(), [[META160:![0-9]+]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG161:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG161]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG161]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG161]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG161]] +// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG161]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG161]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG161]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG161]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG161]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG161]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG161]] +// CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG161]] +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP9]], -1, !dbg [[DBG161]] +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG161]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]]) +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB13:[0-9]+]]), !dbg [[DBG162:![0-9]+]] // CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG164:![0-9]+]] // CHECK1-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4, !dbg [[DBG164]] // CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8, !dbg [[DBG164]] @@ -412,13 +412,13 @@ int main() { // CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP17]], align 8, !dbg [[DBG164]] // CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB13]], i32 [[TMP10]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG164]] // CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG165:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG167:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG166:![0-9]+]] // CHECK1: worker.exit: -// CHECK1-NEXT: ret void, !dbg [[DBG163]] +// CHECK1-NEXT: ret void, !dbg [[DBG161]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined_debug__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR2]] !dbg [[DBG168:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], i32 noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG167:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -441,140 +441,140 @@ int main() { // CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG172:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META170:![0-9]+]], !DIExpression(), [[META171:![0-9]+]]) // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META173:![0-9]+]], metadata !DIExpression()), !dbg [[DBG172]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META172:![0-9]+]], !DIExpression(), [[META171]]) // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META174:![0-9]+]], metadata !DIExpression()), !dbg [[DBG175:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META173:![0-9]+]], !DIExpression(), [[META174:![0-9]+]]) // CHECK1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META176:![0-9]+]], metadata !DIExpression()), !dbg [[DBG177:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META175:![0-9]+]], !DIExpression(), [[META176:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META178:![0-9]+]], metadata !DIExpression()), !dbg [[DBG179:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META177:![0-9]+]], !DIExpression(), [[META178:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META180:![0-9]+]], metadata !DIExpression()), !dbg [[DBG181:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG182:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG182]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG182]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG182]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG182]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG182]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG182]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG182]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG182]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG182]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG182]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG182]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META183:![0-9]+]], metadata !DIExpression()), !dbg [[DBG172]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META184:![0-9]+]], metadata !DIExpression()), !dbg [[DBG172]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG185:![0-9]+]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META186:![0-9]+]], metadata !DIExpression()), !dbg [[DBG172]] -// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META187:![0-9]+]], metadata !DIExpression()), !dbg [[DBG172]] -// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META188:![0-9]+]], metadata !DIExpression()), !dbg [[DBG172]] -// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META189:![0-9]+]], metadata !DIExpression()), !dbg [[DBG172]] -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG182]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG182]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB10:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG190:![0-9]+]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG182]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META179:![0-9]+]], !DIExpression(), [[META180:![0-9]+]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG181:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG181]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG181]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG181]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG181]] +// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG181]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG181]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG181]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG181]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG181]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG181]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG181]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_IV]], [[META182:![0-9]+]], !DIExpression(), [[META171]]) +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_LB]], [[META183:![0-9]+]], !DIExpression(), [[META171]]) +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG184:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_UB]], [[META185:![0-9]+]], !DIExpression(), [[META171]]) +// CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_STRIDE]], [[META186:![0-9]+]], !DIExpression(), [[META171]]) +// CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_IS_LAST]], [[META187:![0-9]+]], !DIExpression(), [[META171]]) +// CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: #dbg_declare(ptr [[I]], [[META188:![0-9]+]], !DIExpression(), [[META171]]) +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG181]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4, !dbg [[DBG181]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB10:[0-9]+]], i32 [[TMP10]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG189:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG181]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9, !dbg [[DBG185]] -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG185]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP11]], 9, !dbg [[DBG184]] +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG184]] // CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG185]] +// CHECK1-NEXT: br label [[COND_END:%.*]], !dbg [[DBG184]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG185]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: br label [[COND_END]], !dbg [[DBG184]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG185]] -// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG182]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG182]] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ], !dbg [[DBG184]] +// CHECK1-NEXT: store i32 [[COND]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: store i32 [[TMP13]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]], !dbg [[DBG181]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG181]] // CHECK1: omp.dispatch.body: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG182]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG181]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]], !dbg [[DBG182]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG182]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]], !dbg [[DBG181]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG181]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1, !dbg [[DBG191:![0-9]+]] -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG191]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG191]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META192:![0-9]+]], metadata !DIExpression()), !dbg [[DBG194:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG195:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG195]] -// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG195]] -// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[F]], align 8, !dbg [[DBG194]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META196:![0-9]+]], metadata !DIExpression()), !dbg [[DBG197:![0-9]+]] -// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[DBG197]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META198:![0-9]+]], metadata !DIExpression()), !dbg [[DBG199:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 1, !dbg [[DBG200:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 1, !dbg [[DBG200]] -// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[H]], align 8, !dbg [[DBG199]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META201:![0-9]+]], metadata !DIExpression()), !dbg [[DBG202:![0-9]+]] -// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG202]] -// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG203:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG204:![0-9]+]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG205:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG204]] -// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX10]], i64 0, i64 [[IDXPROM]], !dbg [[DBG204]] -// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX11]], align 4, !dbg [[DBG206:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG207:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX12]], i64 0, i64 0, !dbg [[DBG207]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG208:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG207]] -// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX13]], i64 0, i64 [[IDXPROM14]], !dbg [[DBG207]] -// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX15]], align 4, !dbg [[DBG209:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG210:![0-9]+]] -// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX16]], i64 0, i64 0, !dbg [[DBG210]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG211:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG210]] -// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG210]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX19]], align 4, !dbg [[DBG210]] -// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG212:![0-9]+]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG213:![0-9]+]] -// CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG212]] -// CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX20]], i64 0, i64 [[IDXPROM21]], !dbg [[DBG212]] -// CHECK1-NEXT: store i32 [[TMP22]], ptr [[ARRAYIDX22]], align 4, !dbg [[DBG214:![0-9]+]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG215:![0-9]+]] -// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP24]] to i1, !dbg [[DBG215]] -// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG215]] -// CHECK1-NEXT: store i32 [[CONV]], ptr [[D]], align 4, !dbg [[DBG216:![0-9]+]] -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG217:![0-9]+]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1, !dbg [[DBG190:![0-9]+]] +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG190]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG190]] +// CHECK1-NEXT: #dbg_declare(ptr [[F]], [[META191:![0-9]+]], !DIExpression(), [[META193:![0-9]+]]) +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG194:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG194]] +// CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX6]], i64 0, i64 1, !dbg [[DBG194]] +// CHECK1-NEXT: store ptr [[ARRAYIDX7]], ptr [[F]], align 8, !dbg [[META193]] +// CHECK1-NEXT: #dbg_declare(ptr [[G]], [[META195:![0-9]+]], !DIExpression(), [[META196:![0-9]+]]) +// CHECK1-NEXT: store ptr [[A_ADDR]], ptr [[G]], align 8, !dbg [[META196]] +// CHECK1-NEXT: #dbg_declare(ptr [[H]], [[META197:![0-9]+]], !DIExpression(), [[META198:![0-9]+]]) +// CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 1, !dbg [[DBG199:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX8]], i64 0, i64 1, !dbg [[DBG199]] +// CHECK1-NEXT: store ptr [[ARRAYIDX9]], ptr [[H]], align 8, !dbg [[META198]] +// CHECK1-NEXT: #dbg_declare(ptr [[D]], [[META200:![0-9]+]], !DIExpression(), [[META201:![0-9]+]]) +// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[META201]] +// CHECK1-NEXT: store i32 5, ptr [[A_ADDR]], align 4, !dbg [[DBG202:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG203:![0-9]+]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG204:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG203]] +// CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX10]], i64 0, i64 [[IDXPROM]], !dbg [[DBG203]] +// CHECK1-NEXT: store i32 10, ptr [[ARRAYIDX11]], align 4, !dbg [[DBG205:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG206:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX12]], i64 0, i64 0, !dbg [[DBG206]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG207:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM14:%.*]] = sext i32 [[TMP20]] to i64, !dbg [[DBG206]] +// CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX13]], i64 0, i64 [[IDXPROM14]], !dbg [[DBG206]] +// CHECK1-NEXT: store i32 11, ptr [[ARRAYIDX15]], align 4, !dbg [[DBG208:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 0, !dbg [[DBG209:![0-9]+]] +// CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX16]], i64 0, i64 0, !dbg [[DBG209]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG210:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM18:%.*]] = sext i32 [[TMP21]] to i64, !dbg [[DBG209]] +// CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX17]], i64 0, i64 [[IDXPROM18]], !dbg [[DBG209]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX19]], align 4, !dbg [[DBG209]] +// CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP5]], i64 0, i64 0, !dbg [[DBG211:![0-9]+]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG212:![0-9]+]] +// CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i32 [[TMP23]] to i64, !dbg [[DBG211]] +// CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX20]], i64 0, i64 [[IDXPROM21]], !dbg [[DBG211]] +// CHECK1-NEXT: store i32 [[TMP22]], ptr [[ARRAYIDX22]], align 4, !dbg [[DBG213:![0-9]+]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i8, ptr [[TMP8]], align 1, !dbg [[DBG214:![0-9]+]] +// CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP24]] to i1, !dbg [[DBG214]] +// CHECK1-NEXT: [[CONV:%.*]] = zext i1 [[TOBOOL]] to i32, !dbg [[DBG214]] +// CHECK1-NEXT: store i32 [[CONV]], ptr [[D]], align 4, !dbg [[DBG215:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG216:![0-9]+]] // CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG190]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG189]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP25]], 1, !dbg [[DBG182]] -// CHECK1-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG182]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG190]], !llvm.loop [[LOOP218:![0-9]+]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP25]], 1, !dbg [[DBG181]] +// CHECK1-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !dbg [[DBG181]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG189]], !llvm.loop [[LOOP217:![0-9]+]] // CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG190]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG189]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP26]], [[TMP27]], !dbg [[DBG182]] -// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG182]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG185]] -// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]], !dbg [[DBG182]] -// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG182]] -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG190]], !llvm.loop [[LOOP220:![0-9]+]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP26]], [[TMP27]], !dbg [[DBG181]] +// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_LB]], align 4, !dbg [[DBG181]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG184]] +// CHECK1-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP28]], [[TMP29]], !dbg [[DBG181]] +// CHECK1-NEXT: store i32 [[ADD25]], ptr [[DOTOMP_UB]], align 4, !dbg [[DBG181]] +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG189]], !llvm.loop [[LOOP219:![0-9]+]] // CHECK1: omp.dispatch.end: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG219:![0-9]+]] -// CHECK1-NEXT: ret void, !dbg [[DBG221:![0-9]+]] +// CHECK1-NEXT: call void @__kmpc_for_static_fini(ptr @[[GLOB12:[0-9]+]], i32 [[TMP10]]), !dbg [[DBG218:![0-9]+]] +// CHECK1-NEXT: ret void, !dbg [[DBG220:![0-9]+]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR2]] !dbg [[DBG222:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG221:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -583,35 +583,35 @@ int main() { // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META223:![0-9]+]], metadata !DIExpression()), !dbg [[DBG224:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META222:![0-9]+]], !DIExpression(), [[META223:![0-9]+]]) // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META225:![0-9]+]], metadata !DIExpression()), !dbg [[DBG224]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META224:![0-9]+]], !DIExpression(), [[META223]]) // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META226:![0-9]+]], metadata !DIExpression()), !dbg [[DBG224]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META225:![0-9]+]], !DIExpression(), [[META223]]) // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META227:![0-9]+]], metadata !DIExpression()), !dbg [[DBG224]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META226:![0-9]+]], !DIExpression(), [[META223]]) // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META228:![0-9]+]], metadata !DIExpression()), !dbg [[DBG224]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META227:![0-9]+]], !DIExpression(), [[META223]]) // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META229:![0-9]+]], metadata !DIExpression()), !dbg [[DBG224]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG230:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG230]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG230]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG230]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG230]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG230]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG230]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG230]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG230]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG230]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG230]] -// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG230]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR4]], !dbg [[DBG230]] -// CHECK1-NEXT: ret void, !dbg [[DBG230]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META228:![0-9]+]], !DIExpression(), [[META223]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG229:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG229]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG229]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG229]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTBOUND_TID__ADDR]], align 8, !dbg [[DBG229]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG229]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG229]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG229]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG229]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP5]] to ptr addrspace(1), !dbg [[DBG229]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG229]] +// CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG229]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug___omp_outlined_debug__(ptr [[TMP3]], ptr [[TMP4]], ptr addrspace(1) [[TMP9]], i32 [[TMP6]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]]) #[[ATTR3]], !dbg [[DBG229]] +// CHECK1-NEXT: ret void, !dbg [[DBG229]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27 -// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR6]] !dbg [[DBG231:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], i64 noundef [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG230:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 @@ -619,32 +619,32 @@ int main() { // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DYN_PTR_ADDR]], metadata [[META234:![0-9]+]], metadata !DIExpression()), !dbg [[DBG235:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META233:![0-9]+]], !DIExpression(), [[META234:![0-9]+]]) // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META236:![0-9]+]], metadata !DIExpression()), !dbg [[DBG235]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META235:![0-9]+]], !DIExpression(), [[META234]]) // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META237:![0-9]+]], metadata !DIExpression()), !dbg [[DBG235]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META236:![0-9]+]], !DIExpression(), [[META234]]) // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META238:![0-9]+]], metadata !DIExpression()), !dbg [[DBG235]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META237:![0-9]+]], !DIExpression(), [[META234]]) // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META239:![0-9]+]], metadata !DIExpression()), !dbg [[DBG235]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG240:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG240]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG240]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8, !dbg [[DBG240]] -// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG240]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG240]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG240]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG240]] -// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG240]] -// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG240]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG240]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__(ptr [[TMP3]], ptr addrspace(1) [[TMP8]], i32 [[TMP5]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]]) #[[ATTR4]], !dbg [[DBG240]] -// CHECK1-NEXT: ret void, !dbg [[DBG240]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META238:![0-9]+]], !DIExpression(), [[META234]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG239:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG239]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG239]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8, !dbg [[DBG239]] +// CHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG239]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG239]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG239]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[BB_ADDR]], align 8, !dbg [[DBG239]] +// CHECK1-NEXT: [[TMP8:%.*]] = addrspacecast ptr [[TMP4]] to ptr addrspace(1), !dbg [[DBG239]] +// CHECK1-NEXT: [[TMP9:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG239]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG239]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l27_debug__(ptr [[TMP3]], ptr addrspace(1) [[TMP8]], i32 [[TMP5]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]]) #[[ATTR3]], !dbg [[DBG239]] +// CHECK1-NEXT: ret void, !dbg [[DBG239]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__ -// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG241:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR0]] !dbg [[DBG240:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr addrspace(1), align 8 @@ -657,53 +657,53 @@ int main() { // CHECK1-NEXT: [[_TMP3:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x ptr], align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DYN_PTR_ADDR]], metadata [[META246:![0-9]+]], metadata !DIExpression()), !dbg [[DBG247:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META245:![0-9]+]], !DIExpression(), [[META246:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META248:![0-9]+]], metadata !DIExpression()), !dbg [[DBG249:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META247:![0-9]+]], !DIExpression(), [[META248:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META250:![0-9]+]], metadata !DIExpression()), !dbg [[DBG251:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META249:![0-9]+]], !DIExpression(), [[META250:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META252:![0-9]+]], metadata !DIExpression()), !dbg [[DBG253:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META251:![0-9]+]], !DIExpression(), [[META252:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META254:![0-9]+]], metadata !DIExpression()), !dbg [[DBG255:![0-9]+]] -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG256:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG256]] -// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG256]] -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG256]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG256]] -// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG256]] -// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG256]] -// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG256]] -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG256]] -// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG256]] -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG256]] -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG256]] -// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG256]] -// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG256]] -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG256]] -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG256]] -// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG256]] -// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG256]] -// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG256]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META253:![0-9]+]], !DIExpression(), [[META254:![0-9]+]]) +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG255:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG255]] +// CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr addrspace(1), ptr [[A_ADDR]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP4:%.*]] = addrspacecast ptr addrspace(1) [[TMP3]] to ptr, !dbg [[DBG255]] +// CHECK1-NEXT: store ptr [[TMP4]], ptr [[_TMP1]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[_TMP1]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr addrspace(1), ptr [[B_ADDR]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP7:%.*]] = addrspacecast ptr addrspace(1) [[TMP6]] to ptr, !dbg [[DBG255]] +// CHECK1-NEXT: store ptr [[TMP7]], ptr [[_TMP2]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[_TMP2]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP9:%.*]] = load ptr addrspace(1), ptr [[BB_ADDR]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG255]] +// CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG255]] +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_kernel_environment, ptr [[DYN_PTR]]), !dbg [[DBG255]] +// CHECK1-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP12]], -1, !dbg [[DBG255]] +// CHECK1-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]], !dbg [[DBG255]] // CHECK1: user_code.entry: -// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]]) -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG257:![0-9]+]] -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG257]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG257]] -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP15]], align 8, !dbg [[DBG257]] -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG257]] -// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG257]] -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG257]] -// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG257]] -// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB20]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG257]] -// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG258:![0-9]+]] +// CHECK1-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB20:[0-9]+]]), !dbg [[DBG256:![0-9]+]] +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 0, !dbg [[DBG258:![0-9]+]] +// CHECK1-NEXT: store ptr [[TMP2]], ptr [[TMP14]], align 8, !dbg [[DBG258]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 1, !dbg [[DBG258]] +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[TMP15]], align 8, !dbg [[DBG258]] +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 2, !dbg [[DBG258]] +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP16]], align 8, !dbg [[DBG258]] +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i64 0, i64 3, !dbg [[DBG258]] +// CHECK1-NEXT: store ptr [[TMP11]], ptr [[TMP17]], align 8, !dbg [[DBG258]] +// CHECK1-NEXT: call void @__kmpc_parallel_51(ptr @[[GLOB20]], i32 [[TMP13]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i64 4), !dbg [[DBG258]] +// CHECK1-NEXT: call void @__kmpc_target_deinit(), !dbg [[DBG259:![0-9]+]] // CHECK1-NEXT: ret void, !dbg [[DBG260:![0-9]+]] // CHECK1: worker.exit: -// CHECK1-NEXT: ret void, !dbg [[DBG256]] +// CHECK1-NEXT: ret void, !dbg [[DBG255]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined_debug__ -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR2]] !dbg [[DBG261:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr addrspace(1) noalias noundef [[C:%.*]], ptr addrspace(1) noalias noundef [[A:%.*]], ptr addrspace(1) noalias noundef [[B:%.*]], ptr addrspace(1) noalias noundef [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG261:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -727,17 +727,17 @@ int main() { // CHECK1-NEXT: [[H:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D:%.*]] = alloca i32, align 4 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META264:![0-9]+]], metadata !DIExpression()), !dbg [[DBG265:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META264:![0-9]+]], !DIExpression(), [[META265:![0-9]+]]) // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META266:![0-9]+]], metadata !DIExpression()), !dbg [[DBG265]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META266:![0-9]+]], !DIExpression(), [[META265]]) // CHECK1-NEXT: store ptr addrspace(1) [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META267:![0-9]+]], metadata !DIExpression()), !dbg [[DBG268:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META267:![0-9]+]], !DIExpression(), [[META268:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META269:![0-9]+]], metadata !DIExpression()), !dbg [[DBG270:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META269:![0-9]+]], !DIExpression(), [[META270:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META271:![0-9]+]], metadata !DIExpression()), !dbg [[DBG272:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META271:![0-9]+]], !DIExpression(), [[META272:![0-9]+]]) // CHECK1-NEXT: store ptr addrspace(1) [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META273:![0-9]+]], metadata !DIExpression()), !dbg [[DBG274:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META273:![0-9]+]], !DIExpression(), [[META274:![0-9]+]]) // CHECK1-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr [[C_ADDR]], align 8, !dbg [[DBG275:![0-9]+]] // CHECK1-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(1) [[TMP0]] to ptr, !dbg [[DBG275]] // CHECK1-NEXT: store ptr [[TMP1]], ptr [[TMP]], align 8, !dbg [[DBG275]] @@ -754,16 +754,16 @@ int main() { // CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr addrspace(1) [[TMP9]] to ptr, !dbg [[DBG275]] // CHECK1-NEXT: store ptr [[TMP10]], ptr [[_TMP3]], align 8, !dbg [[DBG275]] // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[_TMP3]], align 8, !dbg [[DBG275]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IV]], metadata [[META276:![0-9]+]], metadata !DIExpression()), !dbg [[DBG265]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_LB]], metadata [[META277:![0-9]+]], metadata !DIExpression()), !dbg [[DBG265]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_IV]], [[META276:![0-9]+]], !DIExpression(), [[META265]]) +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_LB]], [[META277:![0-9]+]], !DIExpression(), [[META265]]) // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4, !dbg [[DBG278:![0-9]+]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_UB]], metadata [[META279:![0-9]+]], metadata !DIExpression()), !dbg [[DBG265]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_UB]], [[META279:![0-9]+]], !DIExpression(), [[META265]]) // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4, !dbg [[DBG278]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_STRIDE]], metadata [[META280:![0-9]+]], metadata !DIExpression()), !dbg [[DBG265]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_STRIDE]], [[META280:![0-9]+]], !DIExpression(), [[META265]]) // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4, !dbg [[DBG278]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTOMP_IS_LAST]], metadata [[META281:![0-9]+]], metadata !DIExpression()), !dbg [[DBG265]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTOMP_IS_LAST]], [[META281:![0-9]+]], !DIExpression(), [[META265]]) // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG278]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META282:![0-9]+]], metadata !DIExpression()), !dbg [[DBG265]] +// CHECK1-NEXT: #dbg_declare(ptr [[I]], [[META282:![0-9]+]], !DIExpression(), [[META265]]) // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG275]] // CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4, !dbg [[DBG275]] // CHECK1-NEXT: call void @__kmpc_for_static_init_4(ptr @[[GLOB17:[0-9]+]], i32 [[TMP13]], i32 33, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG283:![0-9]+]] @@ -798,19 +798,19 @@ int main() { // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1, !dbg [[DBG284:![0-9]+]] // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]], !dbg [[DBG284]] // CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !dbg [[DBG284]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[F]], metadata [[META285:![0-9]+]], metadata !DIExpression()), !dbg [[DBG287:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[F]], [[META285:![0-9]+]], !DIExpression(), [[META287:![0-9]+]]) // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr [[TMP2]], i64 0, i64 1, !dbg [[DBG288:![0-9]+]] // CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[ARRAYIDX]], i64 0, i64 1, !dbg [[DBG288]] // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX7]], i64 0, i64 1, !dbg [[DBG288]] -// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[F]], align 8, !dbg [[DBG287]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[G]], metadata [[META289:![0-9]+]], metadata !DIExpression()), !dbg [[DBG290:![0-9]+]] -// CHECK1-NEXT: store ptr [[TMP5]], ptr [[G]], align 8, !dbg [[DBG290]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[H]], metadata [[META291:![0-9]+]], metadata !DIExpression()), !dbg [[DBG292:![0-9]+]] +// CHECK1-NEXT: store ptr [[ARRAYIDX8]], ptr [[F]], align 8, !dbg [[META287]] +// CHECK1-NEXT: #dbg_declare(ptr [[G]], [[META289:![0-9]+]], !DIExpression(), [[META290:![0-9]+]]) +// CHECK1-NEXT: store ptr [[TMP5]], ptr [[G]], align 8, !dbg [[META290]] +// CHECK1-NEXT: #dbg_declare(ptr [[H]], [[META291:![0-9]+]], !DIExpression(), [[META292:![0-9]+]]) // CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 1, !dbg [[DBG293:![0-9]+]] // CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAYIDX9]], i64 0, i64 1, !dbg [[DBG293]] -// CHECK1-NEXT: store ptr [[ARRAYIDX10]], ptr [[H]], align 8, !dbg [[DBG292]] -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[D]], metadata [[META294:![0-9]+]], metadata !DIExpression()), !dbg [[DBG295:![0-9]+]] -// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[DBG295]] +// CHECK1-NEXT: store ptr [[ARRAYIDX10]], ptr [[H]], align 8, !dbg [[META292]] +// CHECK1-NEXT: #dbg_declare(ptr [[D]], [[META294:![0-9]+]], !DIExpression(), [[META295:![0-9]+]]) +// CHECK1-NEXT: store i32 15, ptr [[D]], align 4, !dbg [[META295]] // CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4, !dbg [[DBG296:![0-9]+]] // CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x [10 x i32]], ptr [[TMP8]], i64 0, i64 0, !dbg [[DBG297:![0-9]+]] // CHECK1-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP5]], align 4, !dbg [[DBG298:![0-9]+]] @@ -868,7 +868,7 @@ int main() { // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined -// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR2]] !dbg [[DBG316:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR1]] !dbg [[DBG316:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 @@ -877,17 +877,17 @@ int main() { // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTGLOBAL_TID__ADDR]], metadata [[META319:![0-9]+]], metadata !DIExpression()), !dbg [[DBG320:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTGLOBAL_TID__ADDR]], [[META319:![0-9]+]], !DIExpression(), [[META320:![0-9]+]]) // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DOTBOUND_TID__ADDR]], metadata [[META321:![0-9]+]], metadata !DIExpression()), !dbg [[DBG320]] +// CHECK1-NEXT: #dbg_declare(ptr [[DOTBOUND_TID__ADDR]], [[META321:![0-9]+]], !DIExpression(), [[META320]]) // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META322:![0-9]+]], metadata !DIExpression()), !dbg [[DBG320]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META322:![0-9]+]], !DIExpression(), [[META320]]) // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META323:![0-9]+]], metadata !DIExpression()), !dbg [[DBG320]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META323:![0-9]+]], !DIExpression(), [[META320]]) // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META324:![0-9]+]], metadata !DIExpression()), !dbg [[DBG320]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META324:![0-9]+]], !DIExpression(), [[META320]]) // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META325:![0-9]+]], metadata !DIExpression()), !dbg [[DBG320]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META325:![0-9]+]], !DIExpression(), [[META320]]) // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG326:![0-9]+]] // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG326]] // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG326]] @@ -902,12 +902,12 @@ int main() { // CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG326]] // CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG326]] // CHECK1-NEXT: [[TMP13:%.*]] = addrspacecast ptr [[TMP9]] to ptr addrspace(1), !dbg [[DBG326]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined_debug__(ptr [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]], ptr addrspace(1) [[TMP13]]) #[[ATTR4]], !dbg [[DBG326]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug___omp_outlined_debug__(ptr [[TMP4]], ptr [[TMP5]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]], ptr addrspace(1) [[TMP13]]) #[[ATTR3]], !dbg [[DBG326]] // CHECK1-NEXT: ret void, !dbg [[DBG326]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41 -// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR6]] !dbg [[DBG327:![0-9]+]] { +// CHECK1-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 4 dereferenceable(400) [[B:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[BB:%.*]]) #[[ATTR5]] !dbg [[DBG327:![0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 @@ -915,15 +915,15 @@ int main() { // CHECK1-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[BB_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[DYN_PTR_ADDR]], metadata [[META330:![0-9]+]], metadata !DIExpression()), !dbg [[DBG331:![0-9]+]] +// CHECK1-NEXT: #dbg_declare(ptr [[DYN_PTR_ADDR]], [[META330:![0-9]+]], !DIExpression(), [[META331:![0-9]+]]) // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[C_ADDR]], metadata [[META332:![0-9]+]], metadata !DIExpression()), !dbg [[DBG331]] +// CHECK1-NEXT: #dbg_declare(ptr [[C_ADDR]], [[META332:![0-9]+]], !DIExpression(), [[META331]]) // CHECK1-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META333:![0-9]+]], metadata !DIExpression()), !dbg [[DBG331]] +// CHECK1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META333:![0-9]+]], !DIExpression(), [[META331]]) // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[B_ADDR]], metadata [[META334:![0-9]+]], metadata !DIExpression()), !dbg [[DBG331]] +// CHECK1-NEXT: #dbg_declare(ptr [[B_ADDR]], [[META334:![0-9]+]], !DIExpression(), [[META331]]) // CHECK1-NEXT: store ptr [[BB]], ptr [[BB_ADDR]], align 8 -// CHECK1-NEXT: call void @llvm.dbg.declare(metadata ptr [[BB_ADDR]], metadata [[META335:![0-9]+]], metadata !DIExpression()), !dbg [[DBG331]] +// CHECK1-NEXT: #dbg_declare(ptr [[BB_ADDR]], [[META335:![0-9]+]], !DIExpression(), [[META331]]) // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !dbg [[DBG336:![0-9]+]] // CHECK1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8, !dbg [[DBG336]] // CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !dbg [[DBG336]] @@ -937,6 +937,6 @@ int main() { // CHECK1-NEXT: [[TMP10:%.*]] = addrspacecast ptr [[TMP6]] to ptr addrspace(1), !dbg [[DBG336]] // CHECK1-NEXT: [[TMP11:%.*]] = addrspacecast ptr [[TMP7]] to ptr addrspace(1), !dbg [[DBG336]] // CHECK1-NEXT: [[TMP12:%.*]] = addrspacecast ptr [[TMP8]] to ptr addrspace(1), !dbg [[DBG336]] -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__(ptr [[TMP4]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]]) #[[ATTR4]], !dbg [[DBG336]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l41_debug__(ptr [[TMP4]], ptr addrspace(1) [[TMP9]], ptr addrspace(1) [[TMP10]], ptr addrspace(1) [[TMP11]], ptr addrspace(1) [[TMP12]]) #[[ATTR3]], !dbg [[DBG336]] // CHECK1-NEXT: ret void, !dbg [[DBG336]] // diff --git a/clang/test/OpenMP/taskgroup_task_reduction_codegen.cpp b/clang/test/OpenMP/taskgroup_task_reduction_codegen.cpp index 1e20171f9e0730..e51ee18d5ea8d9 100644 --- a/clang/test/OpenMP/taskgroup_task_reduction_codegen.cpp +++ b/clang/test/OpenMP/taskgroup_task_reduction_codegen.cpp @@ -109,7 +109,7 @@ int main(int argc, char **argv) { // CHECK-DAG: [[TMP26:%.+]] = getelementptr inbounds [[T1]], ptr [[GEPARGC]], i32 0, i32 6 // CHECK-DAG: call void @llvm.memset.p0.i64(ptr align 8 [[TMP26]], i8 0, i64 4, i1 false) // CHECK-DAG: [[TMP29:%.+]] = call ptr @__kmpc_taskred_init(i32 [[GTID]], i32 3, ptr [[RD_IN1]]) -// DEBUG-DAG: call void @llvm.dbg.declare(metadata ptr [[TD1]], +// DEBUG-DAG: #dbg_declare(ptr [[TD1]], // CHECK-DAG: store ptr [[TMP29]], ptr [[TD1]], // CHECK-DAG: call void @__kmpc_taskgroup(ptr {{[^,]+}}, i32 [[GTID]]) // CHECK-DAG: store ptr [[C]], ptr [[TMP30:%[^,]+]], diff --git a/clang/test/OpenMP/threadprivate_codegen.cpp b/clang/test/OpenMP/threadprivate_codegen.cpp index 7a6269954d39ef..b5eb4651d6c337 100644 --- a/clang/test/OpenMP/threadprivate_codegen.cpp +++ b/clang/test/OpenMP/threadprivate_codegen.cpp @@ -2845,9 +2845,9 @@ int foobar() { // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META123:![0-9]+]], metadata !DIExpression()), !dbg [[DBG125:![0-9]+]] +// SIMD2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META123:![0-9]+]], !DIExpression(), [[META125:![0-9]+]]) // SIMD2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META126:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127:![0-9]+]] +// SIMD2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META126:![0-9]+]], !DIExpression(), [[META127:![0-9]+]]) // SIMD2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG128:![0-9]+]] // SIMD2-NEXT: call void @_ZN2S1C2Ei(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG128]] @@ -2859,7 +2859,7 @@ int foobar() { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG132:![0-9]+]] +// SIMD2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META131:![0-9]+]], !DIExpression(), [[META132:![0-9]+]]) // SIMD2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD2-NEXT: call void @_ZN2S1D2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR3]], !dbg [[DBG133:![0-9]+]] // SIMD2-NEXT: ret void, !dbg [[DBG134:![0-9]+]] @@ -2879,9 +2879,9 @@ int foobar() { // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META141:![0-9]+]], metadata !DIExpression()), !dbg [[DBG143:![0-9]+]] +// SIMD2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META141:![0-9]+]], !DIExpression(), [[META143:![0-9]+]]) // SIMD2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG145:![0-9]+]] +// SIMD2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META144:![0-9]+]], !DIExpression(), [[META145:![0-9]+]]) // SIMD2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG146:![0-9]+]] // SIMD2-NEXT: call void @_ZN2S2C2Ei(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG146]] @@ -2893,7 +2893,7 @@ int foobar() { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META149:![0-9]+]], metadata !DIExpression()), !dbg [[DBG150:![0-9]+]] +// SIMD2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META149:![0-9]+]], !DIExpression(), [[META150:![0-9]+]]) // SIMD2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD2-NEXT: call void @_ZN2S2D2Ev(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]]) #[[ATTR3]], !dbg [[DBG151:![0-9]+]] // SIMD2-NEXT: ret void, !dbg [[DBG152:![0-9]+]] @@ -2997,27 +2997,27 @@ int foobar() { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // SIMD2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[DOTADDR]], metadata [[META170:![0-9]+]], metadata !DIExpression()), !dbg [[DBG171:![0-9]+]] -// SIMD2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG171]] +// SIMD2-NEXT: #dbg_declare(ptr [[DOTADDR]], [[META170:![0-9]+]], !DIExpression(), [[META171:![0-9]+]]) +// SIMD2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[META171]] // SIMD2: arraydestroy.body: -// SIMD2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ getelementptr inbounds ([[STRUCT_S1:%.*]], ptr @arr_x, i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG171]] -// SIMD2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG171]] -// SIMD2-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG171]] -// SIMD2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], @arr_x, !dbg [[DBG171]] -// SIMD2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG171]] +// SIMD2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ getelementptr inbounds ([[STRUCT_S1:%.*]], ptr @arr_x, i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[META171]] +// SIMD2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[META171]] +// SIMD2-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[META171]] +// SIMD2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], @arr_x, !dbg [[META171]] +// SIMD2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[META171]] // SIMD2: arraydestroy.done1: -// SIMD2-NEXT: ret void, !dbg [[DBG171]] +// SIMD2-NEXT: ret void, !dbg [[META171]] // // // SIMD2-LABEL: define {{[^@]+}}@main -// SIMD2-SAME: () #[[ATTR5:[0-9]+]] personality ptr @__gxx_personality_v0 !dbg [[DBG52:![0-9]+]] { +// SIMD2-SAME: () #[[ATTR4:[0-9]+]] personality ptr @__gxx_personality_v0 !dbg [[DBG52:![0-9]+]] { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // SIMD2-NEXT: [[RES:%.*]] = alloca i32, align 4 // SIMD2-NEXT: [[EXN_SLOT:%.*]] = alloca ptr, align 8 // SIMD2-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4 // SIMD2-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[RES]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG173:![0-9]+]] +// SIMD2-NEXT: #dbg_declare(ptr [[RES]], [[META172:![0-9]+]], !DIExpression(), [[META173:![0-9]+]]) // SIMD2-NEXT: [[TMP0:%.*]] = load atomic i8, ptr @_ZGVZ4mainE2sm acquire, align 8, !dbg [[DBG174:![0-9]+]] // SIMD2-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG174]] // SIMD2-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG174]], !prof [[PROF175:![0-9]+]] @@ -3094,9 +3094,9 @@ int foobar() { // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META200:![0-9]+]], metadata !DIExpression()), !dbg [[DBG202:![0-9]+]] +// SIMD2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META200:![0-9]+]], !DIExpression(), [[META202:![0-9]+]]) // SIMD2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META203:![0-9]+]], metadata !DIExpression()), !dbg [[DBG204:![0-9]+]] +// SIMD2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META203:![0-9]+]], !DIExpression(), [[META204:![0-9]+]]) // SIMD2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG205:![0-9]+]] // SIMD2-NEXT: call void @_ZZ4mainEN5SmainC2Ei(ptr noundef nonnull align 8 dereferenceable(24) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG205]] @@ -3108,7 +3108,7 @@ int foobar() { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META208:![0-9]+]], metadata !DIExpression()), !dbg [[DBG209:![0-9]+]] +// SIMD2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META208:![0-9]+]], !DIExpression(), [[META209:![0-9]+]]) // SIMD2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD2-NEXT: call void @_ZZ4mainEN5SmainD2Ev(ptr noundef nonnull align 8 dereferenceable(24) [[THIS1]]) #[[ATTR3]], !dbg [[DBG210:![0-9]+]] // SIMD2-NEXT: ret void, !dbg [[DBG211:![0-9]+]] @@ -3118,7 +3118,7 @@ int foobar() { // SIMD2-SAME: () #[[ATTR2]] !dbg [[DBG212:![0-9]+]] { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[RES:%.*]] = alloca i32, align 4 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[RES]], metadata [[META213:![0-9]+]], metadata !DIExpression()), !dbg [[DBG214:![0-9]+]] +// SIMD2-NEXT: #dbg_declare(ptr [[RES]], [[META213:![0-9]+]], !DIExpression(), [[META214:![0-9]+]]) // SIMD2-NEXT: [[TMP0:%.*]] = load i32, ptr @_ZN6Static1sE, align 4, !dbg [[DBG215:![0-9]+]] // SIMD2-NEXT: store i32 [[TMP0]], ptr [[RES]], align 4, !dbg [[DBG216:![0-9]+]] // SIMD2-NEXT: [[TMP1:%.*]] = load i32, ptr @_ZL3gs1, align 4, !dbg [[DBG217:![0-9]+]] @@ -3175,9 +3175,9 @@ int foobar() { // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META239:![0-9]+]], metadata !DIExpression()), !dbg [[DBG241:![0-9]+]] +// SIMD2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META239:![0-9]+]], !DIExpression(), [[META241:![0-9]+]]) // SIMD2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META242:![0-9]+]], metadata !DIExpression()), !dbg [[DBG243:![0-9]+]] +// SIMD2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META242:![0-9]+]], !DIExpression(), [[META243:![0-9]+]]) // SIMD2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG244:![0-9]+]] // SIMD2-NEXT: call void @_ZN2S4C2Ei(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG244]] @@ -3189,7 +3189,7 @@ int foobar() { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META247:![0-9]+]], metadata !DIExpression()), !dbg [[DBG248:![0-9]+]] +// SIMD2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META247:![0-9]+]], !DIExpression(), [[META248:![0-9]+]]) // SIMD2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD2-NEXT: call void @_ZN2S4D2Ev(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR3]], !dbg [[DBG249:![0-9]+]] // SIMD2-NEXT: ret void, !dbg [[DBG250:![0-9]+]] @@ -3201,9 +3201,9 @@ int foobar() { // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META252:![0-9]+]], metadata !DIExpression()), !dbg [[DBG253:![0-9]+]] +// SIMD2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META252:![0-9]+]], !DIExpression(), [[META253:![0-9]+]]) // SIMD2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META254:![0-9]+]], metadata !DIExpression()), !dbg [[DBG255:![0-9]+]] +// SIMD2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META254:![0-9]+]], !DIExpression(), [[META255:![0-9]+]]) // SIMD2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD2-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG256:![0-9]+]] // SIMD2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG257:![0-9]+]] @@ -3216,7 +3216,7 @@ int foobar() { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META260:![0-9]+]], metadata !DIExpression()), !dbg [[DBG261:![0-9]+]] +// SIMD2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META260:![0-9]+]], !DIExpression(), [[META261:![0-9]+]]) // SIMD2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG262:![0-9]+]] // SIMD2-NEXT: store i32 0, ptr [[A]], align 4, !dbg [[DBG264:![0-9]+]] @@ -3229,9 +3229,9 @@ int foobar() { // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META267:![0-9]+]], metadata !DIExpression()), !dbg [[DBG268:![0-9]+]] +// SIMD2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META267:![0-9]+]], !DIExpression(), [[META268:![0-9]+]]) // SIMD2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META269:![0-9]+]], metadata !DIExpression()), !dbg [[DBG270:![0-9]+]] +// SIMD2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META269:![0-9]+]], !DIExpression(), [[META270:![0-9]+]]) // SIMD2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD2-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG271:![0-9]+]] // SIMD2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG272:![0-9]+]] @@ -3244,7 +3244,7 @@ int foobar() { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META275:![0-9]+]], metadata !DIExpression()), !dbg [[DBG276:![0-9]+]] +// SIMD2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META275:![0-9]+]], !DIExpression(), [[META276:![0-9]+]]) // SIMD2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG277:![0-9]+]] // SIMD2-NEXT: store i32 0, ptr [[A]], align 8, !dbg [[DBG279:![0-9]+]] @@ -3257,9 +3257,9 @@ int foobar() { // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META282:![0-9]+]], metadata !DIExpression()), !dbg [[DBG283:![0-9]+]] +// SIMD2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META282:![0-9]+]], !DIExpression(), [[META283:![0-9]+]]) // SIMD2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META284:![0-9]+]], metadata !DIExpression()), !dbg [[DBG285:![0-9]+]] +// SIMD2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META284:![0-9]+]], !DIExpression(), [[META285:![0-9]+]]) // SIMD2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD2-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG286:![0-9]+]] // SIMD2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG287:![0-9]+]] @@ -3272,7 +3272,7 @@ int foobar() { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META290:![0-9]+]], metadata !DIExpression()), !dbg [[DBG291:![0-9]+]] +// SIMD2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META290:![0-9]+]], !DIExpression(), [[META291:![0-9]+]]) // SIMD2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG292:![0-9]+]] // SIMD2-NEXT: store i32 0, ptr [[A]], align 8, !dbg [[DBG294:![0-9]+]] @@ -3285,9 +3285,9 @@ int foobar() { // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META297:![0-9]+]], metadata !DIExpression()), !dbg [[DBG298:![0-9]+]] +// SIMD2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META297:![0-9]+]], !DIExpression(), [[META298:![0-9]+]]) // SIMD2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META299:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300:![0-9]+]] +// SIMD2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META299:![0-9]+]], !DIExpression(), [[META300:![0-9]+]]) // SIMD2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD2-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG301:![0-9]+]] // SIMD2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG302:![0-9]+]] @@ -3300,7 +3300,7 @@ int foobar() { // SIMD2-NEXT: entry: // SIMD2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META305:![0-9]+]], metadata !DIExpression()), !dbg [[DBG306:![0-9]+]] +// SIMD2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META305:![0-9]+]], !DIExpression(), [[META306:![0-9]+]]) // SIMD2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG307:![0-9]+]] // SIMD2-NEXT: store i32 0, ptr [[A]], align 4, !dbg [[DBG309:![0-9]+]] @@ -4392,9 +4392,9 @@ int foobar() { // CHECK-TLS3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META124:![0-9]+]], metadata !DIExpression()), !dbg [[DBG126:![0-9]+]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META124:![0-9]+]], !DIExpression(), [[META126:![0-9]+]]) // CHECK-TLS3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META127:![0-9]+]], metadata !DIExpression()), !dbg [[DBG128:![0-9]+]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META127:![0-9]+]], !DIExpression(), [[META128:![0-9]+]]) // CHECK-TLS3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG129:![0-9]+]] // CHECK-TLS3-NEXT: call void @_ZN2S1C2Ei(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG129]] @@ -4406,7 +4406,7 @@ int foobar() { // CHECK-TLS3-NEXT: entry: // CHECK-TLS3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META132:![0-9]+]], metadata !DIExpression()), !dbg [[DBG133:![0-9]+]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META132:![0-9]+]], !DIExpression(), [[META133:![0-9]+]]) // CHECK-TLS3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS3-NEXT: call void @_ZN2S1D2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR3]], !dbg [[DBG134:![0-9]+]] // CHECK-TLS3-NEXT: ret void, !dbg [[DBG135:![0-9]+]] @@ -4418,9 +4418,9 @@ int foobar() { // CHECK-TLS3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META137:![0-9]+]], metadata !DIExpression()), !dbg [[DBG138:![0-9]+]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META137:![0-9]+]], !DIExpression(), [[META138:![0-9]+]]) // CHECK-TLS3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META139:![0-9]+]], metadata !DIExpression()), !dbg [[DBG140:![0-9]+]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META139:![0-9]+]], !DIExpression(), [[META140:![0-9]+]]) // CHECK-TLS3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS3-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG141:![0-9]+]] // CHECK-TLS3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG142:![0-9]+]] @@ -4433,7 +4433,7 @@ int foobar() { // CHECK-TLS3-NEXT: entry: // CHECK-TLS3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG146:![0-9]+]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META145:![0-9]+]], !DIExpression(), [[META146:![0-9]+]]) // CHECK-TLS3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG147:![0-9]+]] // CHECK-TLS3-NEXT: store i32 0, ptr [[A]], align 4, !dbg [[DBG149:![0-9]+]] @@ -4454,9 +4454,9 @@ int foobar() { // CHECK-TLS3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META157:![0-9]+]], metadata !DIExpression()), !dbg [[DBG159:![0-9]+]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META157:![0-9]+]], !DIExpression(), [[META159:![0-9]+]]) // CHECK-TLS3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META160:![0-9]+]], metadata !DIExpression()), !dbg [[DBG161:![0-9]+]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META160:![0-9]+]], !DIExpression(), [[META161:![0-9]+]]) // CHECK-TLS3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG162:![0-9]+]] // CHECK-TLS3-NEXT: call void @_ZN2S2C2Ei(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG162]] @@ -4468,7 +4468,7 @@ int foobar() { // CHECK-TLS3-NEXT: entry: // CHECK-TLS3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META165:![0-9]+]], metadata !DIExpression()), !dbg [[DBG166:![0-9]+]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META165:![0-9]+]], !DIExpression(), [[META166:![0-9]+]]) // CHECK-TLS3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS3-NEXT: call void @_ZN2S2D2Ev(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]]) #[[ATTR3]], !dbg [[DBG167:![0-9]+]] // CHECK-TLS3-NEXT: ret void, !dbg [[DBG168:![0-9]+]] @@ -4480,9 +4480,9 @@ int foobar() { // CHECK-TLS3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META170:![0-9]+]], metadata !DIExpression()), !dbg [[DBG171:![0-9]+]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META170:![0-9]+]], !DIExpression(), [[META171:![0-9]+]]) // CHECK-TLS3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG173:![0-9]+]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META172:![0-9]+]], !DIExpression(), [[META173:![0-9]+]]) // CHECK-TLS3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS3-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG174:![0-9]+]] // CHECK-TLS3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG175:![0-9]+]] @@ -4495,7 +4495,7 @@ int foobar() { // CHECK-TLS3-NEXT: entry: // CHECK-TLS3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META178:![0-9]+]], metadata !DIExpression()), !dbg [[DBG179:![0-9]+]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META178:![0-9]+]], !DIExpression(), [[META179:![0-9]+]]) // CHECK-TLS3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG180:![0-9]+]] // CHECK-TLS3-NEXT: store i32 0, ptr [[A]], align 8, !dbg [[DBG182:![0-9]+]] @@ -4600,25 +4600,25 @@ int foobar() { // CHECK-TLS3-NEXT: entry: // CHECK-TLS3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[DOTADDR]], metadata [[META201:![0-9]+]], metadata !DIExpression()), !dbg [[DBG202:![0-9]+]] -// CHECK-TLS3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG202]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[DOTADDR]], [[META201:![0-9]+]], !DIExpression(), [[META202:![0-9]+]]) +// CHECK-TLS3-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[META202]] // CHECK-TLS3: arraydestroy.body: -// CHECK-TLS3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ getelementptr inbounds ([[STRUCT_S1:%.*]], ptr @arr_x, i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG202]] -// CHECK-TLS3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG202]] -// CHECK-TLS3-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG202]] -// CHECK-TLS3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], @arr_x, !dbg [[DBG202]] -// CHECK-TLS3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG202]] +// CHECK-TLS3-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ getelementptr inbounds ([[STRUCT_S1:%.*]], ptr @arr_x, i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[META202]] +// CHECK-TLS3-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[META202]] +// CHECK-TLS3-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[META202]] +// CHECK-TLS3-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], @arr_x, !dbg [[META202]] +// CHECK-TLS3-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[META202]] // CHECK-TLS3: arraydestroy.done1: -// CHECK-TLS3-NEXT: ret void, !dbg [[DBG202]] +// CHECK-TLS3-NEXT: ret void, !dbg [[META202]] // // // CHECK-TLS3-LABEL: define {{[^@]+}}@main -// CHECK-TLS3-SAME: () #[[ATTR5:[0-9]+]] !dbg [[DBG52:![0-9]+]] { +// CHECK-TLS3-SAME: () #[[ATTR4:[0-9]+]] !dbg [[DBG52:![0-9]+]] { // CHECK-TLS3-NEXT: entry: // CHECK-TLS3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK-TLS3-NEXT: [[RES:%.*]] = alloca i32, align 4 // CHECK-TLS3-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[RES]], metadata [[META203:![0-9]+]], metadata !DIExpression()), !dbg [[DBG204:![0-9]+]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[RES]], [[META203:![0-9]+]], !DIExpression(), [[META204:![0-9]+]]) // CHECK-TLS3-NEXT: [[TMP0:%.*]] = load i8, ptr @_ZGVZ4mainE2sm, align 1, !dbg [[DBG205:![0-9]+]] // CHECK-TLS3-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG205]] // CHECK-TLS3-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG205]], !prof [[PROF206:![0-9]+]] @@ -4687,7 +4687,7 @@ int foobar() { // // // CHECK-TLS3-LABEL: define {{[^@]+}}@_ZTWL3gs1 -// CHECK-TLS3-SAME: () #[[ATTR6:[0-9]+]] { +// CHECK-TLS3-SAME: () #[[ATTR5:[0-9]+]] { // CHECK-TLS3-NEXT: call void @_ZTHL3gs1() // CHECK-TLS3-NEXT: [[TMP1:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZL3gs1) // CHECK-TLS3-NEXT: ret ptr [[TMP1]] @@ -4699,9 +4699,9 @@ int foobar() { // CHECK-TLS3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META237:![0-9]+]], metadata !DIExpression()), !dbg [[DBG239:![0-9]+]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META237:![0-9]+]], !DIExpression(), [[META239:![0-9]+]]) // CHECK-TLS3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META240:![0-9]+]], metadata !DIExpression()), !dbg [[DBG241:![0-9]+]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META240:![0-9]+]], !DIExpression(), [[META241:![0-9]+]]) // CHECK-TLS3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG242:![0-9]+]] // CHECK-TLS3-NEXT: call void @_ZZ4mainEN5SmainC2Ei(ptr noundef nonnull align 8 dereferenceable(24) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG242]] @@ -4713,14 +4713,14 @@ int foobar() { // CHECK-TLS3-NEXT: entry: // CHECK-TLS3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META245:![0-9]+]], metadata !DIExpression()), !dbg [[DBG246:![0-9]+]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META245:![0-9]+]], !DIExpression(), [[META246:![0-9]+]]) // CHECK-TLS3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS3-NEXT: call void @_ZZ4mainEN5SmainD2Ev(ptr noundef nonnull align 8 dereferenceable(24) [[THIS1]]) #[[ATTR3]], !dbg [[DBG247:![0-9]+]] // CHECK-TLS3-NEXT: ret void, !dbg [[DBG248:![0-9]+]] // // // CHECK-TLS3-LABEL: define {{[^@]+}}@_ZTWN6Static1sE -// CHECK-TLS3-SAME: () #[[ATTR6]] comdat { +// CHECK-TLS3-SAME: () #[[ATTR5]] comdat { // CHECK-TLS3-NEXT: [[TMP1:%.*]] = icmp ne ptr @_ZTHN6Static1sE, null // CHECK-TLS3-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]] // CHECK-TLS3: 2: @@ -4732,7 +4732,7 @@ int foobar() { // // // CHECK-TLS3-LABEL: define {{[^@]+}}@_ZTW3gs3 -// CHECK-TLS3-SAME: () #[[ATTR6]] comdat { +// CHECK-TLS3-SAME: () #[[ATTR5]] comdat { // CHECK-TLS3-NEXT: [[TMP1:%.*]] = icmp ne ptr @_ZTH3gs3, null // CHECK-TLS3-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]] // CHECK-TLS3: 2: @@ -4744,14 +4744,14 @@ int foobar() { // // // CHECK-TLS3-LABEL: define {{[^@]+}}@_ZTW5arr_x -// CHECK-TLS3-SAME: () #[[ATTR6]] comdat { +// CHECK-TLS3-SAME: () #[[ATTR5]] comdat { // CHECK-TLS3-NEXT: call void @_ZTH5arr_x() // CHECK-TLS3-NEXT: [[TMP1:%.*]] = call align 16 ptr @llvm.threadlocal.address.p0(ptr align 16 @arr_x) // CHECK-TLS3-NEXT: ret ptr [[TMP1]] // // // CHECK-TLS3-LABEL: define {{[^@]+}}@_ZTWN2STI2S4E2stE -// CHECK-TLS3-SAME: () #[[ATTR6]] comdat { +// CHECK-TLS3-SAME: () #[[ATTR5]] comdat { // CHECK-TLS3-NEXT: call void @_ZTHN2STI2S4E2stE() // CHECK-TLS3-NEXT: [[TMP1:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZN2STI2S4E2stE) // CHECK-TLS3-NEXT: ret ptr [[TMP1]] @@ -4763,9 +4763,9 @@ int foobar() { // CHECK-TLS3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META250:![0-9]+]], metadata !DIExpression()), !dbg [[DBG251:![0-9]+]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META250:![0-9]+]], !DIExpression(), [[META251:![0-9]+]]) // CHECK-TLS3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META252:![0-9]+]], metadata !DIExpression()), !dbg [[DBG253:![0-9]+]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META252:![0-9]+]], !DIExpression(), [[META253:![0-9]+]]) // CHECK-TLS3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS3-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG254:![0-9]+]] // CHECK-TLS3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG255:![0-9]+]] @@ -4778,7 +4778,7 @@ int foobar() { // CHECK-TLS3-NEXT: entry: // CHECK-TLS3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META258:![0-9]+]], metadata !DIExpression()), !dbg [[DBG259:![0-9]+]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META258:![0-9]+]], !DIExpression(), [[META259:![0-9]+]]) // CHECK-TLS3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG260:![0-9]+]] // CHECK-TLS3-NEXT: store i32 0, ptr [[A]], align 8, !dbg [[DBG262:![0-9]+]] @@ -4789,7 +4789,7 @@ int foobar() { // CHECK-TLS3-SAME: () #[[ATTR1]] !dbg [[DBG264:![0-9]+]] { // CHECK-TLS3-NEXT: entry: // CHECK-TLS3-NEXT: [[RES:%.*]] = alloca i32, align 4 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[RES]], metadata [[META265:![0-9]+]], metadata !DIExpression()), !dbg [[DBG266:![0-9]+]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[RES]], [[META265:![0-9]+]], !DIExpression(), [[META266:![0-9]+]]) // CHECK-TLS3-NEXT: [[TMP0:%.*]] = call ptr @_ZTWN6Static1sE(), !dbg [[DBG267:![0-9]+]] // CHECK-TLS3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG268:![0-9]+]] // CHECK-TLS3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG268]] @@ -4860,9 +4860,9 @@ int foobar() { // CHECK-TLS3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META296:![0-9]+]], metadata !DIExpression()), !dbg [[DBG298:![0-9]+]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META296:![0-9]+]], !DIExpression(), [[META298:![0-9]+]]) // CHECK-TLS3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META299:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300:![0-9]+]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META299:![0-9]+]], !DIExpression(), [[META300:![0-9]+]]) // CHECK-TLS3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG301:![0-9]+]] // CHECK-TLS3-NEXT: call void @_ZN2S4C2Ei(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG301]] @@ -4874,7 +4874,7 @@ int foobar() { // CHECK-TLS3-NEXT: entry: // CHECK-TLS3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META304:![0-9]+]], metadata !DIExpression()), !dbg [[DBG305:![0-9]+]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META304:![0-9]+]], !DIExpression(), [[META305:![0-9]+]]) // CHECK-TLS3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS3-NEXT: call void @_ZN2S4D2Ev(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR3]], !dbg [[DBG306:![0-9]+]] // CHECK-TLS3-NEXT: ret void, !dbg [[DBG307:![0-9]+]] @@ -4886,9 +4886,9 @@ int foobar() { // CHECK-TLS3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META309:![0-9]+]], metadata !DIExpression()), !dbg [[DBG310:![0-9]+]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META309:![0-9]+]], !DIExpression(), [[META310:![0-9]+]]) // CHECK-TLS3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META311:![0-9]+]], metadata !DIExpression()), !dbg [[DBG312:![0-9]+]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META311:![0-9]+]], !DIExpression(), [[META312:![0-9]+]]) // CHECK-TLS3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS3-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG313:![0-9]+]] // CHECK-TLS3-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG314:![0-9]+]] @@ -4901,7 +4901,7 @@ int foobar() { // CHECK-TLS3-NEXT: entry: // CHECK-TLS3-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS3-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS3-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META317:![0-9]+]], metadata !DIExpression()), !dbg [[DBG318:![0-9]+]] +// CHECK-TLS3-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META317:![0-9]+]], !DIExpression(), [[META318:![0-9]+]]) // CHECK-TLS3-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG319:![0-9]+]] // CHECK-TLS3-NEXT: store i32 0, ptr [[A]], align 4, !dbg [[DBG321:![0-9]+]] @@ -4936,7 +4936,7 @@ int foobar() { // CHECK-TLS4-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // CHECK-TLS4-NEXT: [[RES:%.*]] = alloca i32, align 4 // CHECK-TLS4-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[RES]], metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG117:![0-9]+]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[RES]], [[META116:![0-9]+]], !DIExpression(), [[META117:![0-9]+]]) // CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i8, ptr @_ZGVZ4mainE2sm, align 1, !dbg [[DBG118:![0-9]+]] // CHECK-TLS4-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG118]] // CHECK-TLS4-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG118]], !prof [[PROF119:![0-9]+]] @@ -4945,7 +4945,7 @@ int foobar() { // CHECK-TLS4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP1]], i32 0, i32 0, !dbg [[DBG121:![0-9]+]] // CHECK-TLS4-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG121]] // CHECK-TLS4-NEXT: call void @_ZZ4mainEN5SmainC1Ei(ptr noundef nonnull align 8 dereferenceable(24) @_ZZ4mainE2sm, i32 noundef [[TMP2]]), !dbg [[DBG122:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP3:%.*]] = call i32 @__cxa_thread_atexit(ptr @_ZZ4mainEN5SmainD1Ev, ptr @_ZZ4mainE2sm, ptr @__dso_handle) #[[ATTR5:[0-9]+]], !dbg [[DBG118]] +// CHECK-TLS4-NEXT: [[TMP3:%.*]] = call i32 @__cxa_thread_atexit(ptr @_ZZ4mainEN5SmainD1Ev, ptr @_ZZ4mainE2sm, ptr @__dso_handle) #[[ATTR4:[0-9]+]], !dbg [[DBG118]] // CHECK-TLS4-NEXT: store i8 1, ptr @_ZGVZ4mainE2sm, align 1, !dbg [[DBG118]] // CHECK-TLS4-NEXT: br label [[INIT_END]], !dbg [[DBG118]] // CHECK-TLS4: init.end: @@ -5005,21 +5005,21 @@ int foobar() { // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTWL3gs1 -// CHECK-TLS4-SAME: () #[[ATTR2:[0-9]+]] { +// CHECK-TLS4-SAME: () #[[ATTR1:[0-9]+]] { // CHECK-TLS4-NEXT: call void @_ZTHL3gs1() // CHECK-TLS4-NEXT: [[TMP1:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZL3gs1) // CHECK-TLS4-NEXT: ret ptr [[TMP1]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC1Ei -// CHECK-TLS4-SAME: (ptr noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR3:[0-9]+]] align 2 !dbg [[DBG149:![0-9]+]] { +// CHECK-TLS4-SAME: (ptr noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] align 2 !dbg [[DBG149:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META150:![0-9]+]], metadata !DIExpression()), !dbg [[DBG152:![0-9]+]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META150:![0-9]+]], !DIExpression(), [[META152:![0-9]+]]) // CHECK-TLS4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META153:![0-9]+]], metadata !DIExpression()), !dbg [[DBG154:![0-9]+]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META153:![0-9]+]], !DIExpression(), [[META154:![0-9]+]]) // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG155:![0-9]+]] // CHECK-TLS4-NEXT: call void @_ZZ4mainEN5SmainC2Ei(ptr noundef nonnull align 8 dereferenceable(24) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG155]] @@ -5027,18 +5027,18 @@ int foobar() { // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD1Ev -// CHECK-TLS4-SAME: (ptr noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR4:[0-9]+]] align 2 !dbg [[DBG157:![0-9]+]] { +// CHECK-TLS4-SAME: (ptr noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR3:[0-9]+]] align 2 !dbg [[DBG157:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META158:![0-9]+]], metadata !DIExpression()), !dbg [[DBG159:![0-9]+]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META158:![0-9]+]], !DIExpression(), [[META159:![0-9]+]]) // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @_ZZ4mainEN5SmainD2Ev(ptr noundef nonnull align 8 dereferenceable(24) [[THIS1]]) #[[ATTR5]], !dbg [[DBG160:![0-9]+]] +// CHECK-TLS4-NEXT: call void @_ZZ4mainEN5SmainD2Ev(ptr noundef nonnull align 8 dereferenceable(24) [[THIS1]]) #[[ATTR4]], !dbg [[DBG160:![0-9]+]] // CHECK-TLS4-NEXT: ret void, !dbg [[DBG161:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTWN6Static1sE -// CHECK-TLS4-SAME: () #[[ATTR2]] comdat { +// CHECK-TLS4-SAME: () #[[ATTR1]] comdat { // CHECK-TLS4-NEXT: [[TMP1:%.*]] = icmp ne ptr @_ZTHN6Static1sE, null // CHECK-TLS4-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]] // CHECK-TLS4: 2: @@ -5050,7 +5050,7 @@ int foobar() { // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTW3gs3 -// CHECK-TLS4-SAME: () #[[ATTR2]] comdat { +// CHECK-TLS4-SAME: () #[[ATTR1]] comdat { // CHECK-TLS4-NEXT: [[TMP1:%.*]] = icmp ne ptr @_ZTH3gs3, null // CHECK-TLS4-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]] // CHECK-TLS4: 2: @@ -5062,36 +5062,36 @@ int foobar() { // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTW5arr_x -// CHECK-TLS4-SAME: () #[[ATTR2]] comdat { +// CHECK-TLS4-SAME: () #[[ATTR1]] comdat { // CHECK-TLS4-NEXT: call void @_ZTH5arr_x() // CHECK-TLS4-NEXT: [[TMP1:%.*]] = call align 16 ptr @llvm.threadlocal.address.p0(ptr align 16 @arr_x) // CHECK-TLS4-NEXT: ret ptr [[TMP1]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTWN2STIiE2stE -// CHECK-TLS4-SAME: () #[[ATTR2]] comdat { +// CHECK-TLS4-SAME: () #[[ATTR1]] comdat { // CHECK-TLS4-NEXT: [[TMP1:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZN2STIiE2stE) // CHECK-TLS4-NEXT: ret ptr [[TMP1]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTWN2STIfE2stE -// CHECK-TLS4-SAME: () #[[ATTR2]] comdat { +// CHECK-TLS4-SAME: () #[[ATTR1]] comdat { // CHECK-TLS4-NEXT: [[TMP1:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZN2STIfE2stE) // CHECK-TLS4-NEXT: ret ptr [[TMP1]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZTWN2STI2S4E2stE -// CHECK-TLS4-SAME: () #[[ATTR2]] comdat { +// CHECK-TLS4-SAME: () #[[ATTR1]] comdat { // CHECK-TLS4-NEXT: call void @_ZTHN2STI2S4E2stE() // CHECK-TLS4-NEXT: [[TMP1:%.*]] = call align 4 ptr @llvm.threadlocal.address.p0(ptr align 4 @_ZN2STI2S4E2stE) // CHECK-TLS4-NEXT: ret ptr [[TMP1]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_Z6foobarv -// CHECK-TLS4-SAME: () #[[ATTR3]] !dbg [[DBG162:![0-9]+]] { +// CHECK-TLS4-SAME: () #[[ATTR2]] !dbg [[DBG162:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[RES:%.*]] = alloca i32, align 4 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[RES]], metadata [[META163:![0-9]+]], metadata !DIExpression()), !dbg [[DBG164:![0-9]+]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[RES]], [[META163:![0-9]+]], !DIExpression(), [[META164:![0-9]+]]) // CHECK-TLS4-NEXT: [[TMP0:%.*]] = call ptr @_ZTWN6Static1sE(), !dbg [[DBG165:![0-9]+]] // CHECK-TLS4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], ptr [[TMP0]], i32 0, i32 0, !dbg [[DBG166:![0-9]+]] // CHECK-TLS4-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG166]] @@ -5145,19 +5145,19 @@ int foobar() { // CHECK-TLS4-SAME: () #[[ATTR6:[0-9]+]] !dbg [[DBG188:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: call void @_ZN2S1C1Ei(ptr noundef nonnull align 4 dereferenceable(4) @_ZL3gs1, i32 noundef 5), !dbg [[DBG191:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP0:%.*]] = call i32 @__cxa_thread_atexit(ptr @_ZN2S1D1Ev, ptr @_ZL3gs1, ptr @__dso_handle) #[[ATTR5]], !dbg [[DBG193:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP0:%.*]] = call i32 @__cxa_thread_atexit(ptr @_ZN2S1D1Ev, ptr @_ZL3gs1, ptr @__dso_handle) #[[ATTR4]], !dbg [[DBG193:![0-9]+]] // CHECK-TLS4-NEXT: ret void, !dbg [[DBG194:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S1C1Ei -// CHECK-TLS4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG195:![0-9]+]] { +// CHECK-TLS4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG195:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META196:![0-9]+]], metadata !DIExpression()), !dbg [[DBG198:![0-9]+]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META196:![0-9]+]], !DIExpression(), [[META198:![0-9]+]]) // CHECK-TLS4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META199:![0-9]+]], metadata !DIExpression()), !dbg [[DBG200:![0-9]+]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META199:![0-9]+]], !DIExpression(), [[META200:![0-9]+]]) // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG201:![0-9]+]] // CHECK-TLS4-NEXT: call void @_ZN2S1C2Ei(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG201]] @@ -5165,25 +5165,25 @@ int foobar() { // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S1D1Ev -// CHECK-TLS4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG203:![0-9]+]] { +// CHECK-TLS4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG203:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META204:![0-9]+]], metadata !DIExpression()), !dbg [[DBG205:![0-9]+]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META204:![0-9]+]], !DIExpression(), [[META205:![0-9]+]]) // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @_ZN2S1D2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR5]], !dbg [[DBG206:![0-9]+]] +// CHECK-TLS4-NEXT: call void @_ZN2S1D2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]], !dbg [[DBG206:![0-9]+]] // CHECK-TLS4-NEXT: ret void, !dbg [[DBG207:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S1C2Ei -// CHECK-TLS4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG208:![0-9]+]] { +// CHECK-TLS4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG208:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META209:![0-9]+]], metadata !DIExpression()), !dbg [[DBG210:![0-9]+]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META209:![0-9]+]], !DIExpression(), [[META210:![0-9]+]]) // CHECK-TLS4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META211:![0-9]+]], metadata !DIExpression()), !dbg [[DBG212:![0-9]+]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META211:![0-9]+]], !DIExpression(), [[META212:![0-9]+]]) // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG213:![0-9]+]] // CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG214:![0-9]+]] @@ -5192,11 +5192,11 @@ int foobar() { // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S1D2Ev -// CHECK-TLS4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG216:![0-9]+]] { +// CHECK-TLS4-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG216:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META217:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218:![0-9]+]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META217:![0-9]+]], !DIExpression(), [[META218:![0-9]+]]) // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG219:![0-9]+]] // CHECK-TLS4-NEXT: store i32 0, ptr [[A]], align 4, !dbg [[DBG221:![0-9]+]] @@ -5207,19 +5207,19 @@ int foobar() { // CHECK-TLS4-SAME: () #[[ATTR6]] !dbg [[DBG223:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: call void @_ZN2S2C1Ei(ptr noundef nonnull align 8 dereferenceable(16) @_ZL3gs2, i32 noundef 27), !dbg [[DBG224:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2S2D1Ev, ptr @_ZL3gs2, ptr @__dso_handle) #[[ATTR5]], !dbg [[DBG226:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2S2D1Ev, ptr @_ZL3gs2, ptr @__dso_handle) #[[ATTR4]], !dbg [[DBG226:![0-9]+]] // CHECK-TLS4-NEXT: ret void, !dbg [[DBG227:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S2C1Ei -// CHECK-TLS4-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG228:![0-9]+]] { +// CHECK-TLS4-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG228:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META229:![0-9]+]], metadata !DIExpression()), !dbg [[DBG231:![0-9]+]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META229:![0-9]+]], !DIExpression(), [[META231:![0-9]+]]) // CHECK-TLS4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META232:![0-9]+]], metadata !DIExpression()), !dbg [[DBG233:![0-9]+]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META232:![0-9]+]], !DIExpression(), [[META233:![0-9]+]]) // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG234:![0-9]+]] // CHECK-TLS4-NEXT: call void @_ZN2S2C2Ei(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG234]] @@ -5227,25 +5227,25 @@ int foobar() { // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S2D1Ev -// CHECK-TLS4-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG236:![0-9]+]] { +// CHECK-TLS4-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG236:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META237:![0-9]+]], metadata !DIExpression()), !dbg [[DBG238:![0-9]+]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META237:![0-9]+]], !DIExpression(), [[META238:![0-9]+]]) // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @_ZN2S2D2Ev(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]]) #[[ATTR5]], !dbg [[DBG239:![0-9]+]] +// CHECK-TLS4-NEXT: call void @_ZN2S2D2Ev(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]]) #[[ATTR4]], !dbg [[DBG239:![0-9]+]] // CHECK-TLS4-NEXT: ret void, !dbg [[DBG240:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S2C2Ei -// CHECK-TLS4-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG241:![0-9]+]] { +// CHECK-TLS4-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG241:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META242:![0-9]+]], metadata !DIExpression()), !dbg [[DBG243:![0-9]+]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META242:![0-9]+]], !DIExpression(), [[META243:![0-9]+]]) // CHECK-TLS4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META244:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245:![0-9]+]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META244:![0-9]+]], !DIExpression(), [[META245:![0-9]+]]) // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG246:![0-9]+]] // CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG247:![0-9]+]] @@ -5254,11 +5254,11 @@ int foobar() { // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S2D2Ev -// CHECK-TLS4-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG249:![0-9]+]] { +// CHECK-TLS4-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG249:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META250:![0-9]+]], metadata !DIExpression()), !dbg [[DBG251:![0-9]+]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META250:![0-9]+]], !DIExpression(), [[META251:![0-9]+]]) // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG252:![0-9]+]] // CHECK-TLS4-NEXT: store i32 0, ptr [[A]], align 8, !dbg [[DBG254:![0-9]+]] @@ -5299,7 +5299,7 @@ int foobar() { // CHECK-TLS4-NEXT: invoke void @_ZN2S1C1Ei(ptr noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([[STRUCT_S1]], ptr getelementptr inbounds ([3 x %struct.S1], ptr @arr_x, i64 1), i64 2), i32 noundef 6) // CHECK-TLS4-NEXT: to label [[INVOKE_CONT9:%.*]] unwind label [[LPAD6]], !dbg [[DBG266:![0-9]+]] // CHECK-TLS4: invoke.cont9: -// CHECK-TLS4-NEXT: [[TMP0:%.*]] = call i32 @__cxa_thread_atexit(ptr @__cxx_global_array_dtor, ptr null, ptr @__dso_handle) #[[ATTR5]], !dbg [[DBG267:![0-9]+]] +// CHECK-TLS4-NEXT: [[TMP0:%.*]] = call i32 @__cxa_thread_atexit(ptr @__cxx_global_array_dtor, ptr null, ptr @__dso_handle) #[[ATTR4]], !dbg [[DBG267:![0-9]+]] // CHECK-TLS4-NEXT: ret void, !dbg [[DBG267]] // CHECK-TLS4: lpad: // CHECK-TLS4-NEXT: [[TMP1:%.*]] = landingpad { ptr, i32 } @@ -5314,7 +5314,7 @@ int foobar() { // CHECK-TLS4: arraydestroy.body: // CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG259]] // CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG259]] -// CHECK-TLS4-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]], !dbg [[DBG259]] +// CHECK-TLS4-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]], !dbg [[DBG259]] // CHECK-TLS4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], @arr_x, !dbg [[DBG259]] // CHECK-TLS4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG259]] // CHECK-TLS4: arraydestroy.done4: @@ -5332,7 +5332,7 @@ int foobar() { // CHECK-TLS4: arraydestroy.body11: // CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENTPAST12:%.*]] = phi ptr [ [[TMP8]], [[LPAD6]] ], [ [[ARRAYDESTROY_ELEMENT13:%.*]], [[ARRAYDESTROY_BODY11]] ], !dbg [[DBG263]] // CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENT13]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST12]], i64 -1, !dbg [[DBG263]] -// CHECK-TLS4-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT13]]) #[[ATTR5]], !dbg [[DBG263]] +// CHECK-TLS4-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT13]]) #[[ATTR4]], !dbg [[DBG263]] // CHECK-TLS4-NEXT: [[ARRAYDESTROY_DONE14:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT13]], getelementptr inbounds ([3 x %struct.S1], ptr @arr_x, i64 1), !dbg [[DBG263]] // CHECK-TLS4-NEXT: br i1 [[ARRAYDESTROY_DONE14]], label [[ARRAYDESTROY_DONE15]], label [[ARRAYDESTROY_BODY11]], !dbg [[DBG263]] // CHECK-TLS4: arraydestroy.done15: @@ -5345,7 +5345,7 @@ int foobar() { // CHECK-TLS4: arraydestroy.body17: // CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi ptr [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ], !dbg [[DBG257]] // CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1, !dbg [[DBG257]] -// CHECK-TLS4-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR5]], !dbg [[DBG257]] +// CHECK-TLS4-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR4]], !dbg [[DBG257]] // CHECK-TLS4-NEXT: [[ARRAYDESTROY_DONE20:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT19]], @arr_x, !dbg [[DBG257]] // CHECK-TLS4-NEXT: br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]], !dbg [[DBG257]] // CHECK-TLS4: arraydestroy.done21: @@ -5363,27 +5363,27 @@ int foobar() { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS4-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[DOTADDR]], metadata [[META273:![0-9]+]], metadata !DIExpression()), !dbg [[DBG274:![0-9]+]] -// CHECK-TLS4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG274]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[DOTADDR]], [[META273:![0-9]+]], !DIExpression(), [[META274:![0-9]+]]) +// CHECK-TLS4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[META274]] // CHECK-TLS4: arraydestroy.body: -// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ getelementptr inbounds ([[STRUCT_S1:%.*]], ptr @arr_x, i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG274]] -// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG274]] -// CHECK-TLS4-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR5]], !dbg [[DBG274]] -// CHECK-TLS4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], @arr_x, !dbg [[DBG274]] -// CHECK-TLS4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG274]] +// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ getelementptr inbounds ([[STRUCT_S1:%.*]], ptr @arr_x, i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[META274]] +// CHECK-TLS4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[META274]] +// CHECK-TLS4-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]], !dbg [[META274]] +// CHECK-TLS4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], @arr_x, !dbg [[META274]] +// CHECK-TLS4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[META274]] // CHECK-TLS4: arraydestroy.done1: -// CHECK-TLS4-NEXT: ret void, !dbg [[DBG274]] +// CHECK-TLS4-NEXT: ret void, !dbg [[META274]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC2Ei -// CHECK-TLS4-SAME: (ptr noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR4]] align 2 !dbg [[DBG275:![0-9]+]] { +// CHECK-TLS4-SAME: (ptr noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR3]] align 2 !dbg [[DBG275:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META276:![0-9]+]], metadata !DIExpression()), !dbg [[DBG277:![0-9]+]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META276:![0-9]+]], !DIExpression(), [[META277:![0-9]+]]) // CHECK-TLS4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META278:![0-9]+]], metadata !DIExpression()), !dbg [[DBG279:![0-9]+]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META278:![0-9]+]], !DIExpression(), [[META279:![0-9]+]]) // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG280:![0-9]+]] // CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG281:![0-9]+]] @@ -5392,11 +5392,11 @@ int foobar() { // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD2Ev -// CHECK-TLS4-SAME: (ptr noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] align 2 !dbg [[DBG283:![0-9]+]] { +// CHECK-TLS4-SAME: (ptr noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] align 2 !dbg [[DBG283:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META284:![0-9]+]], metadata !DIExpression()), !dbg [[DBG285:![0-9]+]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META284:![0-9]+]], !DIExpression(), [[META285:![0-9]+]]) // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG286:![0-9]+]] // CHECK-TLS4-NEXT: store i32 0, ptr [[A]], align 8, !dbg [[DBG288:![0-9]+]] @@ -5412,21 +5412,21 @@ int foobar() { // CHECK-TLS4: init.check: // CHECK-TLS4-NEXT: store i8 1, ptr @_ZGVN2STI2S4E2stE, align 8, !dbg [[DBG291]] // CHECK-TLS4-NEXT: call void @_ZN2S4C1Ei(ptr noundef nonnull align 4 dereferenceable(8) @_ZN2STI2S4E2stE, i32 noundef 23), !dbg [[DBG292:![0-9]+]] -// CHECK-TLS4-NEXT: [[TMP1:%.*]] = call i32 @__cxa_thread_atexit(ptr @_ZN2S4D1Ev, ptr @_ZN2STI2S4E2stE, ptr @__dso_handle) #[[ATTR5]], !dbg [[DBG291]] +// CHECK-TLS4-NEXT: [[TMP1:%.*]] = call i32 @__cxa_thread_atexit(ptr @_ZN2S4D1Ev, ptr @_ZN2STI2S4E2stE, ptr @__dso_handle) #[[ATTR4]], !dbg [[DBG291]] // CHECK-TLS4-NEXT: br label [[INIT_END]], !dbg [[DBG291]] // CHECK-TLS4: init.end: // CHECK-TLS4-NEXT: ret void, !dbg [[DBG294:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S4C1Ei -// CHECK-TLS4-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG295:![0-9]+]] { +// CHECK-TLS4-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG295:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META296:![0-9]+]], metadata !DIExpression()), !dbg [[DBG298:![0-9]+]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META296:![0-9]+]], !DIExpression(), [[META298:![0-9]+]]) // CHECK-TLS4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META299:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300:![0-9]+]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META299:![0-9]+]], !DIExpression(), [[META300:![0-9]+]]) // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG301:![0-9]+]] // CHECK-TLS4-NEXT: call void @_ZN2S4C2Ei(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG301]] @@ -5434,25 +5434,25 @@ int foobar() { // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S4D1Ev -// CHECK-TLS4-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG303:![0-9]+]] { +// CHECK-TLS4-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG303:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META304:![0-9]+]], metadata !DIExpression()), !dbg [[DBG305:![0-9]+]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META304:![0-9]+]], !DIExpression(), [[META305:![0-9]+]]) // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: call void @_ZN2S4D2Ev(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR5]], !dbg [[DBG306:![0-9]+]] +// CHECK-TLS4-NEXT: call void @_ZN2S4D2Ev(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR4]], !dbg [[DBG306:![0-9]+]] // CHECK-TLS4-NEXT: ret void, !dbg [[DBG307:![0-9]+]] // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S4C2Ei -// CHECK-TLS4-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG308:![0-9]+]] { +// CHECK-TLS4-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG308:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-TLS4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META309:![0-9]+]], metadata !DIExpression()), !dbg [[DBG310:![0-9]+]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META309:![0-9]+]], !DIExpression(), [[META310:![0-9]+]]) // CHECK-TLS4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META311:![0-9]+]], metadata !DIExpression()), !dbg [[DBG312:![0-9]+]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META311:![0-9]+]], !DIExpression(), [[META312:![0-9]+]]) // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG313:![0-9]+]] // CHECK-TLS4-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG314:![0-9]+]] @@ -5461,11 +5461,11 @@ int foobar() { // // // CHECK-TLS4-LABEL: define {{[^@]+}}@_ZN2S4D2Ev -// CHECK-TLS4-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 !dbg [[DBG316:![0-9]+]] { +// CHECK-TLS4-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG316:![0-9]+]] { // CHECK-TLS4-NEXT: entry: // CHECK-TLS4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // CHECK-TLS4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// CHECK-TLS4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META317:![0-9]+]], metadata !DIExpression()), !dbg [[DBG318:![0-9]+]] +// CHECK-TLS4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META317:![0-9]+]], !DIExpression(), [[META318:![0-9]+]]) // CHECK-TLS4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK-TLS4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG319:![0-9]+]] // CHECK-TLS4-NEXT: store i32 0, ptr [[A]], align 4, !dbg [[DBG321:![0-9]+]] @@ -5966,9 +5966,9 @@ int foobar() { // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META123:![0-9]+]], metadata !DIExpression()), !dbg [[DBG125:![0-9]+]] +// SIMD4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META123:![0-9]+]], !DIExpression(), [[META125:![0-9]+]]) // SIMD4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META126:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127:![0-9]+]] +// SIMD4-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META126:![0-9]+]], !DIExpression(), [[META127:![0-9]+]]) // SIMD4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD4-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG128:![0-9]+]] // SIMD4-NEXT: call void @_ZN2S1C2Ei(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG128]] @@ -5980,7 +5980,7 @@ int foobar() { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG132:![0-9]+]] +// SIMD4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META131:![0-9]+]], !DIExpression(), [[META132:![0-9]+]]) // SIMD4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD4-NEXT: call void @_ZN2S1D2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR3]], !dbg [[DBG133:![0-9]+]] // SIMD4-NEXT: ret void, !dbg [[DBG134:![0-9]+]] @@ -6000,9 +6000,9 @@ int foobar() { // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META141:![0-9]+]], metadata !DIExpression()), !dbg [[DBG143:![0-9]+]] +// SIMD4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META141:![0-9]+]], !DIExpression(), [[META143:![0-9]+]]) // SIMD4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG145:![0-9]+]] +// SIMD4-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META144:![0-9]+]], !DIExpression(), [[META145:![0-9]+]]) // SIMD4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD4-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG146:![0-9]+]] // SIMD4-NEXT: call void @_ZN2S2C2Ei(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG146]] @@ -6014,7 +6014,7 @@ int foobar() { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META149:![0-9]+]], metadata !DIExpression()), !dbg [[DBG150:![0-9]+]] +// SIMD4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META149:![0-9]+]], !DIExpression(), [[META150:![0-9]+]]) // SIMD4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD4-NEXT: call void @_ZN2S2D2Ev(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]]) #[[ATTR3]], !dbg [[DBG151:![0-9]+]] // SIMD4-NEXT: ret void, !dbg [[DBG152:![0-9]+]] @@ -6118,27 +6118,27 @@ int foobar() { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // SIMD4-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[DOTADDR]], metadata [[META170:![0-9]+]], metadata !DIExpression()), !dbg [[DBG171:![0-9]+]] -// SIMD4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG171]] +// SIMD4-NEXT: #dbg_declare(ptr [[DOTADDR]], [[META170:![0-9]+]], !DIExpression(), [[META171:![0-9]+]]) +// SIMD4-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[META171]] // SIMD4: arraydestroy.body: -// SIMD4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ getelementptr inbounds ([[STRUCT_S1:%.*]], ptr @arr_x, i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG171]] -// SIMD4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG171]] -// SIMD4-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG171]] -// SIMD4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], @arr_x, !dbg [[DBG171]] -// SIMD4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG171]] +// SIMD4-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ getelementptr inbounds ([[STRUCT_S1:%.*]], ptr @arr_x, i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[META171]] +// SIMD4-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[META171]] +// SIMD4-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[META171]] +// SIMD4-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], @arr_x, !dbg [[META171]] +// SIMD4-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[META171]] // SIMD4: arraydestroy.done1: -// SIMD4-NEXT: ret void, !dbg [[DBG171]] +// SIMD4-NEXT: ret void, !dbg [[META171]] // // // SIMD4-LABEL: define {{[^@]+}}@main -// SIMD4-SAME: () #[[ATTR5:[0-9]+]] personality ptr @__gxx_personality_v0 !dbg [[DBG52:![0-9]+]] { +// SIMD4-SAME: () #[[ATTR4:[0-9]+]] personality ptr @__gxx_personality_v0 !dbg [[DBG52:![0-9]+]] { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // SIMD4-NEXT: [[RES:%.*]] = alloca i32, align 4 // SIMD4-NEXT: [[EXN_SLOT:%.*]] = alloca ptr, align 8 // SIMD4-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4 // SIMD4-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[RES]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG173:![0-9]+]] +// SIMD4-NEXT: #dbg_declare(ptr [[RES]], [[META172:![0-9]+]], !DIExpression(), [[META173:![0-9]+]]) // SIMD4-NEXT: [[TMP0:%.*]] = load atomic i8, ptr @_ZGVZ4mainE2sm acquire, align 8, !dbg [[DBG174:![0-9]+]] // SIMD4-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP0]], 0, !dbg [[DBG174]] // SIMD4-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG174]], !prof [[PROF175:![0-9]+]] @@ -6215,9 +6215,9 @@ int foobar() { // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META200:![0-9]+]], metadata !DIExpression()), !dbg [[DBG202:![0-9]+]] +// SIMD4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META200:![0-9]+]], !DIExpression(), [[META202:![0-9]+]]) // SIMD4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META203:![0-9]+]], metadata !DIExpression()), !dbg [[DBG204:![0-9]+]] +// SIMD4-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META203:![0-9]+]], !DIExpression(), [[META204:![0-9]+]]) // SIMD4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD4-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG205:![0-9]+]] // SIMD4-NEXT: call void @_ZZ4mainEN5SmainC2Ei(ptr noundef nonnull align 8 dereferenceable(24) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG205]] @@ -6229,7 +6229,7 @@ int foobar() { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META208:![0-9]+]], metadata !DIExpression()), !dbg [[DBG209:![0-9]+]] +// SIMD4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META208:![0-9]+]], !DIExpression(), [[META209:![0-9]+]]) // SIMD4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD4-NEXT: call void @_ZZ4mainEN5SmainD2Ev(ptr noundef nonnull align 8 dereferenceable(24) [[THIS1]]) #[[ATTR3]], !dbg [[DBG210:![0-9]+]] // SIMD4-NEXT: ret void, !dbg [[DBG211:![0-9]+]] @@ -6239,7 +6239,7 @@ int foobar() { // SIMD4-SAME: () #[[ATTR2]] !dbg [[DBG212:![0-9]+]] { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[RES:%.*]] = alloca i32, align 4 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[RES]], metadata [[META213:![0-9]+]], metadata !DIExpression()), !dbg [[DBG214:![0-9]+]] +// SIMD4-NEXT: #dbg_declare(ptr [[RES]], [[META213:![0-9]+]], !DIExpression(), [[META214:![0-9]+]]) // SIMD4-NEXT: [[TMP0:%.*]] = load i32, ptr @_ZN6Static1sE, align 4, !dbg [[DBG215:![0-9]+]] // SIMD4-NEXT: store i32 [[TMP0]], ptr [[RES]], align 4, !dbg [[DBG216:![0-9]+]] // SIMD4-NEXT: [[TMP1:%.*]] = load i32, ptr @_ZL3gs1, align 4, !dbg [[DBG217:![0-9]+]] @@ -6296,9 +6296,9 @@ int foobar() { // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META239:![0-9]+]], metadata !DIExpression()), !dbg [[DBG241:![0-9]+]] +// SIMD4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META239:![0-9]+]], !DIExpression(), [[META241:![0-9]+]]) // SIMD4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META242:![0-9]+]], metadata !DIExpression()), !dbg [[DBG243:![0-9]+]] +// SIMD4-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META242:![0-9]+]], !DIExpression(), [[META243:![0-9]+]]) // SIMD4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD4-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG244:![0-9]+]] // SIMD4-NEXT: call void @_ZN2S4C2Ei(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG244]] @@ -6310,7 +6310,7 @@ int foobar() { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META247:![0-9]+]], metadata !DIExpression()), !dbg [[DBG248:![0-9]+]] +// SIMD4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META247:![0-9]+]], !DIExpression(), [[META248:![0-9]+]]) // SIMD4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD4-NEXT: call void @_ZN2S4D2Ev(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR3]], !dbg [[DBG249:![0-9]+]] // SIMD4-NEXT: ret void, !dbg [[DBG250:![0-9]+]] @@ -6322,9 +6322,9 @@ int foobar() { // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META252:![0-9]+]], metadata !DIExpression()), !dbg [[DBG253:![0-9]+]] +// SIMD4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META252:![0-9]+]], !DIExpression(), [[META253:![0-9]+]]) // SIMD4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META254:![0-9]+]], metadata !DIExpression()), !dbg [[DBG255:![0-9]+]] +// SIMD4-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META254:![0-9]+]], !DIExpression(), [[META255:![0-9]+]]) // SIMD4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG256:![0-9]+]] // SIMD4-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG257:![0-9]+]] @@ -6337,7 +6337,7 @@ int foobar() { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META260:![0-9]+]], metadata !DIExpression()), !dbg [[DBG261:![0-9]+]] +// SIMD4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META260:![0-9]+]], !DIExpression(), [[META261:![0-9]+]]) // SIMD4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG262:![0-9]+]] // SIMD4-NEXT: store i32 0, ptr [[A]], align 4, !dbg [[DBG264:![0-9]+]] @@ -6350,9 +6350,9 @@ int foobar() { // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META267:![0-9]+]], metadata !DIExpression()), !dbg [[DBG268:![0-9]+]] +// SIMD4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META267:![0-9]+]], !DIExpression(), [[META268:![0-9]+]]) // SIMD4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META269:![0-9]+]], metadata !DIExpression()), !dbg [[DBG270:![0-9]+]] +// SIMD4-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META269:![0-9]+]], !DIExpression(), [[META270:![0-9]+]]) // SIMD4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG271:![0-9]+]] // SIMD4-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG272:![0-9]+]] @@ -6365,7 +6365,7 @@ int foobar() { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META275:![0-9]+]], metadata !DIExpression()), !dbg [[DBG276:![0-9]+]] +// SIMD4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META275:![0-9]+]], !DIExpression(), [[META276:![0-9]+]]) // SIMD4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG277:![0-9]+]] // SIMD4-NEXT: store i32 0, ptr [[A]], align 8, !dbg [[DBG279:![0-9]+]] @@ -6378,9 +6378,9 @@ int foobar() { // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META282:![0-9]+]], metadata !DIExpression()), !dbg [[DBG283:![0-9]+]] +// SIMD4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META282:![0-9]+]], !DIExpression(), [[META283:![0-9]+]]) // SIMD4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META284:![0-9]+]], metadata !DIExpression()), !dbg [[DBG285:![0-9]+]] +// SIMD4-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META284:![0-9]+]], !DIExpression(), [[META285:![0-9]+]]) // SIMD4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG286:![0-9]+]] // SIMD4-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG287:![0-9]+]] @@ -6393,7 +6393,7 @@ int foobar() { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META290:![0-9]+]], metadata !DIExpression()), !dbg [[DBG291:![0-9]+]] +// SIMD4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META290:![0-9]+]], !DIExpression(), [[META291:![0-9]+]]) // SIMD4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG292:![0-9]+]] // SIMD4-NEXT: store i32 0, ptr [[A]], align 8, !dbg [[DBG294:![0-9]+]] @@ -6406,9 +6406,9 @@ int foobar() { // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // SIMD4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META297:![0-9]+]], metadata !DIExpression()), !dbg [[DBG298:![0-9]+]] +// SIMD4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META297:![0-9]+]], !DIExpression(), [[META298:![0-9]+]]) // SIMD4-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META299:![0-9]+]], metadata !DIExpression()), !dbg [[DBG300:![0-9]+]] +// SIMD4-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META299:![0-9]+]], !DIExpression(), [[META300:![0-9]+]]) // SIMD4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD4-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG301:![0-9]+]] // SIMD4-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG302:![0-9]+]] @@ -6421,7 +6421,7 @@ int foobar() { // SIMD4-NEXT: entry: // SIMD4-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // SIMD4-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// SIMD4-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META305:![0-9]+]], metadata !DIExpression()), !dbg [[DBG306:![0-9]+]] +// SIMD4-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META305:![0-9]+]], !DIExpression(), [[META306:![0-9]+]]) // SIMD4-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // SIMD4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG307:![0-9]+]] // SIMD4-NEXT: store i32 0, ptr [[A]], align 4, !dbg [[DBG309:![0-9]+]] @@ -6442,7 +6442,7 @@ int foobar() { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[DOTADDR]], metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG120:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[DOTADDR]], [[META118:![0-9]+]], !DIExpression(), [[META120:![0-9]+]]) // DEBUG1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG121:![0-9]+]] // DEBUG1-NEXT: call void @_ZN2S1C1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[TMP1]], i32 noundef 5), !dbg [[DBG122:![0-9]+]] // DEBUG1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG121]] @@ -6450,14 +6450,14 @@ int foobar() { // // // DEBUG1-LABEL: define {{[^@]+}}@_ZN2S1C1Ei -// DEBUG1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 !dbg [[DBG123:![0-9]+]] { +// DEBUG1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 !dbg [[DBG123:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // DEBUG1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META124:![0-9]+]], metadata !DIExpression()), !dbg [[DBG126:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META124:![0-9]+]], !DIExpression(), [[META126:![0-9]+]]) // DEBUG1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META127:![0-9]+]], metadata !DIExpression()), !dbg [[DBG128:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META127:![0-9]+]], !DIExpression(), [[META128:![0-9]+]]) // DEBUG1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // DEBUG1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG129:![0-9]+]] // DEBUG1-NEXT: call void @_ZN2S1C2Ei(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG129]] @@ -6469,20 +6469,20 @@ int foobar() { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[DOTADDR]], metadata [[META132:![0-9]+]], metadata !DIExpression()), !dbg [[DBG133:![0-9]+]] -// DEBUG1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG133]] -// DEBUG1-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR4:[0-9]+]], !dbg [[DBG133]] +// DEBUG1-NEXT: #dbg_declare(ptr [[DOTADDR]], [[META132:![0-9]+]], !DIExpression(), [[META133:![0-9]+]]) +// DEBUG1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[META133]] +// DEBUG1-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR3:[0-9]+]], !dbg [[META133]] // DEBUG1-NEXT: ret void, !dbg [[DBG134:![0-9]+]] // // // DEBUG1-LABEL: define {{[^@]+}}@_ZN2S1D1Ev -// DEBUG1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR3:[0-9]+]] comdat align 2 !dbg [[DBG135:![0-9]+]] { +// DEBUG1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 !dbg [[DBG135:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META136:![0-9]+]], metadata !DIExpression()), !dbg [[DBG137:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META136:![0-9]+]], !DIExpression(), [[META137:![0-9]+]]) // DEBUG1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// DEBUG1-NEXT: call void @_ZN2S1D2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]], !dbg [[DBG138:![0-9]+]] +// DEBUG1-NEXT: call void @_ZN2S1D2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR3]], !dbg [[DBG138:![0-9]+]] // DEBUG1-NEXT: ret void, !dbg [[DBG139:![0-9]+]] // // @@ -6504,7 +6504,7 @@ int foobar() { // DEBUG1-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4 // DEBUG1-NEXT: [[ARRAYINIT_ENDOFINIT7:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[DOTADDR]], metadata [[META143:![0-9]+]], metadata !DIExpression()), !dbg [[DBG144:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[DOTADDR]], [[META143:![0-9]+]], !DIExpression(), [[META144:![0-9]+]]) // DEBUG1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG145:![0-9]+]] // DEBUG1-NEXT: store ptr [[TMP1]], ptr [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG146:![0-9]+]] // DEBUG1-NEXT: store ptr [[TMP1]], ptr [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG147:![0-9]+]] @@ -6541,36 +6541,36 @@ int foobar() { // DEBUG1-NEXT: ret ptr [[TMP2]], !dbg [[DBG145]] // DEBUG1: lpad: // DEBUG1-NEXT: [[TMP3:%.*]] = landingpad { ptr, i32 } -// DEBUG1-NEXT: cleanup, !dbg [[DBG144]] -// DEBUG1-NEXT: [[TMP4:%.*]] = extractvalue { ptr, i32 } [[TMP3]], 0, !dbg [[DBG144]] -// DEBUG1-NEXT: store ptr [[TMP4]], ptr [[EXN_SLOT]], align 8, !dbg [[DBG144]] -// DEBUG1-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP3]], 1, !dbg [[DBG144]] -// DEBUG1-NEXT: store i32 [[TMP5]], ptr [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG144]] +// DEBUG1-NEXT: cleanup, !dbg [[META144]] +// DEBUG1-NEXT: [[TMP4:%.*]] = extractvalue { ptr, i32 } [[TMP3]], 0, !dbg [[META144]] +// DEBUG1-NEXT: store ptr [[TMP4]], ptr [[EXN_SLOT]], align 8, !dbg [[META144]] +// DEBUG1-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP3]], 1, !dbg [[META144]] +// DEBUG1-NEXT: store i32 [[TMP5]], ptr [[EHSELECTOR_SLOT]], align 4, !dbg [[META144]] // DEBUG1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG147]] // DEBUG1-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP6]], !dbg [[DBG147]] // DEBUG1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG147]] // DEBUG1: arraydestroy.body: // DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG147]] // DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG147]] -// DEBUG1-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]], !dbg [[DBG147]] +// DEBUG1-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG147]] // DEBUG1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[TMP1]], !dbg [[DBG147]] // DEBUG1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG147]] // DEBUG1: arraydestroy.done5: // DEBUG1-NEXT: br label [[EHCLEANUP:%.*]], !dbg [[DBG147]] // DEBUG1: lpad8: // DEBUG1-NEXT: [[TMP7:%.*]] = landingpad { ptr, i32 } -// DEBUG1-NEXT: cleanup, !dbg [[DBG144]] -// DEBUG1-NEXT: [[TMP8:%.*]] = extractvalue { ptr, i32 } [[TMP7]], 0, !dbg [[DBG144]] -// DEBUG1-NEXT: store ptr [[TMP8]], ptr [[EXN_SLOT]], align 8, !dbg [[DBG144]] -// DEBUG1-NEXT: [[TMP9:%.*]] = extractvalue { ptr, i32 } [[TMP7]], 1, !dbg [[DBG144]] -// DEBUG1-NEXT: store i32 [[TMP9]], ptr [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG144]] +// DEBUG1-NEXT: cleanup, !dbg [[META144]] +// DEBUG1-NEXT: [[TMP8:%.*]] = extractvalue { ptr, i32 } [[TMP7]], 0, !dbg [[META144]] +// DEBUG1-NEXT: store ptr [[TMP8]], ptr [[EXN_SLOT]], align 8, !dbg [[META144]] +// DEBUG1-NEXT: [[TMP9:%.*]] = extractvalue { ptr, i32 } [[TMP7]], 1, !dbg [[META144]] +// DEBUG1-NEXT: store i32 [[TMP9]], ptr [[EHSELECTOR_SLOT]], align 4, !dbg [[META144]] // DEBUG1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[ARRAYINIT_ENDOFINIT7]], align 8, !dbg [[DBG151]] // DEBUG1-NEXT: [[ARRAYDESTROY_ISEMPTY14:%.*]] = icmp eq ptr [[ARRAYINIT_ELEMENT6]], [[TMP10]], !dbg [[DBG151]] // DEBUG1-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY14]], label [[ARRAYDESTROY_DONE19:%.*]], label [[ARRAYDESTROY_BODY15:%.*]], !dbg [[DBG151]] // DEBUG1: arraydestroy.body15: // DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENTPAST16:%.*]] = phi ptr [ [[TMP10]], [[LPAD8]] ], [ [[ARRAYDESTROY_ELEMENT17:%.*]], [[ARRAYDESTROY_BODY15]] ], !dbg [[DBG151]] // DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENT17]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST16]], i64 -1, !dbg [[DBG151]] -// DEBUG1-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT17]]) #[[ATTR4]], !dbg [[DBG151]] +// DEBUG1-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT17]]) #[[ATTR3]], !dbg [[DBG151]] // DEBUG1-NEXT: [[ARRAYDESTROY_DONE18:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT17]], [[ARRAYINIT_ELEMENT6]], !dbg [[DBG151]] // DEBUG1-NEXT: br i1 [[ARRAYDESTROY_DONE18]], label [[ARRAYDESTROY_DONE19]], label [[ARRAYDESTROY_BODY15]], !dbg [[DBG151]] // DEBUG1: arraydestroy.done19: @@ -6584,7 +6584,7 @@ int foobar() { // DEBUG1: arraydestroy.body21: // DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENTPAST22:%.*]] = phi ptr [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT23:%.*]], [[ARRAYDESTROY_BODY21]] ], !dbg [[DBG146]] // DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENT23]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST22]], i64 -1, !dbg [[DBG146]] -// DEBUG1-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT23]]) #[[ATTR4]], !dbg [[DBG146]] +// DEBUG1-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT23]]) #[[ATTR3]], !dbg [[DBG146]] // DEBUG1-NEXT: [[ARRAYDESTROY_DONE24:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT23]], [[PAD_ARRAYBEGIN]], !dbg [[DBG146]] // DEBUG1-NEXT: br i1 [[ARRAYDESTROY_DONE24]], label [[ARRAYDESTROY_DONE25]], label [[ARRAYDESTROY_BODY21]], !dbg [[DBG146]] // DEBUG1: arraydestroy.done25: @@ -6602,16 +6602,16 @@ int foobar() { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[DOTADDR]], metadata [[META156:![0-9]+]], metadata !DIExpression()), !dbg [[DBG157:![0-9]+]] -// DEBUG1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG157]] -// DEBUG1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP1]], i64 6, !dbg [[DBG157]] -// DEBUG1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG157]] +// DEBUG1-NEXT: #dbg_declare(ptr [[DOTADDR]], [[META156:![0-9]+]], !DIExpression(), [[META157:![0-9]+]]) +// DEBUG1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[META157]] +// DEBUG1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP1]], i64 6, !dbg [[META157]] +// DEBUG1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[META157]] // DEBUG1: arraydestroy.body: -// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG157]] -// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG157]] -// DEBUG1-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]], !dbg [[DBG157]] -// DEBUG1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[TMP1]], !dbg [[DBG157]] -// DEBUG1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG157]] +// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[META157]] +// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[META157]] +// DEBUG1-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[META157]] +// DEBUG1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[TMP1]], !dbg [[META157]] +// DEBUG1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[META157]] // DEBUG1: arraydestroy.done1: // DEBUG1-NEXT: ret void, !dbg [[DBG158:![0-9]+]] // @@ -6628,19 +6628,19 @@ int foobar() { // DEBUG1-SAME: () #[[ATTR0]] !dbg [[DBG161:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: call void @_ZN2S1C1Ei(ptr noundef nonnull align 4 dereferenceable(4) @_ZL3gs1, i32 noundef 5), !dbg [[DBG164:![0-9]+]] -// DEBUG1-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2S1D1Ev, ptr @_ZL3gs1, ptr @__dso_handle) #[[ATTR4]], !dbg [[DBG166:![0-9]+]] +// DEBUG1-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2S1D1Ev, ptr @_ZL3gs1, ptr @__dso_handle) #[[ATTR3]], !dbg [[DBG166:![0-9]+]] // DEBUG1-NEXT: ret void, !dbg [[DBG167:![0-9]+]] // // // DEBUG1-LABEL: define {{[^@]+}}@_ZN2S1C2Ei -// DEBUG1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG168:![0-9]+]] { +// DEBUG1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG168:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // DEBUG1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META169:![0-9]+]], metadata !DIExpression()), !dbg [[DBG170:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META169:![0-9]+]], !DIExpression(), [[META170:![0-9]+]]) // DEBUG1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META171:![0-9]+]], metadata !DIExpression()), !dbg [[DBG172:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META171:![0-9]+]], !DIExpression(), [[META172:![0-9]+]]) // DEBUG1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // DEBUG1-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG173:![0-9]+]] // DEBUG1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG174:![0-9]+]] @@ -6649,11 +6649,11 @@ int foobar() { // // // DEBUG1-LABEL: define {{[^@]+}}@_ZN2S1D2Ev -// DEBUG1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG176:![0-9]+]] { +// DEBUG1-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG176:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META177:![0-9]+]], metadata !DIExpression()), !dbg [[DBG178:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META177:![0-9]+]], !DIExpression(), [[META178:![0-9]+]]) // DEBUG1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // DEBUG1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG179:![0-9]+]] // DEBUG1-NEXT: store i32 0, ptr [[A]], align 4, !dbg [[DBG181:![0-9]+]] @@ -6664,19 +6664,19 @@ int foobar() { // DEBUG1-SAME: () #[[ATTR0]] !dbg [[DBG183:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: call void @_ZN2S2C1Ei(ptr noundef nonnull align 8 dereferenceable(16) @_ZL3gs2, i32 noundef 27), !dbg [[DBG184:![0-9]+]] -// DEBUG1-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2S2D1Ev, ptr @_ZL3gs2, ptr @__dso_handle) #[[ATTR4]], !dbg [[DBG186:![0-9]+]] +// DEBUG1-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2S2D1Ev, ptr @_ZL3gs2, ptr @__dso_handle) #[[ATTR3]], !dbg [[DBG186:![0-9]+]] // DEBUG1-NEXT: ret void, !dbg [[DBG187:![0-9]+]] // // // DEBUG1-LABEL: define {{[^@]+}}@_ZN2S2C1Ei -// DEBUG1-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG188:![0-9]+]] { +// DEBUG1-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 !dbg [[DBG188:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // DEBUG1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META189:![0-9]+]], metadata !DIExpression()), !dbg [[DBG191:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META189:![0-9]+]], !DIExpression(), [[META191:![0-9]+]]) // DEBUG1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META192:![0-9]+]], metadata !DIExpression()), !dbg [[DBG193:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META192:![0-9]+]], !DIExpression(), [[META193:![0-9]+]]) // DEBUG1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // DEBUG1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG194:![0-9]+]] // DEBUG1-NEXT: call void @_ZN2S2C2Ei(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG194]] @@ -6684,25 +6684,25 @@ int foobar() { // // // DEBUG1-LABEL: define {{[^@]+}}@_ZN2S2D1Ev -// DEBUG1-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG196:![0-9]+]] { +// DEBUG1-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG196:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META197:![0-9]+]], metadata !DIExpression()), !dbg [[DBG198:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META197:![0-9]+]], !DIExpression(), [[META198:![0-9]+]]) // DEBUG1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// DEBUG1-NEXT: call void @_ZN2S2D2Ev(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]]) #[[ATTR4]], !dbg [[DBG199:![0-9]+]] +// DEBUG1-NEXT: call void @_ZN2S2D2Ev(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]]) #[[ATTR3]], !dbg [[DBG199:![0-9]+]] // DEBUG1-NEXT: ret void, !dbg [[DBG200:![0-9]+]] // // // DEBUG1-LABEL: define {{[^@]+}}@_ZN2S2C2Ei -// DEBUG1-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG201:![0-9]+]] { +// DEBUG1-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG201:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // DEBUG1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META202:![0-9]+]], metadata !DIExpression()), !dbg [[DBG203:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META202:![0-9]+]], !DIExpression(), [[META203:![0-9]+]]) // DEBUG1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META204:![0-9]+]], metadata !DIExpression()), !dbg [[DBG205:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META204:![0-9]+]], !DIExpression(), [[META205:![0-9]+]]) // DEBUG1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // DEBUG1-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG206:![0-9]+]] // DEBUG1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG207:![0-9]+]] @@ -6711,11 +6711,11 @@ int foobar() { // // // DEBUG1-LABEL: define {{[^@]+}}@_ZN2S2D2Ev -// DEBUG1-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG209:![0-9]+]] { +// DEBUG1-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG209:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META210:![0-9]+]], metadata !DIExpression()), !dbg [[DBG211:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META210:![0-9]+]], !DIExpression(), [[META211:![0-9]+]]) // DEBUG1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // DEBUG1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG212:![0-9]+]] // DEBUG1-NEXT: store i32 0, ptr [[A]], align 8, !dbg [[DBG214:![0-9]+]] @@ -6756,7 +6756,7 @@ int foobar() { // DEBUG1-NEXT: invoke void @_ZN2S1C1Ei(ptr noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([[STRUCT_S1]], ptr getelementptr inbounds ([3 x %struct.S1], ptr @arr_x, i64 1), i64 2), i32 noundef 6) // DEBUG1-NEXT: to label [[INVOKE_CONT9:%.*]] unwind label [[LPAD6]], !dbg [[DBG226:![0-9]+]] // DEBUG1: invoke.cont9: -// DEBUG1-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @__cxx_global_array_dtor, ptr null, ptr @__dso_handle) #[[ATTR4]], !dbg [[DBG227:![0-9]+]] +// DEBUG1-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @__cxx_global_array_dtor, ptr null, ptr @__dso_handle) #[[ATTR3]], !dbg [[DBG227:![0-9]+]] // DEBUG1-NEXT: ret void, !dbg [[DBG227]] // DEBUG1: lpad: // DEBUG1-NEXT: [[TMP1:%.*]] = landingpad { ptr, i32 } @@ -6771,7 +6771,7 @@ int foobar() { // DEBUG1: arraydestroy.body: // DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP4]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG219]] // DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG219]] -// DEBUG1-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]], !dbg [[DBG219]] +// DEBUG1-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG219]] // DEBUG1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], @arr_x, !dbg [[DBG219]] // DEBUG1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG219]] // DEBUG1: arraydestroy.done4: @@ -6789,7 +6789,7 @@ int foobar() { // DEBUG1: arraydestroy.body11: // DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENTPAST12:%.*]] = phi ptr [ [[TMP8]], [[LPAD6]] ], [ [[ARRAYDESTROY_ELEMENT13:%.*]], [[ARRAYDESTROY_BODY11]] ], !dbg [[DBG223]] // DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENT13]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST12]], i64 -1, !dbg [[DBG223]] -// DEBUG1-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT13]]) #[[ATTR4]], !dbg [[DBG223]] +// DEBUG1-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT13]]) #[[ATTR3]], !dbg [[DBG223]] // DEBUG1-NEXT: [[ARRAYDESTROY_DONE14:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT13]], getelementptr inbounds ([3 x %struct.S1], ptr @arr_x, i64 1), !dbg [[DBG223]] // DEBUG1-NEXT: br i1 [[ARRAYDESTROY_DONE14]], label [[ARRAYDESTROY_DONE15]], label [[ARRAYDESTROY_BODY11]], !dbg [[DBG223]] // DEBUG1: arraydestroy.done15: @@ -6802,7 +6802,7 @@ int foobar() { // DEBUG1: arraydestroy.body17: // DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi ptr [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ], !dbg [[DBG217]] // DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1, !dbg [[DBG217]] -// DEBUG1-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR4]], !dbg [[DBG217]] +// DEBUG1-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR3]], !dbg [[DBG217]] // DEBUG1-NEXT: [[ARRAYDESTROY_DONE20:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT19]], @arr_x, !dbg [[DBG217]] // DEBUG1-NEXT: br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]], !dbg [[DBG217]] // DEBUG1: arraydestroy.done21: @@ -6820,20 +6820,20 @@ int foobar() { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[DOTADDR]], metadata [[META232:![0-9]+]], metadata !DIExpression()), !dbg [[DBG233:![0-9]+]] -// DEBUG1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG233]] +// DEBUG1-NEXT: #dbg_declare(ptr [[DOTADDR]], [[META232:![0-9]+]], !DIExpression(), [[META233:![0-9]+]]) +// DEBUG1-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[META233]] // DEBUG1: arraydestroy.body: -// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ getelementptr inbounds ([[STRUCT_S1:%.*]], ptr @arr_x, i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG233]] -// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG233]] -// DEBUG1-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]], !dbg [[DBG233]] -// DEBUG1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], @arr_x, !dbg [[DBG233]] -// DEBUG1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG233]] +// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ getelementptr inbounds ([[STRUCT_S1:%.*]], ptr @arr_x, i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[META233]] +// DEBUG1-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[META233]] +// DEBUG1-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[META233]] +// DEBUG1-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], @arr_x, !dbg [[META233]] +// DEBUG1-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[META233]] // DEBUG1: arraydestroy.done1: -// DEBUG1-NEXT: ret void, !dbg [[DBG233]] +// DEBUG1-NEXT: ret void, !dbg [[META233]] // // // DEBUG1-LABEL: define {{[^@]+}}@main -// DEBUG1-SAME: () #[[ATTR5:[0-9]+]] personality ptr @__gxx_personality_v0 !dbg [[DBG52:![0-9]+]] { +// DEBUG1-SAME: () #[[ATTR4:[0-9]+]] personality ptr @__gxx_personality_v0 !dbg [[DBG52:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // DEBUG1-NEXT: [[RES:%.*]] = alloca i32, align 4 @@ -6841,12 +6841,12 @@ int foobar() { // DEBUG1-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4 // DEBUG1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB9:[0-9]+]]), !dbg [[DBG234:![0-9]+]] // DEBUG1-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[RES]], metadata [[META235:![0-9]+]], metadata !DIExpression()), !dbg [[DBG236:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[RES]], [[META235:![0-9]+]], !DIExpression(), [[META236:![0-9]+]]) // DEBUG1-NEXT: [[TMP1:%.*]] = load atomic i8, ptr @_ZGVZ4mainE2sm acquire, align 8, !dbg [[DBG237:![0-9]+]] // DEBUG1-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP1]], 0, !dbg [[DBG237]] // DEBUG1-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG237]], !prof [[PROF238:![0-9]+]] // DEBUG1: init.check: -// DEBUG1-NEXT: [[TMP2:%.*]] = call i32 @__cxa_guard_acquire(ptr @_ZGVZ4mainE2sm) #[[ATTR4]], !dbg [[DBG237]] +// DEBUG1-NEXT: [[TMP2:%.*]] = call i32 @__cxa_guard_acquire(ptr @_ZGVZ4mainE2sm) #[[ATTR3]], !dbg [[DBG237]] // DEBUG1-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0, !dbg [[DBG237]] // DEBUG1-NEXT: br i1 [[TOBOOL]], label [[INIT:%.*]], label [[INIT_END]], !dbg [[DBG237]] // DEBUG1: init: @@ -6858,8 +6858,8 @@ int foobar() { // DEBUG1-NEXT: invoke void @_ZZ4mainEN5SmainC1Ei(ptr noundef nonnull align 8 dereferenceable(24) @_ZZ4mainE2sm, i32 noundef [[TMP5]]) // DEBUG1-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG240:![0-9]+]] // DEBUG1: invoke.cont: -// DEBUG1-NEXT: [[TMP6:%.*]] = call i32 @__cxa_atexit(ptr @_ZZ4mainEN5SmainD1Ev, ptr @_ZZ4mainE2sm, ptr @__dso_handle) #[[ATTR4]], !dbg [[DBG237]] -// DEBUG1-NEXT: call void @__cxa_guard_release(ptr @_ZGVZ4mainE2sm) #[[ATTR4]], !dbg [[DBG237]] +// DEBUG1-NEXT: [[TMP6:%.*]] = call i32 @__cxa_atexit(ptr @_ZZ4mainEN5SmainD1Ev, ptr @_ZZ4mainE2sm, ptr @__dso_handle) #[[ATTR3]], !dbg [[DBG237]] +// DEBUG1-NEXT: call void @__cxa_guard_release(ptr @_ZGVZ4mainE2sm) #[[ATTR3]], !dbg [[DBG237]] // DEBUG1-NEXT: br label [[INIT_END]], !dbg [[DBG237]] // DEBUG1: init.end: // DEBUG1-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB11:[0-9]+]], i32 [[TMP0]], ptr @_ZN6Static1sE, i64 8, ptr @_ZN6Static1sE.cache.), !dbg [[DBG241:![0-9]+]] @@ -6922,7 +6922,7 @@ int foobar() { // DEBUG1-NEXT: store ptr [[TMP34]], ptr [[EXN_SLOT]], align 8, !dbg [[DBG267]] // DEBUG1-NEXT: [[TMP35:%.*]] = extractvalue { ptr, i32 } [[TMP33]], 1, !dbg [[DBG267]] // DEBUG1-NEXT: store i32 [[TMP35]], ptr [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG267]] -// DEBUG1-NEXT: call void @__cxa_guard_abort(ptr @_ZGVZ4mainE2sm) #[[ATTR4]], !dbg [[DBG237]] +// DEBUG1-NEXT: call void @__cxa_guard_abort(ptr @_ZGVZ4mainE2sm) #[[ATTR3]], !dbg [[DBG237]] // DEBUG1-NEXT: br label [[EH_RESUME:%.*]], !dbg [[DBG237]] // DEBUG1: eh.resume: // DEBUG1-NEXT: [[EXN:%.*]] = load ptr, ptr [[EXN_SLOT]], align 8, !dbg [[DBG237]] @@ -6938,7 +6938,7 @@ int foobar() { // DEBUG1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB5:[0-9]+]]), !dbg [[DBG269:![0-9]+]] // DEBUG1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[DOTADDR]], metadata [[META270:![0-9]+]], metadata !DIExpression()), !dbg [[DBG271:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[DOTADDR]], [[META270:![0-9]+]], !DIExpression(), [[META271:![0-9]+]]) // DEBUG1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG272:![0-9]+]] // DEBUG1-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB5]], i32 [[TMP1]], ptr @_ZL3gs1, i64 4, ptr @_ZL3gs1.cache.), !dbg [[DBG269]] // DEBUG1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP3]], i32 0, i32 0, !dbg [[DBG273:![0-9]+]] @@ -6949,14 +6949,14 @@ int foobar() { // // // DEBUG1-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC1Ei -// DEBUG1-SAME: (ptr noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG275:![0-9]+]] { +// DEBUG1-SAME: (ptr noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 !dbg [[DBG275:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // DEBUG1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META276:![0-9]+]], metadata !DIExpression()), !dbg [[DBG278:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META276:![0-9]+]], !DIExpression(), [[META278:![0-9]+]]) // DEBUG1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META279:![0-9]+]], metadata !DIExpression()), !dbg [[DBG280:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META279:![0-9]+]], !DIExpression(), [[META280:![0-9]+]]) // DEBUG1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // DEBUG1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG281:![0-9]+]] // DEBUG1-NEXT: call void @_ZZ4mainEN5SmainC2Ei(ptr noundef nonnull align 8 dereferenceable(24) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG281]] @@ -6968,32 +6968,32 @@ int foobar() { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[DOTADDR]], metadata [[META284:![0-9]+]], metadata !DIExpression()), !dbg [[DBG285:![0-9]+]] -// DEBUG1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG285]] -// DEBUG1-NEXT: call void @_ZZ4mainEN5SmainD1Ev(ptr noundef nonnull align 8 dereferenceable(24) [[TMP1]]) #[[ATTR4]], !dbg [[DBG285]] +// DEBUG1-NEXT: #dbg_declare(ptr [[DOTADDR]], [[META284:![0-9]+]], !DIExpression(), [[META285:![0-9]+]]) +// DEBUG1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[META285]] +// DEBUG1-NEXT: call void @_ZZ4mainEN5SmainD1Ev(ptr noundef nonnull align 8 dereferenceable(24) [[TMP1]]) #[[ATTR3]], !dbg [[META285]] // DEBUG1-NEXT: ret void, !dbg [[DBG286:![0-9]+]] // // // DEBUG1-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD1Ev -// DEBUG1-SAME: (ptr noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] align 2 !dbg [[DBG287:![0-9]+]] { +// DEBUG1-SAME: (ptr noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG287:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META288:![0-9]+]], metadata !DIExpression()), !dbg [[DBG289:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META288:![0-9]+]], !DIExpression(), [[META289:![0-9]+]]) // DEBUG1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// DEBUG1-NEXT: call void @_ZZ4mainEN5SmainD2Ev(ptr noundef nonnull align 8 dereferenceable(24) [[THIS1]]) #[[ATTR4]], !dbg [[DBG290:![0-9]+]] +// DEBUG1-NEXT: call void @_ZZ4mainEN5SmainD2Ev(ptr noundef nonnull align 8 dereferenceable(24) [[THIS1]]) #[[ATTR3]], !dbg [[DBG290:![0-9]+]] // DEBUG1-NEXT: ret void, !dbg [[DBG291:![0-9]+]] // // // DEBUG1-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC2Ei -// DEBUG1-SAME: (ptr noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR3]] align 2 !dbg [[DBG292:![0-9]+]] { +// DEBUG1-SAME: (ptr noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG292:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // DEBUG1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META293:![0-9]+]], metadata !DIExpression()), !dbg [[DBG294:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META293:![0-9]+]], !DIExpression(), [[META294:![0-9]+]]) // DEBUG1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META295:![0-9]+]], metadata !DIExpression()), !dbg [[DBG296:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META295:![0-9]+]], !DIExpression(), [[META296:![0-9]+]]) // DEBUG1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // DEBUG1-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG297:![0-9]+]] // DEBUG1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG298:![0-9]+]] @@ -7002,11 +7002,11 @@ int foobar() { // // // DEBUG1-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD2Ev -// DEBUG1-SAME: (ptr noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] align 2 !dbg [[DBG300:![0-9]+]] { +// DEBUG1-SAME: (ptr noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG300:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META301:![0-9]+]], metadata !DIExpression()), !dbg [[DBG302:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META301:![0-9]+]], !DIExpression(), [[META302:![0-9]+]]) // DEBUG1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // DEBUG1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG303:![0-9]+]] // DEBUG1-NEXT: store i32 0, ptr [[A]], align 8, !dbg [[DBG305:![0-9]+]] @@ -7014,11 +7014,11 @@ int foobar() { // // // DEBUG1-LABEL: define {{[^@]+}}@_Z6foobarv -// DEBUG1-SAME: () #[[ATTR3]] !dbg [[DBG307:![0-9]+]] { +// DEBUG1-SAME: () #[[ATTR2]] !dbg [[DBG307:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[RES:%.*]] = alloca i32, align 4 // DEBUG1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB27:[0-9]+]]), !dbg [[DBG308:![0-9]+]] -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[RES]], metadata [[META309:![0-9]+]], metadata !DIExpression()), !dbg [[DBG310:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[RES]], [[META309:![0-9]+]], !DIExpression(), [[META310:![0-9]+]]) // DEBUG1-NEXT: [[TMP1:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB27]], i32 [[TMP0]], ptr @_ZN6Static1sE, i64 8, ptr @_ZN6Static1sE.cache.), !dbg [[DBG308]] // DEBUG1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], ptr [[TMP1]], i32 0, i32 0, !dbg [[DBG311:![0-9]+]] // DEBUG1-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG311]] @@ -7079,7 +7079,7 @@ int foobar() { // DEBUG1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB41:[0-9]+]]), !dbg [[DBG334]] // DEBUG1-NEXT: call void @__kmpc_threadprivate_register(ptr @[[GLOB41]], ptr @_ZN2STI2S4E2stE, ptr @.__kmpc_global_ctor_..9, ptr null, ptr @.__kmpc_global_dtor_..10), !dbg [[DBG334]] // DEBUG1-NEXT: call void @_ZN2S4C1Ei(ptr noundef nonnull align 4 dereferenceable(8) @_ZN2STI2S4E2stE, i32 noundef 23), !dbg [[DBG335:![0-9]+]] -// DEBUG1-NEXT: [[TMP2:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2S4D1Ev, ptr @_ZN2STI2S4E2stE, ptr @__dso_handle) #[[ATTR4]], !dbg [[DBG334]] +// DEBUG1-NEXT: [[TMP2:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2S4D1Ev, ptr @_ZN2STI2S4E2stE, ptr @__dso_handle) #[[ATTR3]], !dbg [[DBG334]] // DEBUG1-NEXT: br label [[INIT_END]], !dbg [[DBG334]] // DEBUG1: init.end: // DEBUG1-NEXT: ret void, !dbg [[DBG337:![0-9]+]] @@ -7090,7 +7090,7 @@ int foobar() { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[DOTADDR]], metadata [[META339:![0-9]+]], metadata !DIExpression()), !dbg [[DBG340:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[DOTADDR]], [[META339:![0-9]+]], !DIExpression(), [[META340:![0-9]+]]) // DEBUG1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG341:![0-9]+]] // DEBUG1-NEXT: call void @_ZN2S4C1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[TMP1]], i32 noundef 23), !dbg [[DBG342:![0-9]+]] // DEBUG1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG341]] @@ -7098,14 +7098,14 @@ int foobar() { // // // DEBUG1-LABEL: define {{[^@]+}}@_ZN2S4C1Ei -// DEBUG1-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG343:![0-9]+]] { +// DEBUG1-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 !dbg [[DBG343:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // DEBUG1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META344:![0-9]+]], metadata !DIExpression()), !dbg [[DBG346:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META344:![0-9]+]], !DIExpression(), [[META346:![0-9]+]]) // DEBUG1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META347:![0-9]+]], metadata !DIExpression()), !dbg [[DBG348:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META347:![0-9]+]], !DIExpression(), [[META348:![0-9]+]]) // DEBUG1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // DEBUG1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG349:![0-9]+]] // DEBUG1-NEXT: call void @_ZN2S4C2Ei(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG349]] @@ -7117,32 +7117,32 @@ int foobar() { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[DOTADDR]], metadata [[META352:![0-9]+]], metadata !DIExpression()), !dbg [[DBG353:![0-9]+]] -// DEBUG1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG353]] -// DEBUG1-NEXT: call void @_ZN2S4D1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[TMP1]]) #[[ATTR4]], !dbg [[DBG353]] +// DEBUG1-NEXT: #dbg_declare(ptr [[DOTADDR]], [[META352:![0-9]+]], !DIExpression(), [[META353:![0-9]+]]) +// DEBUG1-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[META353]] +// DEBUG1-NEXT: call void @_ZN2S4D1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[TMP1]]) #[[ATTR3]], !dbg [[META353]] // DEBUG1-NEXT: ret void, !dbg [[DBG354:![0-9]+]] // // // DEBUG1-LABEL: define {{[^@]+}}@_ZN2S4D1Ev -// DEBUG1-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG355:![0-9]+]] { +// DEBUG1-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG355:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META356:![0-9]+]], metadata !DIExpression()), !dbg [[DBG357:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META356:![0-9]+]], !DIExpression(), [[META357:![0-9]+]]) // DEBUG1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// DEBUG1-NEXT: call void @_ZN2S4D2Ev(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR4]], !dbg [[DBG358:![0-9]+]] +// DEBUG1-NEXT: call void @_ZN2S4D2Ev(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR3]], !dbg [[DBG358:![0-9]+]] // DEBUG1-NEXT: ret void, !dbg [[DBG359:![0-9]+]] // // // DEBUG1-LABEL: define {{[^@]+}}@_ZN2S4C2Ei -// DEBUG1-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG360:![0-9]+]] { +// DEBUG1-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG360:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // DEBUG1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META361:![0-9]+]], metadata !DIExpression()), !dbg [[DBG362:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META361:![0-9]+]], !DIExpression(), [[META362:![0-9]+]]) // DEBUG1-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META363:![0-9]+]], metadata !DIExpression()), !dbg [[DBG364:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META363:![0-9]+]], !DIExpression(), [[META364:![0-9]+]]) // DEBUG1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // DEBUG1-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG365:![0-9]+]] // DEBUG1-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG366:![0-9]+]] @@ -7151,11 +7151,11 @@ int foobar() { // // // DEBUG1-LABEL: define {{[^@]+}}@_ZN2S4D2Ev -// DEBUG1-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG368:![0-9]+]] { +// DEBUG1-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG368:![0-9]+]] { // DEBUG1-NEXT: entry: // DEBUG1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG1-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG1-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META369:![0-9]+]], metadata !DIExpression()), !dbg [[DBG370:![0-9]+]] +// DEBUG1-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META369:![0-9]+]], !DIExpression(), [[META370:![0-9]+]]) // DEBUG1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // DEBUG1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG371:![0-9]+]] // DEBUG1-NEXT: store i32 0, ptr [[A]], align 4, !dbg [[DBG373:![0-9]+]] @@ -7179,7 +7179,7 @@ int foobar() { // DEBUG2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]), !dbg [[DBG119:![0-9]+]] // DEBUG2-NEXT: call void @__kmpc_threadprivate_register(ptr @[[GLOB1]], ptr @_ZL3gs1, ptr @.__kmpc_global_ctor_., ptr null, ptr @.__kmpc_global_dtor_.), !dbg [[DBG119]] // DEBUG2-NEXT: call void @_ZN2S1C1Ei(ptr noundef nonnull align 4 dereferenceable(4) @_ZL3gs1, i32 noundef 5), !dbg [[DBG120:![0-9]+]] -// DEBUG2-NEXT: [[TMP1:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2S1D1Ev, ptr @_ZL3gs1, ptr @__dso_handle) #[[ATTR4:[0-9]+]], !dbg [[DBG119]] +// DEBUG2-NEXT: [[TMP1:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2S1D1Ev, ptr @_ZL3gs1, ptr @__dso_handle) #[[ATTR3:[0-9]+]], !dbg [[DBG119]] // DEBUG2-NEXT: ret void, !dbg [[DBG122:![0-9]+]] // // @@ -7188,7 +7188,7 @@ int foobar() { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // DEBUG2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[DOTADDR]], metadata [[META125:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[DOTADDR]], [[META125:![0-9]+]], !DIExpression(), [[META127:![0-9]+]]) // DEBUG2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG128:![0-9]+]] // DEBUG2-NEXT: call void @_ZN2S1C1Ei(ptr noundef nonnull align 4 dereferenceable(4) [[TMP1]], i32 noundef 5), !dbg [[DBG129:![0-9]+]] // DEBUG2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG128]] @@ -7196,14 +7196,14 @@ int foobar() { // // // DEBUG2-LABEL: define {{[^@]+}}@_ZN2S1C1Ei -// DEBUG2-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 !dbg [[DBG130:![0-9]+]] { +// DEBUG2-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1:[0-9]+]] comdat align 2 !dbg [[DBG130:![0-9]+]] { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // DEBUG2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META131:![0-9]+]], metadata !DIExpression()), !dbg [[DBG133:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META131:![0-9]+]], !DIExpression(), [[META133:![0-9]+]]) // DEBUG2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META134:![0-9]+]], metadata !DIExpression()), !dbg [[DBG135:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META134:![0-9]+]], !DIExpression(), [[META135:![0-9]+]]) // DEBUG2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // DEBUG2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG136:![0-9]+]] // DEBUG2-NEXT: call void @_ZN2S1C2Ei(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG136]] @@ -7215,20 +7215,20 @@ int foobar() { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // DEBUG2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[DOTADDR]], metadata [[META139:![0-9]+]], metadata !DIExpression()), !dbg [[DBG140:![0-9]+]] -// DEBUG2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG140]] -// DEBUG2-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR4]], !dbg [[DBG140]] +// DEBUG2-NEXT: #dbg_declare(ptr [[DOTADDR]], [[META139:![0-9]+]], !DIExpression(), [[META140:![0-9]+]]) +// DEBUG2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[META140]] +// DEBUG2-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR3]], !dbg [[META140]] // DEBUG2-NEXT: ret void, !dbg [[DBG141:![0-9]+]] // // // DEBUG2-LABEL: define {{[^@]+}}@_ZN2S1D1Ev -// DEBUG2-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR3:[0-9]+]] comdat align 2 !dbg [[DBG142:![0-9]+]] { +// DEBUG2-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2:[0-9]+]] comdat align 2 !dbg [[DBG142:![0-9]+]] { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META143:![0-9]+]], metadata !DIExpression()), !dbg [[DBG144:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META143:![0-9]+]], !DIExpression(), [[META144:![0-9]+]]) // DEBUG2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// DEBUG2-NEXT: call void @_ZN2S1D2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR4]], !dbg [[DBG145:![0-9]+]] +// DEBUG2-NEXT: call void @_ZN2S1D2Ev(ptr noundef nonnull align 4 dereferenceable(4) [[THIS1]]) #[[ATTR3]], !dbg [[DBG145:![0-9]+]] // DEBUG2-NEXT: ret void, !dbg [[DBG146:![0-9]+]] // // @@ -7236,19 +7236,19 @@ int foobar() { // DEBUG2-SAME: () #[[ATTR0]] !dbg [[DBG147:![0-9]+]] { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: call void @_ZN2S2C1Ei(ptr noundef nonnull align 8 dereferenceable(16) @_ZL3gs2, i32 noundef 27), !dbg [[DBG148:![0-9]+]] -// DEBUG2-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2S2D1Ev, ptr @_ZL3gs2, ptr @__dso_handle) #[[ATTR4]], !dbg [[DBG150:![0-9]+]] +// DEBUG2-NEXT: [[TMP0:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2S2D1Ev, ptr @_ZL3gs2, ptr @__dso_handle) #[[ATTR3]], !dbg [[DBG150:![0-9]+]] // DEBUG2-NEXT: ret void, !dbg [[DBG151:![0-9]+]] // // // DEBUG2-LABEL: define {{[^@]+}}@_ZN2S2C1Ei -// DEBUG2-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG152:![0-9]+]] { +// DEBUG2-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 !dbg [[DBG152:![0-9]+]] { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // DEBUG2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META153:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META153:![0-9]+]], !DIExpression(), [[META155:![0-9]+]]) // DEBUG2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META156:![0-9]+]], metadata !DIExpression()), !dbg [[DBG157:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META156:![0-9]+]], !DIExpression(), [[META157:![0-9]+]]) // DEBUG2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // DEBUG2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG158:![0-9]+]] // DEBUG2-NEXT: call void @_ZN2S2C2Ei(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG158]] @@ -7256,13 +7256,13 @@ int foobar() { // // // DEBUG2-LABEL: define {{[^@]+}}@_ZN2S2D1Ev -// DEBUG2-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG160:![0-9]+]] { +// DEBUG2-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG160:![0-9]+]] { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META161:![0-9]+]], metadata !DIExpression()), !dbg [[DBG162:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META161:![0-9]+]], !DIExpression(), [[META162:![0-9]+]]) // DEBUG2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// DEBUG2-NEXT: call void @_ZN2S2D2Ev(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]]) #[[ATTR4]], !dbg [[DBG163:![0-9]+]] +// DEBUG2-NEXT: call void @_ZN2S2D2Ev(ptr noundef nonnull align 8 dereferenceable(16) [[THIS1]]) #[[ATTR3]], !dbg [[DBG163:![0-9]+]] // DEBUG2-NEXT: ret void, !dbg [[DBG164:![0-9]+]] // // @@ -7302,7 +7302,7 @@ int foobar() { // DEBUG2-NEXT: invoke void @_ZN2S1C1Ei(ptr noundef nonnull align 4 dereferenceable(4) getelementptr inbounds ([[STRUCT_S1]], ptr getelementptr inbounds ([3 x %struct.S1], ptr @arr_x, i64 1), i64 2), i32 noundef 6) // DEBUG2-NEXT: to label [[INVOKE_CONT9:%.*]] unwind label [[LPAD6]], !dbg [[DBG176:![0-9]+]] // DEBUG2: invoke.cont9: -// DEBUG2-NEXT: [[TMP1:%.*]] = call i32 @__cxa_atexit(ptr @__cxx_global_array_dtor, ptr null, ptr @__dso_handle) #[[ATTR4]], !dbg [[DBG166]] +// DEBUG2-NEXT: [[TMP1:%.*]] = call i32 @__cxa_atexit(ptr @__cxx_global_array_dtor, ptr null, ptr @__dso_handle) #[[ATTR3]], !dbg [[DBG166]] // DEBUG2-NEXT: ret void, !dbg [[DBG166]] // DEBUG2: lpad: // DEBUG2-NEXT: [[TMP2:%.*]] = landingpad { ptr, i32 } @@ -7317,7 +7317,7 @@ int foobar() { // DEBUG2: arraydestroy.body: // DEBUG2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP5]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG169]] // DEBUG2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG169]] -// DEBUG2-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]], !dbg [[DBG169]] +// DEBUG2-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG169]] // DEBUG2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], @arr_x, !dbg [[DBG169]] // DEBUG2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE4]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG169]] // DEBUG2: arraydestroy.done4: @@ -7335,7 +7335,7 @@ int foobar() { // DEBUG2: arraydestroy.body11: // DEBUG2-NEXT: [[ARRAYDESTROY_ELEMENTPAST12:%.*]] = phi ptr [ [[TMP9]], [[LPAD6]] ], [ [[ARRAYDESTROY_ELEMENT13:%.*]], [[ARRAYDESTROY_BODY11]] ], !dbg [[DBG173]] // DEBUG2-NEXT: [[ARRAYDESTROY_ELEMENT13]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST12]], i64 -1, !dbg [[DBG173]] -// DEBUG2-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT13]]) #[[ATTR4]], !dbg [[DBG173]] +// DEBUG2-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT13]]) #[[ATTR3]], !dbg [[DBG173]] // DEBUG2-NEXT: [[ARRAYDESTROY_DONE14:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT13]], getelementptr inbounds ([3 x %struct.S1], ptr @arr_x, i64 1), !dbg [[DBG173]] // DEBUG2-NEXT: br i1 [[ARRAYDESTROY_DONE14]], label [[ARRAYDESTROY_DONE15]], label [[ARRAYDESTROY_BODY11]], !dbg [[DBG173]] // DEBUG2: arraydestroy.done15: @@ -7348,7 +7348,7 @@ int foobar() { // DEBUG2: arraydestroy.body17: // DEBUG2-NEXT: [[ARRAYDESTROY_ELEMENTPAST18:%.*]] = phi ptr [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT19:%.*]], [[ARRAYDESTROY_BODY17]] ], !dbg [[DBG167]] // DEBUG2-NEXT: [[ARRAYDESTROY_ELEMENT19]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST18]], i64 -1, !dbg [[DBG167]] -// DEBUG2-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR4]], !dbg [[DBG167]] +// DEBUG2-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT19]]) #[[ATTR3]], !dbg [[DBG167]] // DEBUG2-NEXT: [[ARRAYDESTROY_DONE20:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT19]], @arr_x, !dbg [[DBG167]] // DEBUG2-NEXT: br i1 [[ARRAYDESTROY_DONE20]], label [[ARRAYDESTROY_DONE21]], label [[ARRAYDESTROY_BODY17]], !dbg [[DBG167]] // DEBUG2: arraydestroy.done21: @@ -7371,7 +7371,7 @@ int foobar() { // DEBUG2-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4 // DEBUG2-NEXT: [[ARRAYINIT_ENDOFINIT7:%.*]] = alloca ptr, align 8 // DEBUG2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[DOTADDR]], metadata [[META179:![0-9]+]], metadata !DIExpression()), !dbg [[DBG180:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[DOTADDR]], [[META179:![0-9]+]], !DIExpression(), [[META180:![0-9]+]]) // DEBUG2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG181:![0-9]+]] // DEBUG2-NEXT: store ptr [[TMP1]], ptr [[ARRAYINIT_ENDOFINIT]], align 8, !dbg [[DBG182:![0-9]+]] // DEBUG2-NEXT: store ptr [[TMP1]], ptr [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG183:![0-9]+]] @@ -7408,36 +7408,36 @@ int foobar() { // DEBUG2-NEXT: ret ptr [[TMP2]], !dbg [[DBG181]] // DEBUG2: lpad: // DEBUG2-NEXT: [[TMP3:%.*]] = landingpad { ptr, i32 } -// DEBUG2-NEXT: cleanup, !dbg [[DBG180]] -// DEBUG2-NEXT: [[TMP4:%.*]] = extractvalue { ptr, i32 } [[TMP3]], 0, !dbg [[DBG180]] -// DEBUG2-NEXT: store ptr [[TMP4]], ptr [[EXN_SLOT]], align 8, !dbg [[DBG180]] -// DEBUG2-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP3]], 1, !dbg [[DBG180]] -// DEBUG2-NEXT: store i32 [[TMP5]], ptr [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG180]] +// DEBUG2-NEXT: cleanup, !dbg [[META180]] +// DEBUG2-NEXT: [[TMP4:%.*]] = extractvalue { ptr, i32 } [[TMP3]], 0, !dbg [[META180]] +// DEBUG2-NEXT: store ptr [[TMP4]], ptr [[EXN_SLOT]], align 8, !dbg [[META180]] +// DEBUG2-NEXT: [[TMP5:%.*]] = extractvalue { ptr, i32 } [[TMP3]], 1, !dbg [[META180]] +// DEBUG2-NEXT: store i32 [[TMP5]], ptr [[EHSELECTOR_SLOT]], align 4, !dbg [[META180]] // DEBUG2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[ARRAYINIT_ENDOFINIT1]], align 8, !dbg [[DBG183]] // DEBUG2-NEXT: [[ARRAYDESTROY_ISEMPTY:%.*]] = icmp eq ptr [[TMP1]], [[TMP6]], !dbg [[DBG183]] // DEBUG2-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY]], label [[ARRAYDESTROY_DONE5:%.*]], label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG183]] // DEBUG2: arraydestroy.body: // DEBUG2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP6]], [[LPAD]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG183]] // DEBUG2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG183]] -// DEBUG2-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]], !dbg [[DBG183]] +// DEBUG2-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[DBG183]] // DEBUG2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[TMP1]], !dbg [[DBG183]] // DEBUG2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE5]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG183]] // DEBUG2: arraydestroy.done5: // DEBUG2-NEXT: br label [[EHCLEANUP:%.*]], !dbg [[DBG183]] // DEBUG2: lpad8: // DEBUG2-NEXT: [[TMP7:%.*]] = landingpad { ptr, i32 } -// DEBUG2-NEXT: cleanup, !dbg [[DBG180]] -// DEBUG2-NEXT: [[TMP8:%.*]] = extractvalue { ptr, i32 } [[TMP7]], 0, !dbg [[DBG180]] -// DEBUG2-NEXT: store ptr [[TMP8]], ptr [[EXN_SLOT]], align 8, !dbg [[DBG180]] -// DEBUG2-NEXT: [[TMP9:%.*]] = extractvalue { ptr, i32 } [[TMP7]], 1, !dbg [[DBG180]] -// DEBUG2-NEXT: store i32 [[TMP9]], ptr [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG180]] +// DEBUG2-NEXT: cleanup, !dbg [[META180]] +// DEBUG2-NEXT: [[TMP8:%.*]] = extractvalue { ptr, i32 } [[TMP7]], 0, !dbg [[META180]] +// DEBUG2-NEXT: store ptr [[TMP8]], ptr [[EXN_SLOT]], align 8, !dbg [[META180]] +// DEBUG2-NEXT: [[TMP9:%.*]] = extractvalue { ptr, i32 } [[TMP7]], 1, !dbg [[META180]] +// DEBUG2-NEXT: store i32 [[TMP9]], ptr [[EHSELECTOR_SLOT]], align 4, !dbg [[META180]] // DEBUG2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[ARRAYINIT_ENDOFINIT7]], align 8, !dbg [[DBG187]] // DEBUG2-NEXT: [[ARRAYDESTROY_ISEMPTY14:%.*]] = icmp eq ptr [[ARRAYINIT_ELEMENT6]], [[TMP10]], !dbg [[DBG187]] // DEBUG2-NEXT: br i1 [[ARRAYDESTROY_ISEMPTY14]], label [[ARRAYDESTROY_DONE19:%.*]], label [[ARRAYDESTROY_BODY15:%.*]], !dbg [[DBG187]] // DEBUG2: arraydestroy.body15: // DEBUG2-NEXT: [[ARRAYDESTROY_ELEMENTPAST16:%.*]] = phi ptr [ [[TMP10]], [[LPAD8]] ], [ [[ARRAYDESTROY_ELEMENT17:%.*]], [[ARRAYDESTROY_BODY15]] ], !dbg [[DBG187]] // DEBUG2-NEXT: [[ARRAYDESTROY_ELEMENT17]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST16]], i64 -1, !dbg [[DBG187]] -// DEBUG2-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT17]]) #[[ATTR4]], !dbg [[DBG187]] +// DEBUG2-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT17]]) #[[ATTR3]], !dbg [[DBG187]] // DEBUG2-NEXT: [[ARRAYDESTROY_DONE18:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT17]], [[ARRAYINIT_ELEMENT6]], !dbg [[DBG187]] // DEBUG2-NEXT: br i1 [[ARRAYDESTROY_DONE18]], label [[ARRAYDESTROY_DONE19]], label [[ARRAYDESTROY_BODY15]], !dbg [[DBG187]] // DEBUG2: arraydestroy.done19: @@ -7451,7 +7451,7 @@ int foobar() { // DEBUG2: arraydestroy.body21: // DEBUG2-NEXT: [[ARRAYDESTROY_ELEMENTPAST22:%.*]] = phi ptr [ [[PAD_ARRAYEND]], [[EHCLEANUP]] ], [ [[ARRAYDESTROY_ELEMENT23:%.*]], [[ARRAYDESTROY_BODY21]] ], !dbg [[DBG182]] // DEBUG2-NEXT: [[ARRAYDESTROY_ELEMENT23]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST22]], i64 -1, !dbg [[DBG182]] -// DEBUG2-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT23]]) #[[ATTR4]], !dbg [[DBG182]] +// DEBUG2-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT23]]) #[[ATTR3]], !dbg [[DBG182]] // DEBUG2-NEXT: [[ARRAYDESTROY_DONE24:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT23]], [[PAD_ARRAYBEGIN]], !dbg [[DBG182]] // DEBUG2-NEXT: br i1 [[ARRAYDESTROY_DONE24]], label [[ARRAYDESTROY_DONE25]], label [[ARRAYDESTROY_BODY21]], !dbg [[DBG182]] // DEBUG2: arraydestroy.done25: @@ -7469,16 +7469,16 @@ int foobar() { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // DEBUG2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[DOTADDR]], metadata [[META192:![0-9]+]], metadata !DIExpression()), !dbg [[DBG193:![0-9]+]] -// DEBUG2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG193]] -// DEBUG2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP1]], i64 6, !dbg [[DBG193]] -// DEBUG2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG193]] +// DEBUG2-NEXT: #dbg_declare(ptr [[DOTADDR]], [[META192:![0-9]+]], !DIExpression(), [[META193:![0-9]+]]) +// DEBUG2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[META193]] +// DEBUG2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP1]], i64 6, !dbg [[META193]] +// DEBUG2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[META193]] // DEBUG2: arraydestroy.body: -// DEBUG2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG193]] -// DEBUG2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG193]] -// DEBUG2-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]], !dbg [[DBG193]] -// DEBUG2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[TMP1]], !dbg [[DBG193]] -// DEBUG2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG193]] +// DEBUG2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ [[TMP2]], [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[META193]] +// DEBUG2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[META193]] +// DEBUG2-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[META193]] +// DEBUG2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], [[TMP1]], !dbg [[META193]] +// DEBUG2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[META193]] // DEBUG2: arraydestroy.done1: // DEBUG2-NEXT: ret void, !dbg [[DBG194:![0-9]+]] // @@ -7488,20 +7488,20 @@ int foobar() { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // DEBUG2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[DOTADDR]], metadata [[META198:![0-9]+]], metadata !DIExpression()), !dbg [[DBG199:![0-9]+]] -// DEBUG2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[DBG199]] +// DEBUG2-NEXT: #dbg_declare(ptr [[DOTADDR]], [[META198:![0-9]+]], !DIExpression(), [[META199:![0-9]+]]) +// DEBUG2-NEXT: br label [[ARRAYDESTROY_BODY:%.*]], !dbg [[META199]] // DEBUG2: arraydestroy.body: -// DEBUG2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ getelementptr inbounds ([[STRUCT_S1:%.*]], ptr @arr_x, i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[DBG199]] -// DEBUG2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[DBG199]] -// DEBUG2-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR4]], !dbg [[DBG199]] -// DEBUG2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], @arr_x, !dbg [[DBG199]] -// DEBUG2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[DBG199]] +// DEBUG2-NEXT: [[ARRAYDESTROY_ELEMENTPAST:%.*]] = phi ptr [ getelementptr inbounds ([[STRUCT_S1:%.*]], ptr @arr_x, i64 6), [[ENTRY:%.*]] ], [ [[ARRAYDESTROY_ELEMENT:%.*]], [[ARRAYDESTROY_BODY]] ], !dbg [[META199]] +// DEBUG2-NEXT: [[ARRAYDESTROY_ELEMENT]] = getelementptr inbounds [[STRUCT_S1]], ptr [[ARRAYDESTROY_ELEMENTPAST]], i64 -1, !dbg [[META199]] +// DEBUG2-NEXT: call void @_ZN2S1D1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[ARRAYDESTROY_ELEMENT]]) #[[ATTR3]], !dbg [[META199]] +// DEBUG2-NEXT: [[ARRAYDESTROY_DONE:%.*]] = icmp eq ptr [[ARRAYDESTROY_ELEMENT]], @arr_x, !dbg [[META199]] +// DEBUG2-NEXT: br i1 [[ARRAYDESTROY_DONE]], label [[ARRAYDESTROY_DONE1:%.*]], label [[ARRAYDESTROY_BODY]], !dbg [[META199]] // DEBUG2: arraydestroy.done1: -// DEBUG2-NEXT: ret void, !dbg [[DBG199]] +// DEBUG2-NEXT: ret void, !dbg [[META199]] // // // DEBUG2-LABEL: define {{[^@]+}}@main -// DEBUG2-SAME: () #[[ATTR5:[0-9]+]] personality ptr @__gxx_personality_v0 !dbg [[DBG52:![0-9]+]] { +// DEBUG2-SAME: () #[[ATTR4:[0-9]+]] personality ptr @__gxx_personality_v0 !dbg [[DBG52:![0-9]+]] { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 // DEBUG2-NEXT: [[RES:%.*]] = alloca i32, align 4 @@ -7509,12 +7509,12 @@ int foobar() { // DEBUG2-NEXT: [[EHSELECTOR_SLOT:%.*]] = alloca i32, align 4 // DEBUG2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB9:[0-9]+]]), !dbg [[DBG200:![0-9]+]] // DEBUG2-NEXT: store i32 0, ptr [[RETVAL]], align 4 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[RES]], metadata [[META201:![0-9]+]], metadata !DIExpression()), !dbg [[DBG202:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[RES]], [[META201:![0-9]+]], !DIExpression(), [[META202:![0-9]+]]) // DEBUG2-NEXT: [[TMP1:%.*]] = load atomic i8, ptr @_ZGVZ4mainE2sm acquire, align 8, !dbg [[DBG203:![0-9]+]] // DEBUG2-NEXT: [[GUARD_UNINITIALIZED:%.*]] = icmp eq i8 [[TMP1]], 0, !dbg [[DBG203]] // DEBUG2-NEXT: br i1 [[GUARD_UNINITIALIZED]], label [[INIT_CHECK:%.*]], label [[INIT_END:%.*]], !dbg [[DBG203]], !prof [[PROF204:![0-9]+]] // DEBUG2: init.check: -// DEBUG2-NEXT: [[TMP2:%.*]] = call i32 @__cxa_guard_acquire(ptr @_ZGVZ4mainE2sm) #[[ATTR4]], !dbg [[DBG203]] +// DEBUG2-NEXT: [[TMP2:%.*]] = call i32 @__cxa_guard_acquire(ptr @_ZGVZ4mainE2sm) #[[ATTR3]], !dbg [[DBG203]] // DEBUG2-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0, !dbg [[DBG203]] // DEBUG2-NEXT: br i1 [[TOBOOL]], label [[INIT:%.*]], label [[INIT_END]], !dbg [[DBG203]] // DEBUG2: init: @@ -7526,8 +7526,8 @@ int foobar() { // DEBUG2-NEXT: invoke void @_ZZ4mainEN5SmainC1Ei(ptr noundef nonnull align 8 dereferenceable(24) @_ZZ4mainE2sm, i32 noundef [[TMP5]]) // DEBUG2-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[LPAD:%.*]], !dbg [[DBG206:![0-9]+]] // DEBUG2: invoke.cont: -// DEBUG2-NEXT: [[TMP6:%.*]] = call i32 @__cxa_atexit(ptr @_ZZ4mainEN5SmainD1Ev, ptr @_ZZ4mainE2sm, ptr @__dso_handle) #[[ATTR4]], !dbg [[DBG203]] -// DEBUG2-NEXT: call void @__cxa_guard_release(ptr @_ZGVZ4mainE2sm) #[[ATTR4]], !dbg [[DBG203]] +// DEBUG2-NEXT: [[TMP6:%.*]] = call i32 @__cxa_atexit(ptr @_ZZ4mainEN5SmainD1Ev, ptr @_ZZ4mainE2sm, ptr @__dso_handle) #[[ATTR3]], !dbg [[DBG203]] +// DEBUG2-NEXT: call void @__cxa_guard_release(ptr @_ZGVZ4mainE2sm) #[[ATTR3]], !dbg [[DBG203]] // DEBUG2-NEXT: br label [[INIT_END]], !dbg [[DBG203]] // DEBUG2: init.end: // DEBUG2-NEXT: [[TMP7:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB11:[0-9]+]], i32 [[TMP0]], ptr @_ZN6Static1sE, i64 8, ptr @_ZN6Static1sE.cache.), !dbg [[DBG207:![0-9]+]] @@ -7590,7 +7590,7 @@ int foobar() { // DEBUG2-NEXT: store ptr [[TMP34]], ptr [[EXN_SLOT]], align 8, !dbg [[DBG233]] // DEBUG2-NEXT: [[TMP35:%.*]] = extractvalue { ptr, i32 } [[TMP33]], 1, !dbg [[DBG233]] // DEBUG2-NEXT: store i32 [[TMP35]], ptr [[EHSELECTOR_SLOT]], align 4, !dbg [[DBG233]] -// DEBUG2-NEXT: call void @__cxa_guard_abort(ptr @_ZGVZ4mainE2sm) #[[ATTR4]], !dbg [[DBG203]] +// DEBUG2-NEXT: call void @__cxa_guard_abort(ptr @_ZGVZ4mainE2sm) #[[ATTR3]], !dbg [[DBG203]] // DEBUG2-NEXT: br label [[EH_RESUME:%.*]], !dbg [[DBG203]] // DEBUG2: eh.resume: // DEBUG2-NEXT: [[EXN:%.*]] = load ptr, ptr [[EXN_SLOT]], align 8, !dbg [[DBG203]] @@ -7606,7 +7606,7 @@ int foobar() { // DEBUG2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // DEBUG2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB5:[0-9]+]]), !dbg [[DBG235:![0-9]+]] // DEBUG2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[DOTADDR]], metadata [[META236:![0-9]+]], metadata !DIExpression()), !dbg [[DBG237:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[DOTADDR]], [[META236:![0-9]+]], !DIExpression(), [[META237:![0-9]+]]) // DEBUG2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG238:![0-9]+]] // DEBUG2-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB5]], i32 [[TMP1]], ptr @_ZL3gs1, i64 4, ptr @_ZL3gs1.cache.), !dbg [[DBG235]] // DEBUG2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[TMP3]], i32 0, i32 0, !dbg [[DBG239:![0-9]+]] @@ -7617,14 +7617,14 @@ int foobar() { // // // DEBUG2-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC1Ei -// DEBUG2-SAME: (ptr noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG241:![0-9]+]] { +// DEBUG2-SAME: (ptr noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] align 2 !dbg [[DBG241:![0-9]+]] { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // DEBUG2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META242:![0-9]+]], metadata !DIExpression()), !dbg [[DBG244:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META242:![0-9]+]], !DIExpression(), [[META244:![0-9]+]]) // DEBUG2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META245:![0-9]+]], metadata !DIExpression()), !dbg [[DBG246:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META245:![0-9]+]], !DIExpression(), [[META246:![0-9]+]]) // DEBUG2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // DEBUG2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG247:![0-9]+]] // DEBUG2-NEXT: call void @_ZZ4mainEN5SmainC2Ei(ptr noundef nonnull align 8 dereferenceable(24) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG247]] @@ -7636,29 +7636,29 @@ int foobar() { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // DEBUG2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[DOTADDR]], metadata [[META250:![0-9]+]], metadata !DIExpression()), !dbg [[DBG251:![0-9]+]] -// DEBUG2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG251]] -// DEBUG2-NEXT: call void @_ZZ4mainEN5SmainD1Ev(ptr noundef nonnull align 8 dereferenceable(24) [[TMP1]]) #[[ATTR4]], !dbg [[DBG251]] +// DEBUG2-NEXT: #dbg_declare(ptr [[DOTADDR]], [[META250:![0-9]+]], !DIExpression(), [[META251:![0-9]+]]) +// DEBUG2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[META251]] +// DEBUG2-NEXT: call void @_ZZ4mainEN5SmainD1Ev(ptr noundef nonnull align 8 dereferenceable(24) [[TMP1]]) #[[ATTR3]], !dbg [[META251]] // DEBUG2-NEXT: ret void, !dbg [[DBG252:![0-9]+]] // // // DEBUG2-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD1Ev -// DEBUG2-SAME: (ptr noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] align 2 !dbg [[DBG253:![0-9]+]] { +// DEBUG2-SAME: (ptr noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG253:![0-9]+]] { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META254:![0-9]+]], metadata !DIExpression()), !dbg [[DBG255:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META254:![0-9]+]], !DIExpression(), [[META255:![0-9]+]]) // DEBUG2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// DEBUG2-NEXT: call void @_ZZ4mainEN5SmainD2Ev(ptr noundef nonnull align 8 dereferenceable(24) [[THIS1]]) #[[ATTR4]], !dbg [[DBG256:![0-9]+]] +// DEBUG2-NEXT: call void @_ZZ4mainEN5SmainD2Ev(ptr noundef nonnull align 8 dereferenceable(24) [[THIS1]]) #[[ATTR3]], !dbg [[DBG256:![0-9]+]] // DEBUG2-NEXT: ret void, !dbg [[DBG257:![0-9]+]] // // // DEBUG2-LABEL: define {{[^@]+}}@_Z6foobarv -// DEBUG2-SAME: () #[[ATTR3]] !dbg [[DBG258:![0-9]+]] { +// DEBUG2-SAME: () #[[ATTR2]] !dbg [[DBG258:![0-9]+]] { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[RES:%.*]] = alloca i32, align 4 // DEBUG2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB27:[0-9]+]]), !dbg [[DBG259:![0-9]+]] -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[RES]], metadata [[META260:![0-9]+]], metadata !DIExpression()), !dbg [[DBG261:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[RES]], [[META260:![0-9]+]], !DIExpression(), [[META261:![0-9]+]]) // DEBUG2-NEXT: [[TMP1:%.*]] = call ptr @__kmpc_threadprivate_cached(ptr @[[GLOB27]], i32 [[TMP0]], ptr @_ZN6Static1sE, i64 8, ptr @_ZN6Static1sE.cache.), !dbg [[DBG259]] // DEBUG2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S3:%.*]], ptr [[TMP1]], i32 0, i32 0, !dbg [[DBG262:![0-9]+]] // DEBUG2-NEXT: [[TMP2:%.*]] = load i32, ptr [[A]], align 4, !dbg [[DBG262]] @@ -7719,7 +7719,7 @@ int foobar() { // DEBUG2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB41:[0-9]+]]), !dbg [[DBG285]] // DEBUG2-NEXT: call void @__kmpc_threadprivate_register(ptr @[[GLOB41]], ptr @_ZN2STI2S4E2stE, ptr @.__kmpc_global_ctor_..8, ptr null, ptr @.__kmpc_global_dtor_..9), !dbg [[DBG285]] // DEBUG2-NEXT: call void @_ZN2S4C1Ei(ptr noundef nonnull align 4 dereferenceable(8) @_ZN2STI2S4E2stE, i32 noundef 23), !dbg [[DBG286:![0-9]+]] -// DEBUG2-NEXT: [[TMP2:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2S4D1Ev, ptr @_ZN2STI2S4E2stE, ptr @__dso_handle) #[[ATTR4]], !dbg [[DBG285]] +// DEBUG2-NEXT: [[TMP2:%.*]] = call i32 @__cxa_atexit(ptr @_ZN2S4D1Ev, ptr @_ZN2STI2S4E2stE, ptr @__dso_handle) #[[ATTR3]], !dbg [[DBG285]] // DEBUG2-NEXT: br label [[INIT_END]], !dbg [[DBG285]] // DEBUG2: init.end: // DEBUG2-NEXT: ret void, !dbg [[DBG288:![0-9]+]] @@ -7730,7 +7730,7 @@ int foobar() { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // DEBUG2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[DOTADDR]], metadata [[META290:![0-9]+]], metadata !DIExpression()), !dbg [[DBG291:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[DOTADDR]], [[META290:![0-9]+]], !DIExpression(), [[META291:![0-9]+]]) // DEBUG2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG292:![0-9]+]] // DEBUG2-NEXT: call void @_ZN2S4C1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[TMP1]], i32 noundef 23), !dbg [[DBG293:![0-9]+]] // DEBUG2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG292]] @@ -7738,14 +7738,14 @@ int foobar() { // // // DEBUG2-LABEL: define {{[^@]+}}@_ZN2S4C1Ei -// DEBUG2-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG294:![0-9]+]] { +// DEBUG2-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR1]] comdat align 2 !dbg [[DBG294:![0-9]+]] { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // DEBUG2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META295:![0-9]+]], metadata !DIExpression()), !dbg [[DBG297:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META295:![0-9]+]], !DIExpression(), [[META297:![0-9]+]]) // DEBUG2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META298:![0-9]+]], metadata !DIExpression()), !dbg [[DBG299:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META298:![0-9]+]], !DIExpression(), [[META299:![0-9]+]]) // DEBUG2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // DEBUG2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG300:![0-9]+]] // DEBUG2-NEXT: call void @_ZN2S4C2Ei(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]], i32 noundef [[TMP0]]), !dbg [[DBG300]] @@ -7757,32 +7757,32 @@ int foobar() { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 // DEBUG2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[DOTADDR]], metadata [[META303:![0-9]+]], metadata !DIExpression()), !dbg [[DBG304:![0-9]+]] -// DEBUG2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[DBG304]] -// DEBUG2-NEXT: call void @_ZN2S4D1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[TMP1]]) #[[ATTR4]], !dbg [[DBG304]] +// DEBUG2-NEXT: #dbg_declare(ptr [[DOTADDR]], [[META303:![0-9]+]], !DIExpression(), [[META304:![0-9]+]]) +// DEBUG2-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DOTADDR]], align 8, !dbg [[META304]] +// DEBUG2-NEXT: call void @_ZN2S4D1Ev(ptr noundef nonnull align 4 dereferenceable(8) [[TMP1]]) #[[ATTR3]], !dbg [[META304]] // DEBUG2-NEXT: ret void, !dbg [[DBG305:![0-9]+]] // // // DEBUG2-LABEL: define {{[^@]+}}@_ZN2S4D1Ev -// DEBUG2-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG306:![0-9]+]] { +// DEBUG2-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG306:![0-9]+]] { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META307:![0-9]+]], metadata !DIExpression()), !dbg [[DBG308:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META307:![0-9]+]], !DIExpression(), [[META308:![0-9]+]]) // DEBUG2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 -// DEBUG2-NEXT: call void @_ZN2S4D2Ev(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR4]], !dbg [[DBG309:![0-9]+]] +// DEBUG2-NEXT: call void @_ZN2S4D2Ev(ptr noundef nonnull align 4 dereferenceable(8) [[THIS1]]) #[[ATTR3]], !dbg [[DBG309:![0-9]+]] // DEBUG2-NEXT: ret void, !dbg [[DBG310:![0-9]+]] // // // DEBUG2-LABEL: define {{[^@]+}}@_ZN2S1C2Ei -// DEBUG2-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG311:![0-9]+]] { +// DEBUG2-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG311:![0-9]+]] { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // DEBUG2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META312:![0-9]+]], metadata !DIExpression()), !dbg [[DBG313:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META312:![0-9]+]], !DIExpression(), [[META313:![0-9]+]]) // DEBUG2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META314:![0-9]+]], metadata !DIExpression()), !dbg [[DBG315:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META314:![0-9]+]], !DIExpression(), [[META315:![0-9]+]]) // DEBUG2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // DEBUG2-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG316:![0-9]+]] // DEBUG2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG317:![0-9]+]] @@ -7791,11 +7791,11 @@ int foobar() { // // // DEBUG2-LABEL: define {{[^@]+}}@_ZN2S1D2Ev -// DEBUG2-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG319:![0-9]+]] { +// DEBUG2-SAME: (ptr noundef nonnull align 4 dereferenceable(4) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG319:![0-9]+]] { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META320:![0-9]+]], metadata !DIExpression()), !dbg [[DBG321:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META320:![0-9]+]], !DIExpression(), [[META321:![0-9]+]]) // DEBUG2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // DEBUG2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG322:![0-9]+]] // DEBUG2-NEXT: store i32 0, ptr [[A]], align 4, !dbg [[DBG324:![0-9]+]] @@ -7803,14 +7803,14 @@ int foobar() { // // // DEBUG2-LABEL: define {{[^@]+}}@_ZN2S2C2Ei -// DEBUG2-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG326:![0-9]+]] { +// DEBUG2-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG326:![0-9]+]] { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // DEBUG2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META327:![0-9]+]], metadata !DIExpression()), !dbg [[DBG328:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META327:![0-9]+]], !DIExpression(), [[META328:![0-9]+]]) // DEBUG2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META329:![0-9]+]], metadata !DIExpression()), !dbg [[DBG330:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META329:![0-9]+]], !DIExpression(), [[META330:![0-9]+]]) // DEBUG2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // DEBUG2-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG331:![0-9]+]] // DEBUG2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG332:![0-9]+]] @@ -7819,11 +7819,11 @@ int foobar() { // // // DEBUG2-LABEL: define {{[^@]+}}@_ZN2S2D2Ev -// DEBUG2-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG334:![0-9]+]] { +// DEBUG2-SAME: (ptr noundef nonnull align 8 dereferenceable(16) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG334:![0-9]+]] { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META335:![0-9]+]], metadata !DIExpression()), !dbg [[DBG336:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META335:![0-9]+]], !DIExpression(), [[META336:![0-9]+]]) // DEBUG2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // DEBUG2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S2:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG337:![0-9]+]] // DEBUG2-NEXT: store i32 0, ptr [[A]], align 8, !dbg [[DBG339:![0-9]+]] @@ -7831,14 +7831,14 @@ int foobar() { // // // DEBUG2-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainC2Ei -// DEBUG2-SAME: (ptr noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR3]] align 2 !dbg [[DBG341:![0-9]+]] { +// DEBUG2-SAME: (ptr noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG341:![0-9]+]] { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // DEBUG2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META342:![0-9]+]], metadata !DIExpression()), !dbg [[DBG343:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META342:![0-9]+]], !DIExpression(), [[META343:![0-9]+]]) // DEBUG2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META344:![0-9]+]], metadata !DIExpression()), !dbg [[DBG345:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META344:![0-9]+]], !DIExpression(), [[META345:![0-9]+]]) // DEBUG2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // DEBUG2-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG346:![0-9]+]] // DEBUG2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG347:![0-9]+]] @@ -7847,11 +7847,11 @@ int foobar() { // // // DEBUG2-LABEL: define {{[^@]+}}@_ZZ4mainEN5SmainD2Ev -// DEBUG2-SAME: (ptr noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] align 2 !dbg [[DBG349:![0-9]+]] { +// DEBUG2-SAME: (ptr noundef nonnull align 8 dereferenceable(24) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] align 2 !dbg [[DBG349:![0-9]+]] { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META350:![0-9]+]], metadata !DIExpression()), !dbg [[DBG351:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META350:![0-9]+]], !DIExpression(), [[META351:![0-9]+]]) // DEBUG2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // DEBUG2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SMAIN:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG352:![0-9]+]] // DEBUG2-NEXT: store i32 0, ptr [[A]], align 8, !dbg [[DBG354:![0-9]+]] @@ -7859,14 +7859,14 @@ int foobar() { // // // DEBUG2-LABEL: define {{[^@]+}}@_ZN2S4C2Ei -// DEBUG2-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG356:![0-9]+]] { +// DEBUG2-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]], i32 noundef [[A:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG356:![0-9]+]] { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // DEBUG2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META357:![0-9]+]], metadata !DIExpression()), !dbg [[DBG358:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META357:![0-9]+]], !DIExpression(), [[META358:![0-9]+]]) // DEBUG2-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[A_ADDR]], metadata [[META359:![0-9]+]], metadata !DIExpression()), !dbg [[DBG360:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[A_ADDR]], [[META359:![0-9]+]], !DIExpression(), [[META360:![0-9]+]]) // DEBUG2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // DEBUG2-NEXT: [[A2:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG361:![0-9]+]] // DEBUG2-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4, !dbg [[DBG362:![0-9]+]] @@ -7875,11 +7875,11 @@ int foobar() { // // // DEBUG2-LABEL: define {{[^@]+}}@_ZN2S4D2Ev -// DEBUG2-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 !dbg [[DBG364:![0-9]+]] { +// DEBUG2-SAME: (ptr noundef nonnull align 4 dereferenceable(8) [[THIS:%.*]]) unnamed_addr #[[ATTR2]] comdat align 2 !dbg [[DBG364:![0-9]+]] { // DEBUG2-NEXT: entry: // DEBUG2-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8 // DEBUG2-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8 -// DEBUG2-NEXT: tail call void @llvm.dbg.declare(metadata ptr [[THIS_ADDR]], metadata [[META365:![0-9]+]], metadata !DIExpression()), !dbg [[DBG366:![0-9]+]] +// DEBUG2-NEXT: #dbg_declare(ptr [[THIS_ADDR]], [[META365:![0-9]+]], !DIExpression(), [[META366:![0-9]+]]) // DEBUG2-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // DEBUG2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S4:%.*]], ptr [[THIS1]], i32 0, i32 0, !dbg [[DBG367:![0-9]+]] // DEBUG2-NEXT: store i32 0, ptr [[A]], align 4, !dbg [[DBG369:![0-9]+]] diff --git a/llvm/lib/IR/IRPrintingPasses.cpp b/llvm/lib/IR/IRPrintingPasses.cpp index 43252c57afca99..0dab0c93816358 100644 --- a/llvm/lib/IR/IRPrintingPasses.cpp +++ b/llvm/lib/IR/IRPrintingPasses.cpp @@ -27,7 +27,7 @@ cl::opt WriteNewDbgInfoFormat( "write-experimental-debuginfo", cl::desc("Write debug info in the new non-intrinsic format. Has no effect " "if --preserve-input-debuginfo-format=true."), - cl::init(false)); + cl::init(true)); namespace { diff --git a/llvm/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll b/llvm/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll index 6f4f9d387cb420..04927974a708b4 100644 --- a/llvm/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll +++ b/llvm/test/Assembler/2010-02-05-FunctionLocalMetadataBecomesNull.ll @@ -12,7 +12,7 @@ target triple = "x86_64-apple-darwin10.2" define i32 @main() nounwind readonly !dbg !1 { %diff1 = alloca i64 ; [#uses=2] -; CHECK: call void @llvm.dbg.value(metadata i64 72, +; CHECK: #dbg_value(i64 72, call void @llvm.dbg.declare(metadata ptr %diff1, metadata !0, metadata !DIExpression()), !dbg !DILocation(scope: !1) store i64 72, ptr %diff1, align 8 %v1 = load ptr, ptr @TestArrayPtr, align 8 ; [#uses=1] diff --git a/llvm/test/Assembler/debug-label-bitcode.ll b/llvm/test/Assembler/debug-label-bitcode.ll index a44b0853edd50d..f7ebeb0c7d0c07 100644 --- a/llvm/test/Assembler/debug-label-bitcode.ll +++ b/llvm/test/Assembler/debug-label-bitcode.ll @@ -3,7 +3,7 @@ ; RUN: verify-uselistorder %s ; ; CHECK: top: -; CHECK: call void @llvm.dbg.label(metadata [[LABEL_METADATA:![0-9]+]]) +; CHECK: #dbg_label([[LABEL_METADATA:![0-9]+]], ; CHECK: distinct !DISubprogram(name: "foo", {{.*}}, retainedNodes: [[ELEMENTS:![0-9]+]]) ; CHECK: [[ELEMENTS]] = !{[[LABEL_METADATA]]} ; CHECK: [[LABEL_METADATA]] = !DILabel({{.*}}, name: "top", {{.*}}, line: 4) diff --git a/llvm/test/Bitcode/DIExpression-aggresult.ll b/llvm/test/Bitcode/DIExpression-aggresult.ll index 017218277d02bf..309ca1f1d47b1c 100644 --- a/llvm/test/Bitcode/DIExpression-aggresult.ll +++ b/llvm/test/Bitcode/DIExpression-aggresult.ll @@ -3,7 +3,7 @@ %class.A = type { i32, i32, i32, i32 } define void @_Z3fooi(%class.A* sret(%class.A) %agg.result) #0 !dbg !3 { - ; CHECK: call void @llvm.dbg.declare({{.*}}, metadata !DIExpression()), !dbg + ; CHECK: #dbg_declare({{.*}}, !DIExpression(), call void @llvm.dbg.declare(metadata %class.A* %agg.result, metadata !13, metadata !16), !dbg !17 ret void, !dbg !17 } diff --git a/llvm/test/Bitcode/constexpr-to-instr-metadata-2.ll b/llvm/test/Bitcode/constexpr-to-instr-metadata-2.ll index 24938989e21b15..72f092adf5054a 100644 --- a/llvm/test/Bitcode/constexpr-to-instr-metadata-2.ll +++ b/llvm/test/Bitcode/constexpr-to-instr-metadata-2.ll @@ -1,7 +1,7 @@ ; RUN: llvm-dis -expand-constant-exprs < %S/Inputs/constexpr-to-instr-metadata-2.bc | FileCheck %s ; CHECK-LABEL: define void @_ZN4alsa3pcm3PCM17hw_params_current17hf1c237aece2f69c4E() { -; CHECK: call void @llvm.dbg.value(metadata ptr undef, metadata !4, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)), !dbg !14 +; CHECK: #dbg_value(ptr undef, !4, !DIExpression(DW_OP_LLVM_fragment, 0, 64), !14 ; CHECK-LABEL: define void @_ZN4alsa3pcm8HwParams3any17h02a64cfc85ce8a66E() { -; CHECK: call void @llvm.dbg.value(metadata ptr undef, metadata !23, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)), !dbg !28 +; CHECK: #dbg_value(ptr undef, !23, !DIExpression(DW_OP_LLVM_fragment, 0, 64), !28 diff --git a/llvm/test/Bitcode/constexpr-to-instr-metadata.ll b/llvm/test/Bitcode/constexpr-to-instr-metadata.ll index 39a1b2687ae86b..ecc39a86c63276 100644 --- a/llvm/test/Bitcode/constexpr-to-instr-metadata.ll +++ b/llvm/test/Bitcode/constexpr-to-instr-metadata.ll @@ -1,4 +1,4 @@ ; RUN: llvm-dis -expand-constant-exprs < %S/Inputs/constexpr-to-instr-metadata.bc | FileCheck %s ; CHECK-LABEL: define void @test() { -; CHECK: call void @llvm.dbg.value(metadata i64 undef, metadata !4, metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)), !dbg !13 +; CHECK: #dbg_value(i64 undef, !4, !DIExpression(DW_OP_LLVM_fragment, 64, 64), !13 diff --git a/llvm/test/Bitcode/dbg-label-record-bc.ll b/llvm/test/Bitcode/dbg-label-record-bc.ll index e151f7f6cc157d..02142e5e8cf411 100644 --- a/llvm/test/Bitcode/dbg-label-record-bc.ll +++ b/llvm/test/Bitcode/dbg-label-record-bc.ll @@ -14,7 +14,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-LABEL: void @foo() ; CHECK: bar: -; INTRINSIC-NEXT: call void @llvm.dbg.label(metadata ![[LABEL:[0-9]+]]), !dbg ![[LOC:[0-9]+]] +; INTRINSIC-NEXT: #dbg_label(![[LABEL:[0-9]+]], ![[LOC:[0-9]+]] ; RECORD-NEXT: #dbg_label(![[LABEL:[0-9]+]], ![[LOC:[0-9]+]]) ; CHECK-DAG: ![[LABEL]] = !DILabel({{.*}}name: "bar" diff --git a/llvm/test/Bitcode/upgrade-dbg-addr.ll b/llvm/test/Bitcode/upgrade-dbg-addr.ll index de35609713f933..2c31fcd1e9c637 100644 --- a/llvm/test/Bitcode/upgrade-dbg-addr.ll +++ b/llvm/test/Bitcode/upgrade-dbg-addr.ll @@ -1,6 +1,6 @@ ; Test upgrade of dbg.addr intrinsics into dbg.value with DW_OP_deref appended ; -; RUN: llvm-dis < %s.bc | FileCheck %s +; RUN: llvm-dis < %s.bc --write-experimental-debuginfo=false | FileCheck %s ; RUN: llvm-dis < %s.bc --load-bitcode-into-experimental-debuginfo-iterators --write-experimental-debuginfo=false | FileCheck %s ; RUN: verify-uselistorder < %s.bc diff --git a/llvm/test/Bitcode/upgrade-dbg-value.ll b/llvm/test/Bitcode/upgrade-dbg-value.ll index 090f515bd3b62e..0e333e439fca96 100644 --- a/llvm/test/Bitcode/upgrade-dbg-value.ll +++ b/llvm/test/Bitcode/upgrade-dbg-value.ll @@ -5,15 +5,14 @@ define void @f() !dbg !3 { entry: - ; CHECK-NOT: call void @llvm.dbg.value - ; CHECK: call void @llvm.dbg.value(metadata i32 42, metadata !8, metadata !DIExpression()) + ; CHECK-NOT: #dbg_value + ; CHECK: #dbg_value(i32 42, !8, !DIExpression(), call void @llvm.dbg.value(metadata i32 42, i64 0, metadata !8, metadata !9), !dbg !10 - ; CHECK-NOT: call void @llvm.dbg.value + ; CHECK-NOT: #dbg_value call void @llvm.dbg.value(metadata i32 0, i64 1, metadata !8, metadata !9), !dbg !10 ret void } -; CHECK: declare void @llvm.dbg.value(metadata, metadata, metadata) declare void @llvm.dbg.value(metadata, i64, metadata, metadata) !llvm.dbg.cu = !{!0} diff --git a/llvm/test/CodeGen/AArch64/dbg-declare-swift-async.ll b/llvm/test/CodeGen/AArch64/dbg-declare-swift-async.ll index dfb142572bc626..15eb5f9acfabd8 100644 --- a/llvm/test/CodeGen/AArch64/dbg-declare-swift-async.ll +++ b/llvm/test/CodeGen/AArch64/dbg-declare-swift-async.ll @@ -3,7 +3,7 @@ ; RUN: llc -O0 -fast-isel=false -global-isel=false -stop-after=finalize-isel %s -o - | FileCheck %s ; CHECK: void @foo -; CHECK-NEXT: dbg.declare(metadata {{.*}}, metadata ![[VAR:.*]], metadata !DIExpression([[EXPR:.*]])), !dbg ![[LOC:.*]] +; CHECK-NEXT: #dbg_declare({{.*}}, ![[VAR:.*]], !DIExpression([[EXPR:.*]]), ![[LOC:[0-9]+]] ; CHECK: entry_values: ; CHECK-NEXT: entry-value-register: '$x22', debug-info-variable: '![[VAR]]', debug-info-expression: '!DIExpression([[EXPR]], DW_OP_deref)', ; CHECK-NEXT: debug-info-location: '![[LOC]] diff --git a/llvm/test/CodeGen/AArch64/stack-tagging-dbg-assign-tag-offset.ll b/llvm/test/CodeGen/AArch64/stack-tagging-dbg-assign-tag-offset.ll index 4845bd9936e6b8..a48a885a6c68e0 100644 --- a/llvm/test/CodeGen/AArch64/stack-tagging-dbg-assign-tag-offset.ll +++ b/llvm/test/CodeGen/AArch64/stack-tagging-dbg-assign-tag-offset.ll @@ -16,11 +16,11 @@ entry: %nodebug3 = alloca ptr, align 8 ; CHECK: %a = alloca{{.*}} !DIAssignID ![[ID1:[0-9]+]] %a = alloca ptr, align 8, !DIAssignID !13 - ; CHECK: @llvm.dbg.assign{{.*}} metadata ![[ID1]]{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 4) + ; CHECK: #dbg_assign{{.*}} ![[ID1]]{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 4) call void @llvm.dbg.assign(metadata i1 undef, metadata !14, metadata !DIExpression(), metadata !13, metadata ptr %a, metadata !DIExpression()), !dbg !15 ; CHECK: %b = alloca{{.*}} !DIAssignID ![[ID2:[0-9]+]] %b = alloca ptr, align 8, !DIAssignID !16 - ; CHECK: @llvm.dbg.assign{{.*}} metadata ![[ID2]]{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 5) + ; CHECK: #dbg_assign{{.*}} ![[ID2]]{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 5) call void @llvm.dbg.assign(metadata i1 undef, metadata !17, metadata !DIExpression(), metadata !16, metadata ptr %b, metadata !DIExpression()), !dbg !15 call void @g(ptr %nodebug0, ptr %nodebug1, ptr %nodebug2, ptr %nodebug3, ptr %a, ptr %b) ret void, !dbg !18 diff --git a/llvm/test/CodeGen/AArch64/stack-tagging-dbg-declare-tag-offset.ll b/llvm/test/CodeGen/AArch64/stack-tagging-dbg-declare-tag-offset.ll index 0655eaee34e91f..e0efd37aad2c00 100644 --- a/llvm/test/CodeGen/AArch64/stack-tagging-dbg-declare-tag-offset.ll +++ b/llvm/test/CodeGen/AArch64/stack-tagging-dbg-declare-tag-offset.ll @@ -17,14 +17,14 @@ entry: %nodebug2 = alloca ptr %nodebug3 = alloca ptr %a = alloca ptr - ; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 4) + ; CHECK: #dbg_declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 4) call void @llvm.dbg.declare(metadata ptr %a, metadata !12, metadata !DIExpression()), !dbg !14 - ; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 4) + ; CHECK: #dbg_declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 4) call void @llvm.dbg.declare(metadata ptr %a, metadata !12, metadata !DIExpression()), !dbg !14 %b = alloca ptr - ; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 5) + ; CHECK: #dbg_declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 5) call void @llvm.dbg.declare(metadata ptr %b, metadata !13, metadata !DIExpression()), !dbg !14 - ; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 5) + ; CHECK: #dbg_declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 5) call void @llvm.dbg.declare(metadata ptr %b, metadata !13, metadata !DIExpression()), !dbg !14 call void @g(ptr %nodebug0, ptr %nodebug1, ptr %nodebug2, ptr %nodebug3, ptr %a, ptr %b) ret void, !dbg !15 diff --git a/llvm/test/CodeGen/AArch64/stack-tagging-dbg-value-tag-offset-nopad.ll b/llvm/test/CodeGen/AArch64/stack-tagging-dbg-value-tag-offset-nopad.ll index 470018759af63f..093ddd3be3a56e 100644 --- a/llvm/test/CodeGen/AArch64/stack-tagging-dbg-value-tag-offset-nopad.ll +++ b/llvm/test/CodeGen/AArch64/stack-tagging-dbg-value-tag-offset-nopad.ll @@ -6,13 +6,13 @@ target triple = "aarch64-unknown-linux-android10000" define dso_local void @f() sanitize_memtag !dbg !14 { %a1 = alloca i128, align 4 %a2 = alloca i128, align 4 -; CHECK: call void @llvm.dbg.value(metadata i128 1, {{.*}}, metadata !DIExpression()) +; CHECK: #dbg_value(i128 1, {{.*}}, !DIExpression(), call void @llvm.dbg.value(metadata i128 1, metadata !20, metadata !DIExpression()), !dbg !22 store i128 1, ptr %a2, align 4, !dbg !23, !tbaa !24 -; CHECK: call void @llvm.dbg.value(metadata ptr %a1, {{.*}}, metadata !DIExpression(DW_OP_LLVM_tag_offset, 0, DW_OP_deref)) +; CHECK: #dbg_value(ptr %a1, {{.*}}, !DIExpression(DW_OP_LLVM_tag_offset, 0, DW_OP_deref), call void @llvm.dbg.value(metadata ptr %a1, metadata !18, metadata !DIExpression(DW_OP_deref)), !dbg !22 call void @use(ptr nonnull %a1), !dbg !28 -; CHECK: call void @llvm.dbg.value(metadata ptr %a2, {{.*}}, metadata !DIExpression(DW_OP_LLVM_tag_offset, 1, DW_OP_deref)) +; CHECK: #dbg_value(ptr %a2, {{.*}}, !DIExpression(DW_OP_LLVM_tag_offset, 1, DW_OP_deref), call void @llvm.dbg.value(metadata ptr %a2, metadata !20, metadata !DIExpression(DW_OP_deref)), !dbg !22 call void @use(ptr nonnull %a2), !dbg !29 ret void, !dbg !30 diff --git a/llvm/test/CodeGen/AArch64/stack-tagging-dbg-value-tag-offset.ll b/llvm/test/CodeGen/AArch64/stack-tagging-dbg-value-tag-offset.ll index 6b3e34c265a1ee..ac9b4eb88ad7d8 100644 --- a/llvm/test/CodeGen/AArch64/stack-tagging-dbg-value-tag-offset.ll +++ b/llvm/test/CodeGen/AArch64/stack-tagging-dbg-value-tag-offset.ll @@ -6,13 +6,13 @@ target triple = "aarch64-unknown-linux-android10000" define dso_local void @f() sanitize_memtag !dbg !14 { %a1 = alloca i32, align 4 %a2 = alloca i32, align 4 -; CHECK: call void @llvm.dbg.value(metadata i32 1, {{.*}}, metadata !DIExpression()) +; CHECK: #dbg_value(i32 1, {{.*}}, !DIExpression(), call void @llvm.dbg.value(metadata i32 1, metadata !20, metadata !DIExpression()), !dbg !22 store i32 1, ptr %a2, align 4, !dbg !23, !tbaa !24 -; CHECK: call void @llvm.dbg.value(metadata ptr %a1, {{.*}} metadata !DIExpression(DW_OP_LLVM_tag_offset, 0, DW_OP_deref)) +; CHECK: #dbg_value(ptr %a1, {{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 0, DW_OP_deref), call void @llvm.dbg.value(metadata ptr %a1, metadata !18, metadata !DIExpression(DW_OP_deref)), !dbg !22 call void @use(ptr nonnull %a1), !dbg !28 -; CHECK: call void @llvm.dbg.value(metadata ptr %a2, {{.*}} metadata !DIExpression(DW_OP_LLVM_tag_offset, 1, DW_OP_deref)) +; CHECK: #dbg_value(ptr %a2, {{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 1, DW_OP_deref), call void @llvm.dbg.value(metadata ptr %a2, metadata !20, metadata !DIExpression(DW_OP_deref)), !dbg !22 call void @use(ptr nonnull %a2), !dbg !29 ret void, !dbg !30 diff --git a/llvm/test/CodeGen/AArch64/stack-tagging-dbg.ll b/llvm/test/CodeGen/AArch64/stack-tagging-dbg.ll index ba8c76348b9b54..d81ae9dc8b5561 100644 --- a/llvm/test/CodeGen/AArch64/stack-tagging-dbg.ll +++ b/llvm/test/CodeGen/AArch64/stack-tagging-dbg.ll @@ -21,8 +21,8 @@ entry: ; CHECK-LABEL: define void @DbgIntrinsics( ; CHECK: [[X:%.*]] = alloca { i32, [12 x i8] }, align 16 -; CHECK: call void @llvm.dbg.declare(metadata ptr [[X]], -; CHECK: call void @llvm.dbg.value(metadata !DIArgList(ptr [[X]], ptr [[X]]) +; CHECK: #dbg_declare(ptr [[X]], +; CHECK: #dbg_value(!DIArgList(ptr [[X]], ptr [[X]]) !llvm.dbg.cu = !{!0} diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll index 9646d196da42f6..a35fbaadddf9ef 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll @@ -886,8 +886,8 @@ define float @sincos_f32_unused_result_cos(float %x) { ; CHECK-LABEL: define float @sincos_f32_unused_result_cos ; CHECK-SAME: (float [[X:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = tail call contract float @_Z3sinf(float [[X]]) -; CHECK-NEXT: ret float [[TMP0]] +; CHECK-NEXT: [[SIN:%.*]] = tail call contract float @_Z3sinf(float [[X]]) +; CHECK-NEXT: ret float [[SIN]] ; entry: %alloca0 = alloca i32, addrspace(5) @@ -901,8 +901,8 @@ define float @sincos_f32_unused_result_sin(float %x) { ; CHECK-LABEL: define float @sincos_f32_unused_result_sin ; CHECK-SAME: (float [[X:%.*]]) local_unnamed_addr #[[ATTR5]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP1:%.*]] = tail call contract float @_Z3cosf(float [[X]]) -; CHECK-NEXT: ret float [[TMP1]] +; CHECK-NEXT: [[COS:%.*]] = tail call contract float @_Z3cosf(float [[X]]) +; CHECK-NEXT: ret float [[COS]] ; entry: %alloca0 = alloca i32, addrspace(5) @@ -916,13 +916,13 @@ define void @sincos_f32_repeated_uses(float %x, ptr addrspace(1) %sin_out, ptr a ; CHECK-LABEL: define void @sincos_f32_repeated_uses ; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) [[SIN_OUT:%.*]], ptr addrspace(1) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[__SINCOS_3:%.*]] = alloca float, align 4, addrspace(5) -; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_3]]) -; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_3]], align 4 +; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5) +; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]) +; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4 ; CHECK-NEXT: store volatile float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 ; CHECK-NEXT: store volatile float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 -; CHECK-NEXT: store volatile float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 -; CHECK-NEXT: store volatile float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: store volatile float [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 4 +; CHECK-NEXT: store volatile float [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 4 ; CHECK-NEXT: store volatile float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4 ; CHECK-NEXT: ret void ; @@ -1046,9 +1046,9 @@ define void @sincos_f32_debuginfo(float %x, ptr addrspace(1) nocapture writeonly ; CHECK-NEXT: [[__SINCOS_:%.*]] = alloca float, align 4, addrspace(5), !dbg [[DBG14:![0-9]+]] ; CHECK-NEXT: [[TMP0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[__SINCOS_]]), !dbg [[DBG14]] ; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr addrspace(5) [[__SINCOS_]], align 4, !dbg [[DBG14]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata float [[TMP0]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG15:![0-9]+]] +; CHECK-NEXT: #dbg_value(float [[TMP0]], [[META11:![0-9]+]], !DIExpression(), [[META15:![0-9]+]]) ; CHECK-NEXT: store float [[TMP0]], ptr addrspace(1) [[SIN_OUT]], align 4, !dbg [[DBG16:![0-9]+]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata float [[TMP1]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG17:![0-9]+]] +; CHECK-NEXT: #dbg_value(float [[TMP1]], [[META13:![0-9]+]], !DIExpression(), [[META17:![0-9]+]]) ; CHECK-NEXT: store float [[TMP1]], ptr addrspace(1) [[COS_OUT]], align 4, !dbg [[DBG18:![0-9]+]] ; CHECK-NEXT: ret void, !dbg [[DBG19:![0-9]+]] ; diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-unoptimized-debug-data.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-unoptimized-debug-data.ll index 5828d5bf6f11cf..90fc3cf3d72ea3 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-unoptimized-debug-data.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-unoptimized-debug-data.ll @@ -9,15 +9,15 @@ define float @debug_stash_pointer(ptr addrspace(8) %buf, i32 %idx, ptr addrspace ; CHECK-LABEL: define float @debug_stash_pointer ; CHECK-SAME: (ptr addrspace(8) [[BUF:%.*]], i32 [[IDX:%.*]], ptr addrspace(8) [[AUX:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG5:![0-9]+]] { ; CHECK-NEXT: [[BUF_PTR_VAR:%.*]] = alloca i160, align 32, addrspace(5), !dbg [[DBG21:![0-9]+]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata ptr addrspace(5) [[BUF_PTR_VAR]], metadata [[META10:![0-9]+]], metadata !DIExpression()), !dbg [[DBG21]] +; CHECK-NEXT: #dbg_value(ptr addrspace(5) [[BUF_PTR_VAR]], [[META10:![0-9]+]], !DIExpression(), [[DBG21]]) ; CHECK-NEXT: [[AUX_PTR_VAR:%.*]] = alloca i160, align 32, addrspace(5), !dbg [[DBG22:![0-9]+]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata ptr addrspace(5) [[AUX_PTR_VAR]], metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG22]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata { ptr addrspace(8), i32 } undef, metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG23:![0-9]+]] +; CHECK-NEXT: #dbg_value(ptr addrspace(5) [[AUX_PTR_VAR]], [[META12:![0-9]+]], !DIExpression(), [[DBG22]]) +; CHECK-NEXT: #dbg_value({ ptr addrspace(8), i32 } undef, [[META13:![0-9]+]], !DIExpression(), [[META23:![0-9]+]]) ; CHECK-NEXT: [[BUF_PTR_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[BUF]] to i160, !dbg [[DBG24:![0-9]+]] ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i160 [[BUF_PTR_INT_RSRC]], 32, !dbg [[DBG24]] ; CHECK-NEXT: [[BUF_PTR_INT:%.*]] = or i160 [[TMP1]], 0, !dbg [[DBG24]] ; CHECK-NEXT: store i160 [[BUF_PTR_INT]], ptr addrspace(5) [[BUF_PTR_VAR]], align 32, !dbg [[DBG24]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata { ptr addrspace(8), i32 } undef, metadata [[META15:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25:![0-9]+]] +; CHECK-NEXT: #dbg_value({ ptr addrspace(8), i32 } undef, [[META15:![0-9]+]], !DIExpression(), [[META25:![0-9]+]]) ; CHECK-NEXT: [[AUX_PTR_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[AUX]] to i160, !dbg [[DBG26:![0-9]+]] ; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i160 [[AUX_PTR_INT_RSRC]], 32, !dbg [[DBG26]] ; CHECK-NEXT: [[AUX_PTR_INT:%.*]] = or i160 [[TMP2]], 0, !dbg [[DBG26]] @@ -27,10 +27,10 @@ define float @debug_stash_pointer(ptr addrspace(8) %buf, i32 %idx, ptr addrspace ; CHECK-NEXT: [[TMP4:%.*]] = trunc i160 [[TMP3]] to i128, !dbg [[DBG27]] ; CHECK-NEXT: [[BUF_PTR_2_PTR_RSRC:%.*]] = inttoptr i128 [[TMP4]] to ptr addrspace(8), !dbg [[DBG27]] ; CHECK-NEXT: [[BUF_PTR_2_PTR_OFF:%.*]] = trunc i160 [[BUF_PTR_2]] to i32, !dbg [[DBG27]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata { ptr addrspace(8), i32 } undef, metadata [[META16:![0-9]+]], metadata !DIExpression()), !dbg [[DBG27]] +; CHECK-NEXT: #dbg_value({ ptr addrspace(8), i32 } undef, [[META16:![0-9]+]], !DIExpression(), [[DBG27]]) ; CHECK-NEXT: [[BUF_PTR_3_IDX:%.*]] = mul i32 [[IDX]], 4, !dbg [[DBG28:![0-9]+]] ; CHECK-NEXT: [[BUF_PTR_3:%.*]] = add i32 [[BUF_PTR_2_PTR_OFF]], [[BUF_PTR_3_IDX]], !dbg [[DBG28]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata { ptr addrspace(8), i32 } undef, metadata [[META17:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28]] +; CHECK-NEXT: #dbg_value({ ptr addrspace(8), i32 } undef, [[META17:![0-9]+]], !DIExpression(), [[DBG28]]) ; CHECK-NEXT: [[BUF_PTR_3_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[BUF_PTR_2_PTR_RSRC]] to i160, !dbg [[DBG29:![0-9]+]] ; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i160 [[BUF_PTR_3_INT_RSRC]], 32, !dbg [[DBG29]] ; CHECK-NEXT: [[BUF_PTR_3_INT_OFF:%.*]] = zext i32 [[BUF_PTR_3]] to i160, !dbg [[DBG29]] @@ -41,15 +41,15 @@ define float @debug_stash_pointer(ptr addrspace(8) %buf, i32 %idx, ptr addrspace ; CHECK-NEXT: [[TMP7:%.*]] = trunc i160 [[TMP6]] to i128, !dbg [[DBG30]] ; CHECK-NEXT: [[BUF_PTR_4_PTR_RSRC:%.*]] = inttoptr i128 [[TMP7]] to ptr addrspace(8), !dbg [[DBG30]] ; CHECK-NEXT: [[BUF_PTR_4_PTR_OFF:%.*]] = trunc i160 [[BUF_PTR_4]] to i32, !dbg [[DBG30]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata { ptr addrspace(8), i32 } undef, metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG30]] +; CHECK-NEXT: #dbg_value({ ptr addrspace(8), i32 } undef, [[META18:![0-9]+]], !DIExpression(), [[DBG30]]) ; CHECK-NEXT: [[RET:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF_PTR_4_PTR_RSRC]], i32 [[BUF_PTR_4_PTR_OFF]], i32 0, i32 0), !dbg [[DBG31:![0-9]+]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata float [[RET]], metadata [[META19:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31]] +; CHECK-NEXT: #dbg_value(float [[RET]], [[META19:![0-9]+]], !DIExpression(), [[DBG31]]) ; CHECK-NEXT: [[AUX_PTR_2:%.*]] = load i160, ptr addrspace(5) [[AUX_PTR_VAR]], align 32, !dbg [[DBG32:![0-9]+]] ; CHECK-NEXT: [[TMP8:%.*]] = lshr i160 [[AUX_PTR_2]], 32, !dbg [[DBG32]] ; CHECK-NEXT: [[TMP9:%.*]] = trunc i160 [[TMP8]] to i128, !dbg [[DBG32]] ; CHECK-NEXT: [[AUX_PTR_2_PTR_RSRC:%.*]] = inttoptr i128 [[TMP9]] to ptr addrspace(8), !dbg [[DBG32]] ; CHECK-NEXT: [[AUX_PTR_2_PTR_OFF:%.*]] = trunc i160 [[AUX_PTR_2]] to i32, !dbg [[DBG32]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata { ptr addrspace(8), i32 } undef, metadata [[META20:![0-9]+]], metadata !DIExpression()), !dbg [[DBG32]] +; CHECK-NEXT: #dbg_value({ ptr addrspace(8), i32 } undef, [[META20:![0-9]+]], !DIExpression(), [[DBG32]]) ; CHECK-NEXT: [[BUF_PTR_4_PTR_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[BUF_PTR_4_PTR_RSRC]] to i160, !dbg [[DBG33:![0-9]+]] ; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i160 [[BUF_PTR_4_PTR_INT_RSRC]], 32, !dbg [[DBG33]] ; CHECK-NEXT: [[BUF_PTR_4_PTR_INT_OFF:%.*]] = zext i32 [[BUF_PTR_4_PTR_OFF]] to i160, !dbg [[DBG33]] diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-dbg-info.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-dbg-info.ll index a7b4eee84cb9e4..15f6bb632f3113 100644 --- a/llvm/test/CodeGen/AMDGPU/si-annotate-dbg-info.ll +++ b/llvm/test/CodeGen/AMDGPU/si-annotate-dbg-info.ll @@ -6,7 +6,7 @@ define amdgpu_ps i32 @if_else(i32 %0) !dbg !5 { ; OPT-LABEL: define amdgpu_ps i32 @if_else( ; OPT-SAME: i32 [[TMP0:%.*]]) !dbg [[DBG5:![0-9]+]] { ; OPT-NEXT: [[C:%.*]] = icmp ne i32 [[TMP0]], 0, !dbg [[DBG13:![0-9]+]] -; OPT-NEXT: tail call void @llvm.dbg.value(metadata i1 [[C]], metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG13]] +; OPT-NEXT: #dbg_value(i1 [[C]], [[META9:![0-9]+]], !DIExpression(), [[DBG13]]) ; OPT-NEXT: [[TMP2:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[C]]), !dbg [[DBG14:![0-9]+]] ; OPT-NEXT: [[TMP3:%.*]] = extractvalue { i1, i64 } [[TMP2]], 0, !dbg [[DBG14]] ; OPT-NEXT: [[TMP4:%.*]] = extractvalue { i1, i64 } [[TMP2]], 1, !dbg [[DBG14]] @@ -24,7 +24,7 @@ define amdgpu_ps i32 @if_else(i32 %0) !dbg !5 { ; OPT: exit: ; OPT-NEXT: [[RET:%.*]] = phi i32 [ [[TMP5]], [[FLOW]] ], [ 42, [[TRUE]] ], !dbg [[DBG17:![0-9]+]] ; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 [[TMP8]]) -; OPT-NEXT: tail call void @llvm.dbg.value(metadata i32 [[RET]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG17]] +; OPT-NEXT: #dbg_value(i32 [[RET]], [[META11:![0-9]+]], !DIExpression(), [[DBG17]]) ; OPT-NEXT: ret i32 [[RET]], !dbg [[DBG18:![0-9]+]] ; %c = icmp eq i32 %0, 0, !dbg !13 @@ -51,16 +51,16 @@ define amdgpu_ps void @loop_if_break(i32 %n) !dbg !19 { ; OPT: loop: ; OPT-NEXT: [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP5:%.*]], [[FLOW:%.*]] ], [ 0, [[ENTRY:%.*]] ] ; OPT-NEXT: [[I:%.*]] = phi i32 [ [[N]], [[ENTRY]] ], [ [[TMP3:%.*]], [[FLOW]] ], !dbg [[DBG25:![0-9]+]] -; OPT-NEXT: tail call void @llvm.dbg.value(metadata i32 [[I]], metadata [[META21:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25]] +; OPT-NEXT: #dbg_value(i32 [[I]], [[META21:![0-9]+]], !DIExpression(), [[DBG25]]) ; OPT-NEXT: [[C:%.*]] = icmp ugt i32 [[I]], 0, !dbg [[DBG26:![0-9]+]] -; OPT-NEXT: tail call void @llvm.dbg.value(metadata i1 [[C]], metadata [[META22:![0-9]+]], metadata !DIExpression()), !dbg [[DBG26]] +; OPT-NEXT: #dbg_value(i1 [[C]], [[META22:![0-9]+]], !DIExpression(), [[DBG26]]) ; OPT-NEXT: [[TMP0:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 [[C]]), !dbg [[DBG27:![0-9]+]] ; OPT-NEXT: [[TMP1:%.*]] = extractvalue { i1, i64 } [[TMP0]], 0, !dbg [[DBG27]] ; OPT-NEXT: [[TMP2:%.*]] = extractvalue { i1, i64 } [[TMP0]], 1, !dbg [[DBG27]] ; OPT-NEXT: br i1 [[TMP1]], label [[LOOP_BODY:%.*]], label [[FLOW]], !dbg [[DBG27]] ; OPT: loop_body: ; OPT-NEXT: [[I_NEXT:%.*]] = sub i32 [[I]], 1, !dbg [[DBG28:![0-9]+]] -; OPT-NEXT: tail call void @llvm.dbg.value(metadata i32 [[I_NEXT]], metadata [[META23:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28]] +; OPT-NEXT: #dbg_value(i32 [[I_NEXT]], [[META23:![0-9]+]], !DIExpression(), [[DBG28]]) ; OPT-NEXT: br label [[FLOW]], !dbg [[DBG29:![0-9]+]] ; OPT: Flow: ; OPT-NEXT: [[TMP3]] = phi i32 [ [[I_NEXT]], [[LOOP_BODY]] ], [ undef, [[LOOP]] ] diff --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-arr-pai.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-arr-pai.ll index d6b1f30fa38674..d65a46a03eb0bb 100644 --- a/llvm/test/CodeGen/BPF/preserve-static-offset/load-arr-pai.ll +++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-arr-pai.ll @@ -31,7 +31,7 @@ entry: ; CHECK: define dso_local i32 @buz(ptr noundef %[[p:.*]]) {{.*}} { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.dbg.value +; CHECK-NEXT: #dbg_value ; CHECK-NEXT: %[[v5:.*]] = call i32 (ptr, i1, i8, i8, i8, i1, ...) ; CHECK-SAME: @llvm.bpf.getelementptr.and.load.i32 ; CHECK-SAME: (ptr readonly elementtype(%struct.bar) %[[p]], diff --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-ptr-pai.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-ptr-pai.ll index 6ec59c6b2c0247..eb8d22c7d16d58 100644 --- a/llvm/test/CodeGen/BPF/preserve-static-offset/load-ptr-pai.ll +++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-ptr-pai.ll @@ -41,7 +41,7 @@ entry: ; CHECK: define dso_local void @foo(ptr noundef %[[p:.*]]) {{.*}} { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.dbg.value +; CHECK-NEXT: #dbg_value ; CHECK-NEXT: %[[v5:.*]] = call ptr (ptr, i1, i8, i8, i8, i1, ...) ; CHECK-SAME: @llvm.bpf.getelementptr.and.load.p0 ; CHECK-SAME: (ptr readonly elementtype(%struct.buz) %[[p]], diff --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-struct-pai.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-struct-pai.ll index 5baa7ad0242cfe..4c4151d5e4cb84 100644 --- a/llvm/test/CodeGen/BPF/preserve-static-offset/load-struct-pai.ll +++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-struct-pai.ll @@ -40,7 +40,7 @@ entry: ; CHECK: define dso_local i32 @buz(ptr noundef %[[p:.*]]) {{.*}} { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.dbg.value +; CHECK-NEXT: #dbg_value ; CHECK-NEXT: %[[b1:.*]] = call i32 (ptr, i1, i8, i8, i8, i1, ...) ; CHECK-SAME: @llvm.bpf.getelementptr.and.load.i32 ; CHECK-SAME: (ptr readonly elementtype(%struct.bar) %[[p]], diff --git a/llvm/test/CodeGen/BPF/preserve-static-offset/load-union-pai.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/load-union-pai.ll index f90e3c54b07277..40a2d4fa12284b 100644 --- a/llvm/test/CodeGen/BPF/preserve-static-offset/load-union-pai.ll +++ b/llvm/test/CodeGen/BPF/preserve-static-offset/load-union-pai.ll @@ -41,7 +41,7 @@ entry: ; CHECK: define dso_local i32 @quux(ptr noundef %[[p:.*]]) {{.*}} { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.dbg.value +; CHECK-NEXT: #dbg_value ; CHECK-NEXT: %[[v5:.*]] = call i32 (ptr, i1, i8, i8, i8, i1, ...) ; CHECK-SAME: @llvm.bpf.getelementptr.and.load.i32 ; CHECK-SAME: (ptr readonly elementtype(%struct.bar) %[[p]], diff --git a/llvm/test/CodeGen/BPF/preserve-static-offset/store-pai.ll b/llvm/test/CodeGen/BPF/preserve-static-offset/store-pai.ll index b22b2683682606..65fd2814977fb2 100644 --- a/llvm/test/CodeGen/BPF/preserve-static-offset/store-pai.ll +++ b/llvm/test/CodeGen/BPF/preserve-static-offset/store-pai.ll @@ -51,7 +51,7 @@ entry: ; CHECK: define dso_local void @quux(ptr noundef %[[p:.*]]) {{.*}} { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.dbg.value +; CHECK-NEXT: #dbg_value ; CHECK-NEXT: call void (i32, ptr, i1, i8, i8, i8, i1, ...) ; CHECK-SAME: @llvm.bpf.getelementptr.and.store.i32 ; CHECK-SAME: (i32 42, diff --git a/llvm/test/CodeGen/Generic/MIRDebugify/locations-and-values.mir b/llvm/test/CodeGen/Generic/MIRDebugify/locations-and-values.mir index 3035fb8eab3f82..77a4d65cb66b57 100644 --- a/llvm/test/CodeGen/Generic/MIRDebugify/locations-and-values.mir +++ b/llvm/test/CodeGen/Generic/MIRDebugify/locations-and-values.mir @@ -15,10 +15,10 @@ define i32 @test(i32 %a, i32 %b) { %add = add i32 %a, 2 ; ALL-NEXT: %add = add i32 %a, 2, !dbg [[L1:![0-9]+]] - ; VALUE-NEXT: call{{( addrspace\([0-9]+\))?}} void @llvm.dbg.value(metadata i32 %add, metadata [[add:![0-9]+]], metadata !DIExpression()), !dbg [[L1]] + ; VALUE-NEXT: #dbg_value(i32 %add, [[add:![0-9]+]], !DIExpression(), [[L1]] %sub = sub i32 %add, %b ; ALL-NEXT: %sub = sub i32 %add, %b, !dbg [[L2:![0-9]+]] - ; VALUE-NEXT: call{{( addrspace\([0-9]+\))?}} void @llvm.dbg.value(metadata i32 %sub, metadata [[sub:![0-9]+]], metadata !DIExpression()), !dbg [[L2]] + ; VALUE-NEXT: #dbg_value(i32 %sub, [[sub:![0-9]+]], !DIExpression(), [[L2]] ; ALL-NEXT: ret i32 %sub, !dbg [[L3:![0-9]+]] ret i32 %sub } diff --git a/llvm/test/CodeGen/Generic/MIRStripDebug/dont-strip-real-debug-info.mir b/llvm/test/CodeGen/Generic/MIRStripDebug/dont-strip-real-debug-info.mir index 01a3b2faeb7ded..a085462b8e8f9a 100644 --- a/llvm/test/CodeGen/Generic/MIRStripDebug/dont-strip-real-debug-info.mir +++ b/llvm/test/CodeGen/Generic/MIRStripDebug/dont-strip-real-debug-info.mir @@ -13,9 +13,9 @@ } ; CHECK-LABEL: define i32 @test(i32 %a, i32 %b) {{(addrspace\([0-9]+\) )?}}!dbg !4 { ; CHECK-NEXT: %add = add i32 %a, 2, !dbg !10 - ; CHECK-NEXT: call{{( addrspace\([0-9]+\))?}} void @llvm.dbg.value(metadata i32 %add, metadata !7, metadata !DIExpression()), !dbg !10 + ; CHECK-NEXT: #dbg_value(i32 %add, !7, !DIExpression(), !10 ; CHECK-NEXT: %sub = sub i32 %add, %b, !dbg !11 - ; CHECK-NEXT: call{{( addrspace\([0-9]+\))?}} void @llvm.dbg.value(metadata i32 %sub, metadata !9, metadata !DIExpression()), !dbg !11 + ; CHECK-NEXT: #dbg_value(i32 %sub, !9, !DIExpression(), !11 ; CHECK-NEXT: ret i32 %sub, !dbg !12 ; CHECK-NEXT: } diff --git a/llvm/test/CodeGen/X86/fast-isel-dbg-value-alloca.ll b/llvm/test/CodeGen/X86/fast-isel-dbg-value-alloca.ll index bdb5239d289ea4..e1e90f1f458fc8 100644 --- a/llvm/test/CodeGen/X86/fast-isel-dbg-value-alloca.ll +++ b/llvm/test/CodeGen/X86/fast-isel-dbg-value-alloca.ll @@ -7,7 +7,7 @@ define void @foo(ptr noalias nocapture %arg) !dbg !38 { %k.debug = alloca ptr, align 8 store ptr %arg, ptr %k.debug, align 8, !dbg !70 call void @llvm.dbg.value(metadata ptr %k.debug, metadata !55, metadata !DIExpression(DW_OP_deref)), !dbg !70 -; CHECK: call void @llvm.dbg.value(metadata ptr %{{.*}}, metadata ![[VAR:.*]], metadata ![[EXPR:.*]]) +; CHECK: #dbg_value(ptr %{{.*}}, ![[VAR:.*]], ![[EXPR:.*]], ; CHECK: DBG_VALUE %stack.0{{.*}}, $noreg, ![[VAR]], ![[EXPR]] ret void, !dbg !70 } diff --git a/llvm/test/CodeGen/X86/pr38763.ll b/llvm/test/CodeGen/X86/pr38763.ll index 41b0344b8a92ae..31f97ef7e8e5c0 100644 --- a/llvm/test/CodeGen/X86/pr38763.ll +++ b/llvm/test/CodeGen/X86/pr38763.ll @@ -32,11 +32,11 @@ ; CHECK-LABEL: entry ; CHECK: %cmp = icmp eq i32 %foo.0., 4, !dbg !14 ; CHECK: %add = add nsw i32 %foo.0.4, 2, !dbg !16 -; CHECK-NOT: @llvm.dbg.value(metadata i32 %add +; CHECK-NOT: #dbg_value(i32 %add ; CHECK: %sub = add nsw i32 %foo.0.4, -2, !dbg !16 -; CHECK-NOT: @llvm.dbg.value(metadata i32 %sub +; CHECK-NOT: #dbg_value(i32 %sub ; CHECK: %result.0 = select i1 %cmp, i32 %add, i32 %sub -; CHECK: call void @llvm.dbg.value(metadata i32 %result.0, metadata !12, metadata !DIExpression()), !dbg !13 +; CHECK: #dbg_value(i32 %result.0, !12, !DIExpression(), !13 ; ModuleID = 'pr38763.cpp' source_filename = "pr38763.cpp" diff --git a/llvm/test/CodeGen/X86/select-optimize.ll b/llvm/test/CodeGen/X86/select-optimize.ll index aa04db882f5d40..8b26542b27a18f 100644 --- a/llvm/test/CodeGen/X86/select-optimize.ll +++ b/llvm/test/CodeGen/X86/select-optimize.ll @@ -13,8 +13,8 @@ ; If a select is obviously predictable, turn it into a branch. define i32 @weighted_select1(i32 %a, i32 %b, i1 %cmp) { ; CHECK-LABEL: @weighted_select1( -; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] -; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16:![0-9]+]] +; CHECK-NEXT: [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] +; CHECK-NEXT: br i1 [[CMP_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16:![0-9]+]] ; CHECK: select.false: ; CHECK-NEXT: br label [[SELECT_END]] ; CHECK: select.end: @@ -29,8 +29,8 @@ define i32 @weighted_select1(i32 %a, i32 %b, i1 %cmp) { ; turn it into a branch. define i32 @weighted_select2(i32 %a, i32 %b, i1 %cmp) { ; CHECK-LABEL: @weighted_select2( -; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] -; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF17:![0-9]+]] +; CHECK-NEXT: [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] +; CHECK-NEXT: br i1 [[CMP_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF17:![0-9]+]] ; CHECK: select.false: ; CHECK-NEXT: br label [[SELECT_END]] ; CHECK: select.end: @@ -54,7 +54,7 @@ define i32 @weighted_select3(i32 %a, i32 %b, i1 %cmp) { ; Unpredictable select should not form a branch. define i32 @unpred_select(i32 %a, i32 %b, i1 %cmp) { ; CHECK-LABEL: @unpred_select( -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], !unpredictable !19 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], !unpredictable [[META19:![0-9]+]] ; CHECK-NEXT: ret i32 [[SEL]] ; %sel = select i1 %cmp, i32 %a, i32 %b, !unpredictable !20 @@ -84,15 +84,15 @@ define i32 @weighted_select_pgso(i32 %a, i32 %b, i1 %cmp) !prof !14 { define i32 @weighted_selects(i32 %a, i32 %b) !prof !19 { ; CHECK-LABEL: @weighted_selects( ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[A:%.*]], 0 -; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP]] -; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16]] +; CHECK-NEXT: [[CMP_FROZEN:%.*]] = freeze i1 [[CMP]] +; CHECK-NEXT: br i1 [[CMP_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16]] ; CHECK: select.false: ; CHECK-NEXT: br label [[SELECT_END]] ; CHECK: select.end: ; CHECK-NEXT: [[SEL:%.*]] = phi i32 [ [[A]], [[TMP0:%.*]] ], [ [[B:%.*]], [[SELECT_FALSE]] ] ; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 [[SEL]], 0 -; CHECK-NEXT: [[SEL1_FROZEN:%.*]] = freeze i1 [[CMP1]] -; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_END1:%.*]], label [[SELECT_FALSE2:%.*]], !prof [[PROF16]] +; CHECK-NEXT: [[CMP1_FROZEN:%.*]] = freeze i1 [[CMP1]] +; CHECK-NEXT: br i1 [[CMP1_FROZEN]], label [[SELECT_END1:%.*]], label [[SELECT_FALSE2:%.*]], !prof [[PROF16]] ; CHECK: select.false2: ; CHECK-NEXT: br label [[SELECT_END1]] ; CHECK: select.end1: @@ -110,8 +110,8 @@ define i32 @weighted_selects(i32 %a, i32 %b) !prof !19 { define i32 @weighted_select_group(i32 %a, i32 %b, i32 %c, i1 %cmp) !prof !19 { ; CHECK-LABEL: @weighted_select_group( ; CHECK-NEXT: [[A1:%.*]] = add i32 [[A:%.*]], 1 -; CHECK-NEXT: [[SEL1_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] -; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_FALSE_SINK:%.*]], !prof [[PROF16]] +; CHECK-NEXT: [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] +; CHECK-NEXT: br i1 [[CMP_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_FALSE_SINK:%.*]], !prof [[PROF16]] ; CHECK: select.true.sink: ; CHECK-NEXT: [[C1:%.*]] = add i32 [[C:%.*]], 1 ; CHECK-NEXT: br label [[SELECT_END:%.*]] @@ -121,7 +121,7 @@ define i32 @weighted_select_group(i32 %a, i32 %b, i32 %c, i1 %cmp) !prof !19 { ; CHECK: select.end: ; CHECK-NEXT: [[SEL1:%.*]] = phi i32 [ [[A1]], [[SELECT_TRUE_SINK]] ], [ [[B1]], [[SELECT_FALSE_SINK]] ] ; CHECK-NEXT: [[SEL2:%.*]] = phi i32 [ [[C1]], [[SELECT_TRUE_SINK]] ], [ [[A1]], [[SELECT_FALSE_SINK]] ] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[SEL1]], metadata [[META22:![0-9]+]], metadata !DIExpression()), !dbg [[DBG26:![0-9]+]] +; CHECK-NEXT: #dbg_value(i32 [[SEL1]], [[META22:![0-9]+]], !DIExpression(), [[META26:![0-9]+]]) ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SEL1]], [[SEL2]] ; CHECK-NEXT: ret i32 [[ADD]] ; @@ -138,8 +138,8 @@ define i32 @weighted_select_group(i32 %a, i32 %b, i32 %c, i1 %cmp) !prof !19 { ; Predictable select group with intra-group dependence converted to branch define i32 @select_group_intra_group(i32 %a, i32 %b, i32 %c, i1 %cmp) { ; CHECK-LABEL: @select_group_intra_group( -; CHECK-NEXT: [[SEL1_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] -; CHECK-NEXT: br i1 [[SEL1_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16]] +; CHECK-NEXT: [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] +; CHECK-NEXT: br i1 [[CMP_FROZEN]], label [[SELECT_END:%.*]], label [[SELECT_FALSE:%.*]], !prof [[PROF16]] ; CHECK: select.false: ; CHECK-NEXT: br label [[SELECT_END]] ; CHECK: select.end: @@ -163,8 +163,8 @@ define i32 @select_group_intra_group(i32 %a, i32 %b, i32 %c, i1 %cmp) { ; sink load define i32 @expensive_val_operand1(ptr nocapture %a, i32 %y, i1 %cmp) { ; CHECK-LABEL: @expensive_val_operand1( -; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] -; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]] +; CHECK-NEXT: [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] +; CHECK-NEXT: br i1 [[CMP_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]] ; CHECK: select.true.sink: ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[A:%.*]], align 8 ; CHECK-NEXT: br label [[SELECT_END]] @@ -193,8 +193,8 @@ define i32 @expensive_val_operand2(ptr nocapture %a, i32 %x, i1 %cmp) { ; into a branch with sinked dependence slice. define i32 @expensive_val_operand3(ptr nocapture %a, i32 %b, i32 %y, i1 %cmp) { ; CHECK-LABEL: @expensive_val_operand3( -; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] -; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]] +; CHECK-NEXT: [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] +; CHECK-NEXT: br i1 [[CMP_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]] ; CHECK: select.true.sink: ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[A:%.*]], align 8 ; CHECK-NEXT: [[X:%.*]] = add i32 [[LOAD]], [[B:%.*]] @@ -214,8 +214,8 @@ define i32 @expensive_val_operand4(ptr nocapture %a, i32 %b, i32 %y, i1 %cmp) { ; CHECK-LABEL: @expensive_val_operand4( ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[A:%.*]], align 8 ; CHECK-NEXT: call void @free(ptr [[A]]) -; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] -; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]] +; CHECK-NEXT: [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] +; CHECK-NEXT: br i1 [[CMP_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]] ; CHECK: select.true.sink: ; CHECK-NEXT: [[X:%.*]] = add i32 [[LOAD]], [[B:%.*]] ; CHECK-NEXT: br label [[SELECT_END]] @@ -235,8 +235,8 @@ define i32 @expensive_val_operand5(ptr nocapture %a, i32 %b, i32 %y, i1 %cmp) { ; CHECK-LABEL: @expensive_val_operand5( ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[A:%.*]], align 8 ; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 2, ptr nonnull [[A]]) -; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] -; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]] +; CHECK-NEXT: [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] +; CHECK-NEXT: br i1 [[CMP_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]] ; CHECK: select.true.sink: ; CHECK-NEXT: [[X:%.*]] = add i32 [[LOAD]], [[B:%.*]] ; CHECK-NEXT: br label [[SELECT_END]] @@ -259,8 +259,8 @@ define i32 @expensive_val_operand6(ptr nocapture %a, i32 %b, i32 %y, i1 %cmp) { ; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[A:%.*]], align 8 ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] -; CHECK-NEXT: br i1 [[SEL_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]] +; CHECK-NEXT: [[CMP_FROZEN:%.*]] = freeze i1 [[CMP:%.*]] +; CHECK-NEXT: br i1 [[CMP_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !prof [[PROF18]] ; CHECK: select.true.sink: ; CHECK-NEXT: [[X:%.*]] = add i32 [[LOAD]], [[B:%.*]] ; CHECK-NEXT: br label [[SELECT_END]] @@ -323,8 +323,8 @@ define double @cmov_on_critical_path(i32 %n, double %x, ptr nocapture %a) { ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[R:%.*]] = load double, ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt double [[X1]], [[R]] -; CHECK-NEXT: [[X2_FROZEN:%.*]] = freeze i1 [[CMP2]] -; CHECK-NEXT: br i1 [[X2_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END]], !prof [[PROF27:![0-9]+]] +; CHECK-NEXT: [[CMP2_FROZEN:%.*]] = freeze i1 [[CMP2]] +; CHECK-NEXT: br i1 [[CMP2_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END]], !prof [[PROF27:![0-9]+]] ; CHECK: select.true.sink: ; CHECK-NEXT: [[SUB:%.*]] = fsub double [[X1]], [[R]] ; CHECK-NEXT: br label [[SELECT_END]] @@ -465,8 +465,8 @@ define double @loop_select_groups(i32 %n, double %x, ptr nocapture %a, i32 %k) { ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX]], align 8 ; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt double [[X_ADDR_022]], [[TMP0]] -; CHECK-NEXT: [[SUB_FROZEN:%.*]] = freeze i1 [[CMP1]] -; CHECK-NEXT: br i1 [[SUB_FROZEN]], label [[SELECT_END]], label [[SELECT_FALSE:%.*]] +; CHECK-NEXT: [[CMP1_FROZEN:%.*]] = freeze i1 [[CMP1]] +; CHECK-NEXT: br i1 [[CMP1_FROZEN]], label [[SELECT_END]], label [[SELECT_FALSE:%.*]] ; CHECK: select.false: ; CHECK-NEXT: br label [[SELECT_END]] ; CHECK: select.end: diff --git a/llvm/test/DebugInfo/AArch64/ir-outliner.ll b/llvm/test/DebugInfo/AArch64/ir-outliner.ll index bcf76ab827d9fa..168232e2158714 100644 --- a/llvm/test/DebugInfo/AArch64/ir-outliner.ll +++ b/llvm/test/DebugInfo/AArch64/ir-outliner.ll @@ -125,11 +125,11 @@ attributes #0 = { nounwind readnone speculatable willreturn } ; IRDEBUG-LABEL: @outline_debug1( ; IRDEBUG-NEXT: entry: ; IRDEBUG-NEXT: [[A:%.*]] = alloca i32, align 4, !dbg [[DBG17:![0-9]+]] -; IRDEBUG-NEXT: call void @llvm.dbg.value(metadata ptr [[A]], metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG17]] +; IRDEBUG-NEXT: #dbg_value(ptr [[A]], [[META9:![0-9]+]], !DIExpression(), [[DBG17]]) ; IRDEBUG-NEXT: [[B:%.*]] = alloca i32, align 4, !dbg [[DBG18:![0-9]+]] -; IRDEBUG-NEXT: call void @llvm.dbg.value(metadata ptr [[B]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG18]] +; IRDEBUG-NEXT: #dbg_value(ptr [[B]], [[META11:![0-9]+]], !DIExpression(), [[DBG18]]) ; IRDEBUG-NEXT: [[C:%.*]] = alloca i32, align 4, !dbg [[DBG19:![0-9]+]] -; IRDEBUG-NEXT: call void @llvm.dbg.value(metadata ptr [[C]], metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19]] +; IRDEBUG-NEXT: #dbg_value(ptr [[C]], [[META12:![0-9]+]], !DIExpression(), [[DBG19]]) ; IRDEBUG-NEXT: call void @outlined_ir_func_0(ptr [[A]], ptr [[B]], ptr [[C]]), !dbg [[DBG20:![0-9]+]] ; IRDEBUG-NEXT: ret void, !dbg [[DBG21:![0-9]+]] ; @@ -137,22 +137,26 @@ attributes #0 = { nounwind readnone speculatable willreturn } ; IRDEBUG-LABEL: @outline_debug2( ; IRDEBUG-NEXT: entry: ; IRDEBUG-NEXT: [[A:%.*]] = alloca i32, align 4, !dbg [[DBG30:![0-9]+]] -; IRDEBUG-NEXT: call void @llvm.dbg.value(metadata ptr [[A]], metadata [[META24:![0-9]+]], metadata !DIExpression()), !dbg [[DBG30]] +; IRDEBUG-NEXT: #dbg_value(ptr [[A]], [[META24:![0-9]+]], !DIExpression(), [[DBG30]]) ; IRDEBUG-NEXT: [[B:%.*]] = alloca i32, align 4, !dbg [[DBG31:![0-9]+]] -; IRDEBUG-NEXT: call void @llvm.dbg.value(metadata ptr [[B]], metadata [[META25:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31]] +; IRDEBUG-NEXT: #dbg_value(ptr [[B]], [[META25:![0-9]+]], !DIExpression(), [[DBG31]]) ; IRDEBUG-NEXT: [[C:%.*]] = alloca i32, align 4, !dbg [[DBG32:![0-9]+]] -; IRDEBUG-NEXT: call void @llvm.dbg.value(metadata ptr [[C]], metadata [[META26:![0-9]+]], metadata !DIExpression()), !dbg [[DBG32]] +; IRDEBUG-NEXT: #dbg_value(ptr [[C]], [[META26:![0-9]+]], !DIExpression(), [[DBG32]]) ; IRDEBUG-NEXT: call void @outlined_ir_func_0(ptr [[A]], ptr [[B]], ptr [[C]]), !dbg [[DBG33:![0-9]+]] ; IRDEBUG-NEXT: ret void, !dbg [[DBG34:![0-9]+]] ; ; -; IRDEBUG: @outlined_ir_func_0(ptr [[TMP0:%.*]], ptr [[TMP1:%.*]], ptr [[TMP2:%.*]]) +; IRDEBUG-LABEL: define {{.+}} @outlined_ir_func_0( +; IRDEBUG-NEXT: newFuncRoot: +; IRDEBUG-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] ; IRDEBUG: entry_to_outline: -; IRDEBUG-NEXT: store i32 2, ptr [[TMP0]], align 4 -; IRDEBUG-NEXT: store i32 3, ptr [[TMP1]], align 4 -; IRDEBUG-NEXT: store i32 4, ptr [[TMP2]], align 4 +; IRDEBUG-NEXT: store i32 2, ptr [[TMP0:%.*]], align 4 +; IRDEBUG-NEXT: store i32 3, ptr [[TMP1:%.*]], align 4 +; IRDEBUG-NEXT: store i32 4, ptr [[TMP2:%.*]], align 4 ; IRDEBUG-NEXT: [[AL:%.*]] = load i32, ptr [[TMP0]], align 4 ; IRDEBUG-NEXT: [[BL:%.*]] = load i32, ptr [[TMP1]], align 4 ; IRDEBUG-NEXT: [[CL:%.*]] = load i32, ptr [[TMP2]], align 4 ; IRDEBUG-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; IRDEBUG: entry_after_outline.exitStub: +; IRDEBUG-NEXT: ret void ; diff --git a/llvm/test/DebugInfo/AArch64/select-optimize-trailing-dbg-records.ll b/llvm/test/DebugInfo/AArch64/select-optimize-trailing-dbg-records.ll index 4ae1fb4fc7bcc0..3083f390915aa3 100644 --- a/llvm/test/DebugInfo/AArch64/select-optimize-trailing-dbg-records.ll +++ b/llvm/test/DebugInfo/AArch64/select-optimize-trailing-dbg-records.ll @@ -8,7 +8,7 @@ ; CHECK: select.end: ; CHECK-NEXT: %[[PHI:.*]] = phi i32 -; CHECK-NEXT: dbg.value(metadata i32 %[[PHI]], +; CHECK-NEXT: #dbg_value(i32 %[[PHI]], source_filename = "test.ll" target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/DebugInfo/ARM/hardware-loop-phi-insertion.ll b/llvm/test/DebugInfo/ARM/hardware-loop-phi-insertion.ll index 9240bf25b6f63f..f384b989a50d66 100644 --- a/llvm/test/DebugInfo/ARM/hardware-loop-phi-insertion.ll +++ b/llvm/test/DebugInfo/ARM/hardware-loop-phi-insertion.ll @@ -6,7 +6,7 @@ ; CHECK-LABEL: for.body: ; CHECK-NEXT: = phi i32 ; CHECK-NEXT: = phi i32 -; CHECK-NEXT: call void @llvm.dbg.value +; CHECK-NEXT: #dbg_value source_filename = "repro.c" target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" diff --git a/llvm/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll b/llvm/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll index 2a0ee339e52e8d..1b19e8e11fa414 100644 --- a/llvm/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll +++ b/llvm/test/DebugInfo/ARM/lowerbdgdeclare_vla.ll @@ -26,9 +26,9 @@ entry: %vla = alloca float, i32 %conv, align 4, !dbg !24 tail call void @llvm.dbg.declare(metadata ptr %vla, metadata !14, metadata !DIExpression(DW_OP_deref)), !dbg !24 ; The VLA alloca should be described by a dbg.declare: -; CHECK: call void @llvm.dbg.declare(metadata ptr %vla, metadata ![[VLA:.*]], metadata {{.*}}) +; CHECK: #dbg_declare(ptr %vla, ![[VLA:.*]], {{.*}}) ; The VLA alloca and following store into the array should not be lowered to like this: -; CHECK-NOT: call void @llvm.dbg.value(metadata float %r, metadata ![[VLA]]) +; CHECK-NOT: #dbg_value(float %r, ![[VLA]]) ; the backend interprets this as "vla has the location of %r". store float %r, ptr %vla, align 4, !dbg !25, !tbaa !26 tail call void @llvm.dbg.value(metadata i32 0, metadata !18, metadata !DIExpression()), !dbg !30 diff --git a/llvm/test/DebugInfo/ARM/salvage-debug-info.ll b/llvm/test/DebugInfo/ARM/salvage-debug-info.ll index 1564a80ded0e80..43d3412762c09b 100644 --- a/llvm/test/DebugInfo/ARM/salvage-debug-info.ll +++ b/llvm/test/DebugInfo/ARM/salvage-debug-info.ll @@ -31,7 +31,7 @@ entry: if.then: ; preds = %entry %1 = inttoptr i32 %0 to ptr, !dbg !27 tail call void @llvm.dbg.value(metadata ptr %1, metadata !22, metadata !DIExpression()), !dbg !28 - ; CHECK: call void @llvm.dbg.value(metadata i32 %0, metadata !22, metadata !DIExpression()) + ; CHECK: #dbg_value(i32 %0, !22, !DIExpression(), tail call void @llvm.dbg.value(metadata i32 0, metadata !20, metadata !DIExpression()), !dbg !29 %2 = load i32, ptr @n, align 4, !dbg !30 %cmp5 = icmp eq i32 %2, 0, !dbg !33 diff --git a/llvm/test/DebugInfo/ARM/sroa-complex.ll b/llvm/test/DebugInfo/ARM/sroa-complex.ll index f85e4cd29ba9e3..6948a411d1d052 100644 --- a/llvm/test/DebugInfo/ARM/sroa-complex.ll +++ b/llvm/test/DebugInfo/ARM/sroa-complex.ll @@ -17,13 +17,13 @@ entry: store double 0.000000e+00, ptr %c.realp, align 8, !dbg !17 ; SROA will split the complex double into two double values. ; Test that debug info for both values survives: - ; CHECK: call void @llvm.dbg.value(metadata double 0.000000e+00, - ; CHECK-SAME: metadata ![[C:[^,]*]], - ; CHECK-SAME: metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)) + ; CHECK: #dbg_value(double 0.000000e+00, + ; CHECK-SAME: ![[C:[^,]*]], + ; CHECK-SAME: !DIExpression(DW_OP_LLVM_fragment, 0, 64), store double 0.000000e+00, ptr %c.imagp, align 8, !dbg !17 - ; CHECK: call void @llvm.dbg.value(metadata double 0.000000e+00, - ; CHECK-SAME: metadata ![[C]], - ; CHECK-SAME: metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)) + ; CHECK: #dbg_value(double 0.000000e+00, + ; CHECK-SAME: ![[C]], + ; CHECK-SAME: !DIExpression(DW_OP_LLVM_fragment, 64, 64), ret void, !dbg !18 } diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/adce/no-delete.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/adce/no-delete.ll index 93931c6f68e340..16e6585c1a1501 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/adce/no-delete.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/adce/no-delete.ll @@ -12,8 +12,8 @@ ;; attachments have been deleted) but still linked to an instruction are not ;; deleted by ADCE. -; CHECK: llvm.dbg.assign -; CHECK: llvm.dbg.assign +; CHECK: #dbg_assign +; CHECK: #dbg_assign define dso_local void @fun(i32 noundef %local) #0 !dbg !7 { entry: diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/codegenprepare/sunk-addr.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/codegenprepare/sunk-addr.ll index 8b226aa6633060..8f0350c0e34afe 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/codegenprepare/sunk-addr.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/codegenprepare/sunk-addr.ll @@ -26,10 +26,10 @@ next: ; updated, and the other should not. ; CHECK-LABEL: next: ; CHECK: %[[CASTVAR:[0-9a-zA-Z]+]] = bitcast ptr %p to ptr -; CHECK-NEXT: dbg.assign(metadata ptr %arith, metadata ![[DIVAR:[0-9]+]], +; CHECK-NEXT: #dbg_assign(ptr %arith, ![[DIVAR:[0-9]+]], ; CHECK-NEXT: %[[GEPVAR:[0-9a-zA-Z]+]] = getelementptr i8, ptr %[[CASTVAR]], i64 3 ; CHECK-NEXT: %loaded = load i8, ptr %[[GEPVAR]] -; CHECK-NEXT: dbg.assign(metadata ptr %[[GEPVAR]], metadata ![[DIVAR]], +; CHECK-NEXT: #dbg_assign(ptr %[[GEPVAR]], ![[DIVAR]], call void @llvm.dbg.assign(metadata ptr %arith, metadata !12, metadata !DIExpression(), metadata !21, metadata ptr undef, metadata !DIExpression()), !dbg !14 %loaded = load i8, ptr %arith call void @llvm.dbg.assign(metadata ptr %arith, metadata !12, metadata !DIExpression(), metadata !21, metadata ptr undef, metadata !DIExpression()), !dbg !14 diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/hwasan.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/hwasan.ll index f7f126cf3c514e..afbc59b4c66056 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/hwasan.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/hwasan.ll @@ -1,7 +1,7 @@ ; RUN: opt %s -S -passes=declare-to-assign -o - | FileCheck %s ; RUN: opt --try-experimental-debuginfo-iterators %s -S -passes=declare-to-assign -o - | FileCheck %s -; CHECK: call void @llvm.dbg.assign +; CHECK: #dbg_assign define dso_local void @f() sanitize_hwaddress !dbg !9 { entry: diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/long-double-x87.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/long-double-x87.ll index 3149dcb6ebc31c..1ee2d99fcfdf8f 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/long-double-x87.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/long-double-x87.ll @@ -11,7 +11,7 @@ ;; alloca (80 bits) can be represented with assignment tracking. Create a ;; fragment for the dbg.assign for bits 0-80. -; CHECK: llvm.dbg.assign(metadata i1 undef, metadata ![[#]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 80), metadata ![[#]], metadata ptr %f, metadata !DIExpression()) +; CHECK: #dbg_assign(i1 undef, ![[#]], !DIExpression(DW_OP_LLVM_fragment, 0, 80), ![[#]], ptr %f, !DIExpression(), define dso_local void @_Z3funv() #0 !dbg !10 { entry: diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/nullptr-declare.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/nullptr-declare.ll index a795cc4c2dae3a..658ec9d3d71073 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/nullptr-declare.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/nullptr-declare.ll @@ -4,8 +4,8 @@ ;; Check AssignmentTrackingPass ignores a dbg.declare with an empty metadata ;; location operand. -; CHECK: call void @llvm.dbg.declare -; CHECK-NOT: call void @llvm.dbg.assign +; CHECK: #dbg_declare +; CHECK-NOT: #dbg_assign define dso_local void @_Z3funv() #0 !dbg !10 { entry: diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/scalable-vector.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/scalable-vector.ll index 2b9c9bf16c9a47..63ea6e15c887ba 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/scalable-vector.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/scalable-vector.ll @@ -4,7 +4,7 @@ ;; Check declare-to-assign skips scalable vectors for now. i.e. do not replace ;; the dbg.declare with a dbg.assign intrinsic. -; CHECK: call void @llvm.dbg.declare(metadata ptr %c +; CHECK: #dbg_declare(ptr %c define dso_local void @b() !dbg !9 { entry: diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/structured-bindings.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/structured-bindings.ll index 892e8501ebf357..fcd8498391c7f9 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/structured-bindings.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/structured-bindings.ll @@ -13,13 +13,13 @@ ;; } ; CHECK: %0 = alloca %struct.two, align 4, !DIAssignID ![[ID1:[0-9]+]] -; CHECK-NEXT: llvm.dbg.assign(metadata i1 undef, metadata ![[AGGR:[0-9]+]], metadata !DIExpression(), metadata ![[ID1]], metadata ptr %0, metadata !DIExpression()) -; CHECK-NEXT: llvm.dbg.assign(metadata i1 undef, metadata ![[A:[0-9]+]], metadata !DIExpression(), metadata ![[ID1]], metadata ptr %0, metadata !DIExpression()) -; CHECK-NEXT: llvm.dbg.declare(metadata ptr %0, metadata ![[B:[0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 4)) +; CHECK-NEXT: #dbg_assign(i1 undef, ![[AGGR:[0-9]+]], !DIExpression(), ![[ID1]], ptr %0, !DIExpression(), +; CHECK-NEXT: #dbg_assign(i1 undef, ![[A:[0-9]+]], !DIExpression(), ![[ID1]], ptr %0, !DIExpression(), +; CHECK-NEXT: #dbg_declare(ptr %0, ![[B:[0-9]+]], !DIExpression(DW_OP_plus_uconst, 4), ; CHECK: store i64 %call, ptr %0, align 4,{{.*}}, !DIAssignID ![[ID2:[0-9]+]] -; CHECK-NEXT: llvm.dbg.assign(metadata i64 %call, metadata ![[AGGR]], metadata !DIExpression(), metadata ![[ID2]], metadata ptr %0, metadata !DIExpression()) -; CHECK-NEXT: llvm.dbg.assign(metadata i64 %call, metadata ![[A]], metadata !DIExpression(), metadata ![[ID2]], metadata ptr %0, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i64 %call, ![[AGGR]], !DIExpression(), ![[ID2]], ptr %0, !DIExpression(), +; CHECK-NEXT: #dbg_assign(i64 %call, ![[A]], !DIExpression(), ![[ID2]], ptr %0, !DIExpression(), ; CHECK: ![[AGGR]] = !DILocalVariable(scope: ; CHECK: ![[A]] = !DILocalVariable(name: "a", scope: diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/var-not-alloca-sized.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/var-not-alloca-sized.ll index c009fdcc238cb8..0fa9dd17726bd9 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/var-not-alloca-sized.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/var-not-alloca-sized.ll @@ -17,24 +17,24 @@ entry: %0 = alloca [4 x i16], align 4 call void @llvm.dbg.declare(metadata ptr %0, metadata !15, metadata !DIExpression()), !dbg !16 ; CHECK: %0 = alloca [4 x i16], align 4, !DIAssignID ![[ID1:[0-9]+]] -; CHECK-NEXT: llvm.dbg.assign(metadata i1 undef, metadata ![[#]], metadata !DIExpression(), metadata ![[ID1]], metadata ptr %0, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i1 undef, ![[#]], !DIExpression(), ![[ID1]], ptr %0, !DIExpression(), %a = getelementptr inbounds [4 x i16], ptr %0, i32 0, i32 0 %a.5 = getelementptr inbounds [4 x i16], ptr %0, i32 0, i32 1 %b = getelementptr inbounds [4 x i16], ptr %0, i32 0, i32 2 store i64 1, ptr %a, align 4 ; CHECK: store i64 1, ptr %a, align 4, !DIAssignID ![[ID2:[0-9]+]] -; CHECK-NEXT: llvm.dbg.assign(metadata i64 1, metadata ![[#]], metadata !DIExpression(), metadata ![[ID2]], metadata ptr %a, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i64 1, ![[#]], !DIExpression(), ![[ID2]], ptr %a, !DIExpression(), store i64 2, ptr %b, align 4 ;; %b is outside the variable bounds, no debug intrinsic needed. store i16 3, ptr %a.5, align 4 ; CHECK: store i16 3, ptr %a.5, align 4, !DIAssignID ![[ID3:[0-9]+]] -; CHECK-NEXT: llvm.dbg.assign(metadata i16 3, metadata ![[#]], metadata !DIExpression(DW_OP_LLVM_fragment, 16, 16), metadata ![[ID3]], metadata ptr %a.5, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i16 3, ![[#]], !DIExpression(DW_OP_LLVM_fragment, 16, 16), ![[ID3]], ptr %a.5, !DIExpression(), store i32 4, ptr %a.5, align 4 ; CHECK: store i32 4, ptr %a.5, align 4, !DIAssignID ![[ID4:[0-9]+]] -; CHECK-NEXT: llvm.dbg.assign(metadata i32 4, metadata ![[#]], metadata !DIExpression(DW_OP_LLVM_fragment, 16, 16), metadata ![[ID4]], metadata ptr %a.5, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i32 4, ![[#]], !DIExpression(DW_OP_LLVM_fragment, 16, 16), ![[ID4]], ptr %a.5, !DIExpression(), store i32 5, ptr %a, align 4 ; CHECK: store i32 5, ptr %a, align 4, !DIAssignID ![[ID5:[0-9]+]] -; CHECK-NEXT: llvm.dbg.assign(metadata i32 5, metadata ![[#]], metadata !DIExpression(), metadata ![[ID5]], metadata ptr %a, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i32 5, ![[#]], !DIExpression(), ![[ID5]], ptr %a, !DIExpression(), ret i32 0 } diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/vla.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/vla.ll index b4e619e0e62ee4..317bc919717de8 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/vla.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/declare-to-assign/vla.ll @@ -8,7 +8,7 @@ ;; int x[sz]; ;; } -; CHECK: llvm.dbg.declare(metadata ptr %vla, metadata ![[#]], metadata !DIExpression()) +; CHECK: #dbg_declare(ptr %vla, ![[#]], !DIExpression(), @sz = dso_local global i32 0, align 4 diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/dse/dse-after-memcpyopt-merge.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/dse/dse-after-memcpyopt-merge.ll index c78925f6a900f0..5e9c7b334ce30f 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/dse/dse-after-memcpyopt-merge.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/dse/dse-after-memcpyopt-merge.ll @@ -14,11 +14,11 @@ ;; Check that there's an unlinked dbg.assign inserted after each overlapping ;; fragment of the shortened store. ;; -; CHECK: llvm.dbg.assign({{.*}}, metadata ptr %g, metadata !DIExpression()) -; CHECK: llvm.dbg.assign(metadata float 0.000000e+00, metadata ![[#]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 32), metadata ![[ID:[0-9]+]], metadata ptr %arrayidx.i, metadata !DIExpression()) -; CHECK: llvm.dbg.assign(metadata float 0.000000e+00, metadata ![[#]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 32), metadata ![[ID]], metadata ptr %arrayidx3.i, metadata !DIExpression()) -; CHECK: llvm.dbg.assign(metadata float 0.000000e+00, metadata ![[#]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32), metadata ![[UniqueID1:[0-9]+]], metadata ptr undef, metadata !DIExpression()) -; CHECK: llvm.dbg.assign(metadata float 0.000000e+00, metadata ![[#]], metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32), metadata ![[UniqueID2:[0-9]+]], metadata ptr undef, metadata !DIExpression()) +; CHECK: #dbg_assign({{.*}}, ptr %g, !DIExpression(), +; CHECK: #dbg_assign(float 0.000000e+00, ![[#]], !DIExpression(DW_OP_LLVM_fragment, 64, 32), ![[ID:[0-9]+]], ptr %arrayidx.i, !DIExpression(), +; CHECK: #dbg_assign(float 0.000000e+00, ![[#]], !DIExpression(DW_OP_LLVM_fragment, 32, 32), ![[ID]], ptr %arrayidx3.i, !DIExpression(), +; CHECK: #dbg_assign(float 0.000000e+00, ![[#]], !DIExpression(DW_OP_LLVM_fragment, 0, 32), ![[UniqueID1:[0-9]+]], ptr undef, !DIExpression(), +; CHECK: #dbg_assign(float 0.000000e+00, ![[#]], !DIExpression(DW_OP_LLVM_fragment, 96, 32), ![[UniqueID2:[0-9]+]], ptr undef, !DIExpression(), ; CHECK: call void @llvm.memset{{.*}}, !DIAssignID ![[ID]] ; CHECK-DAG: ![[ID]] = distinct !DIAssignID() diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/dse/shorten-offset.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/dse/shorten-offset.ll index b1f1e242ce038e..ad83b6fd9ff627 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/dse/shorten-offset.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/dse/shorten-offset.ll @@ -29,10 +29,10 @@ ;; bits that overlap the dbg.assign's fagment: [128, 160) (offset=128 size=32). ; CHECK: @_Z10shortenEndv -; CHECK: call void @llvm.dbg.assign({{.*}}, metadata ptr %local, metadata !DIExpression()) +; CHECK: #dbg_assign({{.*}}, ptr %local, !DIExpression(), ; CHECK: call void @llvm.memset{{.*}}, !DIAssignID ![[ID:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i8 0, metadata ![[VAR:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 96), metadata ![[ID:[0-9]+]], metadata ptr %offset_4_bytes, metadata !DIExpression(DW_OP_plus_uconst, 4)) -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i8 0, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 32), metadata ![[UniqueID1:[0-9]+]], metadata ptr undef, metadata !DIExpression({{.*}})) +; CHECK-NEXT: #dbg_assign(i8 0, ![[VAR:[0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 64, 96), ![[ID:[0-9]+]], ptr %offset_4_bytes, !DIExpression(DW_OP_plus_uconst, 4), +; CHECK-NEXT: #dbg_assign(i8 0, ![[VAR]], !DIExpression(DW_OP_LLVM_fragment, 128, 32), ![[UniqueID1:[0-9]+]], ptr undef, !DIExpression({{.*}}), ;; DSE will shorten the first store in shortenStart from [0, 160) bits to [128, ;; 160) bits. Variable 'local2' has been adjusted to be 160 bits. Check we get @@ -41,10 +41,10 @@ ;; [0, 128) (offset=0, size=128). ; CHECK: @_Z12shortenStartv -; CHECK: call void @llvm.dbg.assign({{.*}}, metadata ptr %local2, metadata !DIExpression()) +; CHECK: #dbg_assign({{.*}}, ptr %local2, !DIExpression(), ; CHECK: call void @llvm.memset{{.*}}, !DIAssignID ![[ID2:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i8 0, metadata ![[VAR2:[0-9]+]], metadata !DIExpression(), metadata ![[ID2]], metadata ptr %local2, metadata !DIExpression()) -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i8 0, metadata ![[VAR2]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 128), metadata ![[UniqueID2:[0-9]+]], metadata ptr undef, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i8 0, ![[VAR2:[0-9]+]], !DIExpression(), ![[ID2]], ptr %local2, !DIExpression(), +; CHECK-NEXT: #dbg_assign(i8 0, ![[VAR2]], !DIExpression(DW_OP_LLVM_fragment, 0, 128), ![[UniqueID2:[0-9]+]], ptr undef, !DIExpression(), ; CHECK-DAG: ![[ID]] = distinct !DIAssignID() ; CHECK-DAG: ![[UniqueID1]] = distinct !DIAssignID() diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/dse/shorten.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/dse/shorten.ll index 9ba999b96723b4..0770629aef5b50 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/dse/shorten.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/dse/shorten.ll @@ -27,13 +27,13 @@ ; CHECK: @_Z10shortenEndv ; CHECK: call void @llvm.memset{{.*}}, !DIAssignID ![[ID:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i8 0, metadata ![[VAR:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 192), metadata ![[ID:[0-9]+]], metadata ptr %local, metadata !DIExpression()) -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i8 0, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 64), metadata ![[UniqueID1:[0-9]+]], metadata ptr undef, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i8 0, ![[VAR:[0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 0, 192), ![[ID:[0-9]+]], ptr %local, !DIExpression(), +; CHECK-NEXT: #dbg_assign(i8 0, ![[VAR]], !DIExpression(DW_OP_LLVM_fragment, 128, 64), ![[UniqueID1:[0-9]+]], ptr undef, !DIExpression(), ; CHECK: @_Z12shortenStartv ; CHECK: call void @llvm.memset{{.*}}, !DIAssignID ![[ID2:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i8 0, metadata ![[VAR2:[0-9]+]], metadata !DIExpression(), metadata ![[ID2]], metadata ptr %local2, metadata !DIExpression()) -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i8 0, metadata ![[VAR2]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 128), metadata ![[UniqueID2:[0-9]+]], metadata ptr undef, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i8 0, ![[VAR2:[0-9]+]], !DIExpression(), ![[ID2]], ptr %local2, !DIExpression(), +; CHECK-NEXT: #dbg_assign(i8 0, ![[VAR2]], !DIExpression(DW_OP_LLVM_fragment, 0, 128), ![[UniqueID2:[0-9]+]], ptr undef, !DIExpression(), ; CHECK-DAG: ![[ID]] = distinct !DIAssignID() ; CHECK-DAG: ![[UniqueID1]] = distinct !DIAssignID() diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/inline/id.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/inline/id.ll index 3c684e1d1b8961..19feaf6623ba52 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/inline/id.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/inline/id.ll @@ -16,10 +16,10 @@ ; CHECK-LABEL: _Z3funv ; ; CHECK: store i32 5, ptr %val.i, align 4{{.*}}, !DIAssignID [[ID_0:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 5, metadata [[val:![0-9]+]], metadata !DIExpression(), metadata [[ID_0]], metadata ptr %val.i, metadata !DIExpression()), !dbg [[dl_inline_0:![0-9]+]] +; CHECK-NEXT: #dbg_assign(i32 5, [[val:![0-9]+]], !DIExpression(), [[ID_0]], ptr %val.i, !DIExpression(), [[dl_inline_0:![0-9]+]] ; ; CHECK: store i32 5, ptr %val.i1, align 4{{.*}}, !DIAssignID [[ID_1:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 5, metadata [[val]], metadata !DIExpression(), metadata [[ID_1]], metadata ptr %val.i1, metadata !DIExpression()), !dbg [[dl_inline_1:![0-9]+]] +; CHECK-NEXT: #dbg_assign(i32 5, [[val]], !DIExpression(), [[ID_1]], ptr %val.i1, !DIExpression(), [[dl_inline_1:![0-9]+]] ; ; CHECK-DAG: [[val]] = !DILocalVariable(name: "val", ; CHECK-DAG: [[dl_inline_0]] = !DILocation({{.*}}inlinedAt diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/inline/inline-stores.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/inline/inline-stores.ll index 25f9db7dcc4315..8520aa0ae9cb62 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/inline/inline-stores.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/inline/inline-stores.ll @@ -84,7 +84,7 @@ entry: ;; ; CHECK-LABEL: define dso_local i32 @_Z2f1v() ; CHECK: store i32 1, ptr %f1_local, align 4,{{.*}} !DIAssignID ![[ID_1:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 1, metadata ![[f1_local:[0-9]+]], metadata !DIExpression(), metadata ![[ID_1]], metadata ptr %f1_local, metadata !DIExpression()), !dbg ![[f1_dbg:[0-9]+]] +; CHECK-NEXT: #dbg_assign(i32 1, ![[f1_local:[0-9]+]], !DIExpression(), ![[ID_1]], ptr %f1_local, !DIExpression(), ![[f1_dbg:[0-9]+]] define dso_local i32 @_Z2f1v() #3 !dbg !37 { entry: %f1_local = alloca i32, align 4, !DIAssignID !42 @@ -107,7 +107,7 @@ entry: ;; ; CHECK-LABEL: define dso_local i32 @_Z2f2v() ; CHECK: store i32 2, ptr %arraydecay, align 4,{{.*}} !DIAssignID ![[ID_2:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 2, metadata ![[f2_local:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32), metadata ![[ID_2]], metadata ptr %arraydecay, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i32 2, ![[f2_local:[0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 0, 32), ![[ID_2]], ptr %arraydecay, !DIExpression(), define dso_local i32 @_Z2f2v() #3 !dbg !49 { entry: %f2_local = alloca [2 x i32], align 4, !DIAssignID !55 @@ -131,7 +131,7 @@ entry: ;; } ; CHECK-LABEL: define dso_local i32 @_Z2f3v() ; CHECK: store i32 3, ptr %add.ptr, align 4,{{.*}} !DIAssignID ![[ID_3:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 3, metadata ![[f3_local:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 32), metadata ![[ID_3]], metadata ptr %add.ptr, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i32 3, ![[f3_local:[0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 32, 32), ![[ID_3]], ptr %add.ptr, !DIExpression(), define dso_local i32 @_Z2f3v() #3 !dbg !63 { entry: %f3_local = alloca [2 x i32], align 4, !DIAssignID !66 @@ -155,7 +155,7 @@ entry: ;; } ; CHECK-LABEL: define dso_local i32 @_Z2f4i(i32 %f4_param) ; CHECK: store i32 4, ptr %f4_param.addr, align 4,{{.*}} !DIAssignID ![[ID_4:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 4, metadata ![[f4_param:[0-9]+]], metadata !DIExpression(), metadata ![[ID_4]], metadata ptr %f4_param.addr, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i32 4, ![[f4_param:[0-9]+]], !DIExpression(), ![[ID_4]], ptr %f4_param.addr, !DIExpression(), define dso_local i32 @_Z2f4i(i32 %f4_param) #3 !dbg !75 { entry: %f4_param.addr = alloca i32, align 4, !DIAssignID !80 @@ -175,7 +175,7 @@ entry: ;; } ; CHECK-LABEL: define dso_local i32 @_Z2f5i(i32 %f5_param) ; CHECK: store i32 5, ptr %f5_param.addr, align 4,{{.*}}!DIAssignID ![[ID_5:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 5, metadata ![[f5_param:[0-9]+]], metadata !DIExpression(), metadata ![[ID_5]], metadata ptr %f5_param.addr, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i32 5, ![[f5_param:[0-9]+]], !DIExpression(), ![[ID_5]], ptr %f5_param.addr, !DIExpression(), define dso_local i32 @_Z2f5i(i32 %f5_param) #3 !dbg !86 { entry: %f5_param.addr = alloca i32, align 4, !DIAssignID !91 @@ -196,7 +196,7 @@ entry: ;; } ; CHECK-LABEL: define dso_local i32 @_Z2f6v() ; CHECK: store i32 6, ptr %f6_local, align 4,{{.*}} !DIAssignID ![[ID_6:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 6, metadata ![[f6_local:[0-9]+]], metadata !DIExpression(), metadata ![[ID_6]], metadata ptr %f6_local, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i32 6, ![[f6_local:[0-9]+]], !DIExpression(), ![[ID_6]], ptr %f6_local, !DIExpression(), define dso_local i32 @_Z2f6v() #3 !dbg !99 { entry: %f6_local = alloca i32, align 4, !DIAssignID !102 diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/inline/shared-alloca.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/inline/shared-alloca.ll index 33bc2f196872f0..1f45a0ddb91887 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/inline/shared-alloca.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/inline/shared-alloca.ll @@ -14,15 +14,15 @@ ;; Test A: ; CHECK: %0 = alloca %"struct.llvm::detail::DenseMapPair", i32 0, align 8, !DIAssignID ![[ID1:[0-9]+]] -; CHECK: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[#]], metadata !DIExpression(), metadata ![[ID1]], metadata ptr %0, metadata !DIExpression()) +; CHECK: #dbg_assign(i1 undef, ![[#]], !DIExpression(), ![[ID1]], ptr %0, !DIExpression(), ;; Test B: ;; CHECK: store i64 1, ptr %0, align 4, !DIAssignID ![[ID2:[0-9]+]] -;; CHECK: call void @llvm.dbg.assign(metadata i64 1, metadata ![[#]], metadata !DIExpression(), metadata ![[ID2]], metadata ptr %0, metadata !DIExpression()) +;; CHECK: #dbg_assign(i64 1, ![[#]], !DIExpression(), ![[ID2]], ptr %0, !DIExpression(), ;; Test C: ;; CHECK: store i32 2, ptr %0, align 4, !DIAssignID ![[ID3:[0-9]+]] -;; CHECK: call void @llvm.dbg.assign(metadata i32 2, metadata ![[#]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32), metadata ![[ID3]], metadata ptr %0, metadata !DIExpression()) +;; CHECK: #dbg_assign(i32 2, ![[#]], !DIExpression(DW_OP_LLVM_fragment, 0, 32), ![[ID3]], ptr %0, !DIExpression(), %"struct.llvm::detail::DenseMapPair" = type { %"struct.std::pair" } %"struct.std::pair" = type { ptr, %"class.llvm::SmallVector" } diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/inline/use-before-def.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/inline/use-before-def.ll index ff7e5b18a9439c..627335aed9b676 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/inline/use-before-def.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/inline/use-before-def.ll @@ -32,7 +32,7 @@ ; CHECK: define dso_local i32 @fun() ; CHECK-NEXT: entry -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %0 +; CHECK-NEXT: #dbg_value(i32 %0 @g = dso_local local_unnamed_addr global i32 5, align 4, !dbg !0 diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/do-not-remove-redundant-dbg.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/do-not-remove-redundant-dbg.ll index 9763064dac2b05..2e100ed88fc71b 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/do-not-remove-redundant-dbg.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/do-not-remove-redundant-dbg.ll @@ -10,9 +10,9 @@ ;; behaviour is maintained. If it is discovered that it is profitable to remove ;; these intrinsics in instcombine then it's okay to remove this test. -; CHECK: @llvm.dbg.value(metadata i32 undef -; CHECK: @llvm.dbg.value(metadata i32 0 -; CHECK: @llvm.dbg.value(metadata i32 1 +; CHECK: #dbg_value(i32 undef +; CHECK: #dbg_value(i32 0 +; CHECK: #dbg_value(i32 1 define dso_local void @_Z3funv() local_unnamed_addr !dbg !7 { entry: diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/memset.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/memset.ll index 020f8aba0144c9..db16667684be47 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/memset.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/memset.ll @@ -19,7 +19,7 @@ ;; component is correct. ; CHECK: store i64 0, ptr %local, align 16{{.*}}, !DIAssignID ![[ID:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i64 0, metadata !{{.*}}, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64), metadata ![[ID]], metadata ptr %local, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i64 0, !{{.*}}, !DIExpression(DW_OP_LLVM_fragment, 0, 64), ![[ID]], ptr %local, !DIExpression(), define dso_local void @_Z3funv() local_unnamed_addr #0 !dbg !7 { entry: diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/remove-redundant-dbg.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/remove-redundant-dbg.ll index cffac06f8e5451..cb4250430106b9 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/remove-redundant-dbg.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/remove-redundant-dbg.ll @@ -7,7 +7,7 @@ ;; change. This has a significant positive impact on peak memory and compiler ;; run time. -; CHECK: @llvm.dbg.assign(metadata i32 1 +; CHECK: #dbg_assign(i32 1 define dso_local void @_Z3funv() local_unnamed_addr !dbg !7 { entry: diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/sink-store.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/sink-store.ll index 6ee567421b0cb8..43279bbfef5400 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/sink-store.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/sink-store.ll @@ -21,10 +21,10 @@ ; CHECK: if.then: ; CHECK-NEXT: %call = call -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 2, metadata ![[LOCAL:[0-9]+]], metadata !DIExpression(), metadata ![[MERGED_ID:[0-9]+]], metadata ptr %local, metadata !DIExpression()), !dbg +; CHECK-NEXT: #dbg_assign(i32 2, ![[LOCAL:[0-9]+]], !DIExpression(), ![[MERGED_ID:[0-9]+]], ptr %local, !DIExpression(), ; CHECK: if.else: -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 2, metadata ![[LOCAL]], metadata !DIExpression(), metadata ![[MERGED_ID]], metadata ptr %local, metadata !DIExpression()), !dbg +; CHECK-NEXT: #dbg_assign(i32 2, ![[LOCAL]], !DIExpression(), ![[MERGED_ID]], ptr %local, !DIExpression(), ; CHECK: if.end: ; CHECK-NEXT: store i32 2, ptr %local{{.*}}!DIAssignID ![[MERGED_ID]] diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/sink.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/sink.ll index 0da61f8b4c6aaa..f2aa1c5b198337 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/sink.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/sink.ll @@ -32,7 +32,7 @@ ; CHECK: f.exit: ; CHECK-NEXT: store ptr null, ptr %i, align 8,{{.+}}, !DIAssignID ![[ID:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign({{.+}}, {{.+}}, {{.+}}, metadata ![[ID]], metadata ptr %i, {{.+}}), !dbg +; CHECK-NEXT: #dbg_assign({{.+}}, {{.+}}, {{.+}}, ![[ID]], ptr %i, {{.+}}, %struct.a = type { ptr } diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/store-new-type.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/store-new-type.ll index cf427323dc2e57..0b142aa0ed6007 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/store-new-type.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/store-new-type.ll @@ -9,7 +9,7 @@ define <2 x i4> @shuf_bitcast_insert_use2(<2 x i8> %v, i8 %x, ptr %p) { ; CHECK-LABEL: @shuf_bitcast_insert_use2( ; CHECK-NEXT: [[I:%.*]] = insertelement <2 x i8> [[V:%.*]], i8 [[X:%.*]], i64 0 ; CHECK-NEXT: store <2 x i8> [[I]], ptr [[P:%.*]], align 2, !DIAssignID ![[ID:[0-9]+]] -; CHECK-NEXT: dbg.assign(metadata <2 x i8> %i, {{.+}}, {{.+}}, metadata ![[ID]], metadata ptr %p,{{.+}}) +; CHECK-NEXT: #dbg_assign(<2 x i8> %i, {{.+}}, {{.+}}, ![[ID]], ptr %p,{{.+}}) ; CHECK-NEXT: [[R:%.*]] = bitcast i8 [[X]] to <2 x i4> ; CHECK-NEXT: ret <2 x i4> [[R]] ; diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/storemerge.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/storemerge.ll index 41eef16ce2c350..4a5db0880051fb 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/storemerge.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/instcombine/storemerge.ll @@ -48,11 +48,11 @@ ;; with the same DIAssignID attachments too. ; CHECK: if.then: -; CHECK: call void @llvm.dbg.assign(metadata float %call2, metadata ![[var:[0-9]+]], metadata !DIExpression(), metadata ![[id:[0-9]+]], metadata ptr %p, metadata !DIExpression()), !dbg +; CHECK: #dbg_assign(float %call2, ![[var:[0-9]+]], !DIExpression(), ![[id:[0-9]+]], ptr %p, !DIExpression(), ; CHECK: br label %for.inc ; CHECK: if.else: -; CHECK: call void @llvm.dbg.assign(metadata float %call5, metadata ![[var]], metadata !DIExpression(), metadata ![[id]], metadata ptr %p, metadata !DIExpression()), !dbg +; CHECK: #dbg_assign(float %call5, ![[var]], !DIExpression(), ![[id]], ptr %p, !DIExpression(), ; CHECK: br label %for.inc ; CHECK: for.inc: @@ -60,11 +60,11 @@ ; CHECK-NEXT: store float %storemerge, ptr %p, align 4{{.+}}!DIAssignID ![[id]] ; CHECK: if.then.1: -; CHECK: call void @llvm.dbg.assign(metadata float %call2.1, metadata ![[var]], metadata !DIExpression(), metadata ![[id]], metadata ptr %p, metadata !DIExpression()), !dbg +; CHECK: #dbg_assign(float %call2.1, ![[var]], !DIExpression(), ![[id]], ptr %p, !DIExpression(), ; CHECK: br label %for.inc.1 ; CHECK: if.else.1: -; CHECK: call void @llvm.dbg.assign(metadata float %call5.1, metadata ![[var]], metadata !DIExpression(), metadata ![[id]], metadata ptr %p, metadata !DIExpression()), !dbg +; CHECK: #dbg_assign(float %call5.1, ![[var]], !DIExpression(), ![[id]], ptr %p, !DIExpression(), ; CHECK: br label %for.inc.1 ; CHECK: for.inc.1: @@ -72,11 +72,11 @@ ; CHECK-NEXT: store float %storemerge1, ptr %p, align 4{{.+}}!DIAssignID ![[id]] ; CHECK: if.then.2: -; CHECK: call void @llvm.dbg.assign(metadata float %call2.2, metadata ![[var]], metadata !DIExpression(), metadata ![[id]], metadata ptr %p, metadata !DIExpression()), !dbg +; CHECK: #dbg_assign(float %call2.2, ![[var]], !DIExpression(), ![[id]], ptr %p, !DIExpression(), ; CHECK: br label %for.inc.2 ; CHECK: if.else.2: -; CHECK: call void @llvm.dbg.assign(metadata float %call5.2, metadata ![[var]], metadata !DIExpression(), metadata ![[id]], metadata ptr %p, metadata !DIExpression()), !dbg +; CHECK: #dbg_assign(float %call5.2, ![[var]], !DIExpression(), ![[id]], ptr %p, !DIExpression(), ; CHECK: br label %for.inc.2 ; CHECK: for.inc.2: diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/licm/merge.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/licm/merge.ll index 187194dd932618..4c46c487f3a1d5 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/licm/merge.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/licm/merge.ll @@ -20,9 +20,9 @@ ;; this test is useless. ; CHECK-NOT: store i32 %inc, ptr %c.addr ;; Check that the two dbg.assigns now have the same (merged) !DIAssingID ID. -; CHECK: call void @llvm.dbg.assign(metadata i32 %inc, metadata ![[VAR_C:[0-9]+]], metadata !DIExpression(), metadata ![[ID:[0-9]+]], metadata ptr %c.addr, metadata !DIExpression()), !dbg +; CHECK: #dbg_assign(i32 %inc, ![[VAR_C:[0-9]+]], !DIExpression(), ![[ID:[0-9]+]], ptr %c.addr, !DIExpression(), ; CHECK-NOT: store i32 %inc, ptr %c.addr -; CHECK: call void @llvm.dbg.assign(metadata i32 %inc, metadata ![[VAR_C]], metadata !DIExpression(), metadata ![[ID]], metadata ptr %c.addr, metadata !DIExpression()), !dbg +; CHECK: #dbg_assign(i32 %inc, ![[VAR_C]], !DIExpression(), ![[ID]], ptr %c.addr, !DIExpression(), ; CHECK-LABEL: for.cond.for.end_crit_edge: ; CHECK-NEXT: %[[PHI:.*]] = phi i32 [ %inc, %for.inc ] diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/licm/multi-exit.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/licm/multi-exit.ll index 29fbd164374f62..183d9ed83c7a43 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/licm/multi-exit.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/licm/multi-exit.ll @@ -44,7 +44,7 @@ ;; and that the new stores share the same DIAssignID. ; CHECK-LABEL: for.cond: -; CHECK: call void @llvm.dbg.assign(metadata i32 1, metadata ![[var:[0-9]+]], metadata !DIExpression(), metadata ![[id:[0-9]+]], metadata ptr %a, metadata !DIExpression()), !dbg +; CHECK: #dbg_assign(i32 1, ![[var:[0-9]+]], !DIExpression(), ![[id:[0-9]+]], ptr %a, !DIExpression(), ; CHECK-LABEL: if.end: ; CHECK-NEXT: store i32 1, ptr %a, align 1,{{.*}}!DIAssignID ![[id]] diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/loop-deletion/dead-loop.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/loop-deletion/dead-loop.ll index 078d7b97b2a971..92b8757547e60b 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/loop-deletion/dead-loop.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/loop-deletion/dead-loop.ll @@ -20,7 +20,7 @@ ;; mistake. ; CHECK: for.end: -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 undef,{{.+}}, metadata !DIExpression({{.+}}), metadata ![[ID:[0-9]+]], metadata ptr %Counter, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i32 undef,{{.+}}, !DIExpression({{.+}}), ![[ID:[0-9]+]], ptr %Counter, !DIExpression(), ; CHECK-NEXT: store i32 2, ptr %Counter, align 4,{{.*}}!DIAssignID ![[ID]] define dso_local void @_Z3funv() local_unnamed_addr #0 !dbg !7 { diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/loop-vectorize/remove-redundant-dbg.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/loop-vectorize/remove-redundant-dbg.ll index 5c897187086d29..beb45b61b418c6 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/loop-vectorize/remove-redundant-dbg.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/loop-vectorize/remove-redundant-dbg.ll @@ -8,7 +8,7 @@ ;; run time. ;; Check there is only one dbg.assign. -; CHECK: call void @llvm.dbg.assign +; CHECK: #dbg_assign ;; Check that the loop was actually modified. ; CHECK: extractelement diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/phi.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/phi.ll index 8a72377c6375f3..ede967c844c3e6 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/phi.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/phi.ll @@ -9,19 +9,19 @@ ;; dbg.assgin for another variable "b" to the alloca). ; CHECK: entry: -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %a, metadata ![[B:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %a, metadata ![[A:[0-9]+]] +; CHECK-NEXT: #dbg_value(i32 %a, ![[B:[0-9]+]] +; CHECK-NEXT: #dbg_value(i32 %a, ![[A:[0-9]+]] ; CHECK: if.then: ; CHECK-NEXT: %add = -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %add, metadata ![[B]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %add, metadata ![[A]] +; CHECK-NEXT: #dbg_value(i32 %add, ![[B]] +; CHECK-NEXT: #dbg_value(i32 %add, ![[A]] ; CHECK: if.else: -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 -1, metadata ![[B]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 -1, metadata ![[A]] +; CHECK-NEXT: #dbg_value(i32 -1, ![[B]] +; CHECK-NEXT: #dbg_value(i32 -1, ![[A]] ; CHECK: if.end: ; CHECK-NEXT: %a.addr.0 = phi i32 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %a.addr.0, metadata ![[A]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %a.addr.0, metadata ![[B]] +; CHECK-NEXT: #dbg_value(i32 %a.addr.0, ![[A]] +; CHECK-NEXT: #dbg_value(i32 %a.addr.0, ![[B]] ; CHECK-DAG: ![[A]] = !DILocalVariable(name: "a", ; CHECK-DAG: ![[B]] = !DILocalVariable(name: "b", diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/single-block-alloca.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/single-block-alloca.ll index e3cbe89dceecb4..9b76b12055ba03 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/single-block-alloca.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/single-block-alloca.ll @@ -9,11 +9,11 @@ ;; "b" to the alloca). ; CHECK: entry: -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %a, metadata ![[B:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %a, metadata ![[A:[0-9]+]] +; CHECK-NEXT: #dbg_value(i32 %a, ![[B:[0-9]+]] +; CHECK-NEXT: #dbg_value(i32 %a, ![[A:[0-9]+]] ; CHECK-NEXT: %add = -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %add, metadata ![[B]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %add, metadata ![[A]] +; CHECK-NEXT: #dbg_value(i32 %add, ![[B]] +; CHECK-NEXT: #dbg_value(i32 %add, ![[A]] ; CHECK-DAG: ![[A]] = !DILocalVariable(name: "a", ; CHECK-DAG: ![[B]] = !DILocalVariable(name: "b", diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/single-store-alloca.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/single-store-alloca.ll index 1753ca5644e3e1..16e9406f2c2930 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/single-store-alloca.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/single-store-alloca.ll @@ -8,8 +8,8 @@ ;; are cleaned up, including duplciates. ; CHECK: entry: -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %a, metadata ![[B:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %a, metadata ![[A:[0-9]+]] +; CHECK-NEXT: #dbg_value(i32 %a, ![[B:[0-9]+]] +; CHECK-NEXT: #dbg_value(i32 %a, ![[A:[0-9]+]] ; CHECK-NEXT: ret ; CHECK-DAG: ![[A]] = !DILocalVariable(name: "a", diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/store-to-part-of-alloca.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/store-to-part-of-alloca.ll index 7242c4eafafce7..139fe5e3efe20f 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/store-to-part-of-alloca.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/mem2reg/store-to-part-of-alloca.ll @@ -1,7 +1,7 @@ ; RUN: opt -passes=mem2reg -S %s -o - | FileCheck %s --implicit-check-not="call void @llvm.dbg" ; RUN: opt --try-experimental-debuginfo-iterators -passes=mem2reg -S %s -o - | FileCheck %s --implicit-check-not="call void @llvm.dbg" -; CHECK: llvm.dbg.value(metadata i64 0, metadata ![[#]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)) +; CHECK: #dbg_value(i64 0, ![[#]], !DIExpression(DW_OP_LLVM_fragment, 0, 32), ;; The store has a debug intrinsic attached to it with a fragment size ;; different to the base alloca debug intrinsic fragment size. Check that diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/memcpyopt/merge-stores.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/memcpyopt/merge-stores.ll index 8738e5db3acc14..e04ecb6c66f017 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/memcpyopt/merge-stores.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/memcpyopt/merge-stores.ll @@ -23,10 +23,10 @@ ;; Check that the memset that memcpyopt creates to merge 4 stores merges the ;; DIASsignIDs from the stores. -; CHECK: call void @llvm.dbg.assign(metadata float 0.000000e+00, metadata ![[VAR:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 32), metadata ![[ID:[0-9]+]], metadata ptr %arrayidx.i, metadata !DIExpression()) -; CHECK: call void @llvm.dbg.assign(metadata float 0.000000e+00, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 32), metadata ![[ID]], metadata ptr %arrayidx3.i, metadata !DIExpression()) -; CHECK: call void @llvm.dbg.assign(metadata float 0.000000e+00, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32), metadata ![[ID]], metadata ptr %arrayidx5.i, metadata !DIExpression()) -; CHECK: call void @llvm.dbg.assign(metadata float 0.000000e+00, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32), metadata ![[ID]], metadata ptr %arrayidx7.i, metadata !DIExpression()) +; CHECK: #dbg_assign(float 0.000000e+00, ![[VAR:[0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 64, 32), ![[ID:[0-9]+]], ptr %arrayidx.i, !DIExpression(), +; CHECK: #dbg_assign(float 0.000000e+00, ![[VAR]], !DIExpression(DW_OP_LLVM_fragment, 32, 32), ![[ID]], ptr %arrayidx3.i, !DIExpression(), +; CHECK: #dbg_assign(float 0.000000e+00, ![[VAR]], !DIExpression(DW_OP_LLVM_fragment, 0, 32), ![[ID]], ptr %arrayidx5.i, !DIExpression(), +; CHECK: #dbg_assign(float 0.000000e+00, ![[VAR]], !DIExpression(DW_OP_LLVM_fragment, 96, 32), ![[ID]], ptr %arrayidx7.i, !DIExpression(), ; CHECK: call void @llvm.memset{{.*}}, !DIAssignID ![[ID]] %struct.v = type { [4 x float] } diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/mldst-motion/diamond.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/mldst-motion/diamond.ll index 861241b8552af6..3a8435543fe2b4 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/mldst-motion/diamond.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/mldst-motion/diamond.ll @@ -21,10 +21,10 @@ ;; dbg.assign intrinsics which have been left in their original blocks. ; CHECK: if.then: -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 1,{{.+}}, metadata !DIExpression(DW_OP_LLVM_fragment, 32, 32), metadata ![[ID:[0-9]+]], metadata ptr %1, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i32 1,{{.+}}, !DIExpression(DW_OP_LLVM_fragment, 32, 32), ![[ID:[0-9]+]], ptr %1, !DIExpression(), ; CHECK: if.else: -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 2,{{.+}}, metadata !DIExpression(DW_OP_LLVM_fragment, 32, 32), metadata ![[ID]], metadata ptr %1, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i32 2,{{.+}}, !DIExpression(DW_OP_LLVM_fragment, 32, 32), ![[ID]], ptr %1, !DIExpression(), ; CHECK: if.end: ; CHECK: store i32 %.sink, ptr %1, align 4{{.+}}, !DIAssignID ![[ID]] diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/optnone.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/optnone.ll index 502d46640713af..fe79caf3b4caf7 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/optnone.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/optnone.ll @@ -7,10 +7,10 @@ ;; Check it doesn't get applied to functions marked optnone. ; CHECK: @_Z3funv -; CHECK: dbg.assign +; CHECK: #dbg_assign ; CHECK: @_Z3funv2 -; CHECK: dbg.declare +; CHECK: #dbg_declare define dso_local void @_Z3funv() local_unnamed_addr !dbg !16 { entry: diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/roundtrip.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/roundtrip.ll index c8fc014fcadf1f..aaf33dff1cc29c 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/roundtrip.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/parse-and-verify/roundtrip.ll @@ -19,7 +19,7 @@ entry: ;; Unlinked llvm.dbg.assign. ; CHECK-DAG: @fun2() -; CHECK: llvm.dbg.assign(metadata i32 undef, metadata ![[VAR2:[0-9]+]], metadata !DIExpression(), metadata ![[ID2:[0-9]+]], metadata i32 undef, metadata !DIExpression()), !dbg ![[DBG2:[0-9]+]] +; CHECK: #dbg_assign(i32 undef, ![[VAR2:[0-9]+]], !DIExpression(), ![[ID2:[0-9]+]], i32 undef, !DIExpression(), ![[DBG2:[0-9]+]] define dso_local void @fun2() !dbg !15 { entry: %local = alloca i32, align 4 @@ -30,7 +30,7 @@ entry: ;; An llvm.dbg.assign linked to an alloca. ; CHECK-LABEL: @fun3() ; CHECK: %local = alloca i32, align 4, !DIAssignID ![[ID3:[0-9]+]] -; CHECK-NEXT: llvm.dbg.assign(metadata i32 undef, metadata ![[VAR3:[0-9]+]], metadata !DIExpression(), metadata ![[ID3]], metadata i32 undef, metadata !DIExpression()), !dbg ![[DBG3:[0-9]+]] +; CHECK-NEXT: #dbg_assign(i32 undef, ![[VAR3:[0-9]+]], !DIExpression(), ![[ID3]], i32 undef, !DIExpression(), ![[DBG3:[0-9]+]] define dso_local void @fun3() !dbg !19 { entry: %local = alloca i32, align 4, !DIAssignID !22 @@ -41,7 +41,7 @@ entry: ;; Check that using a DIAssignID as an operand before using it as an attachment ;; works (the order of the alloca and dbg.assign has been swapped). ; CHECK-LABEL: @fun4() -; CHECK: llvm.dbg.assign(metadata i32 undef, metadata ![[VAR4:[0-9]+]], metadata !DIExpression(), metadata ![[ID4:[0-9]+]], metadata i32 undef, metadata !DIExpression()), !dbg ![[DBG4:[0-9]+]] +; CHECK: #dbg_assign(i32 undef, ![[VAR4:[0-9]+]], !DIExpression(), ![[ID4:[0-9]+]], i32 undef, !DIExpression(), ![[DBG4:[0-9]+]] ; CHECK-NEXT: %local = alloca i32, align 4, !DIAssignID ![[ID4]] define dso_local void @fun4() !dbg !23 { entry: @@ -54,8 +54,8 @@ entry: ;; There are currently no plans to support DIArgLists for the address component. ; CHECK-LABEL: @fun5 ; CHECK: %local = alloca i32, align 4, !DIAssignID ![[ID5:[0-9]+]] -; CHECK-NEXT: llvm.dbg.assign(metadata i32 %v, metadata ![[VAR5:[0-9]+]], metadata !DIExpression(), metadata ![[ID5]], metadata ptr %local, metadata !DIExpression()), !dbg ![[DBG5:[0-9]+]] -; CHECK-NEXT: llvm.dbg.assign(metadata !DIArgList(i32 %v, i32 1), metadata ![[VAR5]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_minus, DW_OP_stack_value), metadata ![[ID5]], metadata ptr %local, metadata !DIExpression()), !dbg ![[DBG5]] +; CHECK-NEXT: #dbg_assign(i32 %v, ![[VAR5:[0-9]+]], !DIExpression(), ![[ID5]], ptr %local, !DIExpression(), ![[DBG5:[0-9]+]] +; CHECK-NEXT: #dbg_assign(!DIArgList(i32 %v, i32 1), ![[VAR5]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_minus, DW_OP_stack_value), ![[ID5]], ptr %local, !DIExpression(), ![[DBG5]] define dso_local void @fun5(i32 %v) !dbg !27 { entry: %local = alloca i32, align 4, !DIAssignID !30 diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/remove-redundant-fwd-scan-linked.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/remove-redundant-fwd-scan-linked.ll index d472ac7e9cb669..de93dac413f09f 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/remove-redundant-fwd-scan-linked.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/remove-redundant-fwd-scan-linked.ll @@ -33,21 +33,21 @@ ;; Check we don't delete that inserted unlinked dbg.assign. ; CHECK: %a = alloca %struct.e, align 8, !DIAssignID ![[ID_0:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign({{.*}}, metadata ![[ID_0]],{{.*}}) +; CHECK-NEXT: #dbg_assign({{.*}}, ![[ID_0]],{{.*}}) ;; This dbg.assign is linked to the memset. -; CHECK: call void @llvm.dbg.assign(metadata ptr null,{{.*}}, metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64), metadata ![[ID_1:[0-9]+]], metadata ptr %b, metadata !DIExpression()) +; CHECK: #dbg_assign(ptr null,{{.*}}, !DIExpression(DW_OP_LLVM_fragment, 64, 64), ![[ID_1:[0-9]+]], ptr %b, !DIExpression(), ;; Importantly, check this unlinked dbg.assign which is shadowed by the ;; dbg.assign above isn't deleted. -; CHECK-NEXT: call void @llvm.dbg.assign(metadata ptr null,{{.*}}, metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64), metadata ![[ID_2:[0-9]+]], metadata ptr undef, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(ptr null,{{.*}}, !DIExpression(DW_OP_LLVM_fragment, 64, 64), ![[ID_2:[0-9]+]], ptr undef, !DIExpression(), -; CHECK: call void @llvm.dbg.assign(metadata ptr null,{{.*}}, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64), metadata ![[ID_1]], metadata ptr %a2, metadata !DIExpression()) +; CHECK: #dbg_assign(ptr null,{{.*}}, !DIExpression(DW_OP_LLVM_fragment, 0, 64), ![[ID_1]], ptr %a2, !DIExpression(), ; CHECK: call void @llvm.memset{{.*}}, !DIAssignID ![[ID_1]] ; CHECK: store ptr @d, ptr %b, align 8,{{.*}}!DIAssignID ![[ID_3:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata ptr @d,{{.*}}, metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64), metadata ![[ID_3]], metadata ptr %b, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(ptr @d,{{.*}}, !DIExpression(DW_OP_LLVM_fragment, 64, 64), ![[ID_3]], ptr %b, !DIExpression(), ; CHECK-DAG: ![[ID_0]] = distinct !DIAssignID() ; CHECK-DAG: ![[ID_1]] = distinct !DIAssignID() diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/remove-redundant.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/remove-redundant.ll index 24ec3e94ed2753..c5f421d1c5f33f 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/remove-redundant.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/remove-redundant.ll @@ -13,7 +13,7 @@ entry: ;; Forward scan: This dbg.assign for Local2 contains an undef value component ;; in the entry block and is the first debug intrinsic for the variable, but is ;; linked to an instruction so should not be deleted. -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[Local2:[0-9]+]] +; CHECK-NEXT: #dbg_assign(i1 undef, ![[Local2:[0-9]+]] call void @llvm.dbg.assign(metadata i1 undef, metadata !19, metadata !DIExpression(), metadata !20, metadata ptr %test, metadata !DIExpression()), !dbg !14 ;; Forward scan: dbg.assign for Local unlinked with undef value component, in @@ -29,7 +29,7 @@ entry: ;; Backward scan: Check that a dbg.value made redundant by a dbg.assign is ;; removed. -;; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 1, metadata ![[Local:[0-9]+]] +;; CHECK-NEXT: #dbg_assign(i32 1, ![[Local:[0-9]+]] ;; CHECK-NEXT: @step() call void @llvm.dbg.value(metadata i32 0, metadata !11, metadata !DIExpression()), !dbg !14 call void @llvm.dbg.assign(metadata i32 1, metadata !11, metadata !DIExpression(), metadata !15, metadata ptr undef, metadata !DIExpression()), !dbg !14 @@ -37,7 +37,7 @@ entry: ;; Backward scan: Check that a dbg.assign made redundant by a dbg.value is ;; removed. -;; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 3, metadata ![[Local:[0-9]+]] +;; CHECK-NEXT: #dbg_value(i32 3, ![[Local:[0-9]+]] ;; CHECK-NEXT: @step() call void @llvm.dbg.assign(metadata i32 2, metadata !11, metadata !DIExpression(), metadata !15, metadata ptr undef, metadata !DIExpression()), !dbg !14 call void @llvm.dbg.value(metadata i32 3, metadata !11, metadata !DIExpression()), !dbg !14 @@ -57,13 +57,13 @@ entry: ;; Forward scan: We've seen non-undef dbg intrinsics for Local in the entry ;; block so we shouldn't delete this undef. -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 undef, metadata ![[Local]] +; CHECK-NEXT: #dbg_assign(i32 undef, ![[Local]] call void @llvm.dbg.assign(metadata i32 undef, metadata !11, metadata !DIExpression(), metadata !15, metadata ptr undef, metadata !DIExpression()), !dbg !14 br label %next next: ;; Forward scan: Do not delete undef dbg.assigns from non-entry blocks. -; CHECK: call void @llvm.dbg.assign(metadata i32 undef, metadata ![[Local2]] +; CHECK: #dbg_assign(i32 undef, ![[Local2]] ; CHECK-NEXT: @step() call void @llvm.dbg.assign(metadata i32 undef, metadata !19, metadata !DIExpression(), metadata !21, metadata ptr %test, metadata !DIExpression()), !dbg !14 call void @step() @@ -73,11 +73,11 @@ next: ;; isn't removed. ;; Backward scan: It (the next dbg.assign) is also followed by another for the ;; same variable - check it isn't remove (because it's linked). -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 0, metadata ![[Local2]] +; CHECK-NEXT: #dbg_value(i32 0, ![[Local2]] ; CHECK-NEXT: store ; CHECK-NEXT: store -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 0, metadata ![[Local2]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 1, metadata ![[Local2]] +; CHECK-NEXT: #dbg_assign(i32 0, ![[Local2]] +; CHECK-NEXT: #dbg_assign(i32 1, ![[Local2]] call void @llvm.dbg.value(metadata i32 0, metadata !19, metadata !DIExpression()), !dbg !14 store i32 0, ptr %test, !DIAssignID !17 store i32 1, ptr %test, !DIAssignID !16 diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/salvage-value.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/salvage-value.ll index 3c1ef0791945ff..e3eb0396ac396d 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/salvage-value.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/salvage-value.ll @@ -12,28 +12,28 @@ entry: %add = add nsw i32 %x, 1, !dbg !22 call void @llvm.dbg.assign(metadata i32 %add, metadata !14, metadata !DIExpression(), metadata !28, metadata ptr %p, metadata !DIExpression()), !dbg !16 ;; %add is salvaged. -; CHECK: call void @llvm.dbg.assign(metadata i32 %x,{{.+}}metadata !DIExpression(DW_OP_plus_uconst, 1, DW_OP_stack_value),{{.+}}, metadata ptr %p, metadata !DIExpression()) +; CHECK: #dbg_assign(i32 %x,{{.+}}!DIExpression(DW_OP_plus_uconst, 1, DW_OP_stack_value),{{.+}}, ptr %p, !DIExpression(), %add1 = add nsw i32 %x, %y, !dbg !29 call void @llvm.dbg.assign(metadata i32 %add1, metadata !32, metadata !DIExpression(), metadata !31, metadata ptr %p, metadata !DIExpression()), !dbg !16 ;; %add1 is salvaged using a variadic expression. -; CHECK-NEXT: call void @llvm.dbg.assign(metadata !DIArgList(i32 %x, i32 %y), metadata ![[#]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_stack_value), metadata ![[#]], metadata ptr %p, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(!DIArgList(i32 %x, i32 %y), ![[#]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_stack_value), ![[#]], ptr %p, !DIExpression(), %arrayidx0 = getelementptr inbounds i32, ptr %p, i32 0 call void @llvm.dbg.assign(metadata i32 %x, metadata !14, metadata !DIExpression(), metadata !17, metadata ptr %arrayidx0, metadata !DIExpression()), !dbg !16 ;; %arrayidx0 is salvaged (zero offset, so the gep is just replaced with %p). -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 %x,{{.+}}, metadata !DIExpression(),{{.+}}, metadata ptr %p, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i32 %x,{{.+}}, !DIExpression(),{{.+}}, ptr %p, !DIExpression(), %arrayidx1 = getelementptr inbounds i32, ptr %p, i32 1 call void @llvm.dbg.assign(metadata i32 %x, metadata !33, metadata !DIExpression(), metadata !18, metadata ptr %arrayidx1, metadata !DIExpression()), !dbg !16 ;; %arrayidx1 is salvaged. -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 %x,{{.+}}, metadata !DIExpression(),{{.+}}, metadata ptr %p, metadata !DIExpression(DW_OP_plus_uconst, 4)) +; CHECK-NEXT: #dbg_assign(i32 %x,{{.+}}, !DIExpression(),{{.+}}, ptr %p, !DIExpression(DW_OP_plus_uconst, 4), %arrayidx2 = getelementptr inbounds i32, ptr %p, i32 %x call void @llvm.dbg.assign(metadata i32 %x, metadata !34, metadata !DIExpression(), metadata !19, metadata ptr %arrayidx2, metadata !DIExpression()), !dbg !16 ;; Variadic DIExpressions for dbg.assign address component is not supported - ;; set undef. -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 %x,{{.+}}, metadata !DIExpression(),{{.+}}, metadata ptr undef, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i32 %x,{{.+}}, !DIExpression(),{{.+}}, ptr undef, !DIExpression(), ret void } diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/simplifycfg/empty-block.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/simplifycfg/empty-block.ll index 304d873ba7b7f5..a525b1967afe69 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/simplifycfg/empty-block.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/simplifycfg/empty-block.ll @@ -33,10 +33,10 @@ ; CHECK: entry: ;; -- alloca dbg.assign -; CHECK: call void @llvm.dbg.assign(metadata i1 undef +; CHECK: #dbg_assign(i1 undef ;; -- sunk dbg.assigns -; CHECK: call void @llvm.dbg.assign(metadata float undef, metadata ![[var:[0-9]+]], metadata !DIExpression(), metadata ![[id:[0-9]+]], metadata ptr %h, metadata !DIExpression()), !dbg -; CHECK-NEXT: call void @llvm.dbg.assign(metadata float undef, metadata ![[var]], metadata !DIExpression(), metadata ![[id]], metadata ptr %h, metadata !DIExpression()), !dbg +; CHECK: #dbg_assign(float undef, ![[var:[0-9]+]], !DIExpression(), ![[id:[0-9]+]], ptr %h, !DIExpression(), +; CHECK-NEXT: #dbg_assign(float undef, ![[var]], !DIExpression(), ![[id]], ptr %h, !DIExpression(), ; CHECK-NEXT: %storemerge.in = getelementptr ; CHECK-NEXT: %storemerge = load float ; CHECK-NEXT: store float %storemerge, ptr %h, align 4{{.+}}!DIAssignID ![[id]] diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/simplifycfg/speculated-store.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/simplifycfg/speculated-store.ll index 6e46b022649248..b17ab8c113f3ec 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/simplifycfg/speculated-store.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/simplifycfg/speculated-store.ll @@ -25,7 +25,7 @@ ; CHECK: %[[SELECT:.*]] = select i1 %tobool ; CHECK-NEXT: store i32 %[[SELECT]], ptr %c{{.*}}, !DIAssignID ![[ID:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 %[[SELECT]], metadata ![[VAR_C:[0-9]+]], metadata !DIExpression(), metadata ![[ID]], metadata ptr %c, metadata !DIExpression()), !dbg +; CHECK-NEXT: #dbg_assign(i32 %[[SELECT]], ![[VAR_C:[0-9]+]], !DIExpression(), ![[ID]], ptr %c, !DIExpression(), ; CHECK: ![[VAR_C]] = !DILocalVariable(name: "c", @a = dso_local global i32 0, align 4, !dbg !0 diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/slp-vectorizer/merge-scalars.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/slp-vectorizer/merge-scalars.ll index 4c47ecce1a0a38..daa534fcd0c227 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/slp-vectorizer/merge-scalars.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/slp-vectorizer/merge-scalars.ll @@ -24,11 +24,11 @@ ;; Test that dbg.assigns linked to the the scalar stores to quad get linked to ;; the vector store that replaces them. -; CHECK: call void @llvm.dbg.assign(metadata float undef, metadata ![[VAR:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32), metadata ![[ID:[0-9]+]], metadata ptr %arrayidx, metadata !DIExpression()) -; CHECK: call void @llvm.dbg.assign(metadata float undef, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 32), metadata ![[ID]], metadata ptr %quad, metadata !DIExpression(DW_OP_plus_uconst, 4)) -; CHECK: call void @llvm.dbg.assign(metadata float undef, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 32), metadata ![[ID]], metadata ptr %quad, metadata !DIExpression(DW_OP_plus_uconst, 8)) +; CHECK: #dbg_assign(float undef, ![[VAR:[0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 0, 32), ![[ID:[0-9]+]], ptr %arrayidx, !DIExpression(), +; CHECK: #dbg_assign(float undef, ![[VAR]], !DIExpression(DW_OP_LLVM_fragment, 32, 32), ![[ID]], ptr %quad, !DIExpression(DW_OP_plus_uconst, 4), +; CHECK: #dbg_assign(float undef, ![[VAR]], !DIExpression(DW_OP_LLVM_fragment, 64, 32), ![[ID]], ptr %quad, !DIExpression(DW_OP_plus_uconst, 8), ; CHECK: store <4 x float> {{.*}} !DIAssignID ![[ID]] -; CHECK: call void @llvm.dbg.assign(metadata float undef, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32), metadata ![[ID]], metadata ptr %quad, metadata !DIExpression(DW_OP_plus_uconst, 12)) +; CHECK: #dbg_assign(float undef, ![[VAR]], !DIExpression(DW_OP_LLVM_fragment, 96, 32), ![[ID]], ptr %quad, !DIExpression(DW_OP_plus_uconst, 12), target triple = "x86_64-unknown-unknown" diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/after-inlining.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/after-inlining.ll index b39337247e3659..03d3b4bb17e2e1 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/after-inlining.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/after-inlining.ll @@ -29,7 +29,7 @@ ;; ;; $ clang test.c -Xclang -fexperimental-assignment-tracking -O2 -g -; CHECK: call void @llvm.dbg.assign(metadata i1 false, metadata !{{.+}}, metadata !DIExpression(), metadata !{{.+}}, metadata ptr undef, metadata !DIExpression()), !dbg ![[DBG:[0-9]+]] +; CHECK: #dbg_assign(i1 false, !{{.+}}, !DIExpression(), !{{.+}}, ptr undef, !DIExpression(), ![[DBG:[0-9]+]] ; CHECK-DAG: ![[DBG]] = !DILocation(line: 0, scope: ![[INL_SC:[0-9]+]], inlinedAt: ![[IA:[0-9]+]]) ; CHECK-DAG: ![[IA]] = distinct !DILocation(line: 21, column: 12, scope: ![[SC:[0-9]+]]) diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/alloca-single-slice.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/alloca-single-slice.ll index deee04066fc2ba..e2e2c14678a4f3 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/alloca-single-slice.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/alloca-single-slice.ll @@ -19,7 +19,7 @@ ; CHECK: entry: ; CHECK-NEXT: %a.sroa.0 = alloca i64, align 8, !DIAssignID ![[ID_1:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR:[0-9]+]], metadata !DIExpression(), metadata ![[ID_1]], metadata ptr %a.sroa.0, metadata !DIExpression()), !dbg +; CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR:[0-9]+]], !DIExpression(), ![[ID_1]], ptr %a.sroa.0, !DIExpression(), target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/arglist.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/arglist.ll index 83f257e77b0bee..c6e0b9f711e310 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/arglist.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/arglist.ll @@ -5,10 +5,10 @@ ;; it used an arglist. ; CHECK: if.then: -; CHECK-NEXT: dbg.value(metadata i32 poison, +; CHECK-NEXT: #dbg_value(i32 poison, ; CHECK: if.else: -; CHECK-NEXT: dbg.value(metadata i32 2, +; CHECK-NEXT: #dbg_value(i32 2, declare i8 @get_i8() diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/complex.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/complex.ll index 38d4e3fb0d7f85..a1b28a4e2bd408 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/complex.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/complex.ll @@ -19,8 +19,8 @@ ;; dbg.assigns for the split (then promoted) stores. ; CHECK: %c.coerce.fca.0.extract = extractvalue [2 x i64] %c.coerce, 0 ; CHECK: %c.coerce.fca.1.extract = extractvalue [2 x i64] %c.coerce, 1 -; CHECK: call void @llvm.dbg.value(metadata i64 %c.coerce.fca.0.extract,{{.+}}, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)) -; CHECK: call void @llvm.dbg.value(metadata i64 %c.coerce.fca.1.extract,{{.+}}, metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)) +; CHECK: #dbg_value(i64 %c.coerce.fca.0.extract,{{.+}}, !DIExpression(DW_OP_LLVM_fragment, 0, 64), +; CHECK: #dbg_value(i64 %c.coerce.fca.1.extract,{{.+}}, !DIExpression(DW_OP_LLVM_fragment, 64, 64), target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "armv7-apple-unknown" diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/fail-fragment.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/fail-fragment.ll index 234504d14442e9..39b04b8466679b 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/fail-fragment.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/fail-fragment.ll @@ -13,19 +13,19 @@ ;; the expression uses more than one location operand (DW_OP_arg n). ; CHECK: if.then: -; CHECK: dbg.value(metadata i32 poison, metadata ![[#]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)) +; CHECK: #dbg_value(i32 poison, ![[#]], !DIExpression(DW_OP_LLVM_fragment, 0, 32), ;; FIXME: The value below should be poison. See https://reviews.llvm.org/D147431#4245260. -; CHECK: dbg.value(metadata i32 %{{.*}}, metadata ![[#]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 32)) +; CHECK: #dbg_value(i32 %{{.*}}, ![[#]], !DIExpression(DW_OP_LLVM_fragment, 32, 32), ; CHECK: if.else: -; CHECK: dbg.value(metadata i32 2, metadata ![[#]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)) -; CHECK: dbg.value(metadata i32 0, metadata ![[#]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 32)) +; CHECK: #dbg_value(i32 2, ![[#]], !DIExpression(DW_OP_LLVM_fragment, 0, 32), +; CHECK: #dbg_value(i32 0, ![[#]], !DIExpression(DW_OP_LLVM_fragment, 32, 32), ; CHECK: if.inner: -; CHECK: call void @llvm.dbg.value(metadata i32 poison, metadata ![[#]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_stack_value)) +; CHECK: #dbg_value(i32 poison, ![[#]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_stack_value), ; CHECK: end: -; CHECK: dbg.value(metadata i32 %{{.*}}, metadata ![[#]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)) +; CHECK: #dbg_value(i32 %{{.*}}, ![[#]], !DIExpression(DW_OP_LLVM_fragment, 0, 32), declare i64 @get_i64() diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/frag-2.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/frag-2.ll index 0ac5335f51fec0..1a145bb7a5b77e 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/frag-2.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/frag-2.ll @@ -40,8 +40,8 @@ ; CHECK: store <2 x float> %agg.tmp.sroa.0.0.copyload.i, ptr %4, align 4,{{.+}}!DIAssignID ![[id1:[0-9]+]] ; CHECK: store <2 x float> %agg.tmp.sroa.2.0.copyload.i, ptr %n.sroa.2.4..sroa_idx, align 4,{{.+}}!DIAssignID ![[id2:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata <2 x float> %agg.tmp.sroa.0.0.copyload.i, metadata ![[var:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64), metadata ![[id1]], metadata ptr %4, metadata !DIExpression()), !dbg -; CHECK-NEXT: call void @llvm.dbg.assign(metadata <2 x float> %agg.tmp.sroa.2.0.copyload.i, metadata ![[var]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64), metadata ![[id2]], metadata ptr %n.sroa.2.4..sroa_idx, metadata !DIExpression()), !dbg +; CHECK-NEXT: #dbg_assign(<2 x float> %agg.tmp.sroa.0.0.copyload.i, ![[var:[0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 0, 64), ![[id1]], ptr %4, !DIExpression(), +; CHECK-NEXT: #dbg_assign(<2 x float> %agg.tmp.sroa.2.0.copyload.i, ![[var]], !DIExpression(DW_OP_LLVM_fragment, 64, 64), ![[id2]], ptr %n.sroa.2.4..sroa_idx, !DIExpression(), ; CHECK: ret diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/frag.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/frag.ll index 799bd4eeea2593..159fceb174f952 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/frag.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/frag.ll @@ -22,8 +22,8 @@ ; CHECK: %call = call ; CHECK-NEXT: %0 = extractvalue { <2 x float>, <2 x float> } %call, 0 ; CHECK-NEXT: %1 = extractvalue { <2 x float>, <2 x float> } %call, 1 -; CHECK-NEXT: call void @llvm.dbg.value(metadata <2 x float> %0, metadata ![[var:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 256, 64)) -; CHECK-NEXT: call void @llvm.dbg.value(metadata <2 x float> %1, metadata ![[var]], metadata !DIExpression(DW_OP_LLVM_fragment, 320, 64)) +; CHECK-NEXT: #dbg_value(<2 x float> %0, ![[var:[0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 256, 64), +; CHECK-NEXT: #dbg_value(<2 x float> %1, ![[var]], !DIExpression(DW_OP_LLVM_fragment, 320, 64), %class.c = type { [4 x float] } diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/id.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/id.ll index fe7152ff72c123..d7b3288bd8fc75 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/id.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/id.ll @@ -29,13 +29,13 @@ ; CHECK: if.then: ; CHECK-NEXT: %1 = load float -; CHECK-NEXT: call void @llvm.dbg.value(metadata float %storemerge, metadata ![[var:[0-9]+]], metadata !DIExpression()) +; CHECK-NEXT: #dbg_value(float %storemerge, ![[var:[0-9]+]], !DIExpression(), ; CHECK: if.else: ; CHECK-NEXT: %2 = load float ; CHECK-NEXT: %3 = load float ; CHECK-NEXT: %div = fdiv float -; CHECK: call void @llvm.dbg.value(metadata float %storemerge, metadata ![[var]], metadata !DIExpression()) +; CHECK: #dbg_value(float %storemerge, ![[var]], !DIExpression(), %class.a = type { i8 } diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/memcpy.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/memcpy.ll index 5f3491655ae9d0..ff8affbdc22517 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/memcpy.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/memcpy.ll @@ -28,10 +28,10 @@ ;; Split alloca. ; CHECK: entry: ; CHECK-NEXT: %To.sroa.0 = alloca { i32, i32, i32 }, align 8, !DIAssignID ![[ID_1:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata {{.+}} undef, metadata ![[TO:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 96), metadata ![[ID_1]], metadata ptr %To.sroa.0, metadata !DIExpression()), !dbg +; CHECK-NEXT: #dbg_assign({{.+}} undef, ![[TO:[0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 0, 96), ![[ID_1]], ptr %To.sroa.0, !DIExpression(), ; CHECK-NEXT: %To.sroa.4 = alloca { i32, i32, i32 }, align 8, !DIAssignID ![[ID_3:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata {{.+}} undef, metadata ![[TO]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 96), metadata ![[ID_3]], metadata ptr %To.sroa.4, metadata !DIExpression()), !dbg +; CHECK-NEXT: #dbg_assign({{.+}} undef, ![[TO]], !DIExpression(DW_OP_LLVM_fragment, 128, 96), ![[ID_3]], ptr %To.sroa.4, !DIExpression(), ;; Split memcpy. ; CHECK: call void @llvm.memcpy{{.*}}(ptr align 8 %To.sroa.0, ptr align 4 @From, i64 12, i1 false),{{.*}}!DIAssignID ![[ID_4:[0-9]+]] @@ -40,9 +40,9 @@ ; CHECK: call void @llvm.memcpy{{.*}}(ptr align 8 %To.sroa.4, ptr align 4 getelementptr inbounds (i8, ptr @From, i64 16), i64 12, i1 false){{.*}}!DIAssignID ![[ID_6:[0-9]+]] ;; Intrinsics for the splits above. -; CHECK-NEXT: call void @llvm.dbg.assign(metadata {{.+}} undef, metadata ![[TO]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 96), metadata ![[ID_4]], metadata ptr %To.sroa.0, metadata !DIExpression()), !dbg -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %To.sroa.3.0.copyload, metadata ![[TO]], metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32)) -; CHECK-NEXT: call void @llvm.dbg.assign(metadata {{.+}} undef, metadata ![[TO]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 96), metadata ![[ID_6]], metadata ptr %To.sroa.4, metadata !DIExpression()), !dbg +; CHECK-NEXT: #dbg_assign({{.+}} undef, ![[TO]], !DIExpression(DW_OP_LLVM_fragment, 0, 96), ![[ID_4]], ptr %To.sroa.0, !DIExpression(), +; CHECK-NEXT: #dbg_value(i32 %To.sroa.3.0.copyload, ![[TO]], !DIExpression(DW_OP_LLVM_fragment, 96, 32), +; CHECK-NEXT: #dbg_assign({{.+}} undef, ![[TO]], !DIExpression(DW_OP_LLVM_fragment, 128, 96), ![[ID_6]], ptr %To.sroa.4, !DIExpression(), target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/memmove-to-from-same-alloca.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/memmove-to-from-same-alloca.ll index 07b29ab795b72d..4a84df67a998a1 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/memmove-to-from-same-alloca.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/memmove-to-from-same-alloca.ll @@ -43,9 +43,9 @@ ; CHECK: %A.sroa.0.sroa.5 = alloca [5 x i32] ; CHECK: llvm.memcpy{{.*}}(ptr align 4 %A.sroa.0.sroa.5, ptr align 4 getelementptr inbounds (i8, ptr @Glob, i64 4), i64 20, i1 false){{.*}}!DIAssignID ![[ID:[0-9]+]] ;; Here's the dbg.assign for element 0 - it's not important for the test. -; CHECK-NEXT: llvm.dbg.value({{.*}}!DIExpression(DW_OP_LLVM_fragment, 0, 32){{.*}}) +; CHECK-NEXT: #dbg_value({{.*}}!DIExpression(DW_OP_LLVM_fragment, 0, 32){{.*}}) ;; This is the dbg.assign we care about: -; CHECK-NEXT: llvm.dbg.assign(metadata i1 undef, metadata ![[VAR:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 160), metadata ![[ID]], metadata ptr %A.sroa.0.sroa.5, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR:[0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 32, 160), ![[ID]], ptr %A.sroa.0.sroa.5, !DIExpression(), ; CHECK: ![[VAR]] = !DILocalVariable(name: "A" diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/remove-redundant-dbg.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/remove-redundant-dbg.ll index cffac06f8e5451..cb4250430106b9 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/remove-redundant-dbg.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/remove-redundant-dbg.ll @@ -7,7 +7,7 @@ ;; change. This has a significant positive impact on peak memory and compiler ;; run time. -; CHECK: @llvm.dbg.assign(metadata i32 1 +; CHECK: #dbg_assign(i32 1 define dso_local void @_Z3funv() local_unnamed_addr !dbg !7 { entry: diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/rewrite.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/rewrite.ll index 6c4f22f774337d..24199c6f86885d 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/rewrite.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/rewrite.ll @@ -26,25 +26,25 @@ ; CHECK: entry: ; CHECK-NEXT: %S.sroa.0 = alloca { i32, i32, i32 }, align 8, !DIAssignID ![[ID_1:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 96), metadata ![[ID_1]], metadata ptr %S.sroa.0, metadata !DIExpression()), !dbg +; CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR:[0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 0, 96), ![[ID_1]], ptr %S.sroa.0, !DIExpression(), ;; The middle slice has been promoted, so the alloca has gone away. ; CHECK-NEXT: %S.sroa.5 = alloca { i32, i32, i32 }, align 8, !DIAssignID ![[ID_3:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 96), metadata ![[ID_3]], metadata ptr %S.sroa.5, metadata !DIExpression()), !dbg +; CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR]], !DIExpression(DW_OP_LLVM_fragment, 128, 96), ![[ID_3]], ptr %S.sroa.5, !DIExpression(), ;; The memset has been sliced up (middle slice removed). ; CHECK: call void @llvm.memset{{.*}}(ptr align 8 %S.sroa.0, i8 0, i64 12, i1 false), !dbg !{{.+}}, !DIAssignID ![[ID_5:[0-9]+]] ; CHECK: call void @llvm.memset{{.*}}(ptr align 8 %S.sroa.5, i8 0, i64 12, i1 false), !dbg !{{.+}}, !DIAssignID ![[ID_6:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i8 0, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 96), metadata ![[ID_5]], metadata ptr %S.sroa.0, metadata !DIExpression()), !dbg +; CHECK-NEXT: #dbg_assign(i8 0, ![[VAR]], !DIExpression(DW_OP_LLVM_fragment, 0, 96), ![[ID_5]], ptr %S.sroa.0, !DIExpression(), ;; Check the middle slice (no memset) gets a correct dbg.assign. -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 0, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32)) -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i8 0, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 96), metadata ![[ID_6]], metadata ptr %S.sroa.5, metadata !DIExpression()), !dbg +; CHECK-NEXT: #dbg_value(i32 0, ![[VAR]], !DIExpression(DW_OP_LLVM_fragment, 96, 32), +; CHECK-NEXT: #dbg_assign(i8 0, ![[VAR]], !DIExpression(DW_OP_LLVM_fragment, 128, 96), ![[ID_6]], ptr %S.sroa.5, !DIExpression(), ;; mem2reg promotes the load/store to the middle slice created by SROA: ; CHECK-NEXT: %0 = load i32, ptr @Glob, align 4, !dbg !{{.+}} -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %0, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32)) +; CHECK-NEXT: #dbg_value(i32 %0, ![[VAR]], !DIExpression(DW_OP_LLVM_fragment, 96, 32), target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/split-pre-fragmented-store-2.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/split-pre-fragmented-store-2.ll index 1f126f6aa6b4fb..b0b134b7066f18 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/split-pre-fragmented-store-2.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/split-pre-fragmented-store-2.ll @@ -26,18 +26,18 @@ ;; Alloca for var.a and associated dbg.assign: ; CHECK: %var.sroa.0 = alloca i32, align 4, !DIAssignID ![[id_1:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[var:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 32), metadata ![[id_1]], metadata ptr %var.sroa.0, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i1 undef, ![[var:[0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 32, 32), ![[id_1]], ptr %var.sroa.0, !DIExpression(), ;; Alloca for var.b and associated dbg.assign: ; CHECK-NEXT: %var.sroa.1 = alloca i32, align 4, !DIAssignID ![[id_2:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[var]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 32), metadata ![[id_2]], metadata ptr %var.sroa.1, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i1 undef, ![[var]], !DIExpression(DW_OP_LLVM_fragment, 64, 32), ![[id_2]], ptr %var.sroa.1, !DIExpression(), ;; Store to var.b (split from store to var) and associated dbg.assigns. The ;; dbg.assign for the fragment covering the (pre-split) assignment to var.a ;; should not be linked to the store. ; CHECK: store i32 %[[v:.*]], ptr %var.sroa.1,{{.*}}!DIAssignID ![[id_3:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 %{{.*var\.sroa\.0.*}}, metadata ![[var]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 32), metadata ![[id_4:[0-9]+]], metadata ptr %var.sroa.0, metadata !DIExpression()) -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 %[[v]], metadata ![[var]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 32), metadata ![[id_3]], metadata ptr %var.sroa.1, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i32 %{{.*var\.sroa\.0.*}}, ![[var]], !DIExpression(DW_OP_LLVM_fragment, 32, 32), ![[id_4:[0-9]+]], ptr %var.sroa.0, !DIExpression(), +; CHECK-NEXT: #dbg_assign(i32 %[[v]], ![[var]], !DIExpression(DW_OP_LLVM_fragment, 64, 32), ![[id_3]], ptr %var.sroa.1, !DIExpression(), ; CHECK-DAG: ![[id_1]] = distinct !DIAssignID() ; CHECK-DAG: ![[id_2]] = distinct !DIAssignID() diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/split-pre-fragmented-store.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/split-pre-fragmented-store.ll index 458c3298b813d4..c151bd42d346dc 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/split-pre-fragmented-store.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/split-pre-fragmented-store.ll @@ -20,18 +20,18 @@ ;; Alloca for var.a and associated dbg.assign: ; CHECK: %var.sroa.0 = alloca i32, align 4, !DIAssignID ![[id_1:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[var:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32), metadata ![[id_1]], metadata ptr %var.sroa.0, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i1 undef, ![[var:[0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 0, 32), ![[id_1]], ptr %var.sroa.0, !DIExpression(), ;; Alloca for var.b and associated dbg.assign: ; CHECK-NEXT: %var.sroa.1 = alloca i32, align 4, !DIAssignID ![[id_2:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[var]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 32), metadata ![[id_2]], metadata ptr %var.sroa.1, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i1 undef, ![[var]], !DIExpression(DW_OP_LLVM_fragment, 32, 32), ![[id_2]], ptr %var.sroa.1, !DIExpression(), ;; Store to var.b (split from store to var) and associated dbg.assigns. The ;; dbg.assign for the fragment covering the (pre-split) assignment to var.a ;; should not be linked to the store. ; CHECK: store i32 %[[v:.*]], ptr %var.sroa.1,{{.*}}!DIAssignID ![[id_3:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 %{{.*var\.sroa\.0.*}}, metadata ![[var]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32), metadata ![[id_4:[0-9]+]], metadata ptr %var.sroa.0, metadata !DIExpression()) -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 %[[v]], metadata ![[var]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 32), metadata ![[id_3]], metadata ptr %var.sroa.1, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i32 %{{.*var\.sroa\.0.*}}, ![[var]], !DIExpression(DW_OP_LLVM_fragment, 0, 32), ![[id_4:[0-9]+]], ptr %var.sroa.0, !DIExpression(), +; CHECK-NEXT: #dbg_assign(i32 %[[v]], ![[var]], !DIExpression(DW_OP_LLVM_fragment, 32, 32), ![[id_3]], ptr %var.sroa.1, !DIExpression(), ; CHECK-DAG: ![[id_1]] = distinct !DIAssignID() ; CHECK-DAG: ![[id_2]] = distinct !DIAssignID() diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/store.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/store.ll index 52b9d6b4950b99..2ece3d9bd01741 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/store.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/store.ll @@ -29,26 +29,26 @@ ; CHECK: entry: ; CHECK-NEXT: %S.sroa.0 = alloca { i32, i32, i32 }, align 8, !DIAssignID ![[ID_1:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 96), metadata ![[ID_1]], metadata ptr %S.sroa.0, metadata !DIExpression()), !dbg +; CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR:[0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 0, 96), ![[ID_1]], ptr %S.sroa.0, !DIExpression(), ; CHECK-NEXT: %S.sroa.4 = alloca { i32, i32, i32 }, align 8, !DIAssignID ![[ID_3:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 96), metadata ![[ID_3]], metadata ptr %S.sroa.4, metadata !DIExpression()), !dbg +; CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR]], !DIExpression(DW_OP_LLVM_fragment, 128, 96), ![[ID_3]], ptr %S.sroa.4, !DIExpression(), ;; The memset has been split into [0, 96)[96, 128)[128, 224) bit slices. The ;; memset for the middle slice has been removed. ; CHECK: call void @llvm.memset{{.*}}(ptr align 8 %S.sroa.0, i8 0, i64 12, i1 false), !dbg !{{.+}}, !DIAssignID ![[ID_4:[0-9]+]] ; CHECK-NEXT: call void @llvm.memset{{.*}}(ptr align 8 %S.sroa.4, i8 0, i64 12, i1 false), !dbg !{{.+}}, !DIAssignID ![[ID_5:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i8 0, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 96), metadata ![[ID_4]], metadata ptr %S.sroa.0, metadata !DIExpression()), !dbg +; CHECK-NEXT: #dbg_assign(i8 0, ![[VAR]], !DIExpression(DW_OP_LLVM_fragment, 0, 96), ![[ID_4]], ptr %S.sroa.0, !DIExpression(), ;; This is the one we care about most in this test: check that a memset->store ;; gets a correct dbg.assign. -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 0, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32)) -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i8 0, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 96), metadata ![[ID_5]], metadata ptr %S.sroa.4, metadata !DIExpression()), !dbg +; CHECK-NEXT: #dbg_value(i32 0, ![[VAR]], !DIExpression(DW_OP_LLVM_fragment, 96, 32), +; CHECK-NEXT: #dbg_assign(i8 0, ![[VAR]], !DIExpression(DW_OP_LLVM_fragment, 128, 96), ![[ID_5]], ptr %S.sroa.4, !DIExpression(), ;; The load from global+store becomes a load. ;; FIXME: In reality it is actually stored again later on. ; CHECK-NEXT: %0 = load i32, ptr @Glob, align 4, !dbg !{{.+}} -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %0, metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32)) +; CHECK-NEXT: #dbg_value(i32 %0, ![[VAR]], !DIExpression(DW_OP_LLVM_fragment, 96, 32), target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/unspecified-var-size.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/unspecified-var-size.ll index b00986c900bee4..fb11686994a50a 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/unspecified-var-size.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/unspecified-var-size.ll @@ -8,7 +8,7 @@ ;; Check that migrateDebugInfo doesn't crash when encountering an alloca for a ;; variable with a type of unspecified size (e.g. DW_TAG_unspecified_type). -; CHECK: @llvm.dbg.value(metadata ptr %0,{{.+}}, metadata !DIExpression()) +; CHECK: #dbg_value(ptr %0,{{.+}}, !DIExpression(), ;; There should be no new fragment and the value component should remain as %0. define dso_local void @_Z3funDn(ptr %0) #0 !dbg !14 { diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/user-memcpy.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/user-memcpy.ll index ff7779bd2af12d..c4f0c49f1a1f53 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/user-memcpy.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/user-memcpy.ll @@ -23,11 +23,11 @@ ;; Allocas have been promoted - the linked dbg.assigns have been removed. ;; | V3i point = {0, 0, 0}; -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 0, metadata ![[point:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)) -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 0, metadata ![[point]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)) +; CHECK-NEXT: #dbg_value(i64 0, ![[point:[0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 0, 64), +; CHECK-NEXT: #dbg_value(i64 0, ![[point]], !DIExpression(DW_OP_LLVM_fragment, 64, 64), ;; point.z = 5000; -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 5000, metadata ![[point]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 64)) +; CHECK-NEXT: #dbg_value(i64 5000, ![[point]], !DIExpression(DW_OP_LLVM_fragment, 128, 64), ;; | V3i other = {10, 9, 8}; ;; other is global const: @@ -37,17 +37,17 @@ ; CHECK-NEXT: %other.sroa.0.0.copyload = load i64, ptr @__const._Z3funv.other ; CHECK-NEXT: %other.sroa.2.0.copyload = load i64, ptr getelementptr inbounds (i8, ptr @__const._Z3funv.other, i64 8) ; CHECK-NEXT: %other.sroa.3.0.copyload = load i64, ptr getelementptr inbounds (i8, ptr @__const._Z3funv.other, i64 16) -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 %other.sroa.0.0.copyload, metadata ![[other:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)) -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 %other.sroa.2.0.copyload, metadata ![[other]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)) -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 %other.sroa.3.0.copyload, metadata ![[other]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 64)) +; CHECK-NEXT: #dbg_value(i64 %other.sroa.0.0.copyload, ![[other:[0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 0, 64), +; CHECK-NEXT: #dbg_value(i64 %other.sroa.2.0.copyload, ![[other]], !DIExpression(DW_OP_LLVM_fragment, 64, 64), +; CHECK-NEXT: #dbg_value(i64 %other.sroa.3.0.copyload, ![[other]], !DIExpression(DW_OP_LLVM_fragment, 128, 64), ;; | std::memcpy(&point.y, &other.x, sizeof(long) * 2); ;; other is now 3 scalars: ;; point.y = other.x -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 %other.sroa.0.0.copyload, metadata ![[point]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)) +; CHECK-NEXT: #dbg_value(i64 %other.sroa.0.0.copyload, ![[point]], !DIExpression(DW_OP_LLVM_fragment, 64, 64), ;; ;; point.z = other.y -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 %other.sroa.2.0.copyload, metadata ![[point]], metadata !DIExpression(DW_OP_LLVM_fragment, 128, 64)) +; CHECK-NEXT: #dbg_value(i64 %other.sroa.2.0.copyload, ![[point]], !DIExpression(DW_OP_LLVM_fragment, 128, 64), ; CHECK: ![[point]] = !DILocalVariable(name: "point", ; CHECK: ![[other]] = !DILocalVariable(name: "other", diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/var-sized-fragment.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/var-sized-fragment.ll index 97bab2febcae94..55119114bd6023 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/var-sized-fragment.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/var-sized-fragment.ll @@ -22,8 +22,8 @@ ;; FIXME: Variable 'b' gets an incorrect location (value and expression) - see ;; llvm.org/PR61981. This check just ensures that no fragment info is added to ;; the dbg.value. -; CHECK: dbg.value(metadata i32 %.sroa.0.0.extract.trunc, metadata ![[B:[0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 4)) -; CHECK: dbg.value(metadata i32 %.sroa.0.0.extract.trunc, metadata ![[A:[0-9]+]], metadata !DIExpression()) +; CHECK: #dbg_value(i32 %.sroa.0.0.extract.trunc, ![[B:[0-9]+]], !DIExpression(DW_OP_plus_uconst, 4), +; CHECK: #dbg_value(i32 %.sroa.0.0.extract.trunc, ![[A:[0-9]+]], !DIExpression(), ; CHECK: ![[A]] = !DILocalVariable(name: "a", ; CHECK: ![[B]] = !DILocalVariable(name: "b", diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/vec-1.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/vec-1.ll index 69b884431a3573..b60c3fcd47e741 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/vec-1.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/vec-1.ll @@ -19,9 +19,9 @@ ; CHECK: %call = call ; CHECK-NEXT: %0 = extractvalue { <2 x float>, <2 x float> } %call, 0 -; CHECK-NEXT: call void @llvm.dbg.value(metadata <2 x float> %0, metadata ![[var:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)) +; CHECK-NEXT: #dbg_value(<2 x float> %0, ![[var:[0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 0, 64), ; CHECK-NEXT: %1 = extractvalue { <2 x float>, <2 x float> } %call, 1 -; CHECK-NEXT: call void @llvm.dbg.value(metadata <2 x float> %1, metadata ![[var]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)) +; CHECK-NEXT: #dbg_value(<2 x float> %1, ![[var]], !DIExpression(DW_OP_LLVM_fragment, 64, 64), %class.c = type { i8 } %class.a = type { [4 x float] } diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/vec-2.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/vec-2.ll index c2446812d0352b..38f220aa61e80a 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/vec-2.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/sroa/vec-2.ll @@ -29,9 +29,9 @@ ; CHECK: %i.sroa.2.12.vec.insert = insertelement <2 x float> %i.sroa.2.0.vec.insert, float %2, i32 1, !dbg ;; There's a few dbg intrinsics we're not interested in testing wedged in here. -; CHECK-NEXT: dbg.value -; CHECK-NEXT: dbg.value -; CHECK-NEXT: call void @llvm.dbg.value(metadata float %2,{{.+}}, metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32)) +; CHECK-NEXT: #dbg_value +; CHECK-NEXT: #dbg_value +; CHECK-NEXT: #dbg_value(float %2,{{.+}}, !DIExpression(DW_OP_LLVM_fragment, 96, 32), %class.d = type { %class.a } %class.a = type { [4 x float] } diff --git a/llvm/test/DebugInfo/Generic/assignment-tracking/track-assignments.ll b/llvm/test/DebugInfo/Generic/assignment-tracking/track-assignments.ll index 714a9b3a8c9411..6215c9c75147fa 100644 --- a/llvm/test/DebugInfo/Generic/assignment-tracking/track-assignments.ll +++ b/llvm/test/DebugInfo/Generic/assignment-tracking/track-assignments.ll @@ -92,14 +92,14 @@ define dso_local void @_Z8zeroInitv() #0 !dbg !31 { entry: %Z = alloca [3 x i32], align 4 ; CHECK: %Z = alloca [3 x i32], align 4, !DIAssignID ![[ID_0:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR_0:[0-9]+]], metadata !DIExpression(), metadata ![[ID_0]], metadata ptr %Z, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR_0:[0-9]+]], !DIExpression(), ![[ID_0]], ptr %Z, !DIExpression(), %0 = bitcast ptr %Z to ptr, !dbg !39 call void @llvm.lifetime.start.p0(i64 12, ptr %0) #5, !dbg !39 call void @llvm.dbg.declare(metadata ptr %Z, metadata !35, metadata !DIExpression()), !dbg !40 %1 = bitcast ptr %Z to ptr, !dbg !40 call void @llvm.memset.p0.i64(ptr align 4 %1, i8 0, i64 12, i1 false), !dbg !40 ; CHECK: @llvm.memset{{.*}}, !DIAssignID ![[ID_1:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i8 0, metadata ![[VAR_0]], metadata !DIExpression(), metadata ![[ID_1]], metadata ptr %1, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i8 0, ![[VAR_0]], !DIExpression(), ![[ID_1]], ptr %1, !DIExpression(), %2 = bitcast ptr %Z to ptr, !dbg !41 call void @llvm.lifetime.end.p0(i64 12, ptr %2) #5, !dbg !41 ret void, !dbg !41 @@ -117,14 +117,14 @@ define dso_local void @_Z10memcpyInitv() #0 !dbg !42 { entry: %A = alloca [4 x i32], align 16 ; CHECK: %A = alloca [4 x i32], align 16, !DIAssignID ![[ID_2:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR_1:[0-9]+]], metadata !DIExpression(), metadata ![[ID_2]], metadata ptr %A, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR_1:[0-9]+]], !DIExpression(), ![[ID_2]], ptr %A, !DIExpression(), %0 = bitcast ptr %A to ptr, !dbg !48 call void @llvm.lifetime.start.p0(i64 16, ptr %0) #5, !dbg !48 call void @llvm.dbg.declare(metadata ptr %A, metadata !44, metadata !DIExpression()), !dbg !49 %1 = bitcast ptr %A to ptr, !dbg !49 call void @llvm.memcpy.p0.p0.i64(ptr align 16 %1, ptr align 16 @__const._Z10memcpyInitv.A, i64 16, i1 false), !dbg !49 ; CHECK: @llvm.memcpy{{.*}}, !DIAssignID ![[ID_3:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR_1]], metadata !DIExpression(), metadata ![[ID_3]], metadata ptr %1, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR_1]], !DIExpression(), ![[ID_3]], ptr %1, !DIExpression(), %2 = bitcast ptr %A to ptr, !dbg !50 call void @llvm.lifetime.end.p0(i64 16, ptr %2) #5, !dbg !50 ret void, !dbg !50 @@ -146,7 +146,7 @@ define dso_local void @_Z8setFieldv() #0 !dbg !51 { entry: %O = alloca %struct.Outer, align 4 ; CHECK: %O = alloca %struct.Outer, align 4, !DIAssignID ![[ID_4:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR_2:[0-9]+]], metadata !DIExpression(), metadata ![[ID_4]], metadata ptr %O, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR_2:[0-9]+]], !DIExpression(), ![[ID_4]], ptr %O, !DIExpression(), %0 = bitcast ptr %O to ptr, !dbg !58 call void @llvm.lifetime.start.p0(i64 16, ptr %0) #5, !dbg !58 call void @llvm.dbg.declare(metadata ptr %O, metadata !53, metadata !DIExpression()), !dbg !59 @@ -155,7 +155,7 @@ entry: %B = getelementptr inbounds %struct.Inner, ptr %A, i32 0, i32 1, !dbg !66 store i32 %1, ptr %B, align 4, !dbg !67, !tbaa !68 ; CHECK: store i32 %1, ptr %B, align 4,{{.*}}!DIAssignID ![[ID_5:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 %1, metadata ![[VAR_2]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 32), metadata ![[ID_5]], metadata ptr %B, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i32 %1, ![[VAR_2]], !DIExpression(DW_OP_LLVM_fragment, 32, 32), ![[ID_5]], ptr %B, !DIExpression(), %2 = bitcast ptr %O to ptr, !dbg !71 call void @llvm.lifetime.end.p0(i64 16, ptr %2) #5, !dbg !71 ret void, !dbg !71 @@ -178,7 +178,7 @@ define dso_local void @_Z13unknownOffsetv() #0 !dbg !72 { entry: %A = alloca [2 x i32], align 4 ; CHECK: %A = alloca [2 x i32], align 4, !DIAssignID ![[ID_6:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR_3:[0-9]+]], metadata !DIExpression(), metadata ![[ID_6]], metadata ptr %A, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR_3:[0-9]+]], !DIExpression(), ![[ID_6]], ptr %A, !DIExpression(), %0 = bitcast ptr %A to ptr, !dbg !78 call void @llvm.lifetime.start.p0(i64 8, ptr %0) #5, !dbg !78 call void @llvm.dbg.declare(metadata ptr %A, metadata !74, metadata !DIExpression()), !dbg !79 @@ -209,8 +209,8 @@ define dso_local i64 @_Z12sharedAllocav() #0 !dbg !85 { entry: %retval = alloca %struct.Inner, align 4 ; CHECK: %retval = alloca %struct.Inner, align 4, !DIAssignID ![[ID_7:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR_4:[0-9]+]], metadata !DIExpression(), metadata ![[ID_7]], metadata ptr %retval, metadata !DIExpression()) -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR_5:[0-9]+]], metadata !DIExpression(), metadata ![[ID_7]], metadata ptr %retval, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR_4:[0-9]+]], !DIExpression(), ![[ID_7]], ptr %retval, !DIExpression(), +; CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR_5:[0-9]+]], !DIExpression(), ![[ID_7]], ptr %retval, !DIExpression(), %0 = load i32, ptr @Cond, align 4, !dbg !94, !tbaa !61 %tobool = icmp ne i32 %0, 0, !dbg !94 br i1 %tobool, label %if.then, label %if.else, !dbg !95 @@ -221,8 +221,8 @@ if.then: ; preds = %entry %1 = bitcast ptr %retval to ptr, !dbg !97 call void @llvm.memcpy.p0.p0.i64(ptr align 4 %1, ptr align 4 @InnerA, i64 8, i1 false), !dbg !97, !tbaa.struct !98 ; CHECK: call void @llvm.memcpy{{.*}}, !DIAssignID ![[ID_8:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR_4]], metadata !DIExpression(), metadata ![[ID_8]], metadata ptr %1, metadata !DIExpression()) -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR_5]], metadata !DIExpression(), metadata ![[ID_8]], metadata ptr %1, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR_4]], !DIExpression(), ![[ID_8]], ptr %1, !DIExpression(), +; CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR_5]], !DIExpression(), ![[ID_8]], ptr %1, !DIExpression(), br label %return, !dbg !99 if.else: ; preds = %entry @@ -231,8 +231,8 @@ if.else: ; preds = %entry %2 = bitcast ptr %retval to ptr, !dbg !101 call void @llvm.memcpy.p0.p0.i64(ptr align 4 %2, ptr align 4 @InnerB, i64 8, i1 false), !dbg !101, !tbaa.struct !98 ; CHECK: call void @llvm.memcpy{{.*}}, !DIAssignID ![[ID_9:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR_4]], metadata !DIExpression(), metadata ![[ID_9]], metadata ptr %2, metadata !DIExpression()) -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR_5]], metadata !DIExpression(), metadata ![[ID_9]], metadata ptr %2, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR_4]], !DIExpression(), ![[ID_9]], ptr %2, !DIExpression(), +; CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR_5]], !DIExpression(), ![[ID_9]], ptr %2, !DIExpression(), br label %return, !dbg !102 return: ; preds = %if.else, %if.then @@ -255,7 +255,7 @@ return: ; preds = %if.else, %if.then define dso_local void @_Z4sretv(ptr noalias sret(%struct.Large) align 4 %agg.result) #0 !dbg !104 { ; CHECK-LABEL: define dso_local void @_Z4sretv entry: -; CHECK: call void @llvm.dbg.declare +; CHECK: #dbg_declare call void @llvm.dbg.declare(metadata ptr %agg.result, metadata !108, metadata !DIExpression()), !dbg !109 %0 = bitcast ptr %agg.result to ptr, !dbg !110 call void @llvm.memcpy.p0.p0.i64(ptr align 4 %0, ptr align 4 @L, i64 40, i1 false), !dbg !110, !tbaa.struct !111 @@ -270,7 +270,7 @@ entry: define dso_local void @_Z5byval5Large(ptr noundef byval(%struct.Large) align 8 %X) #0 !dbg !114 { ; CHECK-LABEL: define dso_local void @_Z5byval5Large entry: -; CHECK: llvm.dbg.declare +; CHECK: #dbg_declare call void @llvm.dbg.declare(metadata ptr %X, metadata !118, metadata !DIExpression()), !dbg !119 ret void, !dbg !120 } @@ -294,7 +294,7 @@ entry: %0 = bitcast ptr %agg.result to ptr store ptr %0, ptr %result.ptr, align 8 call void @llvm.dbg.declare(metadata ptr %result.ptr, metadata !126, metadata !DIExpression(DW_OP_deref)), !dbg !127 -; CHECK: call void @llvm.dbg.declare +; CHECK: #dbg_declare call void @_ZN9LCopyCtorC1Ev(ptr noundef nonnull align 4 dereferenceable(16) %agg.result), !dbg !127 ret void, !dbg !128 } @@ -312,18 +312,18 @@ define dso_local noundef i32 @_Z3funi(i32 noundef %X) !dbg !139 { entry: %Y.addr.i = alloca i32, align 4 ; CHECK: %Y.addr.i = alloca i32, align 4, !DIAssignID ![[ID_10:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR_6:[0-9]+]], metadata !DIExpression(), metadata ![[ID_10]], metadata ptr %Y.addr.i, metadata !DIExpression()), !dbg ![[DBG_0:[0-9]+]] +; CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR_6:[0-9]+]], !DIExpression(), ![[ID_10]], ptr %Y.addr.i, !DIExpression(), ![[DBG_0:[0-9]+]] %X.addr = alloca i32, align 4 ; CHECK-NEXT: %X.addr = alloca i32, align 4, !DIAssignID ![[ID_11:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i1 undef, metadata ![[VAR_7:[0-9]+]], metadata !DIExpression(), metadata ![[ID_11]], metadata ptr %X.addr, metadata !DIExpression()), !dbg ![[DBG_1:[0-9]+]] +; CHECK-NEXT: #dbg_assign(i1 undef, ![[VAR_7:[0-9]+]], !DIExpression(), ![[ID_11]], ptr %X.addr, !DIExpression(), ![[DBG_1:[0-9]+]] store i32 %X, ptr %X.addr, align 4 ; CHECK-NEXT: store i32 %X, ptr %X.addr, align 4, !DIAssignID ![[ID_12:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 %X, metadata ![[VAR_7]], metadata !DIExpression(), metadata ![[ID_12]], metadata ptr %X.addr, metadata !DIExpression()), !dbg ![[DBG_1]] +; CHECK-NEXT: #dbg_assign(i32 %X, ![[VAR_7]], !DIExpression(), ![[ID_12]], ptr %X.addr, !DIExpression(), ![[DBG_1]] call void @llvm.dbg.declare(metadata ptr %X.addr, metadata !140, metadata !DIExpression()), !dbg !141 %0 = load i32, ptr %X.addr, align 4, !dbg !142 store i32 %0, ptr %Y.addr.i, align 4 ; CHECK: store i32 %0, ptr %Y.addr.i, align 4, !DIAssignID ![[ID_13:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.assign(metadata i32 %0, metadata ![[VAR_6]], metadata !DIExpression(), metadata ![[ID_13]], metadata ptr %Y.addr.i, metadata !DIExpression()), !dbg ![[DBG_0]] +; CHECK-NEXT: #dbg_assign(i32 %0, ![[VAR_6]], !DIExpression(), ![[ID_13]], ptr %Y.addr.i, !DIExpression(), ![[DBG_0]] call void @llvm.dbg.declare(metadata ptr %Y.addr.i, metadata !133, metadata !DIExpression()), !dbg !143 %1 = load i32, ptr %Y.addr.i, align 4, !dbg !145 %2 = load i32, ptr %Y.addr.i, align 4, !dbg !146 diff --git a/llvm/test/DebugInfo/Generic/dbg-value-lower-linenos.ll b/llvm/test/DebugInfo/Generic/dbg-value-lower-linenos.ll index 7fffa93008f6cb..f5f7b55ab3f59b 100644 --- a/llvm/test/DebugInfo/Generic/dbg-value-lower-linenos.ll +++ b/llvm/test/DebugInfo/Generic/dbg-value-lower-linenos.ll @@ -15,16 +15,16 @@ ; CHECK-LABEL: bb1: ; CHECK-NEXT: %bar.0 = phi i32 -; CHECK-NEXT: dbg.value(metadata i32 %bar.0,{{.*}}), !dbg ![[UNKNOWN:[0-9]+]] +; CHECK-NEXT: #dbg_value(i32 %bar.0,{{.*}}, ![[UNKNOWN:[0-9]+]] ; CHECK-NEXT: %totest = load ; CHECK-NEXT: %add = add i32 %bar.0 -; CHECK-NEXT: dbg.value(metadata i32 %add, {{.*}}), !dbg ![[UNKNOWN]] +; CHECK-NEXT: #dbg_value(i32 %add, {{.*}}, ![[UNKNOWN]] ; CHECK-NEXT: %cond = icmp ult ; CHECK-NEXT: br i1 %cond, label %bb1, label %bb2 ; ; CHECK-LABEL: bb2: ; CHECK-NEXT: %toret = add i32 %bar.0, 3 -; CHECK-NEXT: dbg.value(metadata i32 %toret, {{.*}}), !dbg ![[UNKNOWN]] +; CHECK-NEXT: #dbg_value(i32 %toret, {{.*}}, ![[UNKNOWN]] ; CHECK-NEXT: ret i32 %toret define i32 @foo(ptr %bees, ptr %output) { @@ -52,16 +52,16 @@ bb2: ; line number, the other dbg.values should be unknown. ; CHECK-LABEL: define void @bar ; -; CHECK: dbg.value(metadata i32 %map, metadata ![[MAPVAR:[0-9]+]],{{.*}}), -; CHECK-SAME: !dbg ![[UNKNOWN2:[0-9]+]] +; CHECK: #dbg_value(i32 %map, ![[MAPVAR:[0-9]+]],{{.*}}), +; CHECK-SAME: ![[UNKNOWN2:[0-9]+]] ; CHECK-NEXT: store -; CHECK-NEXT: dbg.value(metadata ptr %map.addr, metadata ![[MAPVAR]], -; CHECK-SAME: metadata !DIExpression(DW_OP_deref)), -; CHECK-SAME: !dbg ![[UNKNOWN2]] +; CHECK-NEXT: #dbg_value(ptr %map.addr, ![[MAPVAR]], +; CHECK-SAME: !DIExpression(DW_OP_deref), +; CHECK-SAME: ![[UNKNOWN2]] ; CHECK-NEXT: call ; CHECK-NEXT: load -; CHECK-NEXT: dbg.value(metadata i32 %{{[0-9]+}}, metadata ![[MAPVAR]], -; CHECK-SAME: !dbg ![[UNKNOWN2]] +; CHECK-NEXT: #dbg_value(i32 %{{[0-9]+}}, ![[MAPVAR]], +; CHECK-SAME: ![[UNKNOWN2]] define void @bar(i32 %map) !dbg !20 { entry: diff --git a/llvm/test/DebugInfo/Generic/debug_value_list.ll b/llvm/test/DebugInfo/Generic/debug_value_list.ll index 10c8ae2ef0809b..e1bf355f335d89 100644 --- a/llvm/test/DebugInfo/Generic/debug_value_list.ll +++ b/llvm/test/DebugInfo/Generic/debug_value_list.ll @@ -7,10 +7,10 @@ source_filename = ".\\debug_value_list.cpp" target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-pc-windows-msvc19.16.27034" -; CHECK-COUNT-3: llvm.dbg.value( -; CHECK-SAME: metadata !DIArgList(i32 %a, i32 %b, i32 5) -; CHECK-SAME: metadata !16, -; CHECK-SAME: metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_LLVM_arg, 2, DW_OP_plus) +; CHECK-COUNT-3: #dbg_value( +; CHECK-SAME: !DIArgList(i32 %a, i32 %b, i32 5) +; CHECK-SAME: !16, +; CHECK-SAME: !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_LLVM_arg, 2, DW_OP_plus) define dso_local i32 @"?foo@@YAHHH@Z"(i32 %a, i32 %b) local_unnamed_addr !dbg !8 { entry: call void @llvm.dbg.value(metadata !DIArgList(i32 %b), metadata !14, metadata !DIExpression(DW_OP_LLVM_arg, 0)), !dbg !17 diff --git a/llvm/test/DebugInfo/Generic/empty-metadata.ll b/llvm/test/DebugInfo/Generic/empty-metadata.ll index 24af10c8e779be..1f73121c6263ed 100644 --- a/llvm/test/DebugInfo/Generic/empty-metadata.ll +++ b/llvm/test/DebugInfo/Generic/empty-metadata.ll @@ -2,7 +2,7 @@ ;; Check that DCE doesn't remove a dbg intrinsic that has an empty metadata ;; operand. -; CHECK: call void @llvm.dbg.declare(metadata ![[EMPTY:[0-9]+]], +; CHECK: #dbg_declare(![[EMPTY:[0-9]+]], ; CHECK: ![[EMPTY]] = !{} define dso_local void @fun() local_unnamed_addr #0 !dbg !9 { diff --git a/llvm/test/DebugInfo/Generic/inline-alloca-ordering.ll b/llvm/test/DebugInfo/Generic/inline-alloca-ordering.ll index 9f401ceb5b6f4d..8644611a68c272 100644 --- a/llvm/test/DebugInfo/Generic/inline-alloca-ordering.ll +++ b/llvm/test/DebugInfo/Generic/inline-alloca-ordering.ll @@ -19,10 +19,9 @@ ; CHECK-NEXT: %1 = alloca [65 x i32], align 16 ; CHECK-NEXT: call void @ext() ; CHECK-NEXT: call void @llvm.lifetime.start.p0( -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 0, metadata !10, metadata !DIExpression()), !dbg !12 +; CHECK-NEXT: #dbg_value(i32 0, !10, !DIExpression(), !12 ; CHECK-NEXT: call void @init(ptr %1) -; CHECK: declare void @llvm.dbg.value(metadata, declare void @ext() declare void @init(ptr) diff --git a/llvm/test/DebugInfo/Generic/inline-dbg-values.ll b/llvm/test/DebugInfo/Generic/inline-dbg-values.ll index b0390b9f78f418..f0328bc02d3f30 100644 --- a/llvm/test/DebugInfo/Generic/inline-dbg-values.ll +++ b/llvm/test/DebugInfo/Generic/inline-dbg-values.ll @@ -13,8 +13,8 @@ ; CHECK: %k2.i = alloca i32, align 4 ; CHECK: %0 = load i32, ptr @global_var, align 4, !dbg !9 ; CHECK: store i32 %0, ptr %k.addr.i, align 4 -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr %k.addr.i, metadata ![[KVAR:[0-9]+]], metadata !DIExpression()), !dbg ![[KLINE:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr %k2.i, metadata ![[K2VAR:[0-9]+]], metadata !DIExpression()), !dbg ![[GLINE:[0-9]+]] +; CHECK-NEXT: #dbg_value(ptr %k.addr.i, ![[KVAR:[0-9]+]], !DIExpression(), ![[KLINE:[0-9]+]] +; CHECK-NEXT: #dbg_value(ptr %k2.i, ![[K2VAR:[0-9]+]], !DIExpression(), ![[GLINE:[0-9]+]] ; CHECK-NEXT: %1 = load i32, ptr %k.addr.i, align 4, ;; ;; dbg.values in this block should be remapped to the local load, but also @@ -22,8 +22,8 @@ ;; ; CHECK: if.then.i: ; CHECK-NEXT: %3 = load i32, ptr %k2.i, -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %3, metadata ![[KVAR]], metadata !DIExpression()), !dbg ![[KLINE]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %foo, metadata ![[K2VAR]], metadata !DIExpression()), !dbg ![[GLINE]] +; CHECK-NEXT: #dbg_value(i32 %3, ![[KVAR]], !DIExpression(), ![[KLINE]] +; CHECK-NEXT: #dbg_value(i32 %foo, ![[K2VAR]], !DIExpression(), ![[GLINE]] ; ;; Similarly, the end block should retain remapped dbg.values, with the second ;; referring to the @global_var load in the entry block. Check that we clone @@ -31,21 +31,20 @@ ; ; CHECK: if.end.i: ; CHECK-NEXT: store i32 0, ptr %retval.i, align 4, -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 0, metadata ![[KVAR]], metadata !DIExpression()), !dbg ![[KLINE]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %0, metadata ![[K2VAR]], metadata !DIExpression()), !dbg ![[GLINE]] +; CHECK-NEXT: #dbg_value(i32 0, ![[KVAR]], !DIExpression(), ![[KLINE]] +; CHECK-NEXT: #dbg_value(i32 %0, ![[K2VAR]], !DIExpression(), ![[GLINE]] ; CHECK-NEXT: br label %test.exit, ; ;; More or less the same checks again in the exit block, this time at the head ;; of the block, and on a terminator that gets elided. ; ; CHECK: test.exit: -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %0, metadata ![[KVAR]], metadata !DIExpression()), !dbg ![[KLINE]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %bar, metadata ![[K2VAR]], metadata !DIExpression()), !dbg ![[GLINE]] +; CHECK-NEXT: #dbg_value(i32 %0, ![[KVAR]], !DIExpression(), ![[KLINE]] +; CHECK-NEXT: #dbg_value(i32 %bar, ![[K2VAR]], !DIExpression(), ![[GLINE]] ; CHECK-NEXT: %4 = load i32, ptr %retval.i, align 4, -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 1, metadata ![[KVAR]], metadata !DIExpression()), !dbg ![[KLINE]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 1, metadata ![[K2VAR]], metadata !DIExpression()), !dbg ![[GLINE]] +; CHECK-NEXT: #dbg_value(i32 1, ![[KVAR]], !DIExpression(), ![[KLINE]] +; CHECK-NEXT: #dbg_value(i32 1, ![[K2VAR]], !DIExpression(), ![[GLINE]] ; -; CHECK: declare void @llvm.dbg.value(metadata, ; ;; Test that the metadata maps onto the correct things, and that the DILocations ;; attached to the intrinsics have been inlined. diff --git a/llvm/test/DebugInfo/Generic/instcombine-replaced-select-with-operand.ll b/llvm/test/DebugInfo/Generic/instcombine-replaced-select-with-operand.ll index a31ecb6fd2fbf6..4c21c760e6da44 100644 --- a/llvm/test/DebugInfo/Generic/instcombine-replaced-select-with-operand.ll +++ b/llvm/test/DebugInfo/Generic/instcombine-replaced-select-with-operand.ll @@ -29,7 +29,7 @@ ;; of %retval.0.i is also replaced with %0. ; CHECK: if.then: -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr %0, metadata ![[THIS:[0-9]+]], metadata !DIExpression()) +; CHECK-NEXT: #dbg_value(ptr %0, ![[THIS:[0-9]+]], !DIExpression(), ; CHECK: ![[THIS]] = !DILocalVariable(name: "this", target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/DebugInfo/Generic/ipsccp-remap-assign-id.ll b/llvm/test/DebugInfo/Generic/ipsccp-remap-assign-id.ll index 13ac88de5cded8..0e8f92cacf66d7 100644 --- a/llvm/test/DebugInfo/Generic/ipsccp-remap-assign-id.ll +++ b/llvm/test/DebugInfo/Generic/ipsccp-remap-assign-id.ll @@ -4,10 +4,10 @@ ;; Check the dbg.assign DIAssignID operand gets remapped after cloning. ; CHECK: %tmp = alloca [4096 x i32], i32 0, align 16, !DIAssignID ![[ID1:[0-9]+]] -; CHECK-NEXT: dbg.assign(metadata i1 undef, metadata !{{.*}}, metadata !DIExpression(), metadata ![[ID1]], metadata ptr %tmp, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i1 undef, !{{.*}}, !DIExpression(), ![[ID1]], ptr %tmp, !DIExpression(), ; ; CHECK: %tmp = alloca [4096 x i32], i32 0, align 16, !DIAssignID ![[ID2:[0-9]+]] -; CHECK-NEXT: dbg.assign(metadata i1 undef, metadata !{{.*}}, metadata !DIExpression(), metadata ![[ID2]], metadata ptr %tmp, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i1 undef, !{{.*}}, !DIExpression(), ![[ID2]], ptr %tmp, !DIExpression(), ; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) declare void @llvm.dbg.declare(metadata, metadata, metadata) diff --git a/llvm/test/DebugInfo/Generic/loop-deletion-inline-var.ll b/llvm/test/DebugInfo/Generic/loop-deletion-inline-var.ll index 372fc316819438..4fa326582edb9a 100644 --- a/llvm/test/DebugInfo/Generic/loop-deletion-inline-var.ll +++ b/llvm/test/DebugInfo/Generic/loop-deletion-inline-var.ll @@ -14,8 +14,8 @@ ;; different inlined instances of a variable as the same variable. ; CHECK-LABEL: for.cond.cleanup: ; preds = %entry -; CHECK-NEXT: @llvm.dbg.value({{.+}}, metadata ![[P:[0-9]+]],{{.+}}), !dbg ![[DBG1:[0-9]+]] -; CHECK-NEXT: @llvm.dbg.value({{.+}}, metadata ![[P]], {{.+}}), !dbg ![[DBG2:[0-9]+]] +; CHECK-NEXT: #dbg_value({{.+}}, ![[P:[0-9]+]],{{.+}}, ![[DBG1:[0-9]+]] +; CHECK-NEXT: #dbg_value({{.+}}, ![[P]], {{.+}}, ![[DBG2:[0-9]+]] ; CHECK-DAG: ![[P]] = !DILocalVariable(name: "p", ; CHECK-DAG: ![[DBG1]] = !DILocation({{.+}}, inlinedAt: ![[IA1:[0-9]+]]) diff --git a/llvm/test/DebugInfo/Generic/mem2reg-promote-alloca-1.ll b/llvm/test/DebugInfo/Generic/mem2reg-promote-alloca-1.ll index d471e24b2458ed..3d469965d1cfa2 100644 --- a/llvm/test/DebugInfo/Generic/mem2reg-promote-alloca-1.ll +++ b/llvm/test/DebugInfo/Generic/mem2reg-promote-alloca-1.ll @@ -20,8 +20,8 @@ ; CHECK: define dso_local void @fun(i32 %param) ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %param, metadata ![[PARAM:[0-9]+]], metadata !DIExpression()) -; CHECK-NOT: call void @llvm.dbg.value({{.*}}, metadata ![[PARAM]] +; CHECK-NEXT: #dbg_value(i32 %param, ![[PARAM:[0-9]+]], !DIExpression(), +; CHECK-NOT: #dbg_value({{.*}}, ![[PARAM]] ; CHECK: ![[PARAM]] = !DILocalVariable(name: "param", @g = dso_local global i32 0, align 4, !dbg !0 diff --git a/llvm/test/DebugInfo/Generic/mem2reg-promote-alloca-2.ll b/llvm/test/DebugInfo/Generic/mem2reg-promote-alloca-2.ll index 871fc1c6860171..145f122f87b860 100644 --- a/llvm/test/DebugInfo/Generic/mem2reg-promote-alloca-2.ll +++ b/llvm/test/DebugInfo/Generic/mem2reg-promote-alloca-2.ll @@ -28,8 +28,8 @@ ; CHECK: define dso_local void @fun() ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 0, metadata ![[LOCAL:[0-9]+]], metadata !DIExpression()) -; CHECK-NOT: call void @llvm.dbg.value({{.*}}, metadata ![[LOCAL]] +; CHECK-NEXT: #dbg_value(i64 0, ![[LOCAL:[0-9]+]], !DIExpression(), +; CHECK-NOT: #dbg_value({{.*}}, ![[LOCAL]] ; CHECK: ![[LOCAL]] = !DILocalVariable(name: "local", @a = dso_local global i64 0, align 8, !dbg !0 diff --git a/llvm/test/DebugInfo/Generic/mem2reg-promote-alloca-3.ll b/llvm/test/DebugInfo/Generic/mem2reg-promote-alloca-3.ll index 5e16376a153adc..1636871ff694b6 100644 --- a/llvm/test/DebugInfo/Generic/mem2reg-promote-alloca-3.ll +++ b/llvm/test/DebugInfo/Generic/mem2reg-promote-alloca-3.ll @@ -21,8 +21,8 @@ ; CHECK: define dso_local void @fun() ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr null, metadata ![[LOCAL:[0-9]+]], metadata !DIExpression()) -; CHECK-NOT: call void @llvm.dbg.value({{.*}}, metadata ![[LOCAL]] +; CHECK-NEXT: #dbg_value(ptr null, ![[LOCAL:[0-9]+]], !DIExpression(), +; CHECK-NOT: #dbg_value({{.*}}, ![[LOCAL]] ; CHECK: ![[LOCAL]] = !DILocalVariable(name: "local", define dso_local void @fun() !dbg !7 { diff --git a/llvm/test/DebugInfo/Generic/pr40628.ll b/llvm/test/DebugInfo/Generic/pr40628.ll index 0a919400f2f6d8..5a31ae94d21ed7 100644 --- a/llvm/test/DebugInfo/Generic/pr40628.ll +++ b/llvm/test/DebugInfo/Generic/pr40628.ll @@ -11,9 +11,9 @@ ; being assigned the 'undef' value. ; CHECK: @foo -; CHECK-NEXT: dbg.value(metadata i32 poison, metadata ![[DEADVAR:[0-9]+]], +; CHECK-NEXT: #dbg_value(i32 poison, ![[DEADVAR:[0-9]+]], ; CHECK-NEXT: load -; CHECK-NEXT: dbg.value(metadata i32 %{{[0-9]+}}, metadata ![[LIVEVAR:[0-9]+]], +; CHECK-NEXT: #dbg_value(i32 %{{[0-9]+}}, ![[LIVEVAR:[0-9]+]], ; CHECK-NEXT: store ; CHECK-NEXT: ret diff --git a/llvm/test/DebugInfo/Generic/sroa-larger.ll b/llvm/test/DebugInfo/Generic/sroa-larger.ll index 2121b57c535b53..8c0a8a5a4062c1 100644 --- a/llvm/test/DebugInfo/Generic/sroa-larger.ll +++ b/llvm/test/DebugInfo/Generic/sroa-larger.ll @@ -41,7 +41,7 @@ entry: call void @llvm.lifetime.start.p0(i64 16, ptr %tmp) call void @llvm.dbg.declare(metadata ptr %tmp, metadata !10, metadata !DIExpression()), !dbg !14 ; CHECK-NOT: !DIExpression(DW_OP_LLVM_fragment, 32, 96) - ; CHECK: call void @llvm.dbg.value(metadata i32 0, metadata !10, metadata !DIExpression()) + ; CHECK: #dbg_value(i32 0, !10, !DIExpression(), store i32 0, ptr %tmp, align 8 ret void } diff --git a/llvm/test/DebugInfo/Generic/sroa-samesize.ll b/llvm/test/DebugInfo/Generic/sroa-samesize.ll index 1e70bad225c787..2cff7b5c4c8cc8 100644 --- a/llvm/test/DebugInfo/Generic/sroa-samesize.ll +++ b/llvm/test/DebugInfo/Generic/sroa-samesize.ll @@ -35,7 +35,7 @@ entry: call void @llvm.dbg.declare(metadata ptr %agg.tmp.ensured, metadata !11, metadata !DIExpression()), !dbg !24 %call.i.i = call i32 @"\01?x4@@YAHXZ"(), !dbg !46, !noalias !47 store i32 0, ptr %agg.tmp.ensured, align 4, !dbg !50, !tbaa !57, !alias.scope !47 - ; CHECK: call void @llvm.dbg.value(metadata i32 0, metadata ![[A:.*]], metadata !DIExpression()) + ; CHECK: #dbg_value(i32 0, ![[A:.*]], !DIExpression(), ; CHECK: ![[A]] = !DILocalVariable(name: "a", ret void, !dbg !62 } diff --git a/llvm/test/DebugInfo/Generic/volatile-alloca.ll b/llvm/test/DebugInfo/Generic/volatile-alloca.ll index 891906e3a30f22..a0460e163d8cdb 100644 --- a/llvm/test/DebugInfo/Generic/volatile-alloca.ll +++ b/llvm/test/DebugInfo/Generic/volatile-alloca.ll @@ -17,9 +17,9 @@ ; } ; CHECK: alloca i64 -; CHECK-NOT: call void @llvm.dbg.value -; CHECK: call void @llvm.dbg.declare -; CHECK-NOT: call void @llvm.dbg.value +; CHECK-NOT: #dbg_value +; CHECK: #dbg_declare +; CHECK-NOT: #dbg_value source_filename = "volatile.c" diff --git a/llvm/test/DebugInfo/X86/LLVM_implicit_pointer.ll b/llvm/test/DebugInfo/X86/LLVM_implicit_pointer.ll index 7adb047ce77ff6..8a54e402ad26e1 100644 --- a/llvm/test/DebugInfo/X86/LLVM_implicit_pointer.ll +++ b/llvm/test/DebugInfo/X86/LLVM_implicit_pointer.ll @@ -29,16 +29,16 @@ target triple = "x86_64-unknown-linux-gnu" ; Function Attrs: nofree norecurse nounwind uwtable define dso_local i32 @main() local_unnamed_addr !dbg !12 { entry: -; CHECK: call void @llvm.dbg.value(metadata i32 4, metadata [[VAR:![0-9]+]], metadata !DIExpression()) +; CHECK: #dbg_value(i32 4, [[VAR:![0-9]+]], !DIExpression(), call void @llvm.dbg.value(metadata i32 4, metadata !16, metadata !DIExpression()), !dbg !21 %0 = load volatile i32, ptr @v, align 4, !dbg !22, !tbaa !23 %inc = add nsw i32 %0, 1, !dbg !22 store volatile i32 %inc, ptr @v, align 4, !dbg !22, !tbaa !23 -; CHECK: call void @llvm.dbg.value(metadata i32 4, metadata [[PTR1:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_implicit_pointer)) +; CHECK: #dbg_value(i32 4, [[PTR1:![0-9]+]], !DIExpression(DW_OP_LLVM_implicit_pointer), call void @llvm.dbg.value(metadata i32 4, metadata !17, metadata !DIExpression(DW_OP_LLVM_implicit_pointer)), !dbg !21 -; CHECK: call void @llvm.dbg.value(metadata i32 4, metadata [[PTR2:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_implicit_pointer, DW_OP_LLVM_implicit_pointer)) +; CHECK: #dbg_value(i32 4, [[PTR2:![0-9]+]], !DIExpression(DW_OP_LLVM_implicit_pointer, DW_OP_LLVM_implicit_pointer), call void @llvm.dbg.value(metadata i32 4, metadata !19, metadata !DIExpression(DW_OP_LLVM_implicit_pointer, DW_OP_LLVM_implicit_pointer)), !dbg !21 %1 = load volatile i32, ptr @v, align 4, !dbg !27, !tbaa !23 %inc1 = add nsw i32 %1, 1, !dbg !27 diff --git a/llvm/test/DebugInfo/X86/array2.ll b/llvm/test/DebugInfo/X86/array2.ll index 4fe9c9feb86ed1..528202ddb936a1 100644 --- a/llvm/test/DebugInfo/X86/array2.ll +++ b/llvm/test/DebugInfo/X86/array2.ll @@ -17,7 +17,7 @@ ; Test that we correctly lower dbg.declares for arrays. ; ; CHECK: define noundef i32 @main -; CHECK: call void @llvm.dbg.value(metadata i32 42, metadata ![[ARRAY:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)) +; CHECK: #dbg_value(i32 42, ![[ARRAY:[0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 0, 32), ; CHECK: ![[ARRAY]] = !DILocalVariable(name: "array",{{.*}} line: 6 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" diff --git a/llvm/test/DebugInfo/X86/codegenprep-addrsink.ll b/llvm/test/DebugInfo/X86/codegenprep-addrsink.ll index 3fe4b085525afa..5fabab62ee053d 100644 --- a/llvm/test/DebugInfo/X86/codegenprep-addrsink.ll +++ b/llvm/test/DebugInfo/X86/codegenprep-addrsink.ll @@ -15,7 +15,7 @@ define dso_local i8 @foo(ptr %p, i32 %cond) !dbg !7 { entry: ; There should be no dbg.values in this block. ; CHECK-LABEL: entry: -; CHECK-NOT: dbg.value +; CHECK-NOT: #dbg_value %arith = getelementptr i8, ptr %p, i32 3 %load1 = load i8, ptr %arith %cmpresult = icmp eq i32 %cond, 0 @@ -25,15 +25,15 @@ next: ; Address calcs should be duplicated into this block. One dbg.value should be ; updated, and the other should not. ; CHECK-LABEL: next: -; CHECK: dbg.value(metadata ptr %arith, metadata ![[DIVAR:[0-9]+]], -; CHECK-SAME: metadata !DIExpression() +; CHECK: #dbg_value(ptr %arith, ![[DIVAR:[0-9]+]], +; CHECK-SAME: !DIExpression() ; CHECK-NEXT: %[[GEPVAR:[0-9a-zA-Z]+]] = getelementptr i8, ptr %p, ; CHECK-SAME: i64 3 ; CHECK-NEXT: %loaded = load i8, ptr %[[GEPVAR]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr %[[GEPVAR]], -; CHECK-SAME: metadata ![[DIVAR]], -; CHECK-NEXT: call void @llvm.dbg.value(metadata !DIArgList(ptr %[[GEPVAR]], -; CHECK-SAME: ptr %[[GEPVAR]]), metadata ![[DIVAR]], +; CHECK-NEXT: #dbg_value(ptr %[[GEPVAR]], +; CHECK-SAME: ![[DIVAR]], +; CHECK-NEXT: #dbg_value(!DIArgList(ptr %[[GEPVAR]], +; CHECK-SAME: ptr %[[GEPVAR]]), ![[DIVAR]], call void @llvm.dbg.value(metadata ptr %arith, metadata !12, metadata !DIExpression()), !dbg !14 %loaded = load i8, ptr %arith call void @llvm.dbg.value(metadata ptr %arith, metadata !12, metadata !DIExpression()), !dbg !14 diff --git a/llvm/test/DebugInfo/X86/codegenprep-value.ll b/llvm/test/DebugInfo/X86/codegenprep-value.ll index 52087b619dc898..e7b613f53214c1 100644 --- a/llvm/test/DebugInfo/X86/codegenprep-value.ll +++ b/llvm/test/DebugInfo/X86/codegenprep-value.ll @@ -19,8 +19,8 @@ ; sign-extended and used later. ; ; CHECK: define{{.*}}test -; CHECK: call{{.*}}dbg.value(metadata i32 %p -; CHECK: call{{.*}}dbg.value(metadata i32 %add +; CHECK: #dbg_value(i32 %p +; CHECK: #dbg_value(i32 %add ; define dso_local void @_Z4testi(i32 %p) local_unnamed_addr !dbg !7 { entry: diff --git a/llvm/test/DebugInfo/X86/codegenprepare-rollback.ll b/llvm/test/DebugInfo/X86/codegenprepare-rollback.ll index e0cb22fbd57661..fba385381e2445 100644 --- a/llvm/test/DebugInfo/X86/codegenprepare-rollback.ll +++ b/llvm/test/DebugInfo/X86/codegenprepare-rollback.ll @@ -14,13 +14,13 @@ target triple = "x86_64-apple-macosx" ; DEBUGIFY-LABEL: @twoArgsNoPromotion ; DEBUGIFY-NEXT: %add = add -; DEBUGIFY-NEXT: call void @llvm.dbg.value(metadata i32 %add, +; DEBUGIFY-NEXT: #dbg_value(i32 %add, ; DEBUGIFY-NEXT: %sextadd = sext -; DEBUGIFY-NEXT: call void @llvm.dbg.value(metadata i64 %sextadd, +; DEBUGIFY-NEXT: #dbg_value(i64 %sextadd, ; DEBUGIFY-NEXT: %arrayidx = getelementptr -; DEBUGIFY-NEXT: call void @llvm.dbg.value(metadata ptr %arrayidx, +; DEBUGIFY-NEXT: #dbg_value(ptr %arrayidx, ; DEBUGIFY-NEXT: %res = load i8, -; DEBUGIFY-NEXT: call void @llvm.dbg.value(metadata i8 %res, +; DEBUGIFY-NEXT: #dbg_value(i8 %res, ; DEBUGIFY-NEXT: ret i8 %res, define i8 @twoArgsNoPromotion(i32 %arg1, i32 %arg2, ptr %base) { %add = add nsw i32 %arg1, %arg2 diff --git a/llvm/test/DebugInfo/X86/dbg-value-dropped-instcombine.ll b/llvm/test/DebugInfo/X86/dbg-value-dropped-instcombine.ll index afa55629dd0900..2b54d5c20b90c1 100644 --- a/llvm/test/DebugInfo/X86/dbg-value-dropped-instcombine.ll +++ b/llvm/test/DebugInfo/X86/dbg-value-dropped-instcombine.ll @@ -21,8 +21,8 @@ ; } ; CHECK: define dso_local i64 @foo -; CHECK: @llvm.dbg.value(metadata i64 {{.*}}, metadata ![[BEE:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)), -; CHECK: @llvm.dbg.value(metadata i64 {{.*}}, metadata ![[BEE]], metadata !DIExpression({{.*}}, DW_OP_LLVM_fragment, 32, 32)), +; CHECK: #dbg_value(i64 {{.*}}, ![[BEE:[0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 0, 32), +; CHECK: #dbg_value(i64 {{.*}}, ![[BEE]], !DIExpression({{.*}}, DW_OP_LLVM_fragment, 32, 32), target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-unknown" diff --git a/llvm/test/DebugInfo/X86/dead-store-elimination-marks-undef.ll b/llvm/test/DebugInfo/X86/dead-store-elimination-marks-undef.ll index 62d6ede7a47e22..4179b79b42c433 100644 --- a/llvm/test/DebugInfo/X86/dead-store-elimination-marks-undef.ll +++ b/llvm/test/DebugInfo/X86/dead-store-elimination-marks-undef.ll @@ -8,7 +8,7 @@ define dso_local i32 @main() local_unnamed_addr !dbg !7 { %1 = alloca i32, align 4 %2 = load i32, ptr @b, align 1, !dbg !13 - ; CHECK: call void @llvm.dbg.value(metadata i32 poison + ; CHECK: #dbg_value(i32 poison call void @llvm.dbg.value(metadata i32 %2, metadata !12, metadata !DIExpression()), !dbg !13 store i32 %2, ptr %1, align 4, !dbg !13 ret i32 0, !dbg !13 diff --git a/llvm/test/DebugInfo/X86/formal_parameter.ll b/llvm/test/DebugInfo/X86/formal_parameter.ll index 553b20ff09ad29..9ebcb8540bd315 100644 --- a/llvm/test/DebugInfo/X86/formal_parameter.ll +++ b/llvm/test/DebugInfo/X86/formal_parameter.ll @@ -34,9 +34,9 @@ entry: ; Ensure that all dbg intrinsics have the same scope after ; LowerDbgDeclare is finished with them. ; - ; LOWERING: call void @llvm.dbg.value{{.*}}, !dbg ![[LOC:.*]] - ; LOWERING: call void @llvm.dbg.value{{.*}}, !dbg ![[LOC]] - ; LOWERING: call void @llvm.dbg.value{{.*}}, !dbg ![[LOC]] + ; LOWERING: #dbg_value{{.*}}, ![[LOC:[0-9]+]] + ; LOWERING: #dbg_value{{.*}}, ![[LOC]] + ; LOWERING: #dbg_value{{.*}}, ![[LOC]] %0 = load i32, ptr %map.addr, align 4, !dbg !20, !tbaa !15 %call1 = call i32 (i32, ...) @verify(i32 %0) #3, !dbg !20 ret void, !dbg !22 diff --git a/llvm/test/DebugInfo/X86/instcombine-demanded-bits-salvage.ll b/llvm/test/DebugInfo/X86/instcombine-demanded-bits-salvage.ll index 198dd6d4756d9d..cb5cfd343ec16b 100644 --- a/llvm/test/DebugInfo/X86/instcombine-demanded-bits-salvage.ll +++ b/llvm/test/DebugInfo/X86/instcombine-demanded-bits-salvage.ll @@ -9,7 +9,7 @@ define dso_local i32 @p() local_unnamed_addr !dbg !11 { entry: %conv = load i32, ptr @a, align 4, !dbg !14 %0 = and i32 %conv, 65535, !dbg !14 - ; CHECK: metadata !DIExpression(DW_OP_constu, 65535, DW_OP_and, DW_OP_stack_value)) + ; CHECK: !DIExpression(DW_OP_constu, 65535, DW_OP_and, DW_OP_stack_value), call void @llvm.dbg.value(metadata i32 %0, metadata !15, metadata !DIExpression()), !dbg !14 %1 = lshr i32 %0, 12, !dbg !14 %2 = and i32 %1, 8, !dbg !14 diff --git a/llvm/test/DebugInfo/X86/instcombine-fold-cast-into-phi.ll b/llvm/test/DebugInfo/X86/instcombine-fold-cast-into-phi.ll index b8259dcfab85da..a91a983be49bc0 100644 --- a/llvm/test/DebugInfo/X86/instcombine-fold-cast-into-phi.ll +++ b/llvm/test/DebugInfo/X86/instcombine-fold-cast-into-phi.ll @@ -5,7 +5,7 @@ ;; deleted). Check the new phi inherits the DebugLoc. ; CHECK: %[[phi:.*]] = phi i8 [ 1, %{{.*}} ], [ 0, %{{.*}} ], !dbg ![[dbg:[0-9]+]] -; CHECK: call void @llvm.dbg.value(metadata i8 %[[phi]], metadata ![[#]], metadata !DIExpression(DW_OP_LLVM_convert, 8, DW_ATE_signed, DW_OP_LLVM_convert, 32, DW_ATE_signed, DW_OP_stack_value) +; CHECK: #dbg_value(i8 %[[phi]], ![[#]], !DIExpression(DW_OP_LLVM_convert, 8, DW_ATE_signed, DW_OP_LLVM_convert, 32, DW_ATE_signed, DW_OP_stack_value) ; CHECK: ![[dbg]] = !DILocation(line: 123, target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/DebugInfo/X86/instcombine-instrinsics.ll b/llvm/test/DebugInfo/X86/instcombine-instrinsics.ll index ff5e07318d1e70..db14b58b2584a7 100644 --- a/llvm/test/DebugInfo/X86/instcombine-instrinsics.ll +++ b/llvm/test/DebugInfo/X86/instcombine-instrinsics.ll @@ -3,8 +3,8 @@ ; Verify that we emit the same intrinsic at most once. ; rdar://problem/13056109 ; -; CHECK: call void @llvm.dbg.value(metadata ptr %p -; CHECK-NOT: call void @llvm.dbg.value(metadata ptr %p +; CHECK: #dbg_value(ptr %p +; CHECK-NOT: #dbg_value(ptr %p ; CHECK-NEXT: call i32 @foo ; CHECK: ret ; diff --git a/llvm/test/DebugInfo/X86/licm-undef-dbg-value.ll b/llvm/test/DebugInfo/X86/licm-undef-dbg-value.ll index 7559d25da7bba8..ce455f508457a1 100644 --- a/llvm/test/DebugInfo/X86/licm-undef-dbg-value.ll +++ b/llvm/test/DebugInfo/X86/licm-undef-dbg-value.ll @@ -2,7 +2,7 @@ ; RUN: opt -passes=licm %s -S --try-experimental-debuginfo-iterators | FileCheck %s ; CHECK: for.body: -; CHECK-NEXT: llvm.dbg.value(metadata i8 poison +; CHECK-NEXT: #dbg_value(i8 poison ; The load is loop invariant. Check that we leave an undef dbg.value behind ; when licm sinks the instruction. diff --git a/llvm/test/DebugInfo/X86/mem2reg_fp80.ll b/llvm/test/DebugInfo/X86/mem2reg_fp80.ll index b00b1dea93f925..342ba5a274a018 100644 --- a/llvm/test/DebugInfo/X86/mem2reg_fp80.ll +++ b/llvm/test/DebugInfo/X86/mem2reg_fp80.ll @@ -13,7 +13,7 @@ entry: if.then: ; preds = %entry ; CHECK-LABEL: if.then: ; CHECK: %mul = fmul x86_fp80 -; CHECK: call void @llvm.dbg.value(metadata x86_fp80 %mul, metadata {{.*}}, metadata !DIExpression()) +; CHECK: #dbg_value(x86_fp80 %mul, {{.*}}, !DIExpression(), %mul = fmul x86_fp80 undef, undef, !dbg !18 store x86_fp80 %mul, ptr %r, align 16, !dbg !18 br label %if.end, !dbg !20 @@ -21,7 +21,7 @@ if.then: ; preds = %entry if.end: ; preds = %if.then, %entry ; CHECK-LABEL: if.end: ; CHECK: %r.0 = phi x86_fp80 -; CHECK: call void @llvm.dbg.value(metadata x86_fp80 %r.0, metadata {{.*}}, metadata !DIExpression()) +; CHECK: #dbg_value(x86_fp80 %r.0, {{.*}}, !DIExpression(), %out = load x86_fp80, ptr %r, align 16, !dbg !21 ret x86_fp80 %out, !dbg !22 } diff --git a/llvm/test/DebugInfo/X86/sroa-after-inlining.ll b/llvm/test/DebugInfo/X86/sroa-after-inlining.ll index d8f2c9a3ade0df..cff4e0171b9d59 100644 --- a/llvm/test/DebugInfo/X86/sroa-after-inlining.ll +++ b/llvm/test/DebugInfo/X86/sroa-after-inlining.ll @@ -34,7 +34,7 @@ ; CHECK: _Z3barv ; CHECK: %[[RESULT:.*]] = call i32 @_Z3foov -; CHECK: llvm.dbg.value(metadata i32 %[[RESULT]], metadata [[METADATA_IDX1:![0-9]+]] +; CHECK: #dbg_value(i32 %[[RESULT]], [[METADATA_IDX1:![0-9]+]] ; CHECK: ret ; CHECK: DICompileUnit ; CHECK: [[METADATA_IDX1]] = !DILocalVariable(name: "result" diff --git a/llvm/test/DebugInfo/X86/sroasplit-1.ll b/llvm/test/DebugInfo/X86/sroasplit-1.ll index 426f522410c7e4..8f739b0ea8fa00 100644 --- a/llvm/test/DebugInfo/X86/sroasplit-1.ll +++ b/llvm/test/DebugInfo/X86/sroasplit-1.ll @@ -22,8 +22,8 @@ ; Verify that SROA creates a variable piece when splitting i1. ; CHECK: %[[I1:.*]] = alloca [12 x i8], align 4 -; CHECK: call void @llvm.dbg.declare(metadata ptr %[[I1]], metadata ![[VAR:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 96)) -; CHECK: call void @llvm.dbg.value(metadata i32 %[[A:.*]], metadata ![[VAR]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)) +; CHECK: #dbg_declare(ptr %[[I1]], ![[VAR:[0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 32, 96), +; CHECK: #dbg_value(i32 %[[A:.*]], ![[VAR]], !DIExpression(DW_OP_LLVM_fragment, 0, 32), ; CHECK: ret i32 %[[A]] ; Read Var and Piece: ; CHECK: ![[VAR]] = !DILocalVariable(name: "i1",{{.*}} line: 11, diff --git a/llvm/test/DebugInfo/X86/sroasplit-2.ll b/llvm/test/DebugInfo/X86/sroasplit-2.ll index ddafc6b57f75b5..1b27650e26898c 100644 --- a/llvm/test/DebugInfo/X86/sroasplit-2.ll +++ b/llvm/test/DebugInfo/X86/sroasplit-2.ll @@ -21,10 +21,10 @@ ; ; Verify that SROA creates a variable piece when splitting i1. -; CHECK: call void @llvm.dbg.value(metadata i64 %outer.coerce0, metadata ![[O:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)), -; CHECK: call void @llvm.dbg.value(metadata i32 {{.*}}, metadata ![[O]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 32)), -; CHECK: call void @llvm.dbg.value(metadata i32 {{.*}}, metadata ![[O]], metadata !DIExpression(DW_OP_LLVM_fragment, 96, 32)), -; CHECK: call void @llvm.dbg.value({{.*}}, metadata ![[I1:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)), +; CHECK: #dbg_value(i64 %outer.coerce0, ![[O:[0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 0, 64), +; CHECK: #dbg_value(i32 {{.*}}, ![[O]], !DIExpression(DW_OP_LLVM_fragment, 64, 32), +; CHECK: #dbg_value(i32 {{.*}}, ![[O]], !DIExpression(DW_OP_LLVM_fragment, 96, 32), +; CHECK: #dbg_value({{.*}}, ![[I1:[0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 0, 32), ; CHECK-DAG: ![[O]] = !DILocalVariable(name: "outer",{{.*}} line: 10 ; CHECK-DAG: ![[I1]] = !DILocalVariable(name: "i1",{{.*}} line: 11 diff --git a/llvm/test/DebugInfo/X86/sroasplit-3.ll b/llvm/test/DebugInfo/X86/sroasplit-3.ll index ea70a776cebe56..11ccf7c9f681e3 100644 --- a/llvm/test/DebugInfo/X86/sroasplit-3.ll +++ b/llvm/test/DebugInfo/X86/sroasplit-3.ll @@ -5,7 +5,7 @@ ; Test that SROA updates the debug info correctly if an alloca was rewritten but ; not partitioned into multiple allocas. ; -; CHECK: call void @llvm.dbg.value(metadata float %s.coerce, metadata ![[VAR:[0-9]+]], metadata !DIExpression()) +; CHECK: #dbg_value(float %s.coerce, ![[VAR:[0-9]+]], !DIExpression(), ; CHECK: ![[VAR]] = !DILocalVariable(name: "s",{{.*}} line: 3, ; diff --git a/llvm/test/DebugInfo/X86/sroasplit-4.ll b/llvm/test/DebugInfo/X86/sroasplit-4.ll index d3a6d147f2d3b6..554f80af6c4a96 100644 --- a/llvm/test/DebugInfo/X86/sroasplit-4.ll +++ b/llvm/test/DebugInfo/X86/sroasplit-4.ll @@ -3,11 +3,11 @@ ; ; Test that recursively splitting an alloca updates the debug info correctly. ; CHECK: %[[T:.*]] = load i64, ptr @t, align 8 -; CHECK: call void @llvm.dbg.value(metadata i64 %[[T]], metadata ![[Y:.*]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)) +; CHECK: #dbg_value(i64 %[[T]], ![[Y:.*]], !DIExpression(DW_OP_LLVM_fragment, 0, 64), ; CHECK: %[[T1:.*]] = load i64, ptr @t, align 8 -; CHECK: call void @llvm.dbg.value(metadata i64 %[[T1]], metadata ![[Y]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)) -; CHECK: call void @llvm.dbg.value(metadata i64 %[[T]], metadata ![[R:.*]], metadata !DIExpression(DW_OP_LLVM_fragment, 192, 64)) -; CHECK: call void @llvm.dbg.value(metadata i64 %[[T1]], metadata ![[R]], metadata !DIExpression(DW_OP_LLVM_fragment, 256, 64)) +; CHECK: #dbg_value(i64 %[[T1]], ![[Y]], !DIExpression(DW_OP_LLVM_fragment, 64, 64), +; CHECK: #dbg_value(i64 %[[T]], ![[R:.*]], !DIExpression(DW_OP_LLVM_fragment, 192, 64), +; CHECK: #dbg_value(i64 %[[T1]], ![[R]], !DIExpression(DW_OP_LLVM_fragment, 256, 64), ; ; struct p { ; __SIZE_TYPE__ s; diff --git a/llvm/test/DebugInfo/X86/sroasplit-dbg-declare.ll b/llvm/test/DebugInfo/X86/sroasplit-dbg-declare.ll index bebaea13470ede..e1b5812be35e8e 100644 --- a/llvm/test/DebugInfo/X86/sroasplit-dbg-declare.ll +++ b/llvm/test/DebugInfo/X86/sroasplit-dbg-declare.ll @@ -46,13 +46,13 @@ attributes #1 = { nounwind readnone speculatable } !17 = !DILocation(line: 3, column: 18, scope: !7) ; CHECK-NOT: = alloca [9 x i32] -; CHECK-NOT: call void @llvm.dbg.declare(metadata ptr +; CHECK-NOT: #dbg_declare(ptr ; CHECK: %[[VAR1:.*]] = alloca i32 ; CHECK-NEXT: %[[VAR2:.*]] = alloca [8 x i32] -; CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr %[[VAR1]] -; CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr %[[VAR2]] +; CHECK-NEXT: #dbg_declare(ptr %[[VAR1]] +; CHECK-NEXT: #dbg_declare(ptr %[[VAR2]] ; CHECK-NOT: = alloca [9 x i32] -; CHECK-NOT: call void @llvm.dbg.declare(metadata ptr +; CHECK-NOT: #dbg_declare(ptr diff --git a/llvm/test/DebugInfo/assignment-tracking/X86/hotcoldsplit.ll b/llvm/test/DebugInfo/assignment-tracking/X86/hotcoldsplit.ll index f3faba7122afcf..d6de5300a32fee 100644 --- a/llvm/test/DebugInfo/assignment-tracking/X86/hotcoldsplit.ll +++ b/llvm/test/DebugInfo/assignment-tracking/X86/hotcoldsplit.ll @@ -4,7 +4,7 @@ ; CHECK-LABEL: define void @_foo() ; CHECK: common.ret: -; CHECK-NEXT: dbg.assign(metadata i64 0, metadata ![[#]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64), metadata ![[ID1:[0-9]+]], {{.*}}, metadata !DIExpression()) +; CHECK-NEXT: #dbg_assign(i64 0, ![[#]], !DIExpression(DW_OP_LLVM_fragment, 0, 64), ![[ID1:[0-9]+]], {{.*}}, !DIExpression(), ; CHECK-LABEL: define internal void @_foo.cold.1() ; CHECK: store i64 0, ptr null, align 8, !DIAssignID ![[ID2:[0-9]+]] diff --git a/llvm/test/DebugInfo/duplicate_dbgvalue.ll b/llvm/test/DebugInfo/duplicate_dbgvalue.ll index 685e666b2ffb8a..6f5e3db7105627 100644 --- a/llvm/test/DebugInfo/duplicate_dbgvalue.ll +++ b/llvm/test/DebugInfo/duplicate_dbgvalue.ll @@ -2,7 +2,7 @@ ; RUN: opt --try-experimental-debuginfo-iterators -passes=instcombine -S -o - < %s | FileCheck %s ; CHECK-LABEL: %3 = load i32, ptr %i1_311 -; CHECK: call void @llvm.dbg.value(metadata i32 %3 +; CHECK: #dbg_value(i32 %3 ; Next instruction should not be duplicate dbg.value intrinsic. ; CHECK-NEXT: @f90io_sc_i_ldw diff --git a/llvm/test/DebugInfo/instcombine-sink-latest-assignment.ll b/llvm/test/DebugInfo/instcombine-sink-latest-assignment.ll index bcdcfef8a43121..4b304496f2efff 100644 --- a/llvm/test/DebugInfo/instcombine-sink-latest-assignment.ll +++ b/llvm/test/DebugInfo/instcombine-sink-latest-assignment.ll @@ -3,7 +3,7 @@ ; ; CHECK-LABEL: for.body: ; CHECK-NEXT: %sub.ptr.rhs.cast.i.i = ptrtoint ptr %call2.i.i to i64, -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i64 %sub.ptr.rhs.cast.i.i, metadata !{{[0-9]*}}, metadata !DIExpression(DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_constu, 1, DW_OP_minus, DW_OP_stack_value) +; CHECK-NEXT: #dbg_value(i64 %sub.ptr.rhs.cast.i.i, !{{[0-9]*}}, !DIExpression(DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_constu, 1, DW_OP_minus, DW_OP_stack_value) ; ;; The code below is representative of a common situation: where we've had a ;; loop be completely optimised out, leaving dbg.values representing the diff --git a/llvm/test/DebugInfo/salvage-cast-debug-info.ll b/llvm/test/DebugInfo/salvage-cast-debug-info.ll index b72f717a4f2de7..747ad45e7dc1ed 100644 --- a/llvm/test/DebugInfo/salvage-cast-debug-info.ll +++ b/llvm/test/DebugInfo/salvage-cast-debug-info.ll @@ -2,9 +2,9 @@ ; RUN: opt --experimental-debuginfo-iterators=false %s -passes=debugify,early-cse -earlycse-debug-hash -S | FileCheck %s define i32 @foo(i64 %nose, i32 %more) { ; CHECK-LABEL: @foo( -; CHECK: call void @llvm.dbg.value(metadata i64 %nose, metadata [[V1:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned -; CHECK: call void @llvm.dbg.value(metadata i64 %nose.shift, metadata [[V2:![0-9]+]] -; CHECK: call void @llvm.dbg.value(metadata i64 %nose.shift, metadata [[V3:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned +; CHECK: #dbg_value(i64 %nose, [[V1:![0-9]+]], !DIExpression(DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned +; CHECK: #dbg_value(i64 %nose.shift, [[V2:![0-9]+]] +; CHECK: #dbg_value(i64 %nose.shift, [[V3:![0-9]+]], !DIExpression(DW_OP_LLVM_convert, 64, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned entry: %nose.trunc = trunc i64 %nose to i32 diff --git a/llvm/test/DebugInfo/salvage-duplicate-values.ll b/llvm/test/DebugInfo/salvage-duplicate-values.ll index 8764dbc1dfc77d..217512577400ec 100644 --- a/llvm/test/DebugInfo/salvage-duplicate-values.ll +++ b/llvm/test/DebugInfo/salvage-duplicate-values.ll @@ -3,13 +3,13 @@ ; Tests the results of salvaging variadic dbg.values that use the same SSA value ; multiple times. -; CHECK: call void @llvm.dbg.value(metadata !DIArgList(i32 %a, i32 %a), +; CHECK: #dbg_value(!DIArgList(i32 %a, i32 %a), ; CHECK-SAME: ![[VAR_C:[0-9]+]], -; CHECK-SAME: !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_plus_uconst, 5, DW_OP_LLVM_arg, 1, DW_OP_plus_uconst, 5, DW_OP_plus, DW_OP_stack_value)) +; CHECK-SAME: !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_plus_uconst, 5, DW_OP_LLVM_arg, 1, DW_OP_plus_uconst, 5, DW_OP_plus, DW_OP_stack_value), -; CHECK: call void @llvm.dbg.value(metadata !DIArgList(i32 %a, i32 %a, i32 %b, i32 %b), +; CHECK: #dbg_value(!DIArgList(i32 %a, i32 %a, i32 %b, i32 %b), ; CHECK-SAME: ![[VAR_C]], -; CHECK-SAME: !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 2, DW_OP_plus, DW_OP_LLVM_arg, 1, DW_OP_LLVM_arg, 3, DW_OP_plus, DW_OP_plus, DW_OP_stack_value)) +; CHECK-SAME: !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 2, DW_OP_plus, DW_OP_LLVM_arg, 1, DW_OP_LLVM_arg, 3, DW_OP_plus, DW_OP_plus, DW_OP_stack_value), ; CHECK: ![[VAR_C]] = !DILocalVariable(name: "c" diff --git a/llvm/test/DebugInfo/salvage-gep.ll b/llvm/test/DebugInfo/salvage-gep.ll index 01191da1ed8fa7..5973fc4f6c0933 100644 --- a/llvm/test/DebugInfo/salvage-gep.ll +++ b/llvm/test/DebugInfo/salvage-gep.ll @@ -9,15 +9,15 @@ %zero = type [0 x [10 x i32]] ;; The constant and variable offsets should be applied correctly. -; CHECK: call void @llvm.dbg.value(metadata !DIArgList(ptr %ptr, i64 %offset), +; CHECK: #dbg_value(!DIArgList(ptr %ptr, i64 %offset), ; CHECK-SAME: ![[VAR_OFFSET_PTR:[0-9]+]], -; CHECK-SAME: !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_constu, 8, DW_OP_mul, DW_OP_plus, DW_OP_plus_uconst, 4, DW_OP_stack_value)) +; CHECK-SAME: !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_constu, 8, DW_OP_mul, DW_OP_plus, DW_OP_plus_uconst, 4, DW_OP_stack_value), ;; The variable offset should be ignored, as it applies to a type of width 0, ;; leaving only the constant offset. -; CHECK: call void @llvm.dbg.value(metadata ptr %zptr, +; CHECK: #dbg_value(ptr %zptr, ; CHECK-SAME: ![[VAR_ZERO_PTR:[0-9]+]], -; CHECK-SAME: !DIExpression(DW_OP_plus_uconst, 44, DW_OP_stack_value)) +; CHECK-SAME: !DIExpression(DW_OP_plus_uconst, 44, DW_OP_stack_value), ; CHECK: ![[VAR_OFFSET_PTR]] = !DILocalVariable(name: "offset_ptr" ; CHECK: ![[VAR_ZERO_PTR]] = !DILocalVariable(name: "zero_ptr" diff --git a/llvm/test/DebugInfo/salvage-icmp.ll b/llvm/test/DebugInfo/salvage-icmp.ll index f47c20e7992e65..de3605b2ffd909 100644 --- a/llvm/test/DebugInfo/salvage-icmp.ll +++ b/llvm/test/DebugInfo/salvage-icmp.ll @@ -3,13 +3,13 @@ ; Tests the results of salvaging variadic dbg.values that use the same SSA value ; multiple times. -; CHECK: call void @llvm.dbg.value(metadata i32 %a, +; CHECK: #dbg_value(i32 %a, ; CHECK-SAME: ![[VAR_C:[0-9]+]], -; CHECK-SAME: !DIExpression(DW_OP_lit0, DW_OP_ne, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_lit0, DW_OP_eq, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_constu, 1, DW_OP_gt, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_consts, 18446744073709551615, DW_OP_gt, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_constu, 2, DW_OP_ge, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_consts, 18446744073709551614, DW_OP_ge, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_constu, 3, DW_OP_lt, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_consts, 18446744073709551613, DW_OP_lt, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_constu, 4, DW_OP_le, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_consts, 18446744073709551612, DW_OP_le, DW_OP_stack_value)) +; CHECK-SAME: !DIExpression(DW_OP_lit0, DW_OP_ne, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_lit0, DW_OP_eq, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_constu, 1, DW_OP_gt, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_consts, 18446744073709551615, DW_OP_gt, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_constu, 2, DW_OP_ge, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_consts, 18446744073709551614, DW_OP_ge, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_constu, 3, DW_OP_lt, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_consts, 18446744073709551613, DW_OP_lt, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_constu, 4, DW_OP_le, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_consts, 18446744073709551612, DW_OP_le, DW_OP_stack_value), -; CHECK: call void @llvm.dbg.value(metadata !DIArgList(i32 %a, i32 %a, i32 %a, i32 %b, i32 %a, i32 %b, i32 %b, i32 %a, i32 %a, i32 %b, i32 %b), +; CHECK: #dbg_value(!DIArgList(i32 %a, i32 %a, i32 %a, i32 %b, i32 %a, i32 %b, i32 %b, i32 %a, i32 %a, i32 %b, i32 %b), ; CHECK-SAME: ![[VAR_C:[0-9]+]], -; CHECK-SAME: !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 10, DW_OP_ne, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_arg, 9, DW_OP_eq, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_arg, 8, DW_OP_gt, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_arg, 7, DW_OP_gt, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_arg, 6, DW_OP_ge, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_arg, 5, DW_OP_ge, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_arg, 4, DW_OP_lt, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_arg, 3, DW_OP_lt, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_arg, 2, DW_OP_le, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_arg, 1, DW_OP_le, DW_OP_stack_value)) +; CHECK-SAME: !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 10, DW_OP_ne, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_arg, 9, DW_OP_eq, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_arg, 8, DW_OP_gt, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_arg, 7, DW_OP_gt, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_arg, 6, DW_OP_ge, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_arg, 5, DW_OP_ge, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_arg, 4, DW_OP_lt, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_arg, 3, DW_OP_lt, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_arg, 2, DW_OP_le, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_arg, 1, DW_OP_le, DW_OP_stack_value), ; CHECK: ![[VAR_C]] = !DILocalVariable(name: "c" diff --git a/llvm/test/DebugInfo/salvage-limit-expr-size.ll b/llvm/test/DebugInfo/salvage-limit-expr-size.ll index 379a4ecec43eb9..0ca7fc9175d6c8 100644 --- a/llvm/test/DebugInfo/salvage-limit-expr-size.ll +++ b/llvm/test/DebugInfo/salvage-limit-expr-size.ll @@ -9,13 +9,13 @@ entry: %add.1 = add nsw i32 %a, 5, !dbg !14 %add.2 = add nsw i32 %a, %b, !dbg !14 ;; These expressions should salvage successfully, up to exactly 128 elements. - ; CHECK: call void @llvm.dbg.value(metadata i32 %a, metadata ![[VAR_C:[0-9]+]] - ; CHECK-NEXT: call void @llvm.dbg.value(metadata !DIArgList(i32 %a, i32 %b), metadata ![[VAR_C]] + ; CHECK: #dbg_value(i32 %a, ![[VAR_C:[0-9]+]] + ; CHECK-NEXT: #dbg_value(!DIArgList(i32 %a, i32 %b), ![[VAR_C]] call void @llvm.dbg.value(metadata i32 %add.1, metadata !12, metadata !DIExpression(DW_OP_lit0, DW_OP_ne, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_stack_value)), !dbg !13 call void @llvm.dbg.value(metadata i32 %add.2, metadata !12, metadata !DIExpression(DW_OP_lit0, DW_OP_ne, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_stack_value)), !dbg !13 ;; These expressions should be set undef, as they would salvage up to exactly 129 elements. - ; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 poison, metadata ![[VAR_C]] - ; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 poison, metadata ![[VAR_C]] + ; CHECK-NEXT: #dbg_value(i32 poison, ![[VAR_C]] + ; CHECK-NEXT: #dbg_value(i32 poison, ![[VAR_C]] call void @llvm.dbg.value(metadata i32 %add.1, metadata !12, metadata !DIExpression(DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_stack_value)), !dbg !13 call void @llvm.dbg.value(metadata i32 %add.2, metadata !12, metadata !DIExpression(DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_stack_value)), !dbg !13 %mul = mul nsw i32 %a, %b, !dbg !15 diff --git a/llvm/test/DebugInfo/salvage-nonconst-binop.ll b/llvm/test/DebugInfo/salvage-nonconst-binop.ll index 7b0f9562e5b2f8..f6e5c255b589fa 100644 --- a/llvm/test/DebugInfo/salvage-nonconst-binop.ll +++ b/llvm/test/DebugInfo/salvage-nonconst-binop.ll @@ -3,9 +3,9 @@ ; Tests the salvaging of binary operators that use more than one non-constant ; SSA value. -; CHECK: call void @llvm.dbg.value(metadata !DIArgList(i32 %a, i32 %b), +; CHECK: #dbg_value(!DIArgList(i32 %a, i32 %b), ; CHECK-SAME: ![[VAR_C:[0-9]+]], -; CHECK-SAME: !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_stack_value)) +; CHECK-SAME: !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_stack_value), ; CHECK: ![[VAR_C]] = !DILocalVariable(name: "c" diff --git a/llvm/test/Instrumentation/AddressSanitizer/debug_info.ll b/llvm/test/Instrumentation/AddressSanitizer/debug_info.ll index edd63c614857fe..531a53c127b19f 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/debug_info.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/debug_info.ll @@ -28,8 +28,8 @@ entry: ; Note: these dbg.declares used to contain `ptrtoint` operands. The instruction ; selector would then decline to put the variable in the MachineFunction side ; table. Check that the dbg.declares have `alloca` operands. -; CHECK: call void @llvm.dbg.declare(metadata ptr [[MyAlloca]], metadata ![[ARG_ID:[0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 32)) -; CHECK: call void @llvm.dbg.declare(metadata ptr [[MyAlloca]], metadata ![[VAR_ID:[0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 48)) +; CHECK: #dbg_declare(ptr [[MyAlloca]], ![[ARG_ID:[0-9]+]], !DIExpression(DW_OP_plus_uconst, 32), +; CHECK: #dbg_declare(ptr [[MyAlloca]], ![[VAR_ID:[0-9]+]], !DIExpression(DW_OP_plus_uconst, 48), declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone diff --git a/llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll b/llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll index 4e8466b685689b..afa46e44e2824c 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll @@ -19,7 +19,7 @@ entry: ; CHECK: %[[ALLOCA:.*]] = ptrtoint ptr %MyAlloca to i64 ; CHECK: %[[PHI:.*]] = phi i64 {{.*}} %[[ALLOCA]], ; CHECK: store i64 %[[PHI]], ptr %asan_local_stack_base -; CHECK: call void @llvm.dbg.declare(metadata ptr %asan_local_stack_base, metadata [[VAR_I:![0-9]+]], metadata !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 32)), !dbg [[LOC_I:![0-9]+]] +; CHECK: #dbg_declare(ptr %asan_local_stack_base, [[VAR_I:![0-9]+]], !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 32), [[LOC_I:![0-9]+]] %0 = load i32, ptr %i.addr, align 4, !dbg !14 %add = add nsw i32 %0, 2, !dbg !15 ret i32 %add, !dbg !16 diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll b/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll index 7c899095ffdef6..23b1043c700165 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/RISCV/alloca.ll @@ -11,25 +11,25 @@ target triple = "riscv64-unknown-linux" declare void @use32(ptr) ;. -; DYNAMIC-SHADOW: @[[LLVM_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x ptr] [ptr @hwasan.module_ctor], section "llvm.metadata" -; DYNAMIC-SHADOW: @[[LLVM_GLOBAL_CTORS:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 0, ptr @hwasan.module_ctor, ptr @hwasan.module_ctor }] -; DYNAMIC-SHADOW: @[[__START_HWASAN_GLOBALS:[a-zA-Z0-9_$"\\.-]+]] = external hidden constant [0 x i8] -; DYNAMIC-SHADOW: @[[__STOP_HWASAN_GLOBALS:[a-zA-Z0-9_$"\\.-]+]] = external hidden constant [0 x i8] -; DYNAMIC-SHADOW: @[[HWASAN_NOTE:[a-zA-Z0-9_$"\\.-]+]] = private constant { i32, i32, i32, [8 x i8], i32, i32 } { i32 8, i32 8, i32 3, [8 x i8] c"LLVM\00\00\00\00", i32 trunc (i64 sub (i64 ptrtoint (ptr @__start_hwasan_globals to i64), i64 ptrtoint (ptr @hwasan.note to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @__stop_hwasan_globals to i64), i64 ptrtoint (ptr @hwasan.note to i64)) to i32) }, section ".note.hwasan.globals", comdat($hwasan.module_ctor), align 4 -; DYNAMIC-SHADOW: @[[HWASAN_DUMMY_GLOBAL:[a-zA-Z0-9_$"\\.-]+]] = private constant [0 x i8] zeroinitializer, section "hwasan_globals", comdat($hwasan.module_ctor), !associated !0 -; DYNAMIC-SHADOW: @[[__HWASAN_TLS:[a-zA-Z0-9_$"\\.-]+]] = external thread_local(initialexec) global i64 -; DYNAMIC-SHADOW: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [3 x ptr] [ptr @hwasan.note, ptr @hwasan.dummy.global, ptr @__hwasan_tls], section "llvm.metadata" -; DYNAMIC-SHADOW: @[[__HWASAN_SHADOW:[a-zA-Z0-9_$"\\.-]+]] = external global [0 x i8] +; DYNAMIC-SHADOW: @llvm.used = appending global [1 x ptr] [ptr @hwasan.module_ctor], section "llvm.metadata" +; DYNAMIC-SHADOW: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 0, ptr @hwasan.module_ctor, ptr @hwasan.module_ctor }] +; DYNAMIC-SHADOW: @__start_hwasan_globals = external hidden constant [0 x i8] +; DYNAMIC-SHADOW: @__stop_hwasan_globals = external hidden constant [0 x i8] +; DYNAMIC-SHADOW: @hwasan.note = private constant { i32, i32, i32, [8 x i8], i32, i32 } { i32 8, i32 8, i32 3, [8 x i8] c"LLVM\00\00\00\00", i32 trunc (i64 sub (i64 ptrtoint (ptr @__start_hwasan_globals to i64), i64 ptrtoint (ptr @hwasan.note to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @__stop_hwasan_globals to i64), i64 ptrtoint (ptr @hwasan.note to i64)) to i32) }, section ".note.hwasan.globals", comdat($hwasan.module_ctor), align 4 +; DYNAMIC-SHADOW: @hwasan.dummy.global = private constant [0 x i8] zeroinitializer, section "hwasan_globals", comdat($hwasan.module_ctor), !associated [[META0:![0-9]+]] +; DYNAMIC-SHADOW: @__hwasan_tls = external thread_local(initialexec) global i64 +; DYNAMIC-SHADOW: @llvm.compiler.used = appending global [3 x ptr] [ptr @hwasan.note, ptr @hwasan.dummy.global, ptr @__hwasan_tls], section "llvm.metadata" +; DYNAMIC-SHADOW: @__hwasan_shadow = external global [0 x i8] ;. -; ZERO-BASED-SHADOW: @[[LLVM_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x ptr] [ptr @hwasan.module_ctor], section "llvm.metadata" -; ZERO-BASED-SHADOW: @[[LLVM_GLOBAL_CTORS:[a-zA-Z0-9_$"\\.-]+]] = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 0, ptr @hwasan.module_ctor, ptr @hwasan.module_ctor }] -; ZERO-BASED-SHADOW: @[[__START_HWASAN_GLOBALS:[a-zA-Z0-9_$"\\.-]+]] = external hidden constant [0 x i8] -; ZERO-BASED-SHADOW: @[[__STOP_HWASAN_GLOBALS:[a-zA-Z0-9_$"\\.-]+]] = external hidden constant [0 x i8] -; ZERO-BASED-SHADOW: @[[HWASAN_NOTE:[a-zA-Z0-9_$"\\.-]+]] = private constant { i32, i32, i32, [8 x i8], i32, i32 } { i32 8, i32 8, i32 3, [8 x i8] c"LLVM\00\00\00\00", i32 trunc (i64 sub (i64 ptrtoint (ptr @__start_hwasan_globals to i64), i64 ptrtoint (ptr @hwasan.note to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @__stop_hwasan_globals to i64), i64 ptrtoint (ptr @hwasan.note to i64)) to i32) }, section ".note.hwasan.globals", comdat($hwasan.module_ctor), align 4 -; ZERO-BASED-SHADOW: @[[HWASAN_DUMMY_GLOBAL:[a-zA-Z0-9_$"\\.-]+]] = private constant [0 x i8] zeroinitializer, section "hwasan_globals", comdat($hwasan.module_ctor), !associated !0 -; ZERO-BASED-SHADOW: @[[__HWASAN_TLS:[a-zA-Z0-9_$"\\.-]+]] = external thread_local(initialexec) global i64 -; ZERO-BASED-SHADOW: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [3 x ptr] [ptr @hwasan.note, ptr @hwasan.dummy.global, ptr @__hwasan_tls], section "llvm.metadata" -; ZERO-BASED-SHADOW: @[[__HWASAN_SHADOW:[a-zA-Z0-9_$"\\.-]+]] = external global [0 x i8] +; ZERO-BASED-SHADOW: @llvm.used = appending global [1 x ptr] [ptr @hwasan.module_ctor], section "llvm.metadata" +; ZERO-BASED-SHADOW: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 0, ptr @hwasan.module_ctor, ptr @hwasan.module_ctor }] +; ZERO-BASED-SHADOW: @__start_hwasan_globals = external hidden constant [0 x i8] +; ZERO-BASED-SHADOW: @__stop_hwasan_globals = external hidden constant [0 x i8] +; ZERO-BASED-SHADOW: @hwasan.note = private constant { i32, i32, i32, [8 x i8], i32, i32 } { i32 8, i32 8, i32 3, [8 x i8] c"LLVM\00\00\00\00", i32 trunc (i64 sub (i64 ptrtoint (ptr @__start_hwasan_globals to i64), i64 ptrtoint (ptr @hwasan.note to i64)) to i32), i32 trunc (i64 sub (i64 ptrtoint (ptr @__stop_hwasan_globals to i64), i64 ptrtoint (ptr @hwasan.note to i64)) to i32) }, section ".note.hwasan.globals", comdat($hwasan.module_ctor), align 4 +; ZERO-BASED-SHADOW: @hwasan.dummy.global = private constant [0 x i8] zeroinitializer, section "hwasan_globals", comdat($hwasan.module_ctor), !associated [[META0:![0-9]+]] +; ZERO-BASED-SHADOW: @__hwasan_tls = external thread_local(initialexec) global i64 +; ZERO-BASED-SHADOW: @llvm.compiler.used = appending global [3 x ptr] [ptr @hwasan.note, ptr @hwasan.dummy.global, ptr @__hwasan_tls], section "llvm.metadata" +; ZERO-BASED-SHADOW: @__hwasan_shadow = external global [0 x i8] ;. define void @test_alloca() sanitize_hwaddress !dbg !15 { ; DYNAMIC-SHADOW-LABEL: define void @test_alloca @@ -42,23 +42,23 @@ define void @test_alloca() sanitize_hwaddress !dbg !15 { ; DYNAMIC-SHADOW-NEXT: [[HWASAN_STACK_BASE_TAG:%.*]] = xor i64 [[TMP1]], [[TMP2]] ; DYNAMIC-SHADOW-NEXT: [[HWASAN_UAR_TAG:%.*]] = lshr i64 [[TMP1]], 56 ; DYNAMIC-SHADOW-NEXT: [[X:%.*]] = alloca { i32, [12 x i8] }, align 16 -; DYNAMIC-SHADOW-NEXT: call void @llvm.dbg.value(metadata !DIArgList(ptr [[X]], ptr [[X]]), metadata [[META11:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_tag_offset, 0, DW_OP_LLVM_arg, 1, DW_OP_LLVM_tag_offset, 0, DW_OP_plus, DW_OP_deref)), !dbg [[DBG13:![0-9]+]] -; DYNAMIC-SHADOW-NEXT: [[TMP3:%.*]] = xor i64 [[HWASAN_STACK_BASE_TAG]], 0, !dbg [[DBG10:![0-9]+]] -; DYNAMIC-SHADOW-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG10]] -; DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 72057594037927935, !dbg [[DBG10]] -; DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = shl i64 [[TMP3]], 56, !dbg [[DBG10]] -; DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = or i64 [[TMP5]], [[TMP6]], !dbg [[DBG10]] -; DYNAMIC-SHADOW-NEXT: [[X_HWASAN:%.*]] = inttoptr i64 [[TMP7]] to ptr, !dbg [[DBG10]] -; DYNAMIC-SHADOW-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP3]] to i8, !dbg [[DBG10]] -; DYNAMIC-SHADOW-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG10]] -; DYNAMIC-SHADOW-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], 72057594037927935, !dbg [[DBG10]] -; DYNAMIC-SHADOW-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP10]], 4, !dbg [[DBG10]] -; DYNAMIC-SHADOW-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP11]], !dbg [[DBG10]] -; DYNAMIC-SHADOW-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP12]], i32 0, !dbg [[DBG10]] -; DYNAMIC-SHADOW-NEXT: store i8 4, ptr [[TMP13]], align 1, !dbg [[DBG10]] -; DYNAMIC-SHADOW-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i32 15, !dbg [[DBG10]] -; DYNAMIC-SHADOW-NEXT: store i8 [[TMP8]], ptr [[TMP14]], align 1, !dbg [[DBG10]] -; DYNAMIC-SHADOW-NEXT: call void @use32(ptr nonnull [[X_HWASAN]]), !dbg [[DBG10]] +; DYNAMIC-SHADOW-NEXT: #dbg_value(!DIArgList(ptr [[X]], ptr [[X]]), [[META10:![0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_tag_offset, 0, DW_OP_LLVM_arg, 1, DW_OP_LLVM_tag_offset, 0, DW_OP_plus, DW_OP_deref), [[META12:![0-9]+]]) +; DYNAMIC-SHADOW-NEXT: [[TMP3:%.*]] = xor i64 [[HWASAN_STACK_BASE_TAG]], 0, !dbg [[DBG13:![0-9]+]] +; DYNAMIC-SHADOW-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 72057594037927935, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP6:%.*]] = shl i64 [[TMP3]], 56, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP7:%.*]] = or i64 [[TMP5]], [[TMP6]], !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[X_HWASAN:%.*]] = inttoptr i64 [[TMP7]] to ptr, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP3]] to i8, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], 72057594037927935, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP10]], 4, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[DOTHWASAN_SHADOW]], i64 [[TMP11]], !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP12]], i32 0, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: store i8 4, ptr [[TMP13]], align 1, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i32 15, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: store i8 [[TMP8]], ptr [[TMP14]], align 1, !dbg [[DBG13]] +; DYNAMIC-SHADOW-NEXT: call void @use32(ptr nonnull [[X_HWASAN]]), !dbg [[DBG13]] ; DYNAMIC-SHADOW-NEXT: [[TMP15:%.*]] = trunc i64 [[HWASAN_UAR_TAG]] to i8, !dbg [[DBG14:![0-9]+]] ; DYNAMIC-SHADOW-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG14]] ; DYNAMIC-SHADOW-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 72057594037927935, !dbg [[DBG14]] @@ -77,23 +77,23 @@ define void @test_alloca() sanitize_hwaddress !dbg !15 { ; ZERO-BASED-SHADOW-NEXT: [[HWASAN_STACK_BASE_TAG:%.*]] = xor i64 [[TMP1]], [[TMP2]] ; ZERO-BASED-SHADOW-NEXT: [[HWASAN_UAR_TAG:%.*]] = lshr i64 [[TMP1]], 56 ; ZERO-BASED-SHADOW-NEXT: [[X:%.*]] = alloca { i32, [12 x i8] }, align 16 -; ZERO-BASED-SHADOW-NEXT: call void @llvm.dbg.value(metadata !DIArgList(ptr [[X]], ptr [[X]]), metadata [[META11:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_tag_offset, 0, DW_OP_LLVM_arg, 1, DW_OP_LLVM_tag_offset, 0, DW_OP_plus, DW_OP_deref)), !dbg [[DBG13:![0-9]+]] -; ZERO-BASED-SHADOW-NEXT: [[TMP3:%.*]] = xor i64 [[HWASAN_STACK_BASE_TAG]], 0, !dbg [[DBG10:![0-9]+]] -; ZERO-BASED-SHADOW-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG10]] -; ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 72057594037927935, !dbg [[DBG10]] -; ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = shl i64 [[TMP3]], 56, !dbg [[DBG10]] -; ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = or i64 [[TMP5]], [[TMP6]], !dbg [[DBG10]] -; ZERO-BASED-SHADOW-NEXT: [[X_HWASAN:%.*]] = inttoptr i64 [[TMP7]] to ptr, !dbg [[DBG10]] -; ZERO-BASED-SHADOW-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP3]] to i8, !dbg [[DBG10]] -; ZERO-BASED-SHADOW-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG10]] -; ZERO-BASED-SHADOW-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], 72057594037927935, !dbg [[DBG10]] -; ZERO-BASED-SHADOW-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP10]], 4, !dbg [[DBG10]] -; ZERO-BASED-SHADOW-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG10]] -; ZERO-BASED-SHADOW-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP12]], i32 0, !dbg [[DBG10]] -; ZERO-BASED-SHADOW-NEXT: store i8 4, ptr [[TMP13]], align 1, !dbg [[DBG10]] -; ZERO-BASED-SHADOW-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i32 15, !dbg [[DBG10]] -; ZERO-BASED-SHADOW-NEXT: store i8 [[TMP8]], ptr [[TMP14]], align 1, !dbg [[DBG10]] -; ZERO-BASED-SHADOW-NEXT: call void @use32(ptr nonnull [[X_HWASAN]]), !dbg [[DBG10]] +; ZERO-BASED-SHADOW-NEXT: #dbg_value(!DIArgList(ptr [[X]], ptr [[X]]), [[META10:![0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_tag_offset, 0, DW_OP_LLVM_arg, 1, DW_OP_LLVM_tag_offset, 0, DW_OP_plus, DW_OP_deref), [[META12:![0-9]+]]) +; ZERO-BASED-SHADOW-NEXT: [[TMP3:%.*]] = xor i64 [[HWASAN_STACK_BASE_TAG]], 0, !dbg [[DBG13:![0-9]+]] +; ZERO-BASED-SHADOW-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 72057594037927935, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP6:%.*]] = shl i64 [[TMP3]], 56, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP7:%.*]] = or i64 [[TMP5]], [[TMP6]], !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[X_HWASAN:%.*]] = inttoptr i64 [[TMP7]] to ptr, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP3]] to i8, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], 72057594037927935, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP11:%.*]] = lshr i64 [[TMP10]], 4, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP12]], i32 0, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: store i8 4, ptr [[TMP13]], align 1, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[X]], i32 15, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: store i8 [[TMP8]], ptr [[TMP14]], align 1, !dbg [[DBG13]] +; ZERO-BASED-SHADOW-NEXT: call void @use32(ptr nonnull [[X_HWASAN]]), !dbg [[DBG13]] ; ZERO-BASED-SHADOW-NEXT: [[TMP15:%.*]] = trunc i64 [[HWASAN_UAR_TAG]] to i8, !dbg [[DBG14:![0-9]+]] ; ZERO-BASED-SHADOW-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG14]] ; ZERO-BASED-SHADOW-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 72057594037927935, !dbg [[DBG14]] @@ -132,46 +132,44 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) !24 = !DILocation(line: 8, column: 1, scope: !15) ;. ; DYNAMIC-SHADOW: attributes #[[ATTR0]] = { sanitize_hwaddress } -; DYNAMIC-SHADOW: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -; DYNAMIC-SHADOW: attributes #[[ATTR2:[0-9]+]] = { nounwind } -; DYNAMIC-SHADOW: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } -; DYNAMIC-SHADOW: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +; DYNAMIC-SHADOW: attributes #[[ATTR1:[0-9]+]] = { nounwind } +; DYNAMIC-SHADOW: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; DYNAMIC-SHADOW: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } ;. ; ZERO-BASED-SHADOW: attributes #[[ATTR0]] = { sanitize_hwaddress } -; ZERO-BASED-SHADOW: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -; ZERO-BASED-SHADOW: attributes #[[ATTR2:[0-9]+]] = { nounwind } -; ZERO-BASED-SHADOW: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } -; ZERO-BASED-SHADOW: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +; ZERO-BASED-SHADOW: attributes #[[ATTR1:[0-9]+]] = { nounwind } +; ZERO-BASED-SHADOW: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; ZERO-BASED-SHADOW: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } ;. -; DYNAMIC-SHADOW: [[META0:![0-9]+]] = !{ptr @hwasan.note} -; DYNAMIC-SHADOW: [[META1:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !2, producer: "clang version 13.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, splitDebugInlining: false, nameTableKind: None) -; DYNAMIC-SHADOW: [[META2:![0-9]+]] = !DIFile(filename: "alloca.cpp", directory: "/") -; DYNAMIC-SHADOW: [[META3:![0-9]+]] = !{} +; DYNAMIC-SHADOW: [[META0]] = !{ptr @hwasan.note} +; DYNAMIC-SHADOW: [[META1:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: [[META2:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META3:![0-9]+]], splitDebugInlining: false, nameTableKind: None) +; DYNAMIC-SHADOW: [[META2]] = !DIFile(filename: "alloca.cpp", directory: {{.*}}) +; DYNAMIC-SHADOW: [[META3]] = !{} ; DYNAMIC-SHADOW: [[META4:![0-9]+]] = !{i32 7, !"Dwarf Version", i32 4} ; DYNAMIC-SHADOW: [[META5:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3} -; DYNAMIC-SHADOW: [[META6:![0-9]+]] = !{!"clang version 13.0.0"} -; DYNAMIC-SHADOW: [[DBG7]] = distinct !DISubprogram(name: "test_alloca", linkageName: "_Z11test_allocav", scope: !2, file: !2, line: 4, type: !8, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !1, retainedNodes: !3) -; DYNAMIC-SHADOW: [[META8:![0-9]+]] = !DISubroutineType(types: !9) -; DYNAMIC-SHADOW: [[META9:![0-9]+]] = !{null} -; DYNAMIC-SHADOW: [[META11]] = !DILocalVariable(name: "x", scope: !7, file: !2, line: 5, type: [[META12:![0-9]+]]) -; DYNAMIC-SHADOW: [[META12]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -; DYNAMIC-SHADOW: [[DBG13]] = !DILocation(line: 0, scope: !7) -; DYNAMIC-SHADOW: [[DBG10]] = !DILocation(line: 7, column: 5, scope: !7) -; DYNAMIC-SHADOW: [[DBG14]] = !DILocation(line: 8, column: 1, scope: !7) +; DYNAMIC-SHADOW: [[META6:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +; DYNAMIC-SHADOW: [[DBG7]] = distinct !DISubprogram(name: "test_alloca", linkageName: "_Z11test_allocav", scope: [[META2]], file: [[META2]], line: 4, type: [[META8:![0-9]+]], scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META1]], retainedNodes: [[META3]]) +; DYNAMIC-SHADOW: [[META8]] = !DISubroutineType(types: [[META9:![0-9]+]]) +; DYNAMIC-SHADOW: [[META9]] = !{null} +; DYNAMIC-SHADOW: [[META10]] = !DILocalVariable(name: "x", scope: [[DBG7]], file: [[META2]], line: 5, type: [[META11:![0-9]+]]) +; DYNAMIC-SHADOW: [[META11]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +; DYNAMIC-SHADOW: [[META12]] = !DILocation(line: 0, scope: [[DBG7]]) +; DYNAMIC-SHADOW: [[DBG13]] = !DILocation(line: 7, column: 5, scope: [[DBG7]]) +; DYNAMIC-SHADOW: [[DBG14]] = !DILocation(line: 8, column: 1, scope: [[DBG7]]) ;. -; ZERO-BASED-SHADOW: [[META0:![0-9]+]] = !{ptr @hwasan.note} -; ZERO-BASED-SHADOW: [[META1:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !2, producer: "clang version 13.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, splitDebugInlining: false, nameTableKind: None) -; ZERO-BASED-SHADOW: [[META2:![0-9]+]] = !DIFile(filename: "alloca.cpp", directory: "/") -; ZERO-BASED-SHADOW: [[META3:![0-9]+]] = !{} +; ZERO-BASED-SHADOW: [[META0]] = !{ptr @hwasan.note} +; ZERO-BASED-SHADOW: [[META1:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: [[META2:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META3:![0-9]+]], splitDebugInlining: false, nameTableKind: None) +; ZERO-BASED-SHADOW: [[META2]] = !DIFile(filename: "alloca.cpp", directory: {{.*}}) +; ZERO-BASED-SHADOW: [[META3]] = !{} ; ZERO-BASED-SHADOW: [[META4:![0-9]+]] = !{i32 7, !"Dwarf Version", i32 4} ; ZERO-BASED-SHADOW: [[META5:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3} -; ZERO-BASED-SHADOW: [[META6:![0-9]+]] = !{!"clang version 13.0.0"} -; ZERO-BASED-SHADOW: [[DBG7]] = distinct !DISubprogram(name: "test_alloca", linkageName: "_Z11test_allocav", scope: !2, file: !2, line: 4, type: !8, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !1, retainedNodes: !3) -; ZERO-BASED-SHADOW: [[META8:![0-9]+]] = !DISubroutineType(types: !9) -; ZERO-BASED-SHADOW: [[META9:![0-9]+]] = !{null} -; ZERO-BASED-SHADOW: [[META11]] = !DILocalVariable(name: "x", scope: !7, file: !2, line: 5, type: [[META12:![0-9]+]]) -; ZERO-BASED-SHADOW: [[META12]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -; ZERO-BASED-SHADOW: [[DBG13]] = !DILocation(line: 0, scope: !7) -; ZERO-BASED-SHADOW: [[DBG10]] = !DILocation(line: 7, column: 5, scope: !7) -; ZERO-BASED-SHADOW: [[DBG14]] = !DILocation(line: 8, column: 1, scope: !7) +; ZERO-BASED-SHADOW: [[META6:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"} +; ZERO-BASED-SHADOW: [[DBG7]] = distinct !DISubprogram(name: "test_alloca", linkageName: "_Z11test_allocav", scope: [[META2]], file: [[META2]], line: 4, type: [[META8:![0-9]+]], scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META1]], retainedNodes: [[META3]]) +; ZERO-BASED-SHADOW: [[META8]] = !DISubroutineType(types: [[META9:![0-9]+]]) +; ZERO-BASED-SHADOW: [[META9]] = !{null} +; ZERO-BASED-SHADOW: [[META10]] = !DILocalVariable(name: "x", scope: [[DBG7]], file: [[META2]], line: 5, type: [[META11:![0-9]+]]) +; ZERO-BASED-SHADOW: [[META11]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +; ZERO-BASED-SHADOW: [[META12]] = !DILocation(line: 0, scope: [[DBG7]]) +; ZERO-BASED-SHADOW: [[DBG13]] = !DILocation(line: 7, column: 5, scope: [[DBG7]]) +; ZERO-BASED-SHADOW: [[DBG14]] = !DILocation(line: 8, column: 1, scope: [[DBG7]]) ;. diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/alloca-uninteresting.ll b/llvm/test/Instrumentation/HWAddressSanitizer/alloca-uninteresting.ll index e9f3c3d8b48b9e..1b62e4f06051de 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/alloca-uninteresting.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/alloca-uninteresting.ll @@ -13,7 +13,7 @@ define void @test_dyn_alloca(i32 %n) sanitize_hwaddress !dbg !15 { ; CHECK-LABEL: @test_dyn_alloca( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[X:%.*]] = alloca i32, i32 [[N:%.*]], align 4 -; CHECK-NEXT: call void @llvm.dbg.value(metadata !DIArgList(ptr [[X]], ptr [[X]]), metadata [[META10:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_deref)), !dbg [[DBG12:![0-9]+]] +; CHECK-NEXT: #dbg_value(!DIArgList(ptr [[X]], ptr [[X]]), [[META10:![0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_deref), [[META12:![0-9]+]]) ; CHECK-NEXT: call void @use32(ptr nonnull [[X]]), !dbg [[DBG13:![0-9]+]] ; CHECK-NEXT: ret void, !dbg [[DBG14:![0-9]+]] ; diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll b/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll index 9b64c06ddd5556..4bd23ea76c159b 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/alloca.ll @@ -43,7 +43,7 @@ define void @test_alloca() sanitize_hwaddress !dbg !15 { ; DYNAMIC-SHADOW-NEXT: [[HWASAN_STACK_BASE_TAG:%.*]] = xor i64 [[TMP1]], [[TMP2]] ; DYNAMIC-SHADOW-NEXT: [[HWASAN_UAR_TAG:%.*]] = lshr i64 [[TMP1]], 56 ; DYNAMIC-SHADOW-NEXT: [[X:%.*]] = alloca { i32, [12 x i8] }, align 16 -; DYNAMIC-SHADOW-NEXT: tail call void @llvm.dbg.value(metadata !DIArgList(ptr [[X]], ptr [[X]]), metadata [[META10:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_tag_offset, 0, DW_OP_LLVM_arg, 1, DW_OP_LLVM_tag_offset, 0, DW_OP_plus, DW_OP_deref)), !dbg [[DBG12:![0-9]+]] +; DYNAMIC-SHADOW-NEXT: #dbg_value(!DIArgList(ptr [[X]], ptr [[X]]), [[META10:![0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_tag_offset, 0, DW_OP_LLVM_arg, 1, DW_OP_LLVM_tag_offset, 0, DW_OP_plus, DW_OP_deref), [[META12:![0-9]+]]) ; DYNAMIC-SHADOW-NEXT: [[TMP3:%.*]] = xor i64 [[HWASAN_STACK_BASE_TAG]], 0, !dbg [[DBG13:![0-9]+]] ; DYNAMIC-SHADOW-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG13]] ; DYNAMIC-SHADOW-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 72057594037927935, !dbg [[DBG13]] @@ -78,7 +78,7 @@ define void @test_alloca() sanitize_hwaddress !dbg !15 { ; ZERO-BASED-SHADOW-NEXT: [[HWASAN_STACK_BASE_TAG:%.*]] = xor i64 [[TMP1]], [[TMP2]] ; ZERO-BASED-SHADOW-NEXT: [[HWASAN_UAR_TAG:%.*]] = lshr i64 [[TMP1]], 56 ; ZERO-BASED-SHADOW-NEXT: [[X:%.*]] = alloca { i32, [12 x i8] }, align 16 -; ZERO-BASED-SHADOW-NEXT: tail call void @llvm.dbg.value(metadata !DIArgList(ptr [[X]], ptr [[X]]), metadata [[META10:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_tag_offset, 0, DW_OP_LLVM_arg, 1, DW_OP_LLVM_tag_offset, 0, DW_OP_plus, DW_OP_deref)), !dbg [[DBG12:![0-9]+]] +; ZERO-BASED-SHADOW-NEXT: #dbg_value(!DIArgList(ptr [[X]], ptr [[X]]), [[META10:![0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_tag_offset, 0, DW_OP_LLVM_arg, 1, DW_OP_LLVM_tag_offset, 0, DW_OP_plus, DW_OP_deref), [[META12:![0-9]+]]) ; ZERO-BASED-SHADOW-NEXT: [[TMP3:%.*]] = xor i64 [[HWASAN_STACK_BASE_TAG]], 0, !dbg [[DBG13:![0-9]+]] ; ZERO-BASED-SHADOW-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[X]] to i64, !dbg [[DBG13]] ; ZERO-BASED-SHADOW-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 72057594037927935, !dbg [[DBG13]] @@ -151,16 +151,14 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) !24 = !DILocation(line: 8, column: 1, scope: !15) ;. ; DYNAMIC-SHADOW: attributes #[[ATTR0]] = { sanitize_hwaddress } -; DYNAMIC-SHADOW: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -; DYNAMIC-SHADOW: attributes #[[ATTR2:[0-9]+]] = { nounwind } -; DYNAMIC-SHADOW: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } -; DYNAMIC-SHADOW: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +; DYNAMIC-SHADOW: attributes #[[ATTR1:[0-9]+]] = { nounwind } +; DYNAMIC-SHADOW: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; DYNAMIC-SHADOW: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } ;. ; ZERO-BASED-SHADOW: attributes #[[ATTR0]] = { sanitize_hwaddress } -; ZERO-BASED-SHADOW: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -; ZERO-BASED-SHADOW: attributes #[[ATTR2:[0-9]+]] = { nounwind } -; ZERO-BASED-SHADOW: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } -; ZERO-BASED-SHADOW: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +; ZERO-BASED-SHADOW: attributes #[[ATTR1:[0-9]+]] = { nounwind } +; ZERO-BASED-SHADOW: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; ZERO-BASED-SHADOW: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } ;. ; DYNAMIC-SHADOW: [[META0]] = !{ptr @hwasan.note} ; DYNAMIC-SHADOW: [[META1:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: [[META2:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META3:![0-9]+]], splitDebugInlining: false, nameTableKind: None) @@ -174,7 +172,7 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) ; DYNAMIC-SHADOW: [[META9]] = !{null} ; DYNAMIC-SHADOW: [[META10]] = !DILocalVariable(name: "x", scope: [[DBG7]], file: [[META2]], line: 5, type: [[META11:![0-9]+]]) ; DYNAMIC-SHADOW: [[META11]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -; DYNAMIC-SHADOW: [[DBG12]] = !DILocation(line: 0, scope: [[DBG7]]) +; DYNAMIC-SHADOW: [[META12]] = !DILocation(line: 0, scope: [[DBG7]]) ; DYNAMIC-SHADOW: [[DBG13]] = !DILocation(line: 7, column: 5, scope: [[DBG7]]) ; DYNAMIC-SHADOW: [[DBG14]] = !DILocation(line: 8, column: 1, scope: [[DBG7]]) ;. @@ -190,7 +188,7 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) ; ZERO-BASED-SHADOW: [[META9]] = !{null} ; ZERO-BASED-SHADOW: [[META10]] = !DILocalVariable(name: "x", scope: [[DBG7]], file: [[META2]], line: 5, type: [[META11:![0-9]+]]) ; ZERO-BASED-SHADOW: [[META11]] = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) -; ZERO-BASED-SHADOW: [[DBG12]] = !DILocation(line: 0, scope: [[DBG7]]) +; ZERO-BASED-SHADOW: [[META12]] = !DILocation(line: 0, scope: [[DBG7]]) ; ZERO-BASED-SHADOW: [[DBG13]] = !DILocation(line: 7, column: 5, scope: [[DBG7]]) ; ZERO-BASED-SHADOW: [[DBG14]] = !DILocation(line: 8, column: 1, scope: [[DBG7]]) ;. diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/dbg-assign-tag-offset.ll b/llvm/test/Instrumentation/HWAddressSanitizer/dbg-assign-tag-offset.ll index ec8d0340de4579..f2eae934cbb501 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/dbg-assign-tag-offset.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/dbg-assign-tag-offset.ll @@ -16,11 +16,11 @@ entry: %nodebug3 = alloca ptr, align 8 ; CHECK: %a = alloca{{.*}} !DIAssignID ![[ID1:[0-9]+]] %a = alloca ptr, align 8, !DIAssignID !13 - ; CHECK: @llvm.dbg.assign{{.*}} metadata ![[ID1]]{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 32) + ; CHECK: #dbg_assign{{.*}} ![[ID1]]{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 32) call void @llvm.dbg.assign(metadata i1 undef, metadata !14, metadata !DIExpression(), metadata !13, metadata ptr %a, metadata !DIExpression()), !dbg !15 ; CHECK: %b = alloca{{.*}} !DIAssignID ![[ID2:[0-9]+]] %b = alloca ptr, align 8, !DIAssignID !16 - ; CHECK: @llvm.dbg.assign{{.*}} metadata ![[ID2]]{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 96) + ; CHECK: #dbg_assign{{.*}} ![[ID2]]{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 96) call void @llvm.dbg.assign(metadata i1 undef, metadata !17, metadata !DIExpression(), metadata !16, metadata ptr %b, metadata !DIExpression()), !dbg !15 call void @g(ptr %nodebug0, ptr %nodebug1, ptr %nodebug2, ptr %nodebug3, ptr %a, ptr %b) ret void, !dbg !18 diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/dbg-declare-tag-offset.ll b/llvm/test/Instrumentation/HWAddressSanitizer/dbg-declare-tag-offset.ll index 50bad19250267c..817673dd928b02 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/dbg-declare-tag-offset.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/dbg-declare-tag-offset.ll @@ -17,14 +17,14 @@ entry: %nodebug2 = alloca ptr %nodebug3 = alloca ptr %a = alloca ptr - ; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 32) + ; CHECK: #dbg_declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 32) call void @llvm.dbg.declare(metadata ptr %a, metadata !12, metadata !DIExpression()), !dbg !14 - ; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 32) + ; CHECK: #dbg_declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 32) call void @llvm.dbg.declare(metadata ptr %a, metadata !12, metadata !DIExpression()), !dbg !14 %b = alloca ptr - ; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 96) + ; CHECK: #dbg_declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 96) call void @llvm.dbg.declare(metadata ptr %b, metadata !13, metadata !DIExpression()), !dbg !14 - ; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 96) + ; CHECK: #dbg_declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 96) call void @llvm.dbg.declare(metadata ptr %b, metadata !13, metadata !DIExpression()), !dbg !14 call void @g(ptr %nodebug0, ptr %nodebug1, ptr %nodebug2, ptr %nodebug3, ptr %a, ptr %b) ret void, !dbg !15 diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/dbg-value-tag-offset-nopad.ll b/llvm/test/Instrumentation/HWAddressSanitizer/dbg-value-tag-offset-nopad.ll index 05df221abd8128..efd4bb6f9053c4 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/dbg-value-tag-offset-nopad.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/dbg-value-tag-offset-nopad.ll @@ -6,13 +6,13 @@ target triple = "aarch64-unknown-linux-android24" define dso_local void @f() sanitize_hwaddress !dbg !14 { %a1 = alloca i128, align 4 %a2 = alloca i128, align 4 -; CHECK: call void @llvm.dbg.value(metadata i128 1, {{.*}}, metadata !DIExpression()) +; CHECK: #dbg_value(i128 1, {{.*}}, !DIExpression(), call void @llvm.dbg.value(metadata i128 1, metadata !20, metadata !DIExpression()), !dbg !22 store i128 1, ptr %a2, align 4, !dbg !23, !tbaa !24 -; CHECK: call void @llvm.dbg.value(metadata ptr %a1, {{.*}}, metadata !DIExpression(DW_OP_LLVM_tag_offset, 0, DW_OP_deref)) +; CHECK: #dbg_value(ptr %a1, {{.*}}, !DIExpression(DW_OP_LLVM_tag_offset, 0, DW_OP_deref), call void @llvm.dbg.value(metadata ptr %a1, metadata !18, metadata !DIExpression(DW_OP_deref)), !dbg !22 call void @use(ptr nonnull %a1), !dbg !28 -; CHECK: call void @llvm.dbg.value(metadata ptr %a2, {{.*}}, metadata !DIExpression(DW_OP_LLVM_tag_offset, 128, DW_OP_deref)) +; CHECK: #dbg_value(ptr %a2, {{.*}}, !DIExpression(DW_OP_LLVM_tag_offset, 128, DW_OP_deref), call void @llvm.dbg.value(metadata ptr %a2, metadata !20, metadata !DIExpression(DW_OP_deref)), !dbg !22 call void @use(ptr nonnull %a2), !dbg !29 ret void, !dbg !30 diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/dbg-value-tag-offset.ll b/llvm/test/Instrumentation/HWAddressSanitizer/dbg-value-tag-offset.ll index 8cba9fb3592749..2316725515232f 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/dbg-value-tag-offset.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/dbg-value-tag-offset.ll @@ -6,13 +6,13 @@ target triple = "aarch64-unknown-linux-android24" define dso_local void @f() sanitize_hwaddress !dbg !14 { %a1 = alloca i32, align 4 %a2 = alloca i32, align 4 -; CHECK: call void @llvm.dbg.value(metadata i32 1, {{.*}}, metadata !DIExpression()) +; CHECK: #dbg_value(i32 1, {{.*}}, !DIExpression(), call void @llvm.dbg.value(metadata i32 1, metadata !20, metadata !DIExpression()), !dbg !22 store i32 1, ptr %a2, align 4, !dbg !23, !tbaa !24 -; CHECK: call void @llvm.dbg.value(metadata ptr %a1, {{.*}} metadata !DIExpression(DW_OP_LLVM_tag_offset, 0, DW_OP_deref)) +; CHECK: #dbg_value(ptr %a1, {{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 0, DW_OP_deref), call void @llvm.dbg.value(metadata ptr %a1, metadata !18, metadata !DIExpression(DW_OP_deref)), !dbg !22 call void @use(ptr nonnull %a1), !dbg !28 -; CHECK: call void @llvm.dbg.value(metadata ptr %a2, {{.*}} metadata !DIExpression(DW_OP_LLVM_tag_offset, 128, DW_OP_deref)) +; CHECK: #dbg_value(ptr %a2, {{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 128, DW_OP_deref), call void @llvm.dbg.value(metadata ptr %a2, metadata !20, metadata !DIExpression(DW_OP_deref)), !dbg !22 call void @use(ptr nonnull %a2), !dbg !29 ret void, !dbg !30 diff --git a/llvm/test/Linker/DbgDeclare.ll b/llvm/test/Linker/DbgDeclare.ll index 5ca04c892f8302..c16f4870c94074 100644 --- a/llvm/test/Linker/DbgDeclare.ll +++ b/llvm/test/Linker/DbgDeclare.ll @@ -4,12 +4,12 @@ ; rdar://13089880 ; CHECK: define i32 @main(i32 %argc, ptr %argv) -; CHECK: call void @llvm.dbg.declare(metadata ptr %argc.addr, metadata !{{[0-9]+}}, metadata {{.*}}) -; CHECK: call void @llvm.dbg.declare(metadata ptr %argv.addr, metadata !{{[0-9]+}}, metadata {{.*}}) +; CHECK: #dbg_declare(ptr %argc.addr, !{{[0-9]+}}, {{.*}}) +; CHECK: #dbg_declare(ptr %argv.addr, !{{[0-9]+}}, {{.*}}) ; CHECK: define void @test(i32 %argc, ptr %argv) -; CHECK: call void @llvm.dbg.declare(metadata ptr %argc.addr, metadata !{{[0-9]+}}, metadata {{.*}}) -; CHECK: call void @llvm.dbg.declare(metadata ptr %argv.addr, metadata !{{[0-9]+}}, metadata {{.*}}) -; CHECK: call void @llvm.dbg.declare(metadata ptr %i, metadata !{{[0-9]+}}, metadata {{.*}}) +; CHECK: #dbg_declare(ptr %argc.addr, !{{[0-9]+}}, {{.*}}) +; CHECK: #dbg_declare(ptr %argv.addr, !{{[0-9]+}}, {{.*}}) +; CHECK: #dbg_declare(ptr %i, !{{[0-9]+}}, {{.*}}) target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.9.0" diff --git a/llvm/test/Linker/debug-info-use-before-def.ll b/llvm/test/Linker/debug-info-use-before-def.ll index d32dd97f4ffafe..c2885894591070 100644 --- a/llvm/test/Linker/debug-info-use-before-def.ll +++ b/llvm/test/Linker/debug-info-use-before-def.ll @@ -6,8 +6,8 @@ ; the value. ; CHECK-LABEL: @test -; CHECK: call void @llvm.dbg.value(metadata i32 %A, -; CHECK-NEXT: call void @llvm.dbg.value(metadata !DIArgList(i32 0, i32 %A), +; CHECK: #dbg_value(i32 %A, +; CHECK-NEXT: #dbg_value(!DIArgList(i32 0, i32 %A), ; CHECK-NEXT: %A = target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" diff --git a/llvm/test/Transforms/ADCE/adce-salvage-dbg-value.ll b/llvm/test/Transforms/ADCE/adce-salvage-dbg-value.ll index 8af6f01e4f26fc..97156e957d7fd1 100644 --- a/llvm/test/Transforms/ADCE/adce-salvage-dbg-value.ll +++ b/llvm/test/Transforms/ADCE/adce-salvage-dbg-value.ll @@ -14,9 +14,9 @@ declare void @will_return(i32) #1 define void @test(i32 %a) !dbg !6 { ; CHECK-LABEL: @test( ; CHECK-NEXT: [[B:%.*]] = add i32 [[A:%.*]], 1 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[B]] +; CHECK-NEXT: #dbg_value(i32 [[B]] ; CHECK-NEXT: call void @may_not_return(i32 [[B]]) -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[B]], {{.*}}DIExpression(DW_OP_plus_uconst, 1, DW_OP_stack_value) +; CHECK-NEXT: #dbg_value(i32 [[B]], {{.*}}DIExpression(DW_OP_plus_uconst, 1, DW_OP_stack_value) ; CHECK-NEXT: ret void ; %b = add i32 %a, 1, !dbg !12 diff --git a/llvm/test/Transforms/ADCE/debug-info-intrinsic.ll b/llvm/test/Transforms/ADCE/debug-info-intrinsic.ll index 5d87a1cbbe0cb4..8ad76148820a36 100644 --- a/llvm/test/Transforms/ADCE/debug-info-intrinsic.ll +++ b/llvm/test/Transforms/ADCE/debug-info-intrinsic.ll @@ -45,7 +45,7 @@ entry: ; CHECK-LABEL: define void @variable_in_parent_scope( define void @variable_in_parent_scope() !dbg !7 { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.dbg.value +; CHECK-NEXT: #dbg_value ; CHECK-NEXT: call void @sink ; CHECK-NEXT: ret void entry: diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/combine_ignore_debug.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/combine_ignore_debug.ll index 68455a1f9074ec..317dd88f611f41 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/combine_ignore_debug.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/combine_ignore_debug.ll @@ -16,7 +16,7 @@ define void @test() { ; DBG-LABEL: define void @test() { ; DBG-NEXT: entry: ; DBG-NEXT: [[L1:%.*]] = load i32, ptr @e, align 1 -; DBG-NEXT: call void @llvm.dbg.value(metadata i32 undef, metadata [[META3:![0-9]+]], metadata !DIExpression()), !dbg [[DBG5:![0-9]+]] +; DBG-NEXT: #dbg_value(i32 undef, [[META3:![0-9]+]], !DIExpression(), [[META5:![0-9]+]]) ; DBG-NEXT: store i32 [[L1]], ptr @l, align 1 ; DBG-NEXT: ret void ; diff --git a/llvm/test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll b/llvm/test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll index 59c768a3939976..8db0a28e680587 100644 --- a/llvm/test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll +++ b/llvm/test/Transforms/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll @@ -15,7 +15,7 @@ define void @foo() { define internal void @bar(ptr %p) { ; CHECK-LABEL: define {{.*}}void @bar() -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata !3, metadata !DIExpression()), !dbg !5 +; CHECK-NEXT: #dbg_value(ptr undef, !3, !DIExpression(), !5 call void @llvm.dbg.value(metadata ptr %p, metadata !3, metadata !DIExpression()), !dbg !5 ret void } diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll index 8c02ca4e866083..a4797a093abc05 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/pr33641_remove_arg_dbgvalue.ll @@ -30,7 +30,7 @@ define internal void @bar(%p_t %p) { ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) ; CGSCC-LABEL: define {{[^@]+}}@bar ; CGSCC-SAME: (ptr nocapture nofree readnone [[P:%.*]]) #[[ATTR0]] { -; CGSCC-NEXT: tail call void @llvm.dbg.value(metadata ptr [[P]], metadata [[META3:![0-9]+]], metadata !DIExpression()), !dbg [[DBG5:![0-9]+]] +; CGSCC-NEXT: #dbg_value(ptr [[P]], [[META3:![0-9]+]], !DIExpression(), [[META5:![0-9]+]]) ; CGSCC-NEXT: ret void ; call void @llvm.dbg.value(metadata %p_t %p, metadata !4, metadata !5), !dbg !6 @@ -51,10 +51,8 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) !6 = !DILocation(line: 1, column: 1, scope: !3) ;. ; TUNIT: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) } -; TUNIT: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ;. ; CGSCC: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) } -; CGSCC: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ;. ; TUNIT: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C, file: [[META1:![0-9]+]], isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug) ; TUNIT: [[META1]] = !DIFile(filename: "test.c", directory: "") @@ -65,7 +63,7 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) ; CGSCC: [[META2:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3} ; CGSCC: [[META3]] = !DILocalVariable(name: "p", scope: [[META4:![0-9]+]]) ; CGSCC: [[META4]] = distinct !DISubprogram(name: "bar", scope: null, spFlags: DISPFlagDefinition, unit: [[META0]]) -; CGSCC: [[DBG5]] = !DILocation(line: 1, column: 1, scope: [[META4]]) +; CGSCC: [[META5]] = !DILocation(line: 1, column: 1, scope: [[META4]]) ;. ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; TUNIT: {{.*}} diff --git a/llvm/test/Transforms/BDCE/basic.ll b/llvm/test/Transforms/BDCE/basic.ll index ef0b7be2da0dcc..6127f1fdf688b0 100644 --- a/llvm/test/Transforms/BDCE/basic.ll +++ b/llvm/test/Transforms/BDCE/basic.ll @@ -388,7 +388,7 @@ define signext i16 @tar9(i32 signext %x) #0 { entry: %call = tail call signext i32 @foo(i32 signext 5) #0 %and = and i32 %call, 33554432 -; DEBUGIFY: call void @llvm.dbg.value(metadata i32 %call, metadata {{.*}}, metadata !DIExpression(DW_OP_constu, 33554432, DW_OP_and, DW_OP_stack_value)) +; DEBUGIFY: #dbg_value(i32 %call, {{.*}}, !DIExpression(DW_OP_constu, 33554432, DW_OP_and, DW_OP_stack_value), %cast = trunc i32 %call to i16 ret i16 %cast } diff --git a/llvm/test/Transforms/BDCE/dbg-multipleuses.ll b/llvm/test/Transforms/BDCE/dbg-multipleuses.ll index 6b39804e772228..288b46ffca956b 100644 --- a/llvm/test/Transforms/BDCE/dbg-multipleuses.ll +++ b/llvm/test/Transforms/BDCE/dbg-multipleuses.ll @@ -8,7 +8,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: entry: ; CHECK-NEXT: tail call void (...) @h() ; CHECK-NEXT: %[[CALL:.*]] = tail call i32 (...) @g() -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 %[[CALL:.*]] +; CHECK-NEXT: #dbg_value(i32 %[[CALL:.*]] define void @f() !dbg !6 { entry: diff --git a/llvm/test/Transforms/BDCE/pr26587.ll b/llvm/test/Transforms/BDCE/pr26587.ll index 44f64d3ecaaa79..d8a87e8016a575 100644 --- a/llvm/test/Transforms/BDCE/pr26587.ll +++ b/llvm/test/Transforms/BDCE/pr26587.ll @@ -8,7 +8,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: entry: ; CHECK-NEXT: tail call void (...) @h() ; CHECK-NEXT: %[[CALL:.*]] = tail call i32 (...) @g() -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 %[[CALL:.*]] +; CHECK-NEXT: #dbg_value(i32 %[[CALL:.*]] define void @f() !dbg !6 { entry: diff --git a/llvm/test/Transforms/BDCE/pr41925.ll b/llvm/test/Transforms/BDCE/pr41925.ll index e24a93e1c625dd..b8d894e952e8ab 100644 --- a/llvm/test/Transforms/BDCE/pr41925.ll +++ b/llvm/test/Transforms/BDCE/pr41925.ll @@ -18,8 +18,8 @@ define dso_local i32 @main() local_unnamed_addr !dbg !13 { entry: -;CHECK: call void @llvm.dbg.value(metadata i8 2 -;CHECK: call void @llvm.dbg.value(metadata i8 2 +;CHECK: #dbg_value(i8 2 +;CHECK: #dbg_value(i8 2 call void @llvm.dbg.value(metadata i8 2, metadata !17, metadata !DIExpression()), !dbg !18 %.pr = load i8, ptr @b, align 1, !dbg !19 call void @llvm.dbg.value(metadata i8 2, metadata !17, metadata !DIExpression()), !dbg !18 @@ -29,10 +29,10 @@ entry: for.cond2thread-pre-split.preheader: ; preds = %entry br label %for.cond2thread-pre-split, !dbg !23 for.cond2thread-pre-split: ; preds = %for.cond2thread-pre-split.preheader, %for.inc7 -;CHECK: call void @llvm.dbg.value(metadata i8 poison +;CHECK: #dbg_value(i8 poison %l_177.06 = phi i8 [ %l_177.1.lcssa, %for.inc7 ], [ 2, %for.cond2thread-pre-split.preheader ] call void @llvm.dbg.value(metadata i8 %l_177.06, metadata !17, metadata !DIExpression()), !dbg !18 -;CHECK: call void @llvm.dbg.value(metadata i8 poison +;CHECK: #dbg_value(i8 poison %.pr1 = load i8, ptr @a, align 1, !dbg !24 call void @llvm.dbg.value(metadata i8 %l_177.06, metadata !17, metadata !DIExpression()), !dbg !18 %cmp42 = icmp sgt i8 %.pr1, -1, !dbg !27 @@ -42,8 +42,8 @@ for.body6.preheader: ; preds = %for.cond2thread-pre br label %for.body6, !dbg !28 for.body6: ; preds = %for.body6.preheader, %for.body6 -;CHECK: call void @llvm.dbg.value(metadata i8 poison -;CHECK: call void @llvm.dbg.value(metadata i8 poison +;CHECK: #dbg_value(i8 poison +;CHECK: #dbg_value(i8 poison %l_177.13 = phi i8 [ %inc, %for.body6 ], [ %l_177.06, %for.body6.preheader ] call void @llvm.dbg.value(metadata i8 %l_177.13, metadata !17, metadata !DIExpression()), !dbg !18 call void @llvm.dbg.value(metadata i8 %l_177.13, metadata !17, metadata !DIExpression(DW_OP_plus_uconst, 1, DW_OP_stack_value)), !dbg !18 @@ -61,7 +61,7 @@ for.inc7.loopexit: ; preds = %for.body6 br label %for.inc7, !dbg !35 for.inc7: ; preds = %for.inc7.loopexit, %for.cond2thread-pre-split -;CHECK: call void @llvm.dbg.value(metadata i8 poison +;CHECK: #dbg_value(i8 poison %l_177.1.lcssa = phi i8 [ %l_177.06, %for.cond2thread-pre-split ], [ %inc.lcssa, %for.inc7.loopexit ], !dbg !18 %1 = load i8, ptr @b, align 1, !dbg !35 %inc8 = add i8 %1, 1, !dbg !35 diff --git a/llvm/test/Transforms/CallSiteSplitting/callsite-split-debug.ll b/llvm/test/Transforms/CallSiteSplitting/callsite-split-debug.ll index 68c906d616c92d..d6d99e672ed172 100644 --- a/llvm/test/Transforms/CallSiteSplitting/callsite-split-debug.ll +++ b/llvm/test/Transforms/CallSiteSplitting/callsite-split-debug.ll @@ -63,23 +63,23 @@ attributes #0 = { nounwind readnone speculatable } ; CHECK-LABEL: @foo ; CHECK-LABEL: bb1.split: -; CHECK-DEBUG: call void @llvm.dbg.value(metadata i16 0, metadata ![[DBG_1:[0-9]+]], {{.*}} -; CHECK-DEBUG: call void @llvm.dbg.value(metadata i16 2, metadata ![[DBG_1]], {{.*}} -; CHECK-DEBUG: call void @llvm.dbg.value(metadata !DIArgList(i16 0, i16 2), {{.*}} -; CHECK-DEBUG: call void @llvm.dbg.value(metadata !DIArgList(i16 2, i16 2), {{.*}} -; CHECK-DEBUG: call void @llvm.dbg.assign(metadata i16 0, metadata ![[DBG_2:[0-9]+]], {{.*}} -; CHECK-DEBUG: call void @llvm.dbg.assign(metadata i16 2, metadata ![[DBG_2]], {{.*}} -; CHECK-DEBUG: call void @llvm.dbg.assign(metadata i16 0, metadata ![[DBG_2]], metadata !DIExpression(), metadata ![[ID_1:[0-9]+]], metadata ptr %a, {{.*}} +; CHECK-DEBUG: #dbg_value(i16 0, ![[DBG_1:[0-9]+]], {{.*}} +; CHECK-DEBUG: #dbg_value(i16 2, ![[DBG_1]], {{.*}} +; CHECK-DEBUG: #dbg_value(!DIArgList(i16 0, i16 2), {{.*}} +; CHECK-DEBUG: #dbg_value(!DIArgList(i16 2, i16 2), {{.*}} +; CHECK-DEBUG: #dbg_assign(i16 0, ![[DBG_2:[0-9]+]], {{.*}} +; CHECK-DEBUG: #dbg_assign(i16 2, ![[DBG_2]], {{.*}} +; CHECK-DEBUG: #dbg_assign(i16 0, ![[DBG_2]], !DIExpression(), ![[ID_1:[0-9]+]], ptr %a, {{.*}} ; CHECK: [[TMP1:%[0-9]+]] = call i16 @bar(i16 0, i16 5) ; CHECK-LABEL: bb2.split: -; CHECK-DEBUG: call void @llvm.dbg.value(metadata i16 1, metadata ![[DBG_1]], {{.*}} -; CHECK-DEBUG: call void @llvm.dbg.value(metadata i16 3, metadata ![[DBG_1]], {{.*}} -; CHECK-DEBUG: call void @llvm.dbg.value(metadata !DIArgList(i16 1, i16 3), {{.*}} -; CHECK-DEBUG: call void @llvm.dbg.value(metadata !DIArgList(i16 3, i16 3), {{.*}} -; CHECK-DEBUG: call void @llvm.dbg.assign(metadata i16 1, metadata ![[DBG_2]], {{.*}} -; CHECK-DEBUG: call void @llvm.dbg.assign(metadata i16 3, metadata ![[DBG_2]], {{.*}} -; CHECK-DEBUG: call void @llvm.dbg.assign(metadata i16 1, metadata ![[DBG_2]], metadata !DIExpression(), metadata ![[ID_1:[0-9]+]], metadata ptr %a, {{.*}} +; CHECK-DEBUG: #dbg_value(i16 1, ![[DBG_1]], {{.*}} +; CHECK-DEBUG: #dbg_value(i16 3, ![[DBG_1]], {{.*}} +; CHECK-DEBUG: #dbg_value(!DIArgList(i16 1, i16 3), {{.*}} +; CHECK-DEBUG: #dbg_value(!DIArgList(i16 3, i16 3), {{.*}} +; CHECK-DEBUG: #dbg_assign(i16 1, ![[DBG_2]], {{.*}} +; CHECK-DEBUG: #dbg_assign(i16 3, ![[DBG_2]], {{.*}} +; CHECK-DEBUG: #dbg_assign(i16 1, ![[DBG_2]], !DIExpression(), ![[ID_1:[0-9]+]], ptr %a, {{.*}} ; CHECK: [[TMP2:%[0-9]+]] = call i16 @bar(i16 1, i16 5) ; CHECK-LABEL: CallsiteBB diff --git a/llvm/test/Transforms/CallSiteSplitting/callsite-split-preserve-debug.ll b/llvm/test/Transforms/CallSiteSplitting/callsite-split-preserve-debug.ll index e185286304a686..f54ca6cfe29097 100644 --- a/llvm/test/Transforms/CallSiteSplitting/callsite-split-preserve-debug.ll +++ b/llvm/test/Transforms/CallSiteSplitting/callsite-split-preserve-debug.ll @@ -3,16 +3,15 @@ ;; Test that DebugLocs are preserved, and that dbg.values are duplicated. -; CHECK: declare void @llvm.dbg.value(metadata, ; CHECK-LABEL: @test1 -; CHECK: call void @llvm.dbg.value(metadata i32 0, +; CHECK: #dbg_value(i32 0, ; CHECK-NEXT: [[R1:%.+]] = call i32 @callee(i32 0, i32 %dd), !dbg [[DBG1:!.*]] -; CHECK: call void @llvm.dbg.value(metadata i32 0, +; CHECK: #dbg_value(i32 0, ; CHECK-NEXT: [[R2:%.+]] = call i32 @callee(i32 1, i32 %dd), !dbg [[DBG1]] ; CHECK-LABEL: CallSite: ; CHECK-NEXT: phi i32 [ [[R2]], %land.rhs.split ], [ [[R1]], %entry.split ], !dbg [[DBG1]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 1, +; CHECK-NEXT: #dbg_value(i32 1, declare void @llvm.dbg.value(metadata, metadata, metadata) @@ -33,15 +32,15 @@ CallSite: ; preds = %land.rhs, %entry ; CHECK-LABEL: @test2 ; CHECK: [[LV1:%.*]] = load i32, ptr %ptr, align 4, !dbg [[DBG_LV:!.*]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 0, +; CHECK-NEXT: #dbg_value(i32 0, ; CHECK-NEXT: [[R1:%.+]] = call i32 @callee(i32 0, i32 10), !dbg [[DBG_CALL:!.*]] ; CHECK: [[LV2:%.*]] = load i32, ptr %ptr, align 4, !dbg [[DBG_LV]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 0, +; CHECK-NEXT: #dbg_value(i32 0, ; CHECK-NEXT: [[R2:%.+]] = call i32 @callee(i32 0, i32 %i), !dbg [[DBG_CALL]] ; CHECK-LABEL: CallSite: ; CHECK-NEXT: phi i32 [ [[LV1]], %Header.split ], [ [[LV2]], %TBB.split ], !dbg [[DBG_LV]] ; CHECK-NEXT: phi i32 [ [[R1]], %Header.split ], [ [[R2]], %TBB.split ], !dbg [[DBG_CALL]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 1, +; CHECK-NEXT: #dbg_value(i32 1, define void @test2(ptr %ptr, i32 %i) !dbg !19 { Header: diff --git a/llvm/test/Transforms/CodeExtractor/LoopExtractor_alloca.ll b/llvm/test/Transforms/CodeExtractor/LoopExtractor_alloca.ll index 48e7816c79595f..1026e393c9d26a 100644 --- a/llvm/test/Transforms/CodeExtractor/LoopExtractor_alloca.ll +++ b/llvm/test/Transforms/CodeExtractor/LoopExtractor_alloca.ll @@ -11,7 +11,7 @@ ; CHECK-LABEL: define void @test() ; CHECK-NEXT: entry: ; CHECK-NEXT: %v1 = alloca i32 -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr %v1 +; CHECK-NEXT: #dbg_value(ptr %v1 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 undef, ptr %v1, i64 4, i1 true) ; CHECK-LABEL: define internal void @test.loop2() diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll b/llvm/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll index cb617671827e72..6d19937e68c868 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll @@ -87,11 +87,11 @@ catch: ; CHECK: catch.dispatch: ; CHECK-NEXT: phi ptr ; CHECK-NEXT: catchswitch -; CHECK-NOT: llvm.dbg.value +; CHECK-NOT: #dbg_value ; CHECK: catch: ; CHECK-NEXT: catchpad -; CHECK-NEXT: call void @llvm.dbg.value +; CHECK-NEXT: #dbg_value } !llvm.dbg.cu = !{!0} diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll b/llvm/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll index 3a3a5327da8dfb..06909d950addb6 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/cttz-ctlz.ll @@ -53,7 +53,7 @@ define i64 @cttz(i64 %A) { ; DEBUGINFO-NEXT: br label [[COND_END]], !dbg [[DBG12:![0-9]+]] ; DEBUGINFO: cond.end: ; DEBUGINFO-NEXT: [[CTZ:%.*]] = phi i64 [ 64, [[ENTRY:%.*]] ], [ [[Z]], [[COND_FALSE]] ], !dbg [[DBG12]] -; DEBUGINFO-NEXT: tail call void @llvm.dbg.value(metadata i64 [[CTZ]], metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG11]] +; DEBUGINFO-NEXT: #dbg_value(i64 [[CTZ]], [[META9:![0-9]+]], !DIExpression(), [[DBG11]]) ; DEBUGINFO-NEXT: ret i64 [[CTZ]], !dbg [[DBG12]] ; entry: @@ -101,7 +101,7 @@ define i64 @ctlz(i64 %A) { ; DEBUGINFO-NEXT: br label [[COND_END]], !dbg [[DBG17:![0-9]+]] ; DEBUGINFO: cond.end: ; DEBUGINFO-NEXT: [[CTZ:%.*]] = phi i64 [ 64, [[ENTRY:%.*]] ], [ [[Z]], [[COND_FALSE]] ], !dbg [[DBG17]] -; DEBUGINFO-NEXT: tail call void @llvm.dbg.value(metadata i64 [[CTZ]], metadata [[META15:![0-9]+]], metadata !DIExpression()), !dbg [[DBG16]] +; DEBUGINFO-NEXT: #dbg_value(i64 [[CTZ]], [[META15:![0-9]+]], !DIExpression(), [[DBG16]]) ; DEBUGINFO-NEXT: ret i64 [[CTZ]], !dbg [[DBG17]] ; entry: diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/select.ll b/llvm/test/Transforms/CodeGenPrepare/X86/select.ll index 08dd77e9e4c3d5..66fe2e9c657586 100644 --- a/llvm/test/Transforms/CodeGenPrepare/X86/select.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/select.ll @@ -15,6 +15,16 @@ define i32 @no_sink(double %a, ptr %b, i32 %x, i32 %y) { ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[X:%.*]], i32 [[Y:%.*]] ; CHECK-NEXT: ret i32 [[SEL]] ; +; DEBUG-LABEL: @no_sink( +; DEBUG-NEXT: entry: +; DEBUG-NEXT: [[LOAD:%.*]] = load double, ptr [[B:%.*]], align 8, !dbg [[DBG15:![0-9]+]] +; DEBUG-NEXT: #dbg_value(double [[LOAD]], [[META9:![0-9]+]], !DIExpression(), [[DBG15]]) +; DEBUG-NEXT: [[CMP:%.*]] = fcmp olt double [[LOAD]], [[A:%.*]], !dbg [[DBG16:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i1 [[CMP]], [[META11:![0-9]+]], !DIExpression(), [[DBG16]]) +; DEBUG-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[X:%.*]], i32 [[Y:%.*]], !dbg [[DBG17:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i32 [[SEL]], [[META13:![0-9]+]], !DIExpression(), [[DBG17]]) +; DEBUG-NEXT: ret i32 [[SEL]], !dbg [[DBG18:![0-9]+]] +; entry: %load = load double, ptr %b, align 8 %cmp = fcmp olt double %load, %a @@ -28,8 +38,8 @@ entry: define float @fdiv_true_sink(float %a, float %b) { ; CHECK-LABEL: @fdiv_true_sink( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SEL_FR:%.*]] = freeze float [[A:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[SEL_FR]], 1.000000e+00 +; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze float [[A:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[SEL_FROZEN]], 1.000000e+00 ; CHECK-NEXT: br i1 [[CMP]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]] ; CHECK: select.true.sink: ; CHECK-NEXT: [[DIV:%.*]] = fdiv float [[A]], [[B:%.*]] @@ -40,18 +50,18 @@ define float @fdiv_true_sink(float %a, float %b) { ; ; DEBUG-LABEL: @fdiv_true_sink( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: [[SEL_FR:%.*]] = freeze float [[A:%.*]] -; DEBUG-NEXT: [[CMP:%.*]] = fcmp ogt float [[SEL_FR]], 1.000000e+00, !dbg -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP]] -; DEBUG-NEXT: br i1 [[CMP]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !dbg +; DEBUG-NEXT: [[SEL_FROZEN:%.*]] = freeze float [[A:%.*]] +; DEBUG-NEXT: [[CMP:%.*]] = fcmp ogt float [[SEL_FROZEN]], 1.000000e+00, !dbg [[DBG24:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i1 [[CMP]], [[META22:![0-9]+]], !DIExpression(), [[DBG24]]) +; DEBUG-NEXT: br i1 [[CMP]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_END:%.*]], !dbg [[DBG25:![0-9]+]] ; DEBUG: select.true.sink: -; DEBUG-NEXT: [[DIV:%.*]] = fdiv float [[A]], [[B:%.*]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata float [[DIV]] -; DEBUG-NEXT: br label [[SELECT_END]], !dbg +; DEBUG-NEXT: [[DIV:%.*]] = fdiv float [[A]], [[B:%.*]], !dbg [[DBG26:![0-9]+]] +; DEBUG-NEXT: #dbg_value(float [[DIV]], [[META21:![0-9]+]], !DIExpression(), [[DBG26]]) +; DEBUG-NEXT: br label [[SELECT_END]], !dbg [[DBG27:![0-9]+]] ; DEBUG: select.end: -; DEBUG-NEXT: [[SEL:%.*]] = phi float [ [[DIV]], [[SELECT_TRUE_SINK]] ], [ 2.000000e+00, [[ENTRY:%.*]] ], !dbg -; DEBUG-NEXT: call void @llvm.dbg.value(metadata float [[SEL]] -; DEBUG-NEXT: ret float [[SEL]] +; DEBUG-NEXT: [[SEL:%.*]] = phi float [ [[DIV]], [[SELECT_TRUE_SINK]] ], [ 2.000000e+00, [[ENTRY:%.*]] ], !dbg [[DBG25]] +; DEBUG-NEXT: #dbg_value(float [[SEL]], [[META23:![0-9]+]], !DIExpression(), [[DBG25]]) +; DEBUG-NEXT: ret float [[SEL]], !dbg [[DBG27]] ; entry: %div = fdiv float %a, %b @@ -63,8 +73,8 @@ entry: define float @fdiv_false_sink(float %a, float %b) { ; CHECK-LABEL: @fdiv_false_sink( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SEL_FR:%.*]] = freeze float [[A:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[SEL_FR]], 3.000000e+00 +; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze float [[A:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[SEL_FROZEN]], 3.000000e+00 ; CHECK-NEXT: br i1 [[CMP]], label [[SELECT_END:%.*]], label [[SELECT_FALSE_SINK:%.*]] ; CHECK: select.false.sink: ; CHECK-NEXT: [[DIV:%.*]] = fdiv float [[A]], [[B:%.*]] @@ -75,18 +85,18 @@ define float @fdiv_false_sink(float %a, float %b) { ; ; DEBUG-LABEL: @fdiv_false_sink( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: [[SEL_FR:%.*]] = freeze float [[A:%.*]] -; DEBUG-NEXT: [[CMP:%.*]] = fcmp ogt float [[SEL_FR]], 3.000000e+00, !dbg !33 -; DEBUG-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP]] -; DEBUG-NEXT: br i1 [[CMP]], label [[SELECT_END:%.*]], label [[SELECT_FALSE_SINK:%.*]], !dbg +; DEBUG-NEXT: [[SEL_FROZEN:%.*]] = freeze float [[A:%.*]] +; DEBUG-NEXT: [[CMP:%.*]] = fcmp ogt float [[SEL_FROZEN]], 3.000000e+00, !dbg [[DBG33:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i1 [[CMP]], [[META31:![0-9]+]], !DIExpression(), [[DBG33]]) +; DEBUG-NEXT: br i1 [[CMP]], label [[SELECT_END:%.*]], label [[SELECT_FALSE_SINK:%.*]], !dbg [[DBG34:![0-9]+]] ; DEBUG: select.false.sink: -; DEBUG-NEXT: [[DIV:%.*]] = fdiv float [[A]], [[B:%.*]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata float [[DIV]] -; DEBUG-NEXT: br label [[SELECT_END]], !dbg +; DEBUG-NEXT: [[DIV:%.*]] = fdiv float [[A]], [[B:%.*]], !dbg [[DBG35:![0-9]+]] +; DEBUG-NEXT: #dbg_value(float [[DIV]], [[META30:![0-9]+]], !DIExpression(), [[DBG35]]) +; DEBUG-NEXT: br label [[SELECT_END]], !dbg [[DBG36:![0-9]+]] ; DEBUG: select.end: -; DEBUG-NEXT: [[SEL:%.*]] = phi float [ 4.000000e+00, [[ENTRY:%.*]] ], [ [[DIV]], [[SELECT_FALSE_SINK]] ], !dbg -; DEBUG-NEXT: call void @llvm.dbg.value(metadata float [[SEL]] -; DEBUG-NEXT: ret float [[SEL]], !dbg +; DEBUG-NEXT: [[SEL:%.*]] = phi float [ 4.000000e+00, [[ENTRY:%.*]] ], [ [[DIV]], [[SELECT_FALSE_SINK]] ], !dbg [[DBG34]] +; DEBUG-NEXT: #dbg_value(float [[SEL]], [[META32:![0-9]+]], !DIExpression(), [[DBG34]]) +; DEBUG-NEXT: ret float [[SEL]], !dbg [[DBG36]] ; entry: %div = fdiv float %a, %b @@ -98,8 +108,8 @@ entry: define float @fdiv_both_sink(float %a, float %b) { ; CHECK-LABEL: @fdiv_both_sink( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[SEL_FR:%.*]] = freeze float [[A:%.*]] -; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[SEL_FR]], 5.000000e+00 +; CHECK-NEXT: [[SEL_FROZEN:%.*]] = freeze float [[A:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[SEL_FROZEN]], 5.000000e+00 ; CHECK-NEXT: br i1 [[CMP]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_FALSE_SINK:%.*]] ; CHECK: select.true.sink: ; CHECK-NEXT: [[DIV1:%.*]] = fdiv float [[A]], [[B:%.*]] @@ -111,6 +121,25 @@ define float @fdiv_both_sink(float %a, float %b) { ; CHECK-NEXT: [[SEL:%.*]] = phi float [ [[DIV1]], [[SELECT_TRUE_SINK]] ], [ [[DIV2]], [[SELECT_FALSE_SINK]] ] ; CHECK-NEXT: ret float [[SEL]] ; +; DEBUG-LABEL: @fdiv_both_sink( +; DEBUG-NEXT: entry: +; DEBUG-NEXT: [[SEL_FROZEN:%.*]] = freeze float [[A:%.*]] +; DEBUG-NEXT: [[CMP:%.*]] = fcmp ogt float [[SEL_FROZEN]], 5.000000e+00, !dbg [[DBG43:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i1 [[CMP]], [[META41:![0-9]+]], !DIExpression(), [[DBG43]]) +; DEBUG-NEXT: br i1 [[CMP]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_FALSE_SINK:%.*]], !dbg [[DBG44:![0-9]+]] +; DEBUG: select.true.sink: +; DEBUG-NEXT: [[DIV1:%.*]] = fdiv float [[A]], [[B:%.*]], !dbg [[DBG45:![0-9]+]] +; DEBUG-NEXT: #dbg_value(float [[DIV1]], [[META39:![0-9]+]], !DIExpression(), [[DBG45]]) +; DEBUG-NEXT: br label [[SELECT_END:%.*]], !dbg [[DBG46:![0-9]+]] +; DEBUG: select.false.sink: +; DEBUG-NEXT: [[DIV2:%.*]] = fdiv float [[B]], [[A]], !dbg [[DBG47:![0-9]+]] +; DEBUG-NEXT: #dbg_value(float [[DIV2]], [[META40:![0-9]+]], !DIExpression(), [[DBG47]]) +; DEBUG-NEXT: br label [[SELECT_END]], !dbg [[DBG46]] +; DEBUG: select.end: +; DEBUG-NEXT: [[SEL:%.*]] = phi float [ [[DIV1]], [[SELECT_TRUE_SINK]] ], [ [[DIV2]], [[SELECT_FALSE_SINK]] ], !dbg [[DBG44]] +; DEBUG-NEXT: #dbg_value(float [[SEL]], [[META42:![0-9]+]], !DIExpression(), [[DBG44]]) +; DEBUG-NEXT: ret float [[SEL]], !dbg [[DBG46]] +; entry: %div1 = fdiv float %a, %b %div2 = fdiv float %b, %a @@ -126,9 +155,19 @@ define float @unpredictable_select(float %a, float %b) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[DIV:%.*]] = fdiv float [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[A]], 1.000000e+00 -; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[DIV]], float 2.000000e+00, !unpredictable !0 +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[DIV]], float 2.000000e+00, !unpredictable [[META0:![0-9]+]] ; CHECK-NEXT: ret float [[SEL]] ; +; DEBUG-LABEL: @unpredictable_select( +; DEBUG-NEXT: entry: +; DEBUG-NEXT: [[DIV:%.*]] = fdiv float [[A:%.*]], [[B:%.*]], !dbg [[DBG53:![0-9]+]] +; DEBUG-NEXT: #dbg_value(float [[DIV]], [[META50:![0-9]+]], !DIExpression(), [[DBG53]]) +; DEBUG-NEXT: [[CMP:%.*]] = fcmp ogt float [[A]], 1.000000e+00, !dbg [[DBG54:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i1 [[CMP]], [[META51:![0-9]+]], !DIExpression(), [[DBG54]]) +; DEBUG-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[DIV]], float 2.000000e+00, !dbg [[DBG55:![0-9]+]], !unpredictable [[META7:![0-9]+]] +; DEBUG-NEXT: #dbg_value(float [[SEL]], [[META52:![0-9]+]], !DIExpression(), [[DBG55]]) +; DEBUG-NEXT: ret float [[SEL]], !dbg [[DBG56:![0-9]+]] +; entry: %div = fdiv float %a, %b %cmp = fcmp ogt float %a, 1.0 @@ -146,6 +185,15 @@ define float @fadd_no_sink(float %a, float %b) { ; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float 6.000000e+00, [[A]] ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[ADD]], float 7.000000e+00 ; CHECK-NEXT: ret float [[SEL]] +; +; DEBUG-LABEL: @fadd_no_sink( +; DEBUG-NEXT: [[ADD:%.*]] = fadd float [[A:%.*]], [[B:%.*]], !dbg [[DBG62:![0-9]+]] +; DEBUG-NEXT: #dbg_value(float [[ADD]], [[META59:![0-9]+]], !DIExpression(), [[DBG62]]) +; DEBUG-NEXT: [[CMP:%.*]] = fcmp ogt float 6.000000e+00, [[A]], !dbg [[DBG63:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i1 [[CMP]], [[META60:![0-9]+]], !DIExpression(), [[DBG63]]) +; DEBUG-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[ADD]], float 7.000000e+00, !dbg [[DBG64:![0-9]+]] +; DEBUG-NEXT: #dbg_value(float [[SEL]], [[META61:![0-9]+]], !DIExpression(), [[DBG64]]) +; DEBUG-NEXT: ret float [[SEL]], !dbg [[DBG65:![0-9]+]] ; %add = fadd float %a, %b %cmp = fcmp ogt float 6.0, %a @@ -166,6 +214,18 @@ define float @fdiv_no_sink(float %a, float %b) { ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[ADD]], float 8.000000e+00 ; CHECK-NEXT: ret float [[SEL]] ; +; DEBUG-LABEL: @fdiv_no_sink( +; DEBUG-NEXT: entry: +; DEBUG-NEXT: [[DIV:%.*]] = fdiv float [[A:%.*]], [[B:%.*]], !dbg [[DBG72:![0-9]+]] +; DEBUG-NEXT: #dbg_value(float [[DIV]], [[META68:![0-9]+]], !DIExpression(), [[DBG72]]) +; DEBUG-NEXT: [[ADD:%.*]] = fadd float [[DIV]], [[B]], !dbg [[DBG73:![0-9]+]] +; DEBUG-NEXT: #dbg_value(float [[ADD]], [[META69:![0-9]+]], !DIExpression(), [[DBG73]]) +; DEBUG-NEXT: [[CMP:%.*]] = fcmp ogt float [[A]], 1.000000e+00, !dbg [[DBG74:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i1 [[CMP]], [[META70:![0-9]+]], !DIExpression(), [[DBG74]]) +; DEBUG-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[ADD]], float 8.000000e+00, !dbg [[DBG75:![0-9]+]] +; DEBUG-NEXT: #dbg_value(float [[SEL]], [[META71:![0-9]+]], !DIExpression(), [[DBG75]]) +; DEBUG-NEXT: ret float [[SEL]], !dbg [[DBG76:![0-9]+]] +; entry: %div = fdiv float %a, %b %add = fadd float %div, %b @@ -186,6 +246,17 @@ define ptr @calls_no_sink(i32 %in) { ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[IN:%.*]], 0 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[TOBOOL]], ptr [[CALL1]], ptr [[CALL2]] ; CHECK-NEXT: ret ptr [[SEL]] +; +; DEBUG-LABEL: @calls_no_sink( +; DEBUG-NEXT: [[CALL1:%.*]] = call ptr @bar(i32 1, i32 2, i32 3), !dbg [[DBG83:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr [[CALL1]], [[META79:![0-9]+]], !DIExpression(), [[DBG83]]) +; DEBUG-NEXT: [[CALL2:%.*]] = call ptr @baz(i32 1, i32 2, i32 3), !dbg [[DBG84:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr [[CALL2]], [[META80:![0-9]+]], !DIExpression(), [[DBG84]]) +; DEBUG-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[IN:%.*]], 0, !dbg [[DBG85:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i1 [[TOBOOL]], [[META81:![0-9]+]], !DIExpression(), [[DBG85]]) +; DEBUG-NEXT: [[SEL:%.*]] = select i1 [[TOBOOL]], ptr [[CALL1]], ptr [[CALL2]], !dbg [[DBG86:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr [[SEL]], [[META82:![0-9]+]], !DIExpression(), [[DBG86]]) +; DEBUG-NEXT: ret ptr [[SEL]], !dbg [[DBG87:![0-9]+]] ; %call1 = call ptr @bar(i32 1, i32 2, i32 3) %call2 = call ptr @baz(i32 1, i32 2, i32 3) @@ -201,6 +272,17 @@ define i32 @sdiv_no_sink(i32 %a, i32 %b) { ; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A]], 5 ; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[DIV1]], i32 [[DIV2]] ; CHECK-NEXT: ret i32 [[SEL]] +; +; DEBUG-LABEL: @sdiv_no_sink( +; DEBUG-NEXT: [[DIV1:%.*]] = sdiv i32 [[A:%.*]], [[B:%.*]], !dbg [[DBG94:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i32 [[DIV1]], [[META90:![0-9]+]], !DIExpression(), [[DBG94]]) +; DEBUG-NEXT: [[DIV2:%.*]] = sdiv i32 [[B]], [[A]], !dbg [[DBG95:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i32 [[DIV2]], [[META91:![0-9]+]], !DIExpression(), [[DBG95]]) +; DEBUG-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A]], 5, !dbg [[DBG96:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i1 [[CMP]], [[META92:![0-9]+]], !DIExpression(), [[DBG96]]) +; DEBUG-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 [[DIV1]], i32 [[DIV2]], !dbg [[DBG97:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i32 [[SEL]], [[META93:![0-9]+]], !DIExpression(), [[DBG97]]) +; DEBUG-NEXT: ret i32 [[SEL]], !dbg [[DBG98:![0-9]+]] ; %div1 = sdiv i32 %a, %b %div2 = sdiv i32 %b, %a diff --git a/llvm/test/Transforms/CodeGenPrepare/debug-info-on-skipped-selects.ll b/llvm/test/Transforms/CodeGenPrepare/debug-info-on-skipped-selects.ll index ffd4a0170ac9d4..538719c58b9e00 100644 --- a/llvm/test/Transforms/CodeGenPrepare/debug-info-on-skipped-selects.ll +++ b/llvm/test/Transforms/CodeGenPrepare/debug-info-on-skipped-selects.ll @@ -5,8 +5,7 @@ ; Test that when we skip over multiple selects in CGP, that the debug-info ; attached to those selects is still fixed up. -; CHECK: declare void @llvm.dbg.value(metadata, -; CHECK: call void @llvm.dbg.value(metadata ptr %sunkaddr, +; CHECK: #dbg_value(ptr %sunkaddr, source_filename = "reduced.ll" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/CodeGenPrepare/sink-shift-and-trunc.ll b/llvm/test/Transforms/CodeGenPrepare/sink-shift-and-trunc.ll index e46f70c2c11222..2e820165e17a04 100644 --- a/llvm/test/Transforms/CodeGenPrepare/sink-shift-and-trunc.ll +++ b/llvm/test/Transforms/CodeGenPrepare/sink-shift-and-trunc.ll @@ -12,7 +12,7 @@ entry: %x.sroa.3.0.extract.shift = lshr i64 %arg1, 32, !dbg !37 call void @llvm.dbg.value(metadata i64 %x.sroa.3.0.extract.shift, metadata !13, metadata !DIExpression()), !dbg !37 -; CHECK: call void @llvm.dbg.value(metadata i64 %arg1, metadata {{.*}}, metadata !DIExpression(DW_OP_constu, 32, DW_OP_shr, DW_OP_stack_value)), !dbg [[shift2_loc:![0-9]+]] +; CHECK: #dbg_value(i64 %arg1, {{.*}}, !DIExpression(DW_OP_constu, 32, DW_OP_shr, DW_OP_stack_value), [[shift2_loc:![0-9]+]] %x.sroa.5.0.extract.shift = lshr i64 %arg1, 48, !dbg !38 %tobool = icmp eq i64 %x.sroa.5.0.extract.shift, 0, !dbg !39 diff --git a/llvm/test/Transforms/Coroutines/coro-debug-O2.ll b/llvm/test/Transforms/Coroutines/coro-debug-O2.ll index 3f9af30a92e343..7ffa2ac153c853 100644 --- a/llvm/test/Transforms/Coroutines/coro-debug-O2.ll +++ b/llvm/test/Transforms/Coroutines/coro-debug-O2.ll @@ -5,7 +5,7 @@ ; CHECK-LABEL: define internal fastcc void @f.resume({{.*}}) ; CHECK: entry.resume: -; CHECK: call void @llvm.dbg.declare(metadata ptr %begin, metadata ![[PROMISEVAR_RESUME:[0-9]+]], metadata !DIExpression( +; CHECK: #dbg_declare(ptr %begin, ![[PROMISEVAR_RESUME:[0-9]+]], !DIExpression( ; ; CHECK: ![[PROMISEVAR_RESUME]] = !DILocalVariable(name: "__promise" %promise_type = type { i32, i32, double } diff --git a/llvm/test/Transforms/Coroutines/coro-debug-coro-frame.ll b/llvm/test/Transforms/Coroutines/coro-debug-coro-frame.ll index 2978f85be23854..8e5c4ab52e78eb 100644 --- a/llvm/test/Transforms/Coroutines/coro-debug-coro-frame.ll +++ b/llvm/test/Transforms/Coroutines/coro-debug-coro-frame.ll @@ -6,12 +6,12 @@ ; CHECK-LABEL: define void @f( ; CHECK: coro.init: ; CHECK: %[[begin:.*]] = call noalias nonnull ptr @llvm.coro.begin( -; CHECK: call void @llvm.dbg.declare(metadata ptr %[[begin]], metadata ![[CORO_FRAME:[0-9]+]], metadata !DIExpression()) +; CHECK: #dbg_declare(ptr %[[begin]], ![[CORO_FRAME:[0-9]+]], !DIExpression(), ; ; CHECK: define internal fastcc void @f.resume( ; CHECK: entry.resume: ; CHECK: %[[FramePtr_RESUME:.*]] = alloca ptr -; CHECK: call void @llvm.dbg.declare(metadata ptr %[[FramePtr_RESUME]], metadata ![[CORO_FRAME_IN_RESUME:[0-9]+]], metadata !DIExpression(DW_OP_deref) +; CHECK: #dbg_declare(ptr %[[FramePtr_RESUME]], ![[CORO_FRAME_IN_RESUME:[0-9]+]], !DIExpression(DW_OP_deref) ; ; CHECK-DAG: ![[FILE:[0-9]+]] = !DIFile(filename: "coro-debug.cpp" ; CHECK-DAG: ![[RAMP:[0-9]+]] = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", diff --git a/llvm/test/Transforms/Coroutines/coro-debug-dbg.values-not_used_in_frame.ll b/llvm/test/Transforms/Coroutines/coro-debug-dbg.values-not_used_in_frame.ll index 79793dc293d0d9..8375193548e668 100644 --- a/llvm/test/Transforms/Coroutines/coro-debug-dbg.values-not_used_in_frame.ll +++ b/llvm/test/Transforms/Coroutines/coro-debug-dbg.values-not_used_in_frame.ll @@ -5,8 +5,8 @@ ; This file is based on coro-debug-frame-variable.ll. ; CHECK: define internal fastcc void @f.resume(ptr noundef nonnull align 16 dereferenceable(80) %begin) !dbg ![[RESUME_FN_DBG_NUM:[0-9]+]] ; CHECK: await.ready: -; CHECK: call void @llvm.dbg.value(metadata i32 poison, metadata ![[IVAR_RESUME:[0-9]+]], metadata !DIExpression( -; CHECK: call void @llvm.dbg.value(metadata i32 poison, metadata ![[JVAR_RESUME:[0-9]+]], metadata !DIExpression( +; CHECK: #dbg_value(i32 poison, ![[IVAR_RESUME:[0-9]+]], !DIExpression( +; CHECK: #dbg_value(i32 poison, ![[JVAR_RESUME:[0-9]+]], !DIExpression( ; ; CHECK: ![[RESUME_FN_DBG_NUM]] = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov" ; CHECK: ![[IVAR_RESUME]] = !DILocalVariable(name: "i" diff --git a/llvm/test/Transforms/Coroutines/coro-debug-dbg.values.ll b/llvm/test/Transforms/Coroutines/coro-debug-dbg.values.ll index dd9310fe34f341..0b3acc30a1eee0 100644 --- a/llvm/test/Transforms/Coroutines/coro-debug-dbg.values.ll +++ b/llvm/test/Transforms/Coroutines/coro-debug-dbg.values.ll @@ -5,21 +5,21 @@ ; This file is based on coro-debug-frame-variable.ll. ; CHECK-LABEL: define void @f( ; CHECK: %[[frame:.*]] = call {{.*}} @llvm.coro.begin -; CHECK: call void @llvm.dbg.value(metadata ptr %[[frame]] -; CHECK-SAME: !DIExpression(DW_OP_plus_uconst, [[OffsetX:[0-9]*]])) +; CHECK: #dbg_value(ptr %[[frame]] +; CHECK-SAME: !DIExpression(DW_OP_plus_uconst, [[OffsetX:[0-9]*]]), ; ^ No deref at the end, as this variable ("x") is an array; ; its value is its address. The entire array is in the frame. -; CHECK: call void @llvm.dbg.assign(metadata ptr %[[frame]] +; CHECK: #dbg_assign(ptr %[[frame]] ; CHECK-SAME: !DIExpression(DW_OP_plus_uconst, [[OffsetX]]) ;; FIXME: Should we be updating the addresses on assigns here as well? -; CHECK-SAME: , metadata ptr %[[frame]], metadata !DIExpression()) +; CHECK-SAME: , ptr %[[frame]], !DIExpression(), -; CHECK: call void @llvm.dbg.value(metadata ptr %[[frame]] -; CHECK-SAME: !DIExpression(DW_OP_plus_uconst, [[OffsetSpill:[0-9]*]], DW_OP_deref)) -; CHECK: call void @llvm.dbg.value(metadata ptr %[[frame]] -; CHECK-SAME: !DIExpression(DW_OP_plus_uconst, [[OffsetI:[0-9]*]], DW_OP_deref)) -; CHECK: call void @llvm.dbg.value(metadata ptr %[[frame]] -; CHECK-SAME: !DIExpression(DW_OP_plus_uconst, [[OffsetJ:[0-9]*]], DW_OP_deref)) +; CHECK: #dbg_value(ptr %[[frame]] +; CHECK-SAME: !DIExpression(DW_OP_plus_uconst, [[OffsetSpill:[0-9]*]], DW_OP_deref), +; CHECK: #dbg_value(ptr %[[frame]] +; CHECK-SAME: !DIExpression(DW_OP_plus_uconst, [[OffsetI:[0-9]*]], DW_OP_deref), +; CHECK: #dbg_value(ptr %[[frame]] +; CHECK-SAME: !DIExpression(DW_OP_plus_uconst, [[OffsetJ:[0-9]*]], DW_OP_deref), ; CHECK-LABEL: void @f.resume( ; CHECK-SAME: ptr {{.*}} %[[frame:.*]]) @@ -27,14 +27,14 @@ ; CHECK: %[[frame_alloca:.*]] = alloca ptr ; CHECK-NEXT: store ptr %[[frame]], ptr %[[frame_alloca]] ; CHECK: init.ready: -; CHECK: call void @llvm.dbg.value(metadata ptr %[[frame_alloca]], metadata ![[XVAR_RESUME:[0-9]+]], +; CHECK: #dbg_value(ptr %[[frame_alloca]], ![[XVAR_RESUME:[0-9]+]], ; CHECK-SAME: !DIExpression(DW_OP_deref, DW_OP_plus_uconst, [[OffsetX]]) ; CHECK: await.ready: -; CHECK: call void @llvm.dbg.value(metadata ptr %[[frame_alloca]], metadata ![[SPILL_RESUME:[0-9]+]] +; CHECK: #dbg_value(ptr %[[frame_alloca]], ![[SPILL_RESUME:[0-9]+]] ; CHECK-SAME: !DIExpression(DW_OP_deref, DW_OP_plus_uconst, [[OffsetSpill]], DW_OP_deref) -; CHECK: call void @llvm.dbg.value(metadata ptr %[[frame_alloca]], metadata ![[IVAR_RESUME:[0-9]+]], +; CHECK: #dbg_value(ptr %[[frame_alloca]], ![[IVAR_RESUME:[0-9]+]], ; CHECK-SAME: !DIExpression(DW_OP_deref, DW_OP_plus_uconst, [[OffsetI]], DW_OP_deref) -; CHECK: call void @llvm.dbg.value(metadata ptr %[[frame_alloca]], metadata ![[JVAR_RESUME:[0-9]+]], +; CHECK: #dbg_value(ptr %[[frame_alloca]], ![[JVAR_RESUME:[0-9]+]], ; CHECK-SAME: !DIExpression(DW_OP_deref, DW_OP_plus_uconst, [[OffsetJ]], DW_OP_deref) ; ; CHECK: ![[RESUME_FN_DBG_NUM]] = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov" diff --git a/llvm/test/Transforms/Coroutines/coro-debug-frame-variable.ll b/llvm/test/Transforms/Coroutines/coro-debug-frame-variable.ll index bf51218590c2f2..4f5cdcf15618c7 100644 --- a/llvm/test/Transforms/Coroutines/coro-debug-frame-variable.ll +++ b/llvm/test/Transforms/Coroutines/coro-debug-frame-variable.ll @@ -31,20 +31,20 @@ ; CHECK-LABEL: define void @f() {{.*}} { ; CHECK: entry: ; CHECK: %j = alloca i32, align 4 -; CHECK: call void @llvm.dbg.declare(metadata ptr %j, metadata ![[JVAR:[0-9]+]], metadata !DIExpression()), !dbg ![[JDBGLOC:[0-9]+]] +; CHECK: #dbg_declare(ptr %j, ![[JVAR:[0-9]+]], !DIExpression(), ![[JDBGLOC:[0-9]+]] ; CHECK: %[[MEMORY:.*]] = call ptr @new({{.+}}), !dbg ![[IDBGLOC:[0-9]+]] -; CHECK: call void @llvm.dbg.declare(metadata ptr %[[MEMORY]], metadata ![[XVAR:[0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 32)), !dbg ![[IDBGLOC]] -; CHECK: call void @llvm.dbg.declare(metadata ptr %[[MEMORY]], metadata ![[IVAR:[0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 20)), !dbg ![[IDBGLOC]] +; CHECK: #dbg_declare(ptr %[[MEMORY]], ![[XVAR:[0-9]+]], !DIExpression(DW_OP_plus_uconst, 32), ![[IDBGLOC]] +; CHECK: #dbg_declare(ptr %[[MEMORY]], ![[IVAR:[0-9]+]], !DIExpression(DW_OP_plus_uconst, 20), ![[IDBGLOC]] ; CHECK: await.ready: ; ; CHECK-LABEL: define internal fastcc void @f.resume({{.*}}) {{.*}} { ; CHECK: entry.resume: ; CHECK-NEXT: %[[DBG_PTR:.*]] = alloca ptr -; CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr %[[DBG_PTR]], metadata ![[XVAR_RESUME:[0-9]+]], metadata !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 32)), !dbg -; CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr %[[DBG_PTR]], metadata ![[IVAR_RESUME:[0-9]+]], metadata !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 20)), !dbg ![[IDBGLOC_RESUME:[0-9]+]] +; CHECK-NEXT: #dbg_declare(ptr %[[DBG_PTR]], ![[XVAR_RESUME:[0-9]+]], !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 32), +; CHECK-NEXT: #dbg_declare(ptr %[[DBG_PTR]], ![[IVAR_RESUME:[0-9]+]], !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 20), ![[IDBGLOC_RESUME:[0-9]+]] ; CHECK-NEXT: store ptr {{.*}}, ptr %[[DBG_PTR]] ; CHECK: %[[J:.*]] = alloca i32, align 4 -; CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr %[[J]], metadata ![[JVAR_RESUME:[0-9]+]], metadata !DIExpression()), !dbg ![[JDBGLOC_RESUME:[0-9]+]] +; CHECK-NEXT: #dbg_declare(ptr %[[J]], ![[JVAR_RESUME:[0-9]+]], !DIExpression(), ![[JDBGLOC_RESUME:[0-9]+]] ; CHECK: init.ready: ; CHECK: await.ready: ; diff --git a/llvm/test/Transforms/Coroutines/coro-debug-spill-dbg.declare.ll b/llvm/test/Transforms/Coroutines/coro-debug-spill-dbg.declare.ll index c943ea5ca22ec0..53dfb487fd1d80 100644 --- a/llvm/test/Transforms/Coroutines/coro-debug-spill-dbg.declare.ll +++ b/llvm/test/Transforms/Coroutines/coro-debug-spill-dbg.declare.ll @@ -21,17 +21,17 @@ ; CHECK: define internal fastcc void @foo.resume(ptr noundef nonnull align 8 dereferenceable(32) %[[HDL:.*]]) ; CHECK-NEXT: entry.resume: ; CHECK-NEXT: %[[HDL]].debug = alloca ptr, align 8 -; CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr %[[HDL]].debug, metadata ![[THIS_RESUME:[0-9]+]], metadata !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 24)) +; CHECK-NEXT: #dbg_declare(ptr %[[HDL]].debug, ![[THIS_RESUME:[0-9]+]], !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 24), ; ; CHECK: define internal fastcc void @foo.destroy(ptr noundef nonnull align 8 dereferenceable(32) %[[HDL]]) ; CHECK-NEXT: entry.destroy: ; CHECK-NEXT: %[[HDL]].debug = alloca ptr, align 8 -; CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr %[[HDL]].debug, metadata ![[THIS_DESTROY:[0-9]+]], metadata !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 24)) +; CHECK-NEXT: #dbg_declare(ptr %[[HDL]].debug, ![[THIS_DESTROY:[0-9]+]], !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 24), ; ; CHECK: define internal fastcc void @foo.cleanup(ptr noundef nonnull align 8 dereferenceable(32) %[[HDL]]) ; CHECK-NEXT: entry.cleanup: ; CHECK-NEXT: %[[HDL]].debug = alloca ptr, align 8 -; CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr %[[HDL]].debug, metadata ![[THIS_CLEANUP:[0-9]+]], metadata !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 24)) +; CHECK-NEXT: #dbg_declare(ptr %[[HDL]].debug, ![[THIS_CLEANUP:[0-9]+]], !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 24), ; ; CHECK: ![[THIS_RESUME]] = !DILocalVariable(name: "this" ; CHECK: ![[THIS_DESTROY]] = !DILocalVariable(name: "this" diff --git a/llvm/test/Transforms/Coroutines/coro-debug.ll b/llvm/test/Transforms/Coroutines/coro-debug.ll index 4792825f4ce080..1d8f245d8b7ebc 100644 --- a/llvm/test/Transforms/Coroutines/coro-debug.ll +++ b/llvm/test/Transforms/Coroutines/coro-debug.ll @@ -175,26 +175,26 @@ attributes #7 = { noduplicate } ; CHECK: define internal fastcc void @f.resume(ptr noundef nonnull align 8 dereferenceable(40) %0) #0 personality i32 0 !dbg ![[RESUME:[0-9]+]] ; CHECK: entry.resume: ; CHECK: %[[DBG_PTR:.*]] = alloca ptr -; CHECK: call void @llvm.dbg.declare(metadata ptr %[[DBG_PTR]], metadata ![[RESUME_COROHDL:[0-9]+]], metadata !DIExpression(DW_OP_deref, DW_OP_plus_uconst, -; CHECK: call void @llvm.dbg.declare(metadata ptr %[[DBG_PTR]], metadata ![[RESUME_X:[0-9]+]], metadata !DIExpression(DW_OP_deref, DW_OP_plus_uconst, [[EXPR_TAIL:.*]]) -; CHECK: call void @llvm.dbg.declare(metadata ptr %[[DBG_PTR]], metadata ![[RESUME_DIRECT:[0-9]+]], metadata !DIExpression(DW_OP_deref, DW_OP_plus_uconst, [[EXPR_TAIL]]) +; CHECK: #dbg_declare(ptr %[[DBG_PTR]], ![[RESUME_COROHDL:[0-9]+]], !DIExpression(DW_OP_deref, DW_OP_plus_uconst, +; CHECK: #dbg_declare(ptr %[[DBG_PTR]], ![[RESUME_X:[0-9]+]], !DIExpression(DW_OP_deref, DW_OP_plus_uconst, [[EXPR_TAIL:.*]]) +; CHECK: #dbg_declare(ptr %[[DBG_PTR]], ![[RESUME_DIRECT:[0-9]+]], !DIExpression(DW_OP_deref, DW_OP_plus_uconst, [[EXPR_TAIL]]) ; CHECK: store ptr {{.*}}, ptr %[[DBG_PTR]] ; CHECK-NOT: alloca ptr -; CHECK: call void @llvm.dbg.declare(metadata i8 0, metadata ![[RESUME_CONST:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_convert, 8, DW_ATE_signed, DW_OP_LLVM_convert, 32, DW_ATE_signed)) +; CHECK: #dbg_declare(i8 0, ![[RESUME_CONST:[0-9]+]], !DIExpression(DW_OP_LLVM_convert, 8, DW_ATE_signed, DW_OP_LLVM_convert, 32, DW_ATE_signed), ; Note that keeping the undef value here could be acceptable, too. -; CHECK-NOT: call void @llvm.dbg.declare(metadata ptr undef, metadata !{{[0-9]+}}, metadata !DIExpression()) +; CHECK-NOT: #dbg_declare(ptr undef, !{{[0-9]+}}, !DIExpression(), ; CHECK: call void @coro.devirt.trigger(ptr null) -; CHECK: call void @llvm.dbg.value(metadata ptr {{.*}}, metadata ![[RESUME_DIRECT_VALUE:[0-9]+]], metadata !DIExpression(DW_OP_deref, DW_OP_plus_uconst, {{[0-9]+}}, DW_OP_deref)) +; CHECK: #dbg_value(ptr {{.*}}, ![[RESUME_DIRECT_VALUE:[0-9]+]], !DIExpression(DW_OP_deref, DW_OP_plus_uconst, {{[0-9]+}}, DW_OP_deref), ; Check that the dbg.declare intrinsic of invoke instruction is hanled correctly. ; CHECK: %[[ALLOCATED_STORAGE:.+]] = invoke ptr @allocate() ; CHECK-NEXT: to label %[[NORMAL_DEST:.+]] unwind ; CHECK: [[NORMAL_DEST]] -; CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr %[[ALLOCATED_STORAGE]] +; CHECK-NEXT: #dbg_declare(ptr %[[ALLOCATED_STORAGE]] ; CHECK: %[[CALLBR_RES:.+]] = callbr i32 asm ; CHECK-NEXT: to label %[[DEFAULT_DEST:.+]] [label ; CHECK: [[DEFAULT_DEST]]: ; CHECK-NOT: {{.*}}: -; CHECK: call void @llvm.dbg.declare(metadata i32 %[[CALLBR_RES]] +; CHECK: #dbg_declare(i32 %[[CALLBR_RES]] ; CHECK: define internal fastcc void @f.destroy(ptr noundef nonnull align 8 dereferenceable(40) %0) #0 personality i32 0 !dbg ![[DESTROY:[0-9]+]] ; CHECK: define internal fastcc void @f.cleanup(ptr noundef nonnull align 8 dereferenceable(40) %0) #0 personality i32 0 !dbg ![[CLEANUP:[0-9]+]] diff --git a/llvm/test/Transforms/Coroutines/swift-async-dbg.ll b/llvm/test/Transforms/Coroutines/swift-async-dbg.ll index 74edf7a3f3a540..8c90fcbe19bc5e 100644 --- a/llvm/test/Transforms/Coroutines/swift-async-dbg.ll +++ b/llvm/test/Transforms/Coroutines/swift-async-dbg.ll @@ -29,9 +29,9 @@ define swifttailcc void @coroutineA(ptr swiftasync %arg) !dbg !48 { %i3 = call ptr @llvm.coro.begin(token %i2, ptr null) ; CHECK-LABEL: define {{.*}} @coroutineA( ; CHECK-SAME: ptr swiftasync %[[frame_ptr:.*]]) -; CHECK: @llvm.dbg.declare(metadata ptr %[[frame_ptr]], {{.*}} !DIExpression( +; CHECK: #dbg_declare(ptr %[[frame_ptr]], {{.*}} !DIExpression( ; CHECK-SAME: DW_OP_plus_uconst, 16, DW_OP_plus_uconst, 8) -; CHECK: @llvm.dbg.value(metadata ptr %[[frame_ptr]], {{.*}} !DIExpression( +; CHECK: #dbg_value(ptr %[[frame_ptr]], {{.*}} !DIExpression( ; CHECK-SAME: DW_OP_plus_uconst, 16, DW_OP_deref) ; CHECK: call {{.*}} @swift_task_switch @@ -48,11 +48,11 @@ define swifttailcc void @coroutineA(ptr swiftasync %arg) !dbg !48 { ; CHECK-NOT: define ; CHECK-LABEL: define {{.*}} @coroutineATY0_( ; CHECK-SAME: ptr swiftasync %[[frame_ptr:.*]]) -; CHECK: @llvm.dbg.declare(metadata ptr %[[frame_ptr]], {{.*}} !DIExpression( +; CHECK: #dbg_declare(ptr %[[frame_ptr]], {{.*}} !DIExpression( ; CHECK-SAME: DW_OP_LLVM_entry_value, 1, DW_OP_plus_uconst, 16, DW_OP_plus_uconst, 8) -; CHECK: @llvm.dbg.value(metadata ptr %[[frame_ptr]], {{.*}} !DIExpression( +; CHECK: #dbg_value(ptr %[[frame_ptr]], {{.*}} !DIExpression( ; CHECK-SAME: DW_OP_LLVM_entry_value, 1, DW_OP_plus_uconst, 16, DW_OP_deref) -; CHECK: @llvm.dbg.value(metadata !DIArgList(ptr %[[frame_ptr]], i32 %{{.*}}), {{.*}} !DIExpression( +; CHECK: #dbg_value(!DIArgList(ptr %[[frame_ptr]], i32 %{{.*}}), {{.*}} !DIExpression( ; CHECK-SAME: DW_OP_LLVM_arg, 0, DW_OP_plus_uconst, 16, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_deref) ; CHECK: call {{.*}} @coroutineB @@ -69,9 +69,9 @@ define swifttailcc void @coroutineA(ptr swiftasync %arg) !dbg !48 { ; CHECK-LABEL: define {{.*}} @coroutineATQ1_( ; CHECK-SAME: ptr swiftasync %[[frame_ptr:.*]]) ; Note the extra level of indirection that shows up here! -; CHECK: @llvm.dbg.declare(metadata ptr %[[frame_ptr]], {{.*}} !DIExpression( +; CHECK: #dbg_declare(ptr %[[frame_ptr]], {{.*}} !DIExpression( ; CHECK-SAME: DW_OP_LLVM_entry_value, 1, DW_OP_deref, DW_OP_plus_uconst, 16, DW_OP_plus_uconst, 8) -; CHECK: @llvm.dbg.value(metadata ptr %[[frame_ptr]], {{.*}} !DIExpression( +; CHECK: #dbg_value(ptr %[[frame_ptr]], {{.*}} !DIExpression( ; CHECK-SAME: DW_OP_LLVM_entry_value, 1, DW_OP_deref, DW_OP_plus_uconst, 16, DW_OP_deref) ; CHECK: call {{.*}} @swift_task_switch @@ -84,7 +84,7 @@ define swifttailcc void @coroutineA(ptr swiftasync %arg) !dbg !48 { ; CHECK-NOT: define ; CHECK-LABEL: define {{.*}} @coroutineATY2_( ; CHECK-SAME: ptr swiftasync %[[frame_ptr:.*]]) -; CHECK: @llvm.dbg.declare(metadata ptr %[[frame_ptr]], {{.*}} !DIExpression( +; CHECK: #dbg_declare(ptr %[[frame_ptr]], {{.*}} !DIExpression( ; CHECK-SAME: DW_OP_LLVM_entry_value, 1, DW_OP_plus_uconst, 16, DW_OP_plus_uconst, 8) } diff --git a/llvm/test/Transforms/DCE/basic.ll b/llvm/test/Transforms/DCE/basic.ll index 154fde0d2e3606..af34a1d4735590 100644 --- a/llvm/test/Transforms/DCE/basic.ll +++ b/llvm/test/Transforms/DCE/basic.ll @@ -4,9 +4,9 @@ ; CHECK-LABEL: @test define void @test() { %add = add i32 1, 2 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 1, metadata [[add:![0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 2, DW_OP_stack_value)) +; CHECK-NEXT: #dbg_value(i32 1, [[add:![0-9]+]], !DIExpression(DW_OP_plus_uconst, 2, DW_OP_stack_value), %sub = sub i32 %add, 1 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 1, metadata [[sub:![0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 2, DW_OP_constu, 1, DW_OP_minus, DW_OP_stack_value)) +; CHECK-NEXT: #dbg_value(i32 1, [[sub:![0-9]+]], !DIExpression(DW_OP_plus_uconst, 2, DW_OP_constu, 1, DW_OP_minus, DW_OP_stack_value), ; CHECK-NEXT: ret void ret void } @@ -17,7 +17,7 @@ declare void @llvm.lifetime.end.p0(i64, ptr nocapture) nounwind ; CHECK-LABEL: @test_lifetime_alloca define i32 @test_lifetime_alloca() { ; Check that lifetime intrinsics are removed along with the pointer. -; CHECK-NEXT: @llvm.dbg.value +; CHECK-NEXT: #dbg_value ; CHECK-NEXT: ret i32 0 ; CHECK-NOT: llvm.lifetime.start ; CHECK-NOT: llvm.lifetime.end @@ -30,7 +30,7 @@ define i32 @test_lifetime_alloca() { ; CHECK-LABEL: @test_lifetime_arg define i32 @test_lifetime_arg(ptr) { ; Check that lifetime intrinsics are removed along with the pointer. -; CHECK-NEXT: llvm.dbg.value +; CHECK-NEXT: #dbg_value ; CHECK-NEXT: ret i32 0 ; CHECK-NOT: llvm.lifetime.start ; CHECK-NOT: llvm.lifetime.end @@ -44,7 +44,7 @@ define i32 @test_lifetime_arg(ptr) { ; CHECK-LABEL: @test_lifetime_global define i32 @test_lifetime_global() { ; Check that lifetime intrinsics are removed along with the pointer. -; CHECK-NEXT: llvm.dbg.value +; CHECK-NEXT: #dbg_value ; CHECK-NEXT: ret i32 0 ; CHECK-NOT: llvm.lifetime.start ; CHECK-NOT: llvm.lifetime.end @@ -59,7 +59,7 @@ define i32 @test_lifetime_bitcast(ptr %arg) { ; It's not uncommon for two bitcasts to be made: one for lifetime, one for use. ; TODO: Support the above case. ; CHECK-NEXT: bitcast -; CHECK-NEXT: llvm.dbg.value +; CHECK-NEXT: #dbg_value ; CHECK-NEXT: llvm.lifetime.start.p0(i64 -1, ptr %cast) ; CHECK-NEXT: llvm.lifetime.end.p0(i64 -1, ptr %cast) ; CHECK-NEXT: ret i32 0 diff --git a/llvm/test/Transforms/DCE/dbg-value-removal.ll b/llvm/test/Transforms/DCE/dbg-value-removal.ll index f8f01120d0a069..556a25a23f5cb1 100644 --- a/llvm/test/Transforms/DCE/dbg-value-removal.ll +++ b/llvm/test/Transforms/DCE/dbg-value-removal.ll @@ -10,29 +10,29 @@ define dso_local i16 @main(i16 %a1, i16 %a2) local_unnamed_addr #0 !dbg !7 { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[BB0:%.*]] ; CHECK: bb0: -; CHECK-NEXT: call void @llvm.dbg.value(metadata i16 13, metadata !13, metadata !DIExpression()), !dbg !16 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i16 14, metadata !14, metadata !DIExpression()), !dbg !18 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i16 13, metadata !13, metadata !DIExpression()), !dbg !18 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i16 12, metadata !12, metadata !DIExpression()), !dbg !18 +; CHECK-NEXT: #dbg_value(i16 13, [[META13:![0-9]+]], !DIExpression(), [[META16:![0-9]+]]) +; CHECK-NEXT: #dbg_value(i16 14, [[META14:![0-9]+]], !DIExpression(), [[META18:![0-9]+]]) +; CHECK-NEXT: #dbg_value(i16 13, [[META13]], !DIExpression(), [[META18]]) +; CHECK-NEXT: #dbg_value(i16 12, [[META12:![0-9]+]], !DIExpression(), [[META18]]) ; CHECK-NEXT: br label [[BB1:%.*]] ; CHECK: bb1: -; CHECK-NEXT: call void @llvm.dbg.value(metadata i16 [[A1:%.*]], metadata !14, metadata !DIExpression()), !dbg !18 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i16 888, metadata !13, metadata !DIExpression()), !dbg !18 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i16 [[A2:%.*]], metadata !12, metadata !DIExpression()), !dbg !18 +; CHECK-NEXT: #dbg_value(i16 [[A1:%.*]], [[META14]], !DIExpression(), [[META18]]) +; CHECK-NEXT: #dbg_value(i16 888, [[META13]], !DIExpression(), [[META18]]) +; CHECK-NEXT: #dbg_value(i16 [[A2:%.*]], [[META12]], !DIExpression(), [[META18]]) ; CHECK-NEXT: [[T1:%.*]] = call i16 @bar(i16 0) -; CHECK-NEXT: call void @llvm.dbg.value(metadata i16 [[T1]], metadata !13, metadata !DIExpression()), !dbg !18 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i16 [[A2]], metadata !12, metadata !DIExpression(DW_OP_constu, 2, DW_OP_shr, DW_OP_stack_value)), !dbg !18 +; CHECK-NEXT: #dbg_value(i16 [[T1]], [[META13]], !DIExpression(), [[META18]]) +; CHECK-NEXT: #dbg_value(i16 [[A2]], [[META12]], !DIExpression(DW_OP_constu, 2, DW_OP_shr, DW_OP_stack_value), [[META18]]) ; CHECK-NEXT: br label [[BB2:%.*]] ; CHECK: bb2: -; CHECK-NEXT: call void @llvm.dbg.value(metadata i16 [[A1]], metadata !13, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 8)), !dbg !18 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i16 [[A1]], metadata !13, metadata !DIExpression(DW_OP_LLVM_fragment, 8, 8)), !dbg !18 +; CHECK-NEXT: #dbg_value(i16 [[A1]], [[META13]], !DIExpression(DW_OP_LLVM_fragment, 0, 8), [[META18]]) +; CHECK-NEXT: #dbg_value(i16 [[A1]], [[META13]], !DIExpression(DW_OP_LLVM_fragment, 8, 8), [[META18]]) ; CHECK-NEXT: [[T2:%.*]] = call i16 @bar(i16 [[T1]]) -; CHECK-NEXT: call void @llvm.dbg.value(metadata i16 [[T2]], metadata !13, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 8)), !dbg !18 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i16 [[A1]], metadata !13, metadata !DIExpression(DW_OP_LLVM_fragment, 8, 8)), !dbg !19 +; CHECK-NEXT: #dbg_value(i16 [[T2]], [[META13]], !DIExpression(DW_OP_LLVM_fragment, 0, 8), [[META18]]) +; CHECK-NEXT: #dbg_value(i16 [[A1]], [[META13]], !DIExpression(DW_OP_LLVM_fragment, 8, 8), [[META19:![0-9]+]]) ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb3: -; CHECK-NEXT: call void @llvm.dbg.value(metadata i16 [[A1]], metadata !13, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 8)), !dbg !19 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i16 [[A1]], metadata !13, metadata !DIExpression()), !dbg !18 +; CHECK-NEXT: #dbg_value(i16 [[A1]], [[META13]], !DIExpression(DW_OP_LLVM_fragment, 0, 8), [[META19]]) +; CHECK-NEXT: #dbg_value(i16 [[A1]], [[META13]], !DIExpression(), [[META18]]) ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret i16 [[T2]] diff --git a/llvm/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll b/llvm/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll index 5e49437db0a741..485275b11160ff 100644 --- a/llvm/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll +++ b/llvm/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll @@ -11,11 +11,11 @@ define ptr @vfs_addname(ptr %name, i32 %len, i32 %hash, i32 %flags) nounwind ssp ; CHECK-LABEL: define {{[^@]+}}@vfs_addname ; CHECK-SAME: (ptr [[NAME:%.*]], i32 [[LEN:%.*]], i32 [[HASH:%.*]], i32 [[FLAGS:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG4:![0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata ptr [[NAME]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG12:![0-9]+]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 [[LEN]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG12]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 [[HASH]], metadata [[META14:![0-9]+]], metadata !DIExpression()), !dbg [[DBG12]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 [[FLAGS]], metadata [[META15:![0-9]+]], metadata !DIExpression()), !dbg [[DBG12]] -; CHECK-NEXT: [[TMP0:%.*]] = call fastcc ptr @add_name_internal(ptr [[NAME]], i32 [[HASH]]) #[[ATTR3:[0-9]+]], !dbg [[DBG16:![0-9]+]] +; CHECK-NEXT: #dbg_value(ptr [[NAME]], [[META11:![0-9]+]], !DIExpression(), [[META12:![0-9]+]]) +; CHECK-NEXT: #dbg_value(i32 [[LEN]], [[META13:![0-9]+]], !DIExpression(), [[META12]]) +; CHECK-NEXT: #dbg_value(i32 [[HASH]], [[META14:![0-9]+]], !DIExpression(), [[META12]]) +; CHECK-NEXT: #dbg_value(i32 [[FLAGS]], [[META15:![0-9]+]], !DIExpression(), [[META12]]) +; CHECK-NEXT: [[TMP0:%.*]] = call fastcc ptr @add_name_internal(ptr [[NAME]], i32 [[HASH]]) #[[ATTR2:[0-9]+]], !dbg [[DBG16:![0-9]+]] ; CHECK-NEXT: ret ptr [[TMP0]], !dbg [[DBG16]] ; entry: @@ -34,11 +34,11 @@ define internal fastcc ptr @add_name_internal(ptr %name, i32 %len, i32 %hash, i8 ; CHECK-LABEL: define {{[^@]+}}@add_name_internal ; CHECK-SAME: (ptr [[NAME:%.*]], i32 [[HASH:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG18:![0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata ptr [[NAME]], metadata [[META22:![0-9]+]], metadata !DIExpression()), !dbg [[DBG23:![0-9]+]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 poison, metadata [[META24:![0-9]+]], metadata !DIExpression()), !dbg [[DBG23]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 [[HASH]], metadata [[META25:![0-9]+]], metadata !DIExpression()), !dbg [[DBG23]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i8 poison, metadata [[META26:![0-9]+]], metadata !DIExpression()), !dbg [[DBG23]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 poison, metadata [[META27:![0-9]+]], metadata !DIExpression()), !dbg [[DBG23]] +; CHECK-NEXT: #dbg_value(ptr [[NAME]], [[META22:![0-9]+]], !DIExpression(), [[META23:![0-9]+]]) +; CHECK-NEXT: #dbg_value(i32 poison, [[META24:![0-9]+]], !DIExpression(), [[META23]]) +; CHECK-NEXT: #dbg_value(i32 [[HASH]], [[META25:![0-9]+]], !DIExpression(), [[META23]]) +; CHECK-NEXT: #dbg_value(i8 poison, [[META26:![0-9]+]], !DIExpression(), [[META23]]) +; CHECK-NEXT: #dbg_value(i32 poison, [[META27:![0-9]+]], !DIExpression(), [[META23]]) ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[HASH]], 0, !dbg [[DBG28:![0-9]+]] ; CHECK-NEXT: br i1 [[TMP0]], label [[BB:%.*]], label [[BB1:%.*]], !dbg [[DBG28]] ; CHECK: bb: @@ -72,8 +72,7 @@ bb2: ; preds = %bb1, %bb declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone ; CHECK: attributes #0 = { nounwind ssp } -; CHECK: attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -; CHECK: attributes #2 = { noinline nounwind ssp } +; CHECK: attributes #1 = { noinline nounwind ssp } !llvm.dbg.cu = !{!3} !llvm.module.flags = !{!30} diff --git a/llvm/test/Transforms/DeadArgElim/dbginfo-preserve-dbgloc.ll b/llvm/test/Transforms/DeadArgElim/dbginfo-preserve-dbgloc.ll index faec4ba039e709..8eb0f3ad448626 100644 --- a/llvm/test/Transforms/DeadArgElim/dbginfo-preserve-dbgloc.ll +++ b/llvm/test/Transforms/DeadArgElim/dbginfo-preserve-dbgloc.ll @@ -36,9 +36,9 @@ declare ptr @foo(...) local_unnamed_addr #1 define internal zeroext i1 @f1(i1 zeroext %is_y, ptr %str) #4 !dbg !34 { entry: %frombool = zext i1 %is_y to i8 -; CHECK: call void @llvm.dbg.value(metadata i1 %is_y, metadata !39, metadata !DIExpression()), !dbg !42 +; CHECK: #dbg_value(i1 %is_y, !39, !DIExpression(), !42 call void @llvm.dbg.value(metadata i1 %is_y, metadata !39, metadata !DIExpression()), !dbg !42 -; CHECK: call void @llvm.dbg.value(metadata ptr %str, metadata !40, metadata !DIExpression()), !dbg !43 +; CHECK: #dbg_value(ptr %str, !40, !DIExpression(), !43 call void @llvm.dbg.value(metadata ptr %str, metadata !40, metadata !DIExpression()), !dbg !43 call void @llvm.dbg.value(metadata ptr null, metadata !41, metadata !DIExpression()), !dbg !44 %tobool = icmp ne ptr %str, null, !dbg !45 diff --git a/llvm/test/Transforms/DeadArgElim/dbginfo-update-dbgval-local.ll b/llvm/test/Transforms/DeadArgElim/dbginfo-update-dbgval-local.ll index 4a6d7ab9be55ca..0e834013fe40b4 100644 --- a/llvm/test/Transforms/DeadArgElim/dbginfo-update-dbgval-local.ll +++ b/llvm/test/Transforms/DeadArgElim/dbginfo-update-dbgval-local.ll @@ -6,7 +6,7 @@ ; Reproducer for PR23260. ; CHECK-LABEL: define internal void @bar() -; CHECK: call void @llvm.dbg.value(metadata i32 poison, metadata ![[LOCAL1:[0-9]+]] +; CHECK: #dbg_value(i32 poison, ![[LOCAL1:[0-9]+]] ; CHECK: call void @sink() ; Function Attrs: alwaysinline nounwind uwtable @@ -19,7 +19,7 @@ entry: ; CHECK-LABEL: define void @foo() ; CHECK: call void @bar() -; CHECK: call void @llvm.dbg.value(metadata i32 poison, metadata ![[LOCAL2:[0-9]+]] +; CHECK: #dbg_value(i32 poison, ![[LOCAL2:[0-9]+]] ; CHECK: call void @bar() ; Function Attrs: nounwind uwtable diff --git a/llvm/test/Transforms/DeadArgElim/dbginfo-update-dbgval.ll b/llvm/test/Transforms/DeadArgElim/dbginfo-update-dbgval.ll index e6be5d0b2413c6..3007d5bae7da9b 100644 --- a/llvm/test/Transforms/DeadArgElim/dbginfo-update-dbgval.ll +++ b/llvm/test/Transforms/DeadArgElim/dbginfo-update-dbgval.ll @@ -24,7 +24,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; Function Attrs: noinline nounwind uwtable define dso_local void @f2(i32 %k) local_unnamed_addr !dbg !11 { entry: -; CHECK: call void @llvm.dbg.value(metadata i32 poison, metadata !15, metadata !DIExpression()), !dbg !16 +; CHECK: #dbg_value(i32 poison, !15, !DIExpression(), !16 call void @llvm.dbg.value(metadata i32 %k, metadata !15, metadata !DIExpression()), !dbg !16 %0 = load i32, ptr @s, align 4, !dbg !17 %inc = add nsw i32 %0, 1, !dbg !17 diff --git a/llvm/test/Transforms/DeadStoreElimination/debuginfo.ll b/llvm/test/Transforms/DeadStoreElimination/debuginfo.ll index 0ad495ed0353b8..f316277bb927cb 100644 --- a/llvm/test/Transforms/DeadStoreElimination/debuginfo.ll +++ b/llvm/test/Transforms/DeadStoreElimination/debuginfo.ll @@ -11,8 +11,8 @@ define ptr @test_salvage(i32 %arg) { ; Check that all four original local variables have their values preserved. ; CHECK-LABEL: @test_salvage( ; CHECK-NEXT: malloc -; CHECK-NEXT: @llvm.dbg.value(metadata ptr %p, metadata ![[p:.*]], metadata !DIExpression()) -; CHECK-NEXT: @llvm.dbg.value(metadata i32 %arg, metadata ![[DEAD:.*]], metadata !DIExpression(DW_OP_plus_uconst, 1, DW_OP_stack_value)) +; CHECK-NEXT: #dbg_value(ptr %p, ![[p:.*]], !DIExpression(), +; CHECK-NEXT: #dbg_value(i32 %arg, ![[DEAD:.*]], !DIExpression(DW_OP_plus_uconst, 1, DW_OP_stack_value), ; CHECK-NEXT: call void @test_f() ; CHECK-NEXT: store i32 0, ptr %p diff --git a/llvm/test/Transforms/EarlyCSE/debug-info-undef.ll b/llvm/test/Transforms/EarlyCSE/debug-info-undef.ll index 921fff196a31f0..4c9a83ad481b83 100644 --- a/llvm/test/Transforms/EarlyCSE/debug-info-undef.ll +++ b/llvm/test/Transforms/EarlyCSE/debug-info-undef.ll @@ -10,7 +10,7 @@ entry: %0 = load i8, ptr @a, align 1, !dbg !19, !tbaa !20 %conv = sext i8 %0 to i16, !dbg !19 -; CHECK: call void @llvm.dbg.value(metadata i8 %0, metadata !17, metadata !DIExpression(DW_OP_LLVM_convert, 8, DW_ATE_signed, DW_OP_LLVM_convert, 16, DW_ATE_signed, DW_OP_stack_value)), !dbg !18 +; CHECK: #dbg_value(i8 %0, !17, !DIExpression(DW_OP_LLVM_convert, 8, DW_ATE_signed, DW_OP_LLVM_convert, 16, DW_ATE_signed, DW_OP_stack_value), !18 ; CHECK-NEXT: call i32 (...) @optimize_me_not() call void @llvm.dbg.value(metadata i16 %conv, metadata !17, metadata !DIExpression()), !dbg !18 diff --git a/llvm/test/Transforms/EarlyCSE/debuginfo-dce.ll b/llvm/test/Transforms/EarlyCSE/debuginfo-dce.ll index cd2abf559cd1b1..4637f3729eca51 100644 --- a/llvm/test/Transforms/EarlyCSE/debuginfo-dce.ll +++ b/llvm/test/Transforms/EarlyCSE/debuginfo-dce.ll @@ -7,7 +7,7 @@ entry: %0 = call i64 @llvm.ctpop.i64(i64 0), !dbg !14 %1 = inttoptr i64 %0 to ptr, !dbg !14 call void @llvm.dbg.value(metadata ptr %1, i64 0, metadata !11, metadata !13), !dbg !14 -; CHECK: call void @llvm.dbg.value(metadata i64 0, metadata !11, metadata !DIExpression()), !dbg !13 +; CHECK: #dbg_value(i64 0, !11, !DIExpression(), !13 %call = call ptr (...) @baa(), !dbg !15 %2 = ptrtoint ptr %call to i64, !dbg !16 %3 = inttoptr i64 %2 to ptr, !dbg !16 diff --git a/llvm/test/Transforms/GVN/load-through-select-dbg.ll b/llvm/test/Transforms/GVN/load-through-select-dbg.ll index 933e727c09cf73..0308a671bf9af0 100644 --- a/llvm/test/Transforms/GVN/load-through-select-dbg.ll +++ b/llvm/test/Transforms/GVN/load-through-select-dbg.ll @@ -8,7 +8,7 @@ define i32 @foo(ptr %a, ptr %b) { ; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A]], align 4 -; CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr undef, metadata [[META4:![0-9]+]], metadata !DIExpression()), !dbg [[DBG10:![0-9]+]] +; CHECK-NEXT: #dbg_declare(ptr undef, [[META4:![0-9]+]], !DIExpression(), [[META10:![0-9]+]]) ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[B]], align 4 ; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 [[TMP0]], [[TMP1]] ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[COND]], i32 [[TMP0]], i32 [[TMP1]] diff --git a/llvm/test/Transforms/GlobalOpt/deadglobal-diarglist-use.ll b/llvm/test/Transforms/GlobalOpt/deadglobal-diarglist-use.ll index f077e0013278d0..b10a3778cf440e 100644 --- a/llvm/test/Transforms/GlobalOpt/deadglobal-diarglist-use.ll +++ b/llvm/test/Transforms/GlobalOpt/deadglobal-diarglist-use.ll @@ -6,7 +6,7 @@ ; CHECK-NOT: @s -; CHECK: call void @llvm.dbg.value(metadata !DIArgList(ptr poison +; CHECK: #dbg_value(!DIArgList(ptr poison %struct.S = type { i32 } diff --git a/llvm/test/Transforms/GlobalOpt/localize-constexpr-debuginfo.ll b/llvm/test/Transforms/GlobalOpt/localize-constexpr-debuginfo.ll index 5d6cc7db5a41f3..9faa71e0a06828 100644 --- a/llvm/test/Transforms/GlobalOpt/localize-constexpr-debuginfo.ll +++ b/llvm/test/Transforms/GlobalOpt/localize-constexpr-debuginfo.ll @@ -16,7 +16,7 @@ define i32 @main(i32 %argc, ptr %argv) norecurse !dbg !18 { ; CHECK: alloca ptr ; Make sure the metadata is sane. Currently, we just drop the metadata, ; so it points to nothing. -; CHECK: call void @llvm.dbg.value(metadata !2, +; CHECK: #dbg_value(!2, ; CHECK: !2 = !{} entry: call void @llvm.dbg.value(metadata i32 %argc, metadata !22, metadata !23), !dbg !24 diff --git a/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool-check-debug.ll b/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool-check-debug.ll index 00de0f53a4e63f..00fe58800e9b4d 100644 --- a/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool-check-debug.ll +++ b/llvm/test/Transforms/GlobalOpt/shrink-global-to-bool-check-debug.ll @@ -19,7 +19,7 @@ entry: ;CHECK-NEXT: entry: ;CHECK-NEXT: %.b = load i1, ptr @foo, align 1, !dbg ![[DbgLocLoadSel:[0-9]+]] ;CHECK-NEXT: %0 = select i1 %.b, i32 5, i32 0, !dbg ![[DbgLocLoadSel]] -;CHECK-NEXT: call void @llvm.dbg.value({{.*}}), !dbg ![[DbgLocLoadSel]] +;CHECK-NEXT: #dbg_value({{.*}}, ![[DbgLocLoadSel]] ;CHECK-NEXT: ret i32 %0, !dbg ![[DbgLocRet:[0-9]+]] ;CHECK: ![[DbgLocLoadSel]] = !DILocation(line: 3, diff --git a/llvm/test/Transforms/HotColdSplit/split-out-dbg-label.ll b/llvm/test/Transforms/HotColdSplit/split-out-dbg-label.ll index 97bb13d4bdcfb9..da6c19d604c7c7 100644 --- a/llvm/test/Transforms/HotColdSplit/split-out-dbg-label.ll +++ b/llvm/test/Transforms/HotColdSplit/split-out-dbg-label.ll @@ -11,9 +11,9 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.14.0" ; CHECK-LABEL: define {{.*}}@foo.cold.1 -; CHECK: llvm.dbg.label(metadata [[LABEL:![0-9]+]]), !dbg [[LINE:![0-9]+]] -; CHECK: llvm.dbg.label(metadata [[LABEL_IN_INLINE_ME:![0-9]+]]), !dbg [[LINE2:![0-9]+]] -; CHECK: llvm.dbg.label(metadata [[SCOPED_LABEL:![0-9]+]]), !dbg [[LINE]] +; CHECK: #dbg_label([[LABEL:![0-9]+]], [[LINE:![0-9]+]] +; CHECK: #dbg_label([[LABEL_IN_INLINE_ME:![0-9]+]], [[LINE2:![0-9]+]] +; CHECK: #dbg_label([[SCOPED_LABEL:![0-9]+]], [[LINE]] ; CHECK: [[FILE:![0-9]+]] = !DIFile ; CHECK: [[INLINE_ME_SCOPE:![0-9]+]] = distinct !DISubprogram(name: "inline_me" diff --git a/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll b/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll index eea11209c36fda..e8c1b464ab0c61 100644 --- a/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll +++ b/llvm/test/Transforms/HotColdSplit/transfer-debug-info.ll @@ -10,7 +10,7 @@ target triple = "x86_64-apple-macosx10.14.0" ; - The llvm.dbg.value intrinsic pointing to an argument in @foo (%arg1) is ; dropped -; CHECK-NOT: llvm.dbg.value +; CHECK-NOT: #dbg_value ; - Instructions without locations in the original function have no ; location in the new function @@ -23,21 +23,21 @@ target triple = "x86_64-apple-macosx10.14.0" ; CHECK-NEXT: call void @sink(i32 [[ADD1]]), !dbg [[LINE1:![0-9]+]] ; - llvm.dbg.value intrinsics for values local to @foo.cold.1 are preserved -; CHECK-NEXT: llvm.dbg.value(metadata i32 [[ADD1]], metadata [[VAR1:![0-9]+]], metadata !DIExpression()), !dbg [[LINE1]] +; CHECK-NEXT: #dbg_value(i32 [[ADD1]], [[VAR1:![0-9]+]], !DIExpression(), [[LINE1]] ; - Expressions inside of dbg.value intrinsics are preserved -; CHECK-NEXT: llvm.dbg.value(metadata i32 [[ADD1]], metadata [[VAR1]], metadata !DIExpression(DW_OP_constu, 1, DW_OP_plus, DW_OP_stack_value) +; CHECK-NEXT: #dbg_value(i32 [[ADD1]], [[VAR1]], !DIExpression(DW_OP_constu, 1, DW_OP_plus, DW_OP_stack_value) ; CHECK-NEXT: call void @sink(i32 [[ADD1]]), !dbg [[LINE2:![0-9]+]] ; CHECK-NEXT: call void @sink(i32 [[ADD1]]), !dbg [[LINE3:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[ADD1]] -; CHECK-SAME: metadata [[VAR_FROM_INLINE_ME:![0-9]+]] -; CHECK-SAME: !dbg [[LINE2]] +; CHECK-NEXT: #dbg_value(i32 [[ADD1]] +; CHECK-SAME: [[VAR_FROM_INLINE_ME:![0-9]+]] +; CHECK-SAME: [[LINE2]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[ADD1]] -; CHECK-SAME: metadata [[VAR2:![0-9]+]] -; CHECK-SAME: !dbg [[LINE4:![0-9]+]] +; CHECK-NEXT: #dbg_value(i32 [[ADD1]] +; CHECK-SAME: [[VAR2:![0-9]+]] +; CHECK-SAME: [[LINE4:![0-9]+]] ; - The DISubprogram for @foo.cold.1 has an empty DISubroutineType diff --git a/llvm/test/Transforms/IROutliner/legal-debug.ll b/llvm/test/Transforms/IROutliner/legal-debug.ll index be1182b38fa2d8..39bb3f509bac7a 100644 --- a/llvm/test/Transforms/IROutliner/legal-debug.ll +++ b/llvm/test/Transforms/IROutliner/legal-debug.ll @@ -103,11 +103,11 @@ attributes #0 = { nounwind readnone speculatable willreturn } ; CHECK-LABEL: @function1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4, !dbg [[DBG17:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr [[A]], metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG17]] +; CHECK-NEXT: #dbg_value(ptr [[A]], [[META9:![0-9]+]], !DIExpression(), [[DBG17]]) ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4, !dbg [[DBG18:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr [[B]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG18]] +; CHECK-NEXT: #dbg_value(ptr [[B]], [[META11:![0-9]+]], !DIExpression(), [[DBG18]]) ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4, !dbg [[DBG19:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr [[C]], metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19]] +; CHECK-NEXT: #dbg_value(ptr [[C]], [[META12:![0-9]+]], !DIExpression(), [[DBG19]]) ; CHECK-NEXT: call void @outlined_ir_func_0(ptr [[A]], ptr [[B]], ptr [[C]]), !dbg [[DBG20:![0-9]+]] ; CHECK-NEXT: ret void, !dbg [[DBG21:![0-9]+]] ; @@ -115,22 +115,26 @@ attributes #0 = { nounwind readnone speculatable willreturn } ; CHECK-LABEL: @function2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4, !dbg [[DBG30:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr [[A]], metadata [[META24:![0-9]+]], metadata !DIExpression()), !dbg [[DBG30]] +; CHECK-NEXT: #dbg_value(ptr [[A]], [[META24:![0-9]+]], !DIExpression(), [[DBG30]]) ; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4, !dbg [[DBG31:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr [[B]], metadata [[META25:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31]] +; CHECK-NEXT: #dbg_value(ptr [[B]], [[META25:![0-9]+]], !DIExpression(), [[DBG31]]) ; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4, !dbg [[DBG32:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr [[C]], metadata [[META26:![0-9]+]], metadata !DIExpression()), !dbg [[DBG32]] +; CHECK-NEXT: #dbg_value(ptr [[C]], [[META26:![0-9]+]], !DIExpression(), [[DBG32]]) ; CHECK-NEXT: call void @outlined_ir_func_0(ptr [[A]], ptr [[B]], ptr [[C]]), !dbg [[DBG33:![0-9]+]] ; CHECK-NEXT: ret void, !dbg [[DBG34:![0-9]+]] ; ; -; CHECK: @outlined_ir_func_0(ptr [[TMP0:%.*]], ptr [[TMP1:%.*]], ptr [[TMP2:%.*]]) +; CHECK-LABEL: define {{.+}} @outlined_ir_func_0( +; CHECK-NEXT: newFuncRoot: +; CHECK-NEXT: br label [[ENTRY_TO_OUTLINE:%.*]] ; CHECK: entry_to_outline: -; CHECK-NEXT: store i32 2, ptr [[TMP0]], align 4 -; CHECK-NEXT: store i32 3, ptr [[TMP1]], align 4 -; CHECK-NEXT: store i32 4, ptr [[TMP2]], align 4 +; CHECK-NEXT: store i32 2, ptr [[TMP0:%.*]], align 4 +; CHECK-NEXT: store i32 3, ptr [[TMP1:%.*]], align 4 +; CHECK-NEXT: store i32 4, ptr [[TMP2:%.*]], align 4 ; CHECK-NEXT: [[AL:%.*]] = load i32, ptr [[TMP0]], align 4 ; CHECK-NEXT: [[BL:%.*]] = load i32, ptr [[TMP1]], align 4 ; CHECK-NEXT: [[CL:%.*]] = load i32, ptr [[TMP2]], align 4 ; CHECK-NEXT: br label [[ENTRY_AFTER_OUTLINE_EXITSTUB:%.*]] +; CHECK: entry_after_outline.exitStub: +; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/IndVarSimplify/X86/indvar-debug-value.ll b/llvm/test/Transforms/IndVarSimplify/X86/indvar-debug-value.ll index 8a9afb85ab7af0..fb13717e5da425 100644 --- a/llvm/test/Transforms/IndVarSimplify/X86/indvar-debug-value.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/indvar-debug-value.ll @@ -16,9 +16,9 @@ ; opt -passes=mem2reg,loop-rotate -scalar-evolution ; CHECK: @main -; CHECK: llvm.dbg.value(metadata i32 1, metadata [[METADATA_IDX1:![0-9]+]] +; CHECK: #dbg_value(i32 1, [[METADATA_IDX1:![0-9]+]] ; CHECK: %[[VAR_NAME:.*]] = add nuw nsw i64 -; CHECK: llvm.dbg.value(metadata i64 %[[VAR_NAME]], metadata [[METADATA_IDX1]], metadata !DIExpression()) +; CHECK: #dbg_value(i64 %[[VAR_NAME]], [[METADATA_IDX1]], !DIExpression(), ; CHECK: DICompileUnit ; CHECK: [[METADATA_IDX1]] = !DILocalVariable(name: "ArgIndex" diff --git a/llvm/test/Transforms/IndVarSimplify/X86/indvar-debug-value2.ll b/llvm/test/Transforms/IndVarSimplify/X86/indvar-debug-value2.ll index 3b9b4b7e587a72..4078ac9bb8f876 100644 --- a/llvm/test/Transforms/IndVarSimplify/X86/indvar-debug-value2.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/indvar-debug-value2.ll @@ -16,9 +16,9 @@ ; opt -passes=mem2reg -scalar-evolution ; CHECK: @main -; CHECK: llvm.dbg.value(metadata i32 1, metadata [[METADATA_IDX1:![0-9]+]] +; CHECK: #dbg_value(i32 1, [[METADATA_IDX1:![0-9]+]] ; CHECK: %[[VAR_NAME:.*]] = add nuw nsw i64 -; CHECK: llvm.dbg.value(metadata i64 %[[VAR_NAME]], metadata [[METADATA_IDX1]], metadata !DIExpression()) +; CHECK: #dbg_value(i64 %[[VAR_NAME]], [[METADATA_IDX1]], !DIExpression(), ; CHECK: DICompileUnit ; CHECK: [[METADATA_IDX1]] = !DILocalVariable(name: "ArgIndex" diff --git a/llvm/test/Transforms/IndVarSimplify/X86/scev-phi-debug-info.ll b/llvm/test/Transforms/IndVarSimplify/X86/scev-phi-debug-info.ll index b12201ecea3f84..62c8e29ef70af0 100644 --- a/llvm/test/Transforms/IndVarSimplify/X86/scev-phi-debug-info.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/scev-phi-debug-info.ll @@ -14,7 +14,7 @@ entry: for.cond: ; preds = %for.body, %entry ; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] - ; CHECK: call void @llvm.dbg.value(metadata i64 %indvars.iv, metadata !23, metadata !DIExpression()), !dbg !24 + ; CHECK: #dbg_value(i64 %indvars.iv, !23, !DIExpression(), !24 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] tail call void @llvm.dbg.value(metadata i32 %i.0, metadata !23, metadata !DIExpression()), !dbg !24 %cmp = icmp slt i32 %i.0, 32, !dbg !24 diff --git a/llvm/test/Transforms/Inline/alloca-dbgdeclare.ll b/llvm/test/Transforms/Inline/alloca-dbgdeclare.ll index 9592333aeb5420..e923d3df52e7cb 100644 --- a/llvm/test/Transforms/Inline/alloca-dbgdeclare.ll +++ b/llvm/test/Transforms/Inline/alloca-dbgdeclare.ll @@ -48,7 +48,7 @@ entry: ; CHECK-NEXT: br label %while.body ; CHECK: while.body: ; CHECK-NEXT: llvm.lifetime.start -; CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr %agg.tmp.sroa.3.i, +; CHECK-NEXT: #dbg_declare(ptr %agg.tmp.sroa.3.i, %agg.tmp.sroa.3 = alloca [20 x i8], align 4 tail call void @llvm.dbg.declare(metadata ptr %agg.tmp.sroa.3, metadata !25, metadata !30), !dbg !31 %agg.tmp.sroa.0.0.copyload = load i32, ptr @b, align 8, !dbg !33 diff --git a/llvm/test/Transforms/Inline/inline_dbg_declare.ll b/llvm/test/Transforms/Inline/inline_dbg_declare.ll index 0952d9f01cef0c..b7063b840e1cf7 100644 --- a/llvm/test/Transforms/Inline/inline_dbg_declare.ll +++ b/llvm/test/Transforms/Inline/inline_dbg_declare.ll @@ -44,7 +44,7 @@ entry: ; CHECK: [[x_addr_i:%.+]] = alloca float, align 4 ; CHECK: store float {{.*}}, ptr [[x_addr_i]] -; CHECK-NEXT: void @llvm.dbg.declare(metadata ptr [[x_addr_i]], metadata [[m23:![0-9]+]], metadata !DIExpression()), !dbg [[m24:![0-9]+]] +; CHECK-NEXT: #dbg_declare(ptr [[x_addr_i]], [[m23:![0-9]+]], !DIExpression(), [[m24:![0-9]+]] %dst.addr = alloca ptr, align 4 store ptr %dst, ptr %dst.addr, align 4 diff --git a/llvm/test/Transforms/Inline/local-as-metadata-undominated-use.ll b/llvm/test/Transforms/Inline/local-as-metadata-undominated-use.ll index 83f9a3a778d7f5..640e74cdcefbb3 100644 --- a/llvm/test/Transforms/Inline/local-as-metadata-undominated-use.ll +++ b/llvm/test/Transforms/Inline/local-as-metadata-undominated-use.ll @@ -23,7 +23,7 @@ define i32 @caller(i32 %i) { entry: ; Although the inliner shouldn't crash, it can't be expected to get the ; "correct" SSA value since its assumptions have been violated. -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 %add.i, +; CHECK-NEXT: #dbg_value(i32 %add.i, ; CHECK-NEXT: %{{.*}} = add nsw %call = tail call i32 @foo(i32 %i) ret i32 %call diff --git a/llvm/test/Transforms/InstCombine/alloca-cast-debuginfo.ll b/llvm/test/Transforms/InstCombine/alloca-cast-debuginfo.ll index 9c0f7ec04d4a20..08ce83b389786c 100644 --- a/llvm/test/Transforms/InstCombine/alloca-cast-debuginfo.ll +++ b/llvm/test/Transforms/InstCombine/alloca-cast-debuginfo.ll @@ -27,7 +27,7 @@ define void @f(ptr %p) !dbg !11 { ; CHECK-LABEL: @f( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LOCAL:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4 -; CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[LOCAL]], metadata [[META22:![0-9]+]], metadata !DIExpression()), !dbg [[DBG23:![0-9]+]] +; CHECK-NEXT: #dbg_declare(ptr [[LOCAL]], [[META22:![0-9]+]], !DIExpression(), [[META23:![0-9]+]]) ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[P:%.*]], align 8, !dbg [[DBG24:![0-9]+]], !tbaa [[TBAA25:![0-9]+]] ; CHECK-NEXT: store i64 [[TMP0]], ptr [[LOCAL]], align 4, !dbg [[DBG29:![0-9]+]], !tbaa [[TBAA25]] ; CHECK-NEXT: call void @escape(ptr nonnull [[LOCAL]]), !dbg [[DBG30:![0-9]+]] diff --git a/llvm/test/Transforms/InstCombine/assume.ll b/llvm/test/Transforms/InstCombine/assume.ll index 87c75fb2b55592..474da9968b66ad 100644 --- a/llvm/test/Transforms/InstCombine/assume.ll +++ b/llvm/test/Transforms/InstCombine/assume.ll @@ -405,7 +405,7 @@ define i32 @assumption_conflicts_with_known_bits(i32 %a, i32 %b) { define void @debug_interference(i8 %x) { ; CHECK-LABEL: @debug_interference( -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 5, metadata [[META7:![0-9]+]], metadata !DIExpression()), !dbg [[DBG9:![0-9]+]] +; CHECK-NEXT: #dbg_value(i32 5, [[META7:![0-9]+]], !DIExpression(), [[META9:![0-9]+]]) ; CHECK-NEXT: store i1 true, ptr poison, align 1 ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/InstCombine/cast-mul-select.ll b/llvm/test/Transforms/InstCombine/cast-mul-select.ll index 79d7ac9e6d0bed..d185e226805231 100644 --- a/llvm/test/Transforms/InstCombine/cast-mul-select.ll +++ b/llvm/test/Transforms/InstCombine/cast-mul-select.ll @@ -13,12 +13,12 @@ define i32 @mul(i32 %x, i32 %y) { ; CHECK-NEXT: ret i32 [[D]] ; ; DBGINFO-LABEL: @mul( -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[X:%.*]], metadata [[META9:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value)), !dbg [[DBG15:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[Y:%.*]], metadata [[META11:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value)), !dbg [[DBG16:![0-9]+]] +; DBGINFO-NEXT: #dbg_value(i32 [[X:%.*]], [[META9:![0-9]+]], !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value), [[META15:![0-9]+]]) +; DBGINFO-NEXT: #dbg_value(i32 [[Y:%.*]], [[META11:![0-9]+]], !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value), [[META16:![0-9]+]]) ; DBGINFO-NEXT: [[C:%.*]] = mul i32 [[X]], [[Y]], !dbg [[DBG17:![0-9]+]] ; DBGINFO-NEXT: [[D:%.*]] = and i32 [[C]], 255, !dbg [[DBG18:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[C]], metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG17]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[D]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG18]] +; DBGINFO-NEXT: #dbg_value(i32 [[C]], [[META12:![0-9]+]], !DIExpression(), [[DBG17]]) +; DBGINFO-NEXT: #dbg_value(i32 [[D]], [[META13:![0-9]+]], !DIExpression(), [[DBG18]]) ; DBGINFO-NEXT: ret i32 [[D]], !dbg [[DBG19:![0-9]+]] ; @@ -41,15 +41,15 @@ define i32 @select1(i1 %cond, i32 %x, i32 %y, i32 %z) { ; CHECK-NEXT: ret i32 [[F]] ; ; DBGINFO-LABEL: @select1( -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[X:%.*]], metadata [[META22:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value)), !dbg [[DBG28:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[Y:%.*]], metadata [[META23:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value)), !dbg [[DBG29:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[Z:%.*]], metadata [[META24:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value)), !dbg [[DBG30:![0-9]+]] +; DBGINFO-NEXT: #dbg_value(i32 [[X:%.*]], [[META22:![0-9]+]], !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value), [[META28:![0-9]+]]) +; DBGINFO-NEXT: #dbg_value(i32 [[Y:%.*]], [[META23:![0-9]+]], !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value), [[META29:![0-9]+]]) +; DBGINFO-NEXT: #dbg_value(i32 [[Z:%.*]], [[META24:![0-9]+]], !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value), [[META30:![0-9]+]]) ; DBGINFO-NEXT: [[D:%.*]] = add i32 [[X]], [[Y]], !dbg [[DBG31:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata !DIArgList(i32 [[X]], i32 [[Y]]), metadata [[META25:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_LLVM_arg, 1, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_plus, DW_OP_stack_value)), !dbg [[DBG31]] +; DBGINFO-NEXT: #dbg_value(!DIArgList(i32 [[X]], i32 [[Y]]), [[META25:![0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_LLVM_arg, 1, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_plus, DW_OP_stack_value), [[DBG31]]) ; DBGINFO-NEXT: [[E:%.*]] = select i1 [[COND:%.*]], i32 [[Z]], i32 [[D]], !dbg [[DBG32:![0-9]+]] ; DBGINFO-NEXT: [[F:%.*]] = and i32 [[E]], 255, !dbg [[DBG33:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[E]], metadata [[META26:![0-9]+]], metadata !DIExpression()), !dbg [[DBG32]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[F]], metadata [[META27:![0-9]+]], metadata !DIExpression()), !dbg [[DBG33]] +; DBGINFO-NEXT: #dbg_value(i32 [[E]], [[META26:![0-9]+]], !DIExpression(), [[DBG32]]) +; DBGINFO-NEXT: #dbg_value(i32 [[F]], [[META27:![0-9]+]], !DIExpression(), [[DBG33]]) ; DBGINFO-NEXT: ret i32 [[F]], !dbg [[DBG34:![0-9]+]] ; %A = trunc i32 %x to i8 @@ -68,14 +68,14 @@ define i8 @select2(i1 %cond, i8 %x, i8 %y, i8 %z) { ; CHECK-NEXT: ret i8 [[E]] ; ; DBGINFO-LABEL: @select2( -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i8 [[X:%.*]], metadata [[META37:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_stack_value)), !dbg [[DBG43:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i8 [[Y:%.*]], metadata [[META38:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_stack_value)), !dbg [[DBG44:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i8 [[Z:%.*]], metadata [[META39:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_stack_value)), !dbg [[DBG45:![0-9]+]] +; DBGINFO-NEXT: #dbg_value(i8 [[X:%.*]], [[META37:![0-9]+]], !DIExpression(DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_stack_value), [[META43:![0-9]+]]) +; DBGINFO-NEXT: #dbg_value(i8 [[Y:%.*]], [[META38:![0-9]+]], !DIExpression(DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_stack_value), [[META44:![0-9]+]]) +; DBGINFO-NEXT: #dbg_value(i8 [[Z:%.*]], [[META39:![0-9]+]], !DIExpression(DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_stack_value), [[META45:![0-9]+]]) ; DBGINFO-NEXT: [[D:%.*]] = add i8 [[X]], [[Y]], !dbg [[DBG46:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata !DIArgList(i8 [[X]], i8 [[Y]]), metadata [[META40:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_arg, 1, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_plus, DW_OP_stack_value)), !dbg [[DBG46]] +; DBGINFO-NEXT: #dbg_value(!DIArgList(i8 [[X]], i8 [[Y]]), [[META40:![0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_LLVM_arg, 1, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_plus, DW_OP_stack_value), [[DBG46]]) ; DBGINFO-NEXT: [[E:%.*]] = select i1 [[COND:%.*]], i8 [[Z]], i8 [[D]], !dbg [[DBG47:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 poison, metadata [[META41:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i8 [[E]], metadata [[META42:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48:![0-9]+]] +; DBGINFO-NEXT: #dbg_value(i32 poison, [[META41:![0-9]+]], !DIExpression(), [[DBG47]]) +; DBGINFO-NEXT: #dbg_value(i8 [[E]], [[META42:![0-9]+]], !DIExpression(), [[META48:![0-9]+]]) ; DBGINFO-NEXT: ret i8 [[E]], !dbg [[DBG49:![0-9]+]] ; %A = zext i8 %x to i32 @@ -100,13 +100,13 @@ define i32 @eval_trunc_multi_use_in_one_inst(i32 %x) { ; ; DBGINFO-LABEL: @eval_trunc_multi_use_in_one_inst( ; DBGINFO-NEXT: [[Z:%.*]] = zext i32 [[X:%.*]] to i64, !dbg [[DBG57:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i64 [[Z]], metadata [[META52:![0-9]+]], metadata !DIExpression()), !dbg [[DBG57]] +; DBGINFO-NEXT: #dbg_value(i64 [[Z]], [[META52:![0-9]+]], !DIExpression(), [[DBG57]]) ; DBGINFO-NEXT: [[A:%.*]] = add nuw nsw i64 [[Z]], 15, !dbg [[DBG58:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i64 [[A]], metadata [[META54:![0-9]+]], metadata !DIExpression()), !dbg [[DBG58]] +; DBGINFO-NEXT: #dbg_value(i64 [[A]], [[META54:![0-9]+]], !DIExpression(), [[DBG58]]) ; DBGINFO-NEXT: [[M:%.*]] = mul i64 [[A]], [[A]], !dbg [[DBG59:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i64 [[M]], metadata [[META55:![0-9]+]], metadata !DIExpression()), !dbg [[DBG59]] +; DBGINFO-NEXT: #dbg_value(i64 [[M]], [[META55:![0-9]+]], !DIExpression(), [[DBG59]]) ; DBGINFO-NEXT: [[T:%.*]] = trunc i64 [[M]] to i32, !dbg [[DBG60:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[T]], metadata [[META56:![0-9]+]], metadata !DIExpression()), !dbg [[DBG60]] +; DBGINFO-NEXT: #dbg_value(i32 [[T]], [[META56:![0-9]+]], !DIExpression(), [[DBG60]]) ; DBGINFO-NEXT: ret i32 [[T]], !dbg [[DBG61:![0-9]+]] ; %z = zext i32 %x to i64 @@ -126,13 +126,13 @@ define i32 @eval_zext_multi_use_in_one_inst(i32 %x) { ; ; DBGINFO-LABEL: @eval_zext_multi_use_in_one_inst( ; DBGINFO-NEXT: [[T:%.*]] = trunc i32 [[X:%.*]] to i16, !dbg [[DBG69:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i16 [[T]], metadata [[META64:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69]] +; DBGINFO-NEXT: #dbg_value(i16 [[T]], [[META64:![0-9]+]], !DIExpression(), [[DBG69]]) ; DBGINFO-NEXT: [[A:%.*]] = and i16 [[T]], 5, !dbg [[DBG70:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i16 [[A]], metadata [[META66:![0-9]+]], metadata !DIExpression()), !dbg [[DBG70]] +; DBGINFO-NEXT: #dbg_value(i16 [[A]], [[META66:![0-9]+]], !DIExpression(), [[DBG70]]) ; DBGINFO-NEXT: [[M:%.*]] = mul nuw nsw i16 [[A]], [[A]], !dbg [[DBG71:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i16 [[M]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG71]] +; DBGINFO-NEXT: #dbg_value(i16 [[M]], [[META67:![0-9]+]], !DIExpression(), [[DBG71]]) ; DBGINFO-NEXT: [[R:%.*]] = zext nneg i16 [[M]] to i32, !dbg [[DBG72:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[R]], metadata [[META68:![0-9]+]], metadata !DIExpression()), !dbg [[DBG72]] +; DBGINFO-NEXT: #dbg_value(i32 [[R]], [[META68:![0-9]+]], !DIExpression(), [[DBG72]]) ; DBGINFO-NEXT: ret i32 [[R]], !dbg [[DBG73:![0-9]+]] ; %t = trunc i32 %x to i16 @@ -153,15 +153,15 @@ define i32 @eval_sext_multi_use_in_one_inst(i32 %x) { ; ; DBGINFO-LABEL: @eval_sext_multi_use_in_one_inst( ; DBGINFO-NEXT: [[T:%.*]] = trunc i32 [[X:%.*]] to i16, !dbg [[DBG81:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i16 [[T]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG81]] +; DBGINFO-NEXT: #dbg_value(i16 [[T]], [[META76:![0-9]+]], !DIExpression(), [[DBG81]]) ; DBGINFO-NEXT: [[A:%.*]] = and i16 [[T]], 14, !dbg [[DBG82:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i16 [[A]], metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG82]] +; DBGINFO-NEXT: #dbg_value(i16 [[A]], [[META77:![0-9]+]], !DIExpression(), [[DBG82]]) ; DBGINFO-NEXT: [[M:%.*]] = mul nuw nsw i16 [[A]], [[A]], !dbg [[DBG83:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i16 [[M]], metadata [[META78:![0-9]+]], metadata !DIExpression()), !dbg [[DBG83]] +; DBGINFO-NEXT: #dbg_value(i16 [[M]], [[META78:![0-9]+]], !DIExpression(), [[DBG83]]) ; DBGINFO-NEXT: [[O:%.*]] = or disjoint i16 [[M]], -32768, !dbg [[DBG84:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i16 [[O]], metadata [[META79:![0-9]+]], metadata !DIExpression()), !dbg [[DBG84]] +; DBGINFO-NEXT: #dbg_value(i16 [[O]], [[META79:![0-9]+]], !DIExpression(), [[DBG84]]) ; DBGINFO-NEXT: [[R:%.*]] = sext i16 [[O]] to i32, !dbg [[DBG85:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[R]], metadata [[META80:![0-9]+]], metadata !DIExpression()), !dbg [[DBG85]] +; DBGINFO-NEXT: #dbg_value(i32 [[R]], [[META80:![0-9]+]], !DIExpression(), [[DBG85]]) ; DBGINFO-NEXT: ret i32 [[R]], !dbg [[DBG86:![0-9]+]] ; %t = trunc i32 %x to i16 @@ -207,13 +207,13 @@ define void @PR36225(i32 %a, i32 %b, i1 %c1, i3 %v1, i3 %v2) { ; DBGINFO-NEXT: entry: ; DBGINFO-NEXT: br label [[WHILE_BODY:%.*]], !dbg [[DBG94:![0-9]+]] ; DBGINFO: while.body: -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[B:%.*]], metadata [[META89:![0-9]+]], metadata !DIExpression(DW_OP_lit0, DW_OP_eq, DW_OP_stack_value)), !dbg [[DBG95:![0-9]+]] +; DBGINFO-NEXT: #dbg_value(i32 [[B:%.*]], [[META89:![0-9]+]], !DIExpression(DW_OP_lit0, DW_OP_eq, DW_OP_stack_value), [[META95:![0-9]+]]) ; DBGINFO-NEXT: br i1 [[C1:%.*]], label [[FOR_BODY3_US:%.*]], label [[FOR_BODY3:%.*]], !dbg [[DBG96:![0-9]+]] ; DBGINFO: for.body3.us: -; DBGINFO-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[B]], 0, !dbg [[DBG95]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i1 [[TOBOOL]], metadata [[META89]], metadata !DIExpression()), !dbg [[DBG95]] +; DBGINFO-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[B]], 0, !dbg [[META95]] +; DBGINFO-NEXT: #dbg_value(i1 [[TOBOOL]], [[META89]], !DIExpression(), [[META95]]) ; DBGINFO-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[TOBOOL]], i8 0, i8 4, !dbg [[DBG97:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i8 [[SPEC_SELECT]], metadata [[META90:![0-9]+]], metadata !DIExpression()), !dbg [[DBG97]] +; DBGINFO-NEXT: #dbg_value(i8 [[SPEC_SELECT]], [[META90:![0-9]+]], !DIExpression(), [[DBG97]]) ; DBGINFO-NEXT: switch i3 [[V1:%.*]], label [[EXIT:%.*]] [ ; DBGINFO-NEXT: i3 0, label [[FOR_END:%.*]] ; DBGINFO-NEXT: i3 -1, label [[FOR_END]] @@ -225,11 +225,11 @@ define void @PR36225(i32 %a, i32 %b, i1 %c1, i3 %v1, i3 %v2) { ; DBGINFO-NEXT: ], !dbg [[DBG99:![0-9]+]] ; DBGINFO: for.end: ; DBGINFO-NEXT: [[H:%.*]] = phi i8 [ [[SPEC_SELECT]], [[FOR_BODY3_US]] ], [ [[SPEC_SELECT]], [[FOR_BODY3_US]] ], [ 0, [[FOR_BODY3]] ], [ 0, [[FOR_BODY3]] ], !dbg [[DBG100:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i8 [[H]], metadata [[META91:![0-9]+]], metadata !DIExpression()), !dbg [[DBG100]] +; DBGINFO-NEXT: #dbg_value(i8 [[H]], [[META91:![0-9]+]], !DIExpression(), [[DBG100]]) ; DBGINFO-NEXT: [[CONV:%.*]] = zext nneg i8 [[H]] to i32, !dbg [[DBG101:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[CONV]], metadata [[META92:![0-9]+]], metadata !DIExpression()), !dbg [[DBG101]] +; DBGINFO-NEXT: #dbg_value(i32 [[CONV]], [[META92:![0-9]+]], !DIExpression(), [[DBG101]]) ; DBGINFO-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV]], [[A:%.*]], !dbg [[DBG102:![0-9]+]] -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP]], metadata [[META93:![0-9]+]], metadata !DIExpression()), !dbg [[DBG102]] +; DBGINFO-NEXT: #dbg_value(i1 [[CMP]], [[META93:![0-9]+]], !DIExpression(), [[DBG102]]) ; DBGINFO-NEXT: br i1 [[CMP]], label [[EXIT]], label [[EXIT2:%.*]], !dbg [[DBG103:![0-9]+]] ; DBGINFO: exit2: ; DBGINFO-NEXT: unreachable, !dbg [[DBG104:![0-9]+]] @@ -275,7 +275,7 @@ define i1 @foo(i1 zeroext %b) { ; CHECK-NEXT: ret i1 [[B:%.*]] ; ; DBGINFO-LABEL: @foo( -; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i1 [[B:%.*]], metadata [[META108:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value)), !dbg [[DBG109:![0-9]+]] +; DBGINFO-NEXT: #dbg_value(i1 [[B:%.*]], [[META108:![0-9]+]], !DIExpression(DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value), [[META109:![0-9]+]]) ; DBGINFO-NEXT: ret i1 [[B]], !dbg [[DBG110:![0-9]+]] ; diff --git a/llvm/test/Transforms/InstCombine/cast-set-preserve-signed-dbg-val.ll b/llvm/test/Transforms/InstCombine/cast-set-preserve-signed-dbg-val.ll index f9a476c24246d7..4462193ec421ab 100644 --- a/llvm/test/Transforms/InstCombine/cast-set-preserve-signed-dbg-val.ll +++ b/llvm/test/Transforms/InstCombine/cast-set-preserve-signed-dbg-val.ll @@ -14,14 +14,14 @@ define i16 @test5(i16 %A) !dbg !34 { ; Preserve the dbg.value for the DCE'd 32-bit 'and'. ; ; The high 16 bits of the original 'and' require sign-extending the new 16-bit and: - ; CHECK-NEXT: call void @llvm.dbg.value(metadata i16 [[and]], metadata [[C:![0-9]+]], - ; CHECK-SAME: metadata !DIExpression(DW_OP_LLVM_convert, 16, DW_ATE_signed, DW_OP_LLVM_convert, 32, DW_ATE_signed, DW_OP_stack_value) + ; CHECK-NEXT: #dbg_value(i16 [[and]], [[C:![0-9]+]], + ; CHECK-SAME: !DIExpression(DW_OP_LLVM_convert, 16, DW_ATE_signed, DW_OP_LLVM_convert, 32, DW_ATE_signed, DW_OP_stack_value) %D = trunc i32 %C to i16, !dbg !42 call void @llvm.dbg.value(metadata i16 %D, metadata !38, metadata !DIExpression()), !dbg !42 ; The dbg.value for a truncate should simply point to the result of the 16-bit 'and'. - ; CHECK-NEXT: call void @llvm.dbg.value(metadata i16 [[and]], metadata [[D:![0-9]+]], metadata !DIExpression()) + ; CHECK-NEXT: #dbg_value(i16 [[and]], [[D:![0-9]+]], !DIExpression(), ret i16 %D, !dbg !43 ; CHECK-NEXT: ret i16 [[and]] diff --git a/llvm/test/Transforms/InstCombine/consecutive-fences.ll b/llvm/test/Transforms/InstCombine/consecutive-fences.ll index ce8274811416ce..2ba8ac46a43c79 100644 --- a/llvm/test/Transforms/InstCombine/consecutive-fences.ll +++ b/llvm/test/Transforms/InstCombine/consecutive-fences.ll @@ -98,7 +98,7 @@ define void @acquire_single_thread_scope() { ; CHECK-LABEL: define void @debug ; CHECK-NOT: fence -; CHECK: call void @llvm.dbg.value +; CHECK: #dbg_value ; CHECK: fence seq_cst define void @debug() { fence seq_cst diff --git a/llvm/test/Transforms/InstCombine/dbg-scalable-store-fixed-frag.ll b/llvm/test/Transforms/InstCombine/dbg-scalable-store-fixed-frag.ll index 54c096b42e4931..eeea1458a67d40 100644 --- a/llvm/test/Transforms/InstCombine/dbg-scalable-store-fixed-frag.ll +++ b/llvm/test/Transforms/InstCombine/dbg-scalable-store-fixed-frag.ll @@ -7,7 +7,7 @@ define i32 @foo( %x) { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARR:%.*]] = alloca i32, align 4 -; CHECK-NEXT: call void @llvm.dbg.value(metadata undef, metadata [[META8:![0-9]+]], metadata !DIExpression()), !dbg [[DBG14:![0-9]+]] +; CHECK-NEXT: #dbg_value( undef, [[META8:![0-9]+]], !DIExpression(), [[META14:![0-9]+]]) ; CHECK-NEXT: store [[X:%.*]], ptr [[ARR]], align 4 ; CHECK-NEXT: [[RES:%.*]] = load i32, ptr [[ARR]], align 4 ; CHECK-NEXT: ret i32 [[RES]] @@ -24,7 +24,7 @@ define i32 @foo2( %x) { ; CHECK-LABEL: @foo2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ARR:%.*]] = alloca [4 x i32], align 4 -; CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[ARR]], metadata [[META15:![0-9]+]], metadata !DIExpression()), !dbg [[DBG17:![0-9]+]] +; CHECK-NEXT: #dbg_declare(ptr [[ARR]], [[META15:![0-9]+]], !DIExpression(), [[META17:![0-9]+]]) ; CHECK-NEXT: store [[X:%.*]], ptr [[ARR]], align 4 ; CHECK-NEXT: [[RES:%.*]] = load i32, ptr [[ARR]], align 4 ; CHECK-NEXT: ret i32 [[RES]] diff --git a/llvm/test/Transforms/InstCombine/dbg-simplify-alloca-size.ll b/llvm/test/Transforms/InstCombine/dbg-simplify-alloca-size.ll index 03e8e44109e6b6..343a679950e66f 100644 --- a/llvm/test/Transforms/InstCombine/dbg-simplify-alloca-size.ll +++ b/llvm/test/Transforms/InstCombine/dbg-simplify-alloca-size.ll @@ -9,7 +9,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) ; CHECK-LABEL: @toplevel( ; CHECK: entry: ; CHECK-NEXT: %pixels1 = alloca [3 x i8], align 1 -; CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr %pixels1, metadata ![[MD:[0-9]+]], metadata !DIExpression()), !dbg ![[DBG:[0-9]+]] +; CHECK-NEXT: #dbg_declare(ptr %pixels1, ![[MD:[0-9]+]], !DIExpression(), ![[DBG:[0-9]+]] ; CHECK-NEXT: call void @foo(ptr nonnull %pixels1) ; CHECK-NEXT: ret void define dso_local void @toplevel() { diff --git a/llvm/test/Transforms/InstCombine/debuginfo-dce.ll b/llvm/test/Transforms/InstCombine/debuginfo-dce.ll index 27e0580804cae7..5fcf26362a3412 100644 --- a/llvm/test/Transforms/InstCombine/debuginfo-dce.ll +++ b/llvm/test/Transforms/InstCombine/debuginfo-dce.ll @@ -35,7 +35,7 @@ entry: call void @llvm.dbg.value(metadata ptr %1, metadata !18, metadata !20), !dbg !19 ; CHECK: define void @salvage_load ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr poison +; CHECK-NEXT: #dbg_value(ptr poison store ptr %1, ptr %im_not_dead, align 8 ret void, !dbg !21 } @@ -46,8 +46,8 @@ entry: call void @llvm.dbg.value(metadata ptr %queue, metadata !24, metadata !20), !dbg !23 ; CHECK: define void @salvage_bitcast ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr %queue, -; CHECK-SAME: metadata !DIExpression(DW_OP_plus_uconst, 0)) +; CHECK-NEXT: #dbg_value(ptr %queue, +; CHECK-SAME: !DIExpression(DW_OP_plus_uconst, 0), store ptr %queue, ptr %im_not_dead, align 8 ret void, !dbg !23 } @@ -60,8 +60,8 @@ entry: call void @llvm.dbg.value(metadata ptr %1, metadata !27, metadata !20), !dbg !26 ; CHECK: define void @salvage_gep0 ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr %queue, -; CHECK-SAME: metadata !DIExpression(DW_OP_constu, 8, DW_OP_minus, DW_OP_stack_value)) +; CHECK-NEXT: #dbg_value(ptr %queue, +; CHECK-SAME: !DIExpression(DW_OP_constu, 8, DW_OP_minus, DW_OP_stack_value), store ptr %1, ptr %im_not_dead, align 8 ret void, !dbg !26 } @@ -74,8 +74,8 @@ entry: call void @llvm.dbg.value(metadata ptr %1, metadata !30, metadata !DIExpression(DW_OP_LLVM_fragment, 0, 32)), !dbg !29 ; CHECK: define void @salvage_gep1 ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr %queue, -; CHECK-SAME: metadata !DIExpression(DW_OP_constu, 8, DW_OP_minus, DW_OP_stack_value, DW_OP_LLVM_fragment, 0, 32)) +; CHECK-NEXT: #dbg_value(ptr %queue, +; CHECK-SAME: !DIExpression(DW_OP_constu, 8, DW_OP_minus, DW_OP_stack_value, DW_OP_LLVM_fragment, 0, 32), store ptr %1, ptr %im_not_dead, align 8 ret void, !dbg !29 } @@ -88,8 +88,8 @@ entry: call void @llvm.dbg.value(metadata ptr %1, metadata !33, metadata !DIExpression(DW_OP_stack_value)), !dbg !32 ; CHECK: define void @salvage_gep2 ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr %queue, -; CHECK-SAME: metadata !DIExpression(DW_OP_constu, 8, DW_OP_minus, DW_OP_stack_value)) +; CHECK-NEXT: #dbg_value(ptr %queue, +; CHECK-SAME: !DIExpression(DW_OP_constu, 8, DW_OP_minus, DW_OP_stack_value), store ptr %1, ptr %im_not_dead, align 8 ret void, !dbg !32 } diff --git a/llvm/test/Transforms/InstCombine/debuginfo-dce2.ll b/llvm/test/Transforms/InstCombine/debuginfo-dce2.ll index f4f85f396c6e1b..87981ee5694d45 100644 --- a/llvm/test/Transforms/InstCombine/debuginfo-dce2.ll +++ b/llvm/test/Transforms/InstCombine/debuginfo-dce2.ll @@ -26,9 +26,9 @@ entry: } ; CHECK-LABEL: define void @f(ptr %p) -; CHECK: call void @llvm.dbg.value(metadata ptr %p, metadata ![[P_VAR:[0-9]+]], metadata !DIExpression()) +; CHECK: #dbg_value(ptr %p, ![[P_VAR:[0-9]+]], !DIExpression(), ; CHECK-NOT: bitcast -; CHECK: call void @llvm.dbg.value(metadata ptr %p, metadata ![[Q_VAR:[0-9]+]], metadata !DIExpression()) +; CHECK: #dbg_value(ptr %p, ![[Q_VAR:[0-9]+]], !DIExpression(), ; CHECK-NOT: bitcast ; CHECK: ret void diff --git a/llvm/test/Transforms/InstCombine/debuginfo-sink.ll b/llvm/test/Transforms/InstCombine/debuginfo-sink.ll index 63f09d26cce4f9..c02aefe0723c51 100644 --- a/llvm/test/Transforms/InstCombine/debuginfo-sink.ll +++ b/llvm/test/Transforms/InstCombine/debuginfo-sink.ll @@ -9,8 +9,8 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) ; into, to maximise liveness. ; ; CHECK-LABEL: define i32 @foo(ptr -; CHECK: call void @llvm.dbg.value(metadata ptr %a, metadata !{{[0-9]+}}, -; CHECK-SAME: metadata !DIExpression(DW_OP_plus_uconst, 4, DW_OP_stack_value)) +; CHECK: #dbg_value(ptr %a, !{{[0-9]+}}, +; CHECK-SAME: !DIExpression(DW_OP_plus_uconst, 4, DW_OP_stack_value), ; CHECK-NEXT: br label %sink1 define i32 @foo(ptr %a) !dbg !7 { @@ -21,8 +21,8 @@ entry: sink1: ; CHECK-LABEL: sink1: -; CHECK: call void @llvm.dbg.value(metadata ptr %gep, -; CHECK-SAME: metadata !{{[0-9]+}}, metadata !DIExpression()) +; CHECK: #dbg_value(ptr %gep, +; CHECK-SAME: !{{[0-9]+}}, !DIExpression(), ; CHECK-NEXT: load %0 = load i32, ptr %gep, align 4, !dbg !15 ret i32 %0, !dbg !15 @@ -33,7 +33,7 @@ sink1: ; value range. ; CHECK-LABEL: define i32 @bar( -; CHECK: call void @llvm.dbg.value(metadata ptr poison, +; CHECK: #dbg_value(ptr poison, ; CHECK-NEXT: br label %sink2 define i32 @bar(ptr %a, i32 %b) !dbg !70 { @@ -44,8 +44,8 @@ entry: sink2: ; CHECK-LABEL: sink2: -; CHECK: call void @llvm.dbg.value(metadata ptr %gep, -; CHECK-SAME: metadata !{{[0-9]+}}, metadata !DIExpression()) +; CHECK: #dbg_value(ptr %gep, +; CHECK-SAME: !{{[0-9]+}}, !DIExpression(), ; CHECK-NEXT: load ; CHECK-NEXT: extractelement ; CHECK-NEXT: ret @@ -59,10 +59,10 @@ sink2: ; original dbg.values are salvaged. ; ; CHECK-LABEL: define i32 @baz(ptr -; CHECK: call void @llvm.dbg.value(metadata ptr %a, metadata !{{[0-9]+}}, -; CHECK-SAME: metadata !DIExpression(DW_OP_plus_uconst, 4, DW_OP_stack_value)) -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr %a, metadata !{{[0-9]+}}, -; CHECK-SAME: metadata !DIExpression(DW_OP_plus_uconst, 9, DW_OP_stack_value)) +; CHECK: #dbg_value(ptr %a, !{{[0-9]+}}, +; CHECK-SAME: !DIExpression(DW_OP_plus_uconst, 4, DW_OP_stack_value), +; CHECK-NEXT: #dbg_value(ptr %a, !{{[0-9]+}}, +; CHECK-SAME: !DIExpression(DW_OP_plus_uconst, 9, DW_OP_stack_value), ; CHECK-NEXT: br label %sink1 define i32 @baz(ptr %a) !dbg !80 { @@ -74,8 +74,8 @@ entry: sink1: ; CHECK-LABEL: sink1: -; CHECK: call void @llvm.dbg.value(metadata ptr %gep, -; CHECK-SAME: metadata !{{[0-9]+}}, metadata !DIExpression(DW_OP_plus_uconst, 5)) +; CHECK: #dbg_value(ptr %gep, +; CHECK-SAME: !{{[0-9]+}}, !DIExpression(DW_OP_plus_uconst, 5), ; CHECK-NEXT: load %0 = load i32, ptr %gep, align 4, !dbg !85 ret i32 %0, !dbg !85 diff --git a/llvm/test/Transforms/InstCombine/debuginfo-skip.ll b/llvm/test/Transforms/InstCombine/debuginfo-skip.ll index ce6a675559acd2..9aebbabf4eb1a2 100644 --- a/llvm/test/Transforms/InstCombine/debuginfo-skip.ll +++ b/llvm/test/Transforms/InstCombine/debuginfo-skip.ll @@ -18,8 +18,8 @@ entry: ; should convert the declare to dbg value. ; CHECK-LABEL: define i32 @foo(i32 %j) ; CHECK-NOT: alloca -; CHECK: call void @llvm.dbg.value(metadata i32 %j, {{.*}}) -; CHECK: call void @llvm.dbg.value(metadata i32 10, {{.*}}) +; CHECK: #dbg_value(i32 %j, {{.*}}) +; CHECK: #dbg_value(i32 10, {{.*}}) ; CHECK: ret i32 %j declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 diff --git a/llvm/test/Transforms/InstCombine/debuginfo-variables.ll b/llvm/test/Transforms/InstCombine/debuginfo-variables.ll index 546433fc6779dd..f25cf2782e0955 100644 --- a/llvm/test/Transforms/InstCombine/debuginfo-variables.ll +++ b/llvm/test/Transforms/InstCombine/debuginfo-variables.ll @@ -6,8 +6,8 @@ declare void @escape32(i32) define i64 @test_sext_zext(i16 %A) { ; CHECK-LABEL: @test_sext_zext( ; CHECK-NEXT: [[C2:%.*]] = zext i16 %A to i64 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[C2]], {{.*}}, metadata !DIExpression()) -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[C2]], {{.*}}, metadata !DIExpression()) +; CHECK-NEXT: #dbg_value(i64 [[C2]], {{.*}}, !DIExpression(), +; CHECK-NEXT: #dbg_value(i64 [[C2]], {{.*}}, !DIExpression(), %c1 = zext i16 %A to i32 %c2 = sext i32 %c1 to i64 ret i64 %c2 @@ -16,9 +16,9 @@ define i64 @test_sext_zext(i16 %A) { define i64 @test_used_sext_zext(i16 %A) { ; CHECK-LABEL: @test_used_sext_zext( ; CHECK-NEXT: [[C1:%.*]] = zext i16 %A to i32 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[C1]], {{.*}}, metadata !DIExpression()) +; CHECK-NEXT: #dbg_value(i32 [[C1]], {{.*}}, !DIExpression(), ; CHECK-NEXT: [[C2:%.*]] = zext i16 %A to i64 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[C2]], {{.*}}, metadata !DIExpression()) +; CHECK-NEXT: #dbg_value(i64 [[C2]], {{.*}}, !DIExpression(), ; CHECK-NEXT: call void @escape32(i32 %c1) ; CHECK-NEXT: ret i64 %c2 %c1 = zext i16 %A to i32 @@ -30,8 +30,8 @@ define i64 @test_used_sext_zext(i16 %A) { define i32 @test_cast_select(i1 %cond) { ; CHECK-LABEL: @test_cast_select( ; CHECK-NEXT: [[sel:%.*]] = select i1 %cond, i32 3, i32 5 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[sel]], {{.*}}, metadata !DIExpression()) -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[sel]], {{.*}}, metadata !DIExpression()) +; CHECK-NEXT: #dbg_value(i32 [[sel]], {{.*}}, !DIExpression(), +; CHECK-NEXT: #dbg_value(i32 [[sel]], {{.*}}, !DIExpression(), ; CHECK-NEXT: ret i32 [[sel]] %sel = select i1 %cond, i16 3, i16 5 %cast = zext i16 %sel to i32 @@ -40,84 +40,84 @@ define i32 @test_cast_select(i1 %cond) { define void @test_or(i64 %A) { ; CHECK-LABEL: @test_or( -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 %A, {{.*}}, metadata !DIExpression(DW_OP_constu, 256, DW_OP_or, DW_OP_stack_value)) +; CHECK-NEXT: #dbg_value(i64 %A, {{.*}}, !DIExpression(DW_OP_constu, 256, DW_OP_or, DW_OP_stack_value), %1 = or i64 %A, 256 ret void } define void @test_xor(i32 %A) { ; CHECK-LABEL: @test_xor( -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %A, {{.*}}, metadata !DIExpression(DW_OP_constu, 1, DW_OP_xor, DW_OP_stack_value)) +; CHECK-NEXT: #dbg_value(i32 %A, {{.*}}, !DIExpression(DW_OP_constu, 1, DW_OP_xor, DW_OP_stack_value), %1 = xor i32 %A, 1 ret void } define void @test_sub_neg(i64 %A) { ; CHECK-LABEL: @test_sub_neg( -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 %A, {{.*}}, metadata !DIExpression(DW_OP_plus_uconst, 1, DW_OP_stack_value)) +; CHECK-NEXT: #dbg_value(i64 %A, {{.*}}, !DIExpression(DW_OP_plus_uconst, 1, DW_OP_stack_value), %1 = sub i64 %A, -1 ret void } define void @test_sub_pos(i64 %A) { ; CHECK-LABEL: @test_sub_pos( -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 %A, {{.*}}, metadata !DIExpression(DW_OP_constu, 1, DW_OP_minus, DW_OP_stack_value)) +; CHECK-NEXT: #dbg_value(i64 %A, {{.*}}, !DIExpression(DW_OP_constu, 1, DW_OP_minus, DW_OP_stack_value), %1 = sub i64 %A, 1 ret void } define void @test_shl(i64 %A) { ; CHECK-LABEL: @test_shl( -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 %A, {{.*}}, metadata !DIExpression(DW_OP_constu, 7, DW_OP_shl, DW_OP_stack_value)) +; CHECK-NEXT: #dbg_value(i64 %A, {{.*}}, !DIExpression(DW_OP_constu, 7, DW_OP_shl, DW_OP_stack_value), %1 = shl i64 %A, 7 ret void } define void @test_lshr(i64 %A) { ; CHECK-LABEL: @test_lshr( -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 %A, {{.*}}, metadata !DIExpression(DW_OP_constu, 7, DW_OP_shr, DW_OP_stack_value)) +; CHECK-NEXT: #dbg_value(i64 %A, {{.*}}, !DIExpression(DW_OP_constu, 7, DW_OP_shr, DW_OP_stack_value), %1 = lshr i64 %A, 7 ret void } define void @test_ashr(i64 %A) { ; CHECK-LABEL: @test_ashr( -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 %A, {{.*}}, metadata !DIExpression(DW_OP_constu, 7, DW_OP_shra, DW_OP_stack_value)) +; CHECK-NEXT: #dbg_value(i64 %A, {{.*}}, !DIExpression(DW_OP_constu, 7, DW_OP_shra, DW_OP_stack_value), %1 = ashr i64 %A, 7 ret void } define void @test_mul(i64 %A) { ; CHECK-LABEL: @test_mul( -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 %A, {{.*}}, metadata !DIExpression(DW_OP_constu, 7, DW_OP_mul, DW_OP_stack_value)) +; CHECK-NEXT: #dbg_value(i64 %A, {{.*}}, !DIExpression(DW_OP_constu, 7, DW_OP_mul, DW_OP_stack_value), %1 = mul i64 %A, 7 ret void } define void @test_sdiv(i64 %A) { ; CHECK-LABEL: @test_sdiv( -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 %A, {{.*}}, metadata !DIExpression(DW_OP_constu, 7, DW_OP_div, DW_OP_stack_value)) +; CHECK-NEXT: #dbg_value(i64 %A, {{.*}}, !DIExpression(DW_OP_constu, 7, DW_OP_div, DW_OP_stack_value), %1 = sdiv i64 %A, 7 ret void } define void @test_srem(i64 %A) { ; CHECK-LABEL: @test_srem( -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 %A, {{.*}}, metadata !DIExpression(DW_OP_constu, 7, DW_OP_mod, DW_OP_stack_value)) +; CHECK-NEXT: #dbg_value(i64 %A, {{.*}}, !DIExpression(DW_OP_constu, 7, DW_OP_mod, DW_OP_stack_value), %1 = srem i64 %A, 7 ret void } define void @test_ptrtoint(ptr %P) { ; CHECK-LABEL: @test_ptrtoint -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr %P, {{.*}}, metadata !DIExpression()) +; CHECK-NEXT: #dbg_value(ptr %P, {{.*}}, !DIExpression(), %1 = ptrtoint ptr %P to i64 ret void } define void @test_and(i64 %A) { ; CHECK-LABEL: @test_and( -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 %A, {{.*}}, metadata !DIExpression(DW_OP_constu, 256, DW_OP_and, DW_OP_stack_value)) +; CHECK-NEXT: #dbg_value(i64 %A, {{.*}}, !DIExpression(DW_OP_constu, 256, DW_OP_and, DW_OP_stack_value), %1 = and i64 %A, 256 ret void } diff --git a/llvm/test/Transforms/InstCombine/debuginfo.ll b/llvm/test/Transforms/InstCombine/debuginfo.ll index 0e25f2e74b7ed5..73e925236d0a76 100644 --- a/llvm/test/Transforms/InstCombine/debuginfo.ll +++ b/llvm/test/Transforms/InstCombine/debuginfo.ll @@ -40,12 +40,12 @@ entry: ; CHECK-LABEL: define ptr @passthru(ptr %a, i32 %b, i64 %c) ; CHECK-NOT: alloca ; CHECK-NOT: store -; CHECK-NOT: call void @llvm.dbg.declare -; CHECK: call void @llvm.dbg.value(metadata ptr %a, {{.*}}) +; CHECK-NOT: #dbg_declare +; CHECK: #dbg_value(ptr %a, {{.*}}) ; CHECK-NOT: store -; CHECK: call void @llvm.dbg.value(metadata i32 %b, {{.*}}) +; CHECK: #dbg_value(i32 %b, {{.*}}) ; CHECK-NOT: store -; CHECK: call void @llvm.dbg.value(metadata i64 %c, {{.*}}) +; CHECK: #dbg_value(i64 %c, {{.*}}) ; CHECK-NOT: store ; CHECK: call ptr @passthru_callee(ptr %a, i32 %b, i64 %c, i64 %{{.*}}) @@ -72,12 +72,12 @@ entry: ; NOLOWER-LABEL: define void @tworegs(i64 %o.coerce0, i64 %o.coerce1) ; NOLOWER-NOT: alloca ; NOLOWER-NOT: store -; NOLOWER-NOT: call void @llvm.dbg.declare +; NOLOWER-NOT: #dbg_declare ; Here we want to find: call void @llvm.dbg.value(metadata i64 %o.coerce0, metadata [[VARIABLE_O]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)) -; NOLOWER: call void @llvm.dbg.value(metadata i64 undef, {{.*}}) +; NOLOWER: #dbg_value(i64 undef, {{.*}}) ; NOLOWER-NOT: store ; Here we want to find: call void @llvm.dbg.value(metadata i64 %o.coerce1, metadata [[VARIABLE_O]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)) -; NOLOWER: call void @llvm.dbg.value(metadata i64 undef, {{.*}}) +; NOLOWER: #dbg_value(i64 undef, {{.*}}) ; NOLOWER-NOT: store ; NOLOWER: call void @tworegs_callee(i64 %o.coerce0, i64 %o.coerce1) diff --git a/llvm/test/Transforms/InstCombine/debuginfo_add.ll b/llvm/test/Transforms/InstCombine/debuginfo_add.ll index 2d1e613af30704..98ac6662547b96 100644 --- a/llvm/test/Transforms/InstCombine/debuginfo_add.ll +++ b/llvm/test/Transforms/InstCombine/debuginfo_add.ll @@ -37,8 +37,8 @@ for.body.lr.ph: ; preds = %entry ; The add is later eliminated, so we verify that the dbg.value is salvaged by using DW_OP_minus. ; CHECK-LABEL: for.body.lr.ph: ; CHECK-NEXT: %0 = load - ; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 %0, metadata !25, metadata !DIExpression()), !dbg ! - ; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 %0, metadata !26, metadata !DIExpression(DW_OP_constu, 4096, DW_OP_minus, DW_OP_stack_value)), !dbg ! + ; CHECK-NEXT: #dbg_value(i64 %0, !25, !DIExpression(), ! + ; CHECK-NEXT: #dbg_value(i64 %0, !26, !DIExpression(DW_OP_constu, 4096, DW_OP_minus, DW_OP_stack_value), ! br label %for.body, !dbg !32 for.body: ; preds = %for.body.lr.ph, %for.body @@ -50,7 +50,7 @@ for.body: ; preds = %for.body.lr.ph, %fo %sub2 = add i32 %head_size.09, -4096, !dbg !37 %offset.0 = add i64 %offset.010, -4096 tail call void @llvm.dbg.value(metadata i64 %offset.0, metadata !26, metadata !DIExpression()), !dbg !30 - ; CHECK: call void @llvm.dbg.value(metadata i64 %offset.010, metadata !26, metadata !DIExpression(DW_OP_constu, 4096, DW_OP_minus, DW_OP_stack_value)), !dbg ! + ; CHECK: #dbg_value(i64 %offset.010, !26, !DIExpression(DW_OP_constu, 4096, DW_OP_minus, DW_OP_stack_value), ! tail call void @llvm.dbg.value(metadata i32 %sub2, metadata !23, metadata !DIExpression()), !dbg !31 %tobool = icmp eq i32 %sub2, 0, !dbg !32 br i1 %tobool, label %for.end, label %for.body, !dbg !32, !llvm.loop !38 diff --git a/llvm/test/Transforms/InstCombine/erase-dbg-values-at-dead-alloc-site.ll b/llvm/test/Transforms/InstCombine/erase-dbg-values-at-dead-alloc-site.ll index 1e79d8283b6ab3..27c6c5c305f941 100644 --- a/llvm/test/Transforms/InstCombine/erase-dbg-values-at-dead-alloc-site.ll +++ b/llvm/test/Transforms/InstCombine/erase-dbg-values-at-dead-alloc-site.ll @@ -12,8 +12,8 @@ ; be. ; ; RUN-ONCE-LABEL: @t1( -; RUN-ONCE-NEXT: llvm.dbg.value(metadata i32 %0, metadata [[t1_arg0:![0-9]+]], metadata !DIExpression()) -; RUN-ONCE-NEXT: llvm.dbg.value(metadata ptr poison, metadata [[t1_fake_ptr:![0-9]+]], metadata !DIExpression()) +; RUN-ONCE-NEXT: #dbg_value(i32 %0, [[t1_arg0:![0-9]+]], !DIExpression(), +; RUN-ONCE-NEXT: #dbg_value(ptr poison, [[t1_fake_ptr:![0-9]+]], !DIExpression(), ; RUN-ONCE-NEXT: ret void define void @t1(i32) !dbg !9 { %2 = alloca i32, align 4 diff --git a/llvm/test/Transforms/InstCombine/lifetime-no-null-opt.ll b/llvm/test/Transforms/InstCombine/lifetime-no-null-opt.ll index 1f85dd8646009a..b729ec321876ff 100644 --- a/llvm/test/Transforms/InstCombine/lifetime-no-null-opt.ll +++ b/llvm/test/Transforms/InstCombine/lifetime-no-null-opt.ll @@ -18,7 +18,7 @@ define void @bar(i1 %flag) #0 !dbg !4 { ; CHECK: bb2: ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb3: -; CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[TEXT]], metadata [[META16:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24:![0-9]+]] +; CHECK-NEXT: #dbg_declare(ptr [[TEXT]], [[META16:![0-9]+]], !DIExpression(), [[META24:![0-9]+]]) ; CHECK-NEXT: br label [[FIN:%.*]] ; CHECK: else: ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull [[TEXT]]) diff --git a/llvm/test/Transforms/InstCombine/lifetime.ll b/llvm/test/Transforms/InstCombine/lifetime.ll index fa1c38b7bdd009..74b96d2c1005da 100644 --- a/llvm/test/Transforms/InstCombine/lifetime.ll +++ b/llvm/test/Transforms/InstCombine/lifetime.ll @@ -18,7 +18,7 @@ define void @bar(i1 %flag) !dbg !4 { ; CHECK: bb2: ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb3: -; CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr [[TEXT]], metadata [[META16:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24:![0-9]+]] +; CHECK-NEXT: #dbg_declare(ptr [[TEXT]], [[META16:![0-9]+]], !DIExpression(), [[META24:![0-9]+]]) ; CHECK-NEXT: br label [[FIN:%.*]] ; CHECK: else: ; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull [[TEXT]]) diff --git a/llvm/test/Transforms/InstCombine/lower-dbg-declare.ll b/llvm/test/Transforms/InstCombine/lower-dbg-declare.ll index 5c37c8e5cb6176..4f7e44bd154f3c 100644 --- a/llvm/test/Transforms/InstCombine/lower-dbg-declare.ll +++ b/llvm/test/Transforms/InstCombine/lower-dbg-declare.ll @@ -28,14 +28,14 @@ entry: %d1 = alloca i32, align 4 store i32 0, ptr %retval, align 4 call void @llvm.lifetime.start.p0(i64 4, ptr %d1) #4, !dbg !17 -; CHECK: dbg.value(metadata i32 42, metadata [[METADATA_IDX1:![0-9]+]], metadata !DIExpression()) +; CHECK: #dbg_value(i32 42, [[METADATA_IDX1:![0-9]+]], !DIExpression(), ; CHECK-NEXT: store call void @llvm.dbg.declare(metadata ptr %d1, metadata !16, metadata !DIExpression()), !dbg !17 store i32 42, ptr %d1, align 4, !dbg !17 br label %while.cond, !dbg !22 while.cond: ; preds = %while.body, %entry -; CHECK: dbg.value(metadata i32 %0, metadata [[METADATA_IDX1]], metadata !DIExpression()) +; CHECK: #dbg_value(i32 %0, [[METADATA_IDX1]], !DIExpression(), ; CHECK-NEXT: call zeroext i1 @_ZL5emptyi %0 = load i32, ptr %d1, align 4, !dbg !22 %call = call zeroext i1 @_ZL5emptyi(i32 %0), !dbg !22 @@ -43,7 +43,7 @@ while.cond: ; preds = %while.body, %entry br i1 %lnot, label %while.body, label %while.end, !dbg !22 while.body: ; preds = %while.cond -; CHECK: dbg.value(metadata ptr %d1, metadata [[METADATA_IDX1]], metadata !DIExpression(DW_OP_deref)) +; CHECK: #dbg_value(ptr %d1, [[METADATA_IDX1]], !DIExpression(DW_OP_deref), ; CHECK-NEXT: call void @_ZL6escapeRi call void @_ZL6escapeRi(ptr dereferenceable(4) %d1), !dbg !23 br label %while.cond, !dbg !22, !llvm.loop !24 diff --git a/llvm/test/Transforms/InstCombine/pr43893.ll b/llvm/test/Transforms/InstCombine/pr43893.ll index 9f42c6e41ecf21..32e742f5197d9e 100644 --- a/llvm/test/Transforms/InstCombine/pr43893.ll +++ b/llvm/test/Transforms/InstCombine/pr43893.ll @@ -10,9 +10,9 @@ entry: %0 = load i8, ptr @a, align 1, !dbg !17 %dec = add i8 %0, -1, !dbg !17 store i8 %dec, ptr @a, align 1, !dbg !17 -;CHECK: call void @llvm.dbg.value(metadata i32 poison -;CHECK: call void @llvm.dbg.value(metadata i32 -8 -;CHECK: call void @llvm.dbg.value(metadata i32 poison +;CHECK: #dbg_value(i32 poison +;CHECK: #dbg_value(i32 -8 +;CHECK: #dbg_value(i32 poison %conv = sext i8 %dec to i32, !dbg !17 %udiv = udiv i32 %conv, 4, !dbg !17 call void @llvm.dbg.value(metadata i32 %udiv, metadata !18, metadata !DIExpression()), !dbg !19 diff --git a/llvm/test/Transforms/InstCombine/salvage-dbg-declare.ll b/llvm/test/Transforms/InstCombine/salvage-dbg-declare.ll index 8554296a406d8c..6538810639842a 100644 --- a/llvm/test/Transforms/InstCombine/salvage-dbg-declare.ll +++ b/llvm/test/Transforms/InstCombine/salvage-dbg-declare.ll @@ -6,7 +6,7 @@ declare dso_local i32 @bar(ptr) ; Function Attrs: nounwind define internal i32 @foo() #0 !dbg !1 { ; CHECK: %[[VLA:.*]] = alloca [2 x i32] -; CHECK: call void @llvm.dbg.declare(metadata ptr %[[VLA]], {{.*}}, metadata !DIExpression()) +; CHECK: #dbg_declare(ptr %[[VLA]], {{.*}}, !DIExpression(), entry: %vla = alloca i32, i64 2, align 4, !dbg !16 diff --git a/llvm/test/Transforms/InstCombine/sink-instruction-introduces-unnecessary-poison-value.ll b/llvm/test/Transforms/InstCombine/sink-instruction-introduces-unnecessary-poison-value.ll index 427e7e84ac0b07..b48e5795b7181a 100644 --- a/llvm/test/Transforms/InstCombine/sink-instruction-introduces-unnecessary-poison-value.ll +++ b/llvm/test/Transforms/InstCombine/sink-instruction-introduces-unnecessary-poison-value.ll @@ -22,8 +22,8 @@ ; CHECK-LABEL: sw.bb: ; CHECK: %[[REG:[0-9]+]] = load i32, ptr @"?Two{{.*}} -; CHECK: call void @llvm.dbg.value(metadata i32 %[[REG]], metadata ![[DBG1:[0-9]+]], {{.*}} -; CHECK: call void @llvm.dbg.value(metadata i32 %[[REG]], metadata ![[DBG2:[0-9]+]], {{.*}} +; CHECK: #dbg_value(i32 %[[REG]], ![[DBG1:[0-9]+]], {{.*}} +; CHECK: #dbg_value(i32 %[[REG]], ![[DBG2:[0-9]+]], {{.*}} ; CHECK-DAG: ![[DBG1]] = !DILocalVariable(name: "Four"{{.*}}) ; CHECK-DAG: ![[DBG2]] = !DILocalVariable(name: "Three"{{.*}}) diff --git a/llvm/test/Transforms/InstCombine/stacksave-debuginfo.ll b/llvm/test/Transforms/InstCombine/stacksave-debuginfo.ll index fa9a35f09d1871..1f98cdbf97b465 100644 --- a/llvm/test/Transforms/InstCombine/stacksave-debuginfo.ll +++ b/llvm/test/Transforms/InstCombine/stacksave-debuginfo.ll @@ -9,11 +9,11 @@ declare void @llvm.stackrestore(ptr) #0 define ptr @test1(i32 %P) !dbg !6 { ; CHECK-LABEL: @test1( -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr poison -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[P:%.*]] to i64, !dbg !13 -; CHECK-NEXT: [[A:%.*]] = alloca i32, i64 [[TMP1]], align 4, !dbg !13 -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr [[A]] -; CHECK-NEXT: ret ptr [[A]], !dbg !14 +; CHECK-NEXT: #dbg_value(ptr poison, [[META9:![0-9]+]], !DIExpression(), [[META12:![0-9]+]]) +; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[P:%.*]] to i64, !dbg [[DBG13:![0-9]+]] +; CHECK-NEXT: [[A:%.*]] = alloca i32, i64 [[TMP1]], align 4, !dbg [[DBG13]] +; CHECK-NEXT: #dbg_value(ptr [[A]], [[META11:![0-9]+]], !DIExpression(), [[DBG13]]) +; CHECK-NEXT: ret ptr [[A]], !dbg [[DBG14:![0-9]+]] ; %tmp = call ptr @llvm.stacksave(), !dbg !12 call void @llvm.dbg.value(metadata ptr %tmp, metadata !9, metadata !DIExpression()), !dbg !12 diff --git a/llvm/test/Transforms/InstCombine/unavailable-debug.ll b/llvm/test/Transforms/InstCombine/unavailable-debug.ll index 7dc9ed19ea8699..bf17f1b7f40f13 100644 --- a/llvm/test/Transforms/InstCombine/unavailable-debug.ll +++ b/llvm/test/Transforms/InstCombine/unavailable-debug.ll @@ -3,7 +3,7 @@ ; Make sure to update the debug value after dead code elimination. ; CHECK: %call = call signext i8 @b(i32 6), !dbg !39 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 %call, metadata !30, metadata !DIExpression(DW_OP_LLVM_convert, 8, DW_ATE_signed, DW_OP_LLVM_convert, 32, DW_ATE_signed, DW_OP_stack_value)), !dbg !38 +; CHECK-NEXT: #dbg_value(i8 %call, !30, !DIExpression(DW_OP_LLVM_convert, 8, DW_ATE_signed, DW_OP_LLVM_convert, 32, DW_ATE_signed, DW_OP_stack_value), !38 @e = common local_unnamed_addr global i8 0, align 1, !dbg !0 @c = common local_unnamed_addr global i32 0, align 4, !dbg !6 diff --git a/llvm/test/Transforms/JumpThreading/guard-split-debuginfo.ll b/llvm/test/Transforms/JumpThreading/guard-split-debuginfo.ll index 38fbe4de51ad28..8b06e37d9581a7 100644 --- a/llvm/test/Transforms/JumpThreading/guard-split-debuginfo.ll +++ b/llvm/test/Transforms/JumpThreading/guard-split-debuginfo.ll @@ -23,13 +23,13 @@ define i32 @branch_implies_guard(i32 %a) !dbg !7 { ; CHECK-NEXT: br i1 [[COND]], label [[T1_SPLIT:%.*]], label [[F1_SPLIT:%.*]], !dbg [[DBG12:![0-9]+]] ; CHECK: T1.split: ; CHECK-NEXT: [[V1:%.*]] = call i32 @f1(), !dbg [[DBG12]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 0, metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG14:![0-9]+]] +; CHECK-NEXT: #dbg_value(i32 0, [[META13:![0-9]+]], !DIExpression(), [[META14:![0-9]+]]) ; CHECK-NEXT: [[RETVAL3:%.*]] = add i32 [[V1]], 10, !dbg [[DBG12]] ; CHECK-NEXT: [[CONDGUARD4:%.*]] = icmp slt i32 [[A]], 20, !dbg [[DBG12]] ; CHECK-NEXT: br label [[MERGE:%.*]] ; CHECK: F1.split: ; CHECK-NEXT: [[V2:%.*]] = call i32 @f2(), !dbg [[DBG12]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 0, metadata [[META13]], metadata !DIExpression()), !dbg [[DBG14]] +; CHECK-NEXT: #dbg_value(i32 0, [[META13]], !DIExpression(), [[META14]]) ; CHECK-NEXT: [[RETVAL1:%.*]] = add i32 [[V2]], 10, !dbg [[DBG12]] ; CHECK-NEXT: [[CONDGUARD2:%.*]] = icmp slt i32 [[A]], 20, !dbg [[DBG12]] ; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[CONDGUARD2]]) [ "deopt"() ] @@ -81,8 +81,6 @@ Merge: !19 = distinct !DILexicalBlock(scope: !7, file: !1, line: 8, column: 7) !26 = !DILocation(line: 13, column: 3, scope: !7) -;. -; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ;. ; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: [[META1:![0-9]+]], isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META2:![0-9]+]], splitDebugInlining: false, nameTableKind: None) ; CHECK: [[META1]] = !DIFile(filename: "test.c", directory: {{.*}}) @@ -98,5 +96,5 @@ Merge: ; CHECK: [[META11]] = !DIBasicType(name: "long int", size: 64, encoding: DW_ATE_signed) ; CHECK: [[DBG12]] = !DILocation(line: 13, column: 3, scope: [[META7]]) ; CHECK: [[META13]] = !DILocalVariable(name: "bar", arg: 1, scope: [[META7]], file: [[META1]], line: 3, type: [[META11]]) -; CHECK: [[DBG14]] = !DILocation(line: 0, scope: [[META7]]) +; CHECK: [[META14]] = !DILocation(line: 0, scope: [[META7]]) ;. diff --git a/llvm/test/Transforms/JumpThreading/redundant-dbg-info.ll b/llvm/test/Transforms/JumpThreading/redundant-dbg-info.ll index 8aaed55788c5ce..e392cdcd15fcd0 100644 --- a/llvm/test/Transforms/JumpThreading/redundant-dbg-info.ll +++ b/llvm/test/Transforms/JumpThreading/redundant-dbg-info.ll @@ -20,9 +20,9 @@ if.end: ; preds = %if.then, %entry br i1 %tobool1, label %if.else, label %if.then2, !dbg !23 ; CHECK-LABEL: if.then2: -; CHECK: call void @llvm.dbg.value({{.+}}, metadata ![[B:[0-9]+]], metadata !DIExpression()) -; CHECK: call void @llvm.dbg.value({{.+}}, metadata ![[B:[0-9]+]], metadata !DIExpression()) -; CHECK-NOT: call void @llvm.dbg.value({{.+}}, metadata ![[B]], metadata !DIExpression()) +; CHECK: #dbg_value({{.+}}, ![[B:[0-9]+]], !DIExpression(), +; CHECK: #dbg_value({{.+}}, ![[B:[0-9]+]], !DIExpression(), +; CHECK-NOT: #dbg_value({{.+}}, ![[B]], !DIExpression(), if.then2: ; preds = %if.end call void @llvm.dbg.value(metadata i32 4, metadata !24, metadata !DIExpression()), !dbg !13 br label %if.end3, !dbg !25 diff --git a/llvm/test/Transforms/JumpThreading/thread-debug-info.ll b/llvm/test/Transforms/JumpThreading/thread-debug-info.ll index cc9442b0974036..cd7b0b1c05a801 100644 --- a/llvm/test/Transforms/JumpThreading/thread-debug-info.ll +++ b/llvm/test/Transforms/JumpThreading/thread-debug-info.ll @@ -7,13 +7,13 @@ define void @test1(i32 %cond1, i32 %cond2) { ; CHECK: [[globalptr:@.*]] = global i32 0, align 4 ; CHECK: bb.cond2: -; CHECK: call void @llvm.dbg.value(metadata ptr null, metadata ![[DBG1ptr:[0-9]+]], metadata !DIExpression()), !dbg ![[DBG2ptr:[0-9]+]] +; CHECK: #dbg_value(ptr null, ![[DBG1ptr:[0-9]+]], !DIExpression(), ![[DBG2ptr:[0-9]+]] ; CHECK-NEXT: [[TOBOOL1:%.*]] = icmp eq i32 %cond2, 0, !dbg ![[DBGLOCtobool1:[0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata !DIArgList(ptr null, i1 [[TOBOOL1]], i1 [[TOBOOL1]]), metadata !{{[0-9]+}}, metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_LLVM_arg, 2, DW_OP_plus)), !dbg !{{[0-9]+}} +; CHECK-NEXT: #dbg_value(!DIArgList(ptr null, i1 [[TOBOOL1]], i1 [[TOBOOL1]]), !{{[0-9]+}}, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_LLVM_arg, 2, DW_OP_plus), !{{[0-9]+}} ; CHECK: bb.cond2.thread: -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr [[globalptr]], metadata ![[DBG1ptr]], metadata !DIExpression()), !dbg ![[DBG2ptr]] +; CHECK-NEXT: #dbg_value(ptr [[globalptr]], ![[DBG1ptr]], !DIExpression(), ![[DBG2ptr]] ; CHECK-NEXT: [[TOBOOL12:%.*]] = icmp eq i32 %cond2, 0, !dbg ![[DBGLOCtobool1]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata !DIArgList(ptr [[globalptr]], i1 [[TOBOOL12]], i1 [[TOBOOL12]]), metadata !{{[0-9]+}}, metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_LLVM_arg, 2, DW_OP_plus)), !dbg !{{[0-9]+}} +; CHECK-NEXT: #dbg_value(!DIArgList(ptr [[globalptr]], i1 [[TOBOOL12]], i1 [[TOBOOL12]]), !{{[0-9]+}}, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_LLVM_arg, 2, DW_OP_plus), !{{[0-9]+}} entry: %tobool = icmp eq i32 %cond1, 0, !dbg !15 call void @llvm.dbg.value(metadata i1 %tobool, metadata !9, metadata !DIExpression()), !dbg !15 @@ -54,10 +54,10 @@ exit: ; preds = %bb.f4, %bb.f3, %bb. ; inside to correctly take any new definitions. define void @test2(i32 %cond1, i32 %cond2) !dbg !5 { ; CHECK: bb.f3 -; CHECK: call void @llvm.dbg.value(metadata ptr @a, metadata !{{[0-9]+}}, metadata !DIExpression()), !dbg !{{[0-9]+}} +; CHECK: #dbg_value(ptr @a, !{{[0-9]+}}, !DIExpression(), !{{[0-9]+}} ; CHECK: bb.f4 ; CHECK-NEXT: [[PTR3:%.*]] = phi ptr [ null, %bb.cond2 ] -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr [[PTR3]], metadata !{{[0-9]+}}, metadata !DIExpression()), !dbg !{{[0-9]+}} +; CHECK-NEXT: #dbg_value(ptr [[PTR3]], !{{[0-9]+}}, !DIExpression(), !{{[0-9]+}} entry: %tobool = icmp eq i32 %cond1, 0, !dbg !15 br i1 %tobool, label %bb.cond2, label %bb.f1, !dbg !16 @@ -109,14 +109,14 @@ lor.lhs.false.i: br i1 %c3, label %land.end, label %land.end, !dbg !33 ; CHECK-LABEL: land.end.thr_comm: -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 0, -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 1, +; CHECK-NEXT: #dbg_value(i32 0, +; CHECK-NEXT: #dbg_value(i32 1, ; CHECK-NEXT: call void @f1() ; CHECK-NEXT: br i1 %c4, ; CHECK-LABEL: land.end: ; CHECK-NEXT: %0 = phi i1 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 0, +; CHECK-NEXT: #dbg_value(i32 0, land.end: %0 = phi i1 [ true, %entry ], [ false, %land.rhs ], [false, %lor.lhs.false.i], [false, %lor.lhs.false.i] call void @llvm.dbg.value(metadata i32 0, metadata !32, metadata !DIExpression()), !dbg !33 diff --git a/llvm/test/Transforms/LCSSA/rewrite-existing-dbg-values.ll b/llvm/test/Transforms/LCSSA/rewrite-existing-dbg-values.ll index f4b8fff5a0d738..134cfb73225529 100644 --- a/llvm/test/Transforms/LCSSA/rewrite-existing-dbg-values.ll +++ b/llvm/test/Transforms/LCSSA/rewrite-existing-dbg-values.ll @@ -13,16 +13,16 @@ ; CHECK-LABEL: inner.body: ; CHECK: %add = add nsw i32 0, 2 -; CHECK: call void @llvm.dbg.value(metadata i32 %add, metadata [[VAR:![0-9]+]], metadata !DIExpression()) +; CHECK: #dbg_value(i32 %add, [[VAR:![0-9]+]], !DIExpression(), ; CHECK-LABEL: outer.exit: ; CHECK-NEXT: [[PN:%[^ ]*]] = phi i32 [ %add.lcssa, %outer.latch ] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[PN]], metadata [[VAR]], metadata !DIExpression()) +; CHECK-NEXT: #dbg_value(i32 [[PN]], [[VAR]], !DIExpression(), ; CHECK-NEXT: call void @bar(i32 [[PN]]) ; CHECK-LABEL: exit: -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[PN]], metadata [[VAR]], metadata !DIExpression()) +; CHECK-NEXT: #dbg_value(i32 [[PN]], [[VAR]], !DIExpression(), define void @single_exit() !dbg !6 { entry: @@ -60,23 +60,23 @@ exit: ; preds = %outer.exit ; CHECK-LABEL: for.header: ; CHECK-NEXT: %add = add nsw i32 0, 2 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %add, metadata [[VAR2:![0-9]+]], metadata !DIExpression()) +; CHECK-NEXT: #dbg_value(i32 %add, [[VAR2:![0-9]+]], !DIExpression(), ; CHECK-LABEL: for.exit1: ; CHECK-NEXT: [[PN1:%[^ ]*]] = phi i32 [ %add, %for.header ] ; CHECK-NEXT: br label %for.exit1.succ ; CHECK-LABEL: for.exit1.succ: -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[PN1]], metadata [[VAR2]], metadata !DIExpression()) +; CHECK-NEXT: #dbg_value(i32 [[PN1]], [[VAR2]], !DIExpression(), ; CHECK-NEXT: call void @bar(i32 [[PN1]]) ; CHECK-LABEL: for.exit2: ; CHECK-NEXT: [[PN2:%[^ ]*]] = phi i32 [ %add, %for.latch ] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[PN2]], metadata [[VAR2]], metadata !DIExpression()) +; CHECK-NEXT: #dbg_value(i32 [[PN2]], [[VAR2]], !DIExpression(), ; CHECK-NEXT: call void @bar(i32 [[PN2]]) ; CHECK-LABEL: exit: -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %add, metadata [[VAR2]], metadata !DIExpression()) +; CHECK-NEXT: #dbg_value(i32 %add, [[VAR2]], !DIExpression(), define void @multi_exit() !dbg !13 { entry: diff --git a/llvm/test/Transforms/LICM/dbg-value-sink.ll b/llvm/test/Transforms/LICM/dbg-value-sink.ll index e0bd4d57a67fe8..7865774b781b31 100644 --- a/llvm/test/Transforms/LICM/dbg-value-sink.ll +++ b/llvm/test/Transforms/LICM/dbg-value-sink.ll @@ -28,7 +28,7 @@ define void @test1(i32 %i) { ; CHECK: Out: ; CHECK-NEXT: [[X2_LCSSA:%.*]] = phi i32 [ [[X2]], [[LOOP]] ] ; CHECK-NEXT: store i32 [[X2_LCSSA]], ptr @X, align 4, !dbg [[DBG5]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 0, metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG5]] +; CHECK-NEXT: #dbg_value(i32 0, [[META12:![0-9]+]], !DIExpression(), [[DBG5]]) ; CHECK-NEXT: ret void, !dbg [[DBG5]] ; Entry: diff --git a/llvm/test/Transforms/LICM/debug-value.ll b/llvm/test/Transforms/LICM/debug-value.ll index b15fafcf975560..17ad57f58898df 100644 --- a/llvm/test/Transforms/LICM/debug-value.ll +++ b/llvm/test/Transforms/LICM/debug-value.ll @@ -17,7 +17,7 @@ if.then: ; preds = %for.body br i1 undef, label %if.then27, label %if.end.if.end.split_crit_edge.critedge, !dbg !16 if.then27: ; preds = %if.then -; CHECK: tail call void @llvm.dbg.value +; CHECK: #dbg_value tail call void @llvm.dbg.value(metadata double undef, metadata !19, metadata !DIExpression()), !dbg !21 br label %for.body61.us diff --git a/llvm/test/Transforms/LICM/sinking-debugify.ll b/llvm/test/Transforms/LICM/sinking-debugify.ll index 75bed63f2aecfc..fdd24e597e74eb 100644 --- a/llvm/test/Transforms/LICM/sinking-debugify.ll +++ b/llvm/test/Transforms/LICM/sinking-debugify.ll @@ -13,7 +13,7 @@ define void @test11() { ; CHECK-LABEL: @test11( ; CHECK-NEXT: br label [[LOOP:%.*]], !dbg [[DBG11:![0-9]+]] ; CHECK: Loop: -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr @X2, metadata [[META9:![0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 4, DW_OP_stack_value)), !dbg [[DBG12:![0-9]+]] +; CHECK-NEXT: #dbg_value(ptr @X2, [[META9:![0-9]+]], !DIExpression(DW_OP_plus_uconst, 4, DW_OP_stack_value), [[META12:![0-9]+]]) ; CHECK-NEXT: br i1 false, label [[LOOP]], label [[OUT:%.*]], !dbg [[DBG13:![0-9]+]] ; CHECK: Out: ; CHECK-NEXT: ret void, !dbg [[DBG14:![0-9]+]] diff --git a/llvm/test/Transforms/LoopDeletion/diundef.ll b/llvm/test/Transforms/LoopDeletion/diundef.ll index 7b6178bcc2ae3d..0f37be535aec3a 100644 --- a/llvm/test/Transforms/LoopDeletion/diundef.ll +++ b/llvm/test/Transforms/LoopDeletion/diundef.ll @@ -8,10 +8,10 @@ target triple = "x86_64-apple-macosx10.14.0" define i32 @b() local_unnamed_addr !dbg !12 { ; CHECK-LABEL: entry -; CHECK: call void @llvm.dbg.value(metadata i32 0, metadata ![[IVAR:[0-9]+]], +; CHECK: #dbg_value(i32 0, ![[IVAR:[0-9]+]], ; CHECK-LABEL: for.end: -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 undef, metadata ![[IVAR]], metadata !DIExpression()), !dbg !17 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 undef, metadata ![[JVAR:[0-9]+]], metadata !DIExpression()), !dbg !17 +; CHECK-NEXT: #dbg_value(i32 undef, ![[IVAR]], !DIExpression(), !17 +; CHECK-NEXT: #dbg_value(i32 undef, ![[JVAR:[0-9]+]], !DIExpression(), !17 ; CHECK-NEXT: %call = tail call i32 {{.*}} @patatino() entry: call void @llvm.dbg.value(metadata i32 0, metadata !16, metadata !DIExpression()), !dbg !17 @@ -40,7 +40,6 @@ entry: ret i32 0, !dbg !36 } -; CHECK: declare void @llvm.dbg.value(metadata, ; CHECK: ![[IVAR]] = !DILocalVariable(name: "i", ; CHECK: ![[JVAR]] = !DILocalVariable(name: "j", diff --git a/llvm/test/Transforms/LoopDeletion/over-defensive-undefing-dbg-values.ll b/llvm/test/Transforms/LoopDeletion/over-defensive-undefing-dbg-values.ll index 6f71038a74672c..6b060a5b84064c 100644 --- a/llvm/test/Transforms/LoopDeletion/over-defensive-undefing-dbg-values.ll +++ b/llvm/test/Transforms/LoopDeletion/over-defensive-undefing-dbg-values.ll @@ -20,8 +20,8 @@ ;; loop is preserved. ; CHECK-LABEL: for.end: -; CHECK-NEXT: @llvm.dbg.value({{.+}} undef, metadata ![[VAR1:[0-9]+]],{{.+}}), !dbg ![[DBG1:[0-9]+]] -; CHECK-NEXT: @llvm.dbg.value({{.+}} 5, metadata ![[VAR2:[0-9]+]],{{.+}}), !dbg ![[DBG2:[0-9]+]] +; CHECK-NEXT: #dbg_value({{.+}} undef, ![[VAR1:[0-9]+]],{{.+}}, ![[DBG1:[0-9]+]] +; CHECK-NEXT: #dbg_value({{.+}} 5, ![[VAR2:[0-9]+]],{{.+}}, ![[DBG2:[0-9]+]] ; CHECK-DAG: ![[VAR1]] = !DILocalVariable(name: "Index" ; CHECK-DAG: ![[VAR2]] = !DILocalVariable(name: "Constant" diff --git a/llvm/test/Transforms/LoopIdiom/X86/arithmetic-right-shift-until-zero.ll b/llvm/test/Transforms/LoopIdiom/X86/arithmetic-right-shift-until-zero.ll index e862823f8c4c9e..9de9f1568147a5 100644 --- a/llvm/test/Transforms/LoopIdiom/X86/arithmetic-right-shift-until-zero.ll +++ b/llvm/test/Transforms/LoopIdiom/X86/arithmetic-right-shift-until-zero.ll @@ -16,15 +16,15 @@ define i8 @p0(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG20:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG21:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG21]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV]], [[META9:![0-9]+]], !DIExpression(), [[DBG21]]) ; NOLZCNT-NEXT: [[NBITS:%.*]] = add nsw i8 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG22:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG22]] +; NOLZCNT-NEXT: #dbg_value(i8 [[NBITS]], [[META11:![0-9]+]], !DIExpression(), [[DBG22]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]], !dbg [[DBG23:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG23]] +; NOLZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META12:![0-9]+]], !DIExpression(), [[DBG23]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0, !dbg [[DBG24:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META13:![0-9]+]], !DIExpression(), [[DBG24]]) ; NOLZCNT-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1, !dbg [[DBG25:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META14:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META14:![0-9]+]], !DIExpression(), [[DBG25]]) ; NOLZCNT-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]]), !dbg [[DBG26:![0-9]+]] ; NOLZCNT-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG27:![0-9]+]] ; NOLZCNT: end: @@ -33,11 +33,11 @@ define i8 @p0(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; NOLZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG30:![0-9]+]] ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG31:![0-9]+]] ; NOLZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG32:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META15:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META16:![0-9]+]], metadata !DIExpression()), !dbg [[DBG29]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META17:![0-9]+]], metadata !DIExpression()), !dbg [[DBG30]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META19:![0-9]+]], metadata !DIExpression()), !dbg [[DBG32]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_RES]], [[META15:![0-9]+]], !DIExpression(), [[DBG28]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META16:![0-9]+]], !DIExpression(), [[DBG29]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META17:![0-9]+]], !DIExpression(), [[DBG30]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META18:![0-9]+]], !DIExpression(), [[DBG31]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META19:![0-9]+]], !DIExpression(), [[DBG32]]) ; NOLZCNT-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG33:![0-9]+]] ; NOLZCNT-NEXT: ret i8 [[IV_RES]], !dbg [[DBG34:![0-9]+]] ; @@ -56,14 +56,14 @@ define i8 @p0(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i8 [[LOOP_IV]], 1, !dbg [[DBG22:![0-9]+]] ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i8 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG22]] ; LZCNT-NEXT: [[IV:%.*]] = add nsw i8 [[LOOP_IV]], [[START]], !dbg [[DBG22]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG20]] +; LZCNT-NEXT: #dbg_value(i8 [[IV]], [[META9:![0-9]+]], !DIExpression(), [[DBG20]]) ; LZCNT-NEXT: [[NBITS:%.*]] = add nsw i8 [[IV]], [[EXTRAOFFSET]], !dbg [[DBG22]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG22]] +; LZCNT-NEXT: #dbg_value(i8 [[NBITS]], [[META11:![0-9]+]], !DIExpression(), [[DBG22]]) ; LZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL]], [[NBITS]], !dbg [[DBG23:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG23]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[LOOP_IVCHECK]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24:![0-9]+]] +; LZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META12:![0-9]+]], !DIExpression(), [[DBG23]]) +; LZCNT-NEXT: #dbg_value(i1 [[LOOP_IVCHECK]], [[META13:![0-9]+]], !DIExpression(), [[META24:![0-9]+]]) ; LZCNT-NEXT: [[IV_NEXT:%.*]] = add i8 [[IV]], 1, !dbg [[DBG25:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META14:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25]] +; LZCNT-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META14:![0-9]+]], !DIExpression(), [[DBG25]]) ; LZCNT-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i8 [[IV_NEXT]]), !dbg [[DBG26:![0-9]+]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG27:![0-9]+]] ; LZCNT: end: @@ -72,11 +72,11 @@ define i8 @p0(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; LZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG30:![0-9]+]] ; LZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ], !dbg [[DBG31:![0-9]+]] ; LZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG32:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META15:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META16:![0-9]+]], metadata !DIExpression()), !dbg [[DBG29]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META17:![0-9]+]], metadata !DIExpression()), !dbg [[DBG30]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META19:![0-9]+]], metadata !DIExpression()), !dbg [[DBG32]] +; LZCNT-NEXT: #dbg_value(i8 [[IV_RES]], [[META15:![0-9]+]], !DIExpression(), [[DBG28]]) +; LZCNT-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META16:![0-9]+]], !DIExpression(), [[DBG29]]) +; LZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META17:![0-9]+]], !DIExpression(), [[DBG30]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META18:![0-9]+]], !DIExpression(), [[DBG31]]) +; LZCNT-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META19:![0-9]+]], !DIExpression(), [[DBG32]]) ; LZCNT-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG33:![0-9]+]] ; LZCNT-NEXT: ret i8 [[IV_RES]], !dbg [[DBG34:![0-9]+]] ; @@ -113,15 +113,15 @@ define i8 @p1(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG47:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG48:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META37:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV]], [[META37:![0-9]+]], !DIExpression(), [[DBG48]]) ; NOLZCNT-NEXT: [[NBITS:%.*]] = add nuw i8 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG49:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META38:![0-9]+]], metadata !DIExpression()), !dbg [[DBG49]] +; NOLZCNT-NEXT: #dbg_value(i8 [[NBITS]], [[META38:![0-9]+]], !DIExpression(), [[DBG49]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]], !dbg [[DBG50:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META39:![0-9]+]], metadata !DIExpression()), !dbg [[DBG50]] +; NOLZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META39:![0-9]+]], !DIExpression(), [[DBG50]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0, !dbg [[DBG51:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META40:![0-9]+]], metadata !DIExpression()), !dbg [[DBG51]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META40:![0-9]+]], !DIExpression(), [[DBG51]]) ; NOLZCNT-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1, !dbg [[DBG52:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META41:![0-9]+]], metadata !DIExpression()), !dbg [[DBG52]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META41:![0-9]+]], !DIExpression(), [[DBG52]]) ; NOLZCNT-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]]), !dbg [[DBG53:![0-9]+]] ; NOLZCNT-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG54:![0-9]+]] ; NOLZCNT: end: @@ -130,11 +130,11 @@ define i8 @p1(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; NOLZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG57:![0-9]+]] ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG58:![0-9]+]] ; NOLZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG59:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META42:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META43:![0-9]+]], metadata !DIExpression()), !dbg [[DBG56]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META44:![0-9]+]], metadata !DIExpression()), !dbg [[DBG57]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META45:![0-9]+]], metadata !DIExpression()), !dbg [[DBG58]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META46:![0-9]+]], metadata !DIExpression()), !dbg [[DBG59]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_RES]], [[META42:![0-9]+]], !DIExpression(), [[DBG55]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META43:![0-9]+]], !DIExpression(), [[DBG56]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META44:![0-9]+]], !DIExpression(), [[DBG57]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META45:![0-9]+]], !DIExpression(), [[DBG58]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META46:![0-9]+]], !DIExpression(), [[DBG59]]) ; NOLZCNT-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG60:![0-9]+]] ; NOLZCNT-NEXT: ret i8 [[IV_RES]], !dbg [[DBG61:![0-9]+]] ; @@ -153,14 +153,14 @@ define i8 @p1(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i8 [[LOOP_IV]], 1, !dbg [[DBG49:![0-9]+]] ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i8 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG49]] ; LZCNT-NEXT: [[IV:%.*]] = add nsw i8 [[LOOP_IV]], [[START]], !dbg [[DBG49]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META37:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47]] +; LZCNT-NEXT: #dbg_value(i8 [[IV]], [[META37:![0-9]+]], !DIExpression(), [[DBG47]]) ; LZCNT-NEXT: [[NBITS:%.*]] = add nuw i8 [[IV]], [[EXTRAOFFSET]], !dbg [[DBG49]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META38:![0-9]+]], metadata !DIExpression()), !dbg [[DBG49]] +; LZCNT-NEXT: #dbg_value(i8 [[NBITS]], [[META38:![0-9]+]], !DIExpression(), [[DBG49]]) ; LZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL]], [[NBITS]], !dbg [[DBG50:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META39:![0-9]+]], metadata !DIExpression()), !dbg [[DBG50]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[LOOP_IVCHECK]], metadata [[META40:![0-9]+]], metadata !DIExpression()), !dbg [[DBG51:![0-9]+]] +; LZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META39:![0-9]+]], !DIExpression(), [[DBG50]]) +; LZCNT-NEXT: #dbg_value(i1 [[LOOP_IVCHECK]], [[META40:![0-9]+]], !DIExpression(), [[META51:![0-9]+]]) ; LZCNT-NEXT: [[IV_NEXT:%.*]] = add i8 [[IV]], 1, !dbg [[DBG52:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META41:![0-9]+]], metadata !DIExpression()), !dbg [[DBG52]] +; LZCNT-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META41:![0-9]+]], !DIExpression(), [[DBG52]]) ; LZCNT-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i8 [[IV_NEXT]]), !dbg [[DBG53:![0-9]+]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG54:![0-9]+]] ; LZCNT: end: @@ -169,11 +169,11 @@ define i8 @p1(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; LZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG57:![0-9]+]] ; LZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ], !dbg [[DBG58:![0-9]+]] ; LZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG59:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META42:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META43:![0-9]+]], metadata !DIExpression()), !dbg [[DBG56]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META44:![0-9]+]], metadata !DIExpression()), !dbg [[DBG57]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META45:![0-9]+]], metadata !DIExpression()), !dbg [[DBG58]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META46:![0-9]+]], metadata !DIExpression()), !dbg [[DBG59]] +; LZCNT-NEXT: #dbg_value(i8 [[IV_RES]], [[META42:![0-9]+]], !DIExpression(), [[DBG55]]) +; LZCNT-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META43:![0-9]+]], !DIExpression(), [[DBG56]]) +; LZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META44:![0-9]+]], !DIExpression(), [[DBG57]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META45:![0-9]+]], !DIExpression(), [[DBG58]]) +; LZCNT-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META46:![0-9]+]], !DIExpression(), [[DBG59]]) ; LZCNT-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG60:![0-9]+]] ; LZCNT-NEXT: ret i8 [[IV_RES]], !dbg [[DBG61:![0-9]+]] ; @@ -210,15 +210,15 @@ define i8 @p2(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG74:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG75:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META64:![0-9]+]], metadata !DIExpression()), !dbg [[DBG75]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV]], [[META64:![0-9]+]], !DIExpression(), [[DBG75]]) ; NOLZCNT-NEXT: [[NBITS:%.*]] = sub nsw i8 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG76:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META65:![0-9]+]], metadata !DIExpression()), !dbg [[DBG76]] +; NOLZCNT-NEXT: #dbg_value(i8 [[NBITS]], [[META65:![0-9]+]], !DIExpression(), [[DBG76]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]], !dbg [[DBG77:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META66:![0-9]+]], metadata !DIExpression()), !dbg [[DBG77]] +; NOLZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META66:![0-9]+]], !DIExpression(), [[DBG77]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0, !dbg [[DBG78:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META67:![0-9]+]], !DIExpression(), [[DBG78]]) ; NOLZCNT-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1, !dbg [[DBG79:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META68:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META68:![0-9]+]], !DIExpression(), [[DBG79]]) ; NOLZCNT-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]]), !dbg [[DBG80:![0-9]+]] ; NOLZCNT-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG81:![0-9]+]] ; NOLZCNT: end: @@ -227,11 +227,11 @@ define i8 @p2(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; NOLZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG84:![0-9]+]] ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG85:![0-9]+]] ; NOLZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG86:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG82]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG83]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG84]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG85]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META73:![0-9]+]], metadata !DIExpression()), !dbg [[DBG86]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_RES]], [[META69:![0-9]+]], !DIExpression(), [[DBG82]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META70:![0-9]+]], !DIExpression(), [[DBG83]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META71:![0-9]+]], !DIExpression(), [[DBG84]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META72:![0-9]+]], !DIExpression(), [[DBG85]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META73:![0-9]+]], !DIExpression(), [[DBG86]]) ; NOLZCNT-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG87:![0-9]+]] ; NOLZCNT-NEXT: ret i8 [[IV_RES]], !dbg [[DBG88:![0-9]+]] ; @@ -249,14 +249,14 @@ define i8 @p2(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i8 [[LOOP_IV]], 1, !dbg [[DBG76:![0-9]+]] ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i8 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG76]] ; LZCNT-NEXT: [[IV:%.*]] = add nsw i8 [[LOOP_IV]], [[START]], !dbg [[DBG76]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META64:![0-9]+]], metadata !DIExpression()), !dbg [[DBG74]] +; LZCNT-NEXT: #dbg_value(i8 [[IV]], [[META64:![0-9]+]], !DIExpression(), [[DBG74]]) ; LZCNT-NEXT: [[NBITS:%.*]] = sub nsw i8 [[IV]], [[EXTRAOFFSET]], !dbg [[DBG76]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META65:![0-9]+]], metadata !DIExpression()), !dbg [[DBG76]] +; LZCNT-NEXT: #dbg_value(i8 [[NBITS]], [[META65:![0-9]+]], !DIExpression(), [[DBG76]]) ; LZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL]], [[NBITS]], !dbg [[DBG77:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META66:![0-9]+]], metadata !DIExpression()), !dbg [[DBG77]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[LOOP_IVCHECK]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78:![0-9]+]] +; LZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META66:![0-9]+]], !DIExpression(), [[DBG77]]) +; LZCNT-NEXT: #dbg_value(i1 [[LOOP_IVCHECK]], [[META67:![0-9]+]], !DIExpression(), [[META78:![0-9]+]]) ; LZCNT-NEXT: [[IV_NEXT:%.*]] = add i8 [[IV]], 1, !dbg [[DBG79:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META68:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] +; LZCNT-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META68:![0-9]+]], !DIExpression(), [[DBG79]]) ; LZCNT-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i8 [[IV_NEXT]]), !dbg [[DBG80:![0-9]+]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG81:![0-9]+]] ; LZCNT: end: @@ -265,11 +265,11 @@ define i8 @p2(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; LZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG84:![0-9]+]] ; LZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ], !dbg [[DBG85:![0-9]+]] ; LZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG86:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META69:![0-9]+]], metadata !DIExpression()), !dbg [[DBG82]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META70:![0-9]+]], metadata !DIExpression()), !dbg [[DBG83]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META71:![0-9]+]], metadata !DIExpression()), !dbg [[DBG84]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG85]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META73:![0-9]+]], metadata !DIExpression()), !dbg [[DBG86]] +; LZCNT-NEXT: #dbg_value(i8 [[IV_RES]], [[META69:![0-9]+]], !DIExpression(), [[DBG82]]) +; LZCNT-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META70:![0-9]+]], !DIExpression(), [[DBG83]]) +; LZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META71:![0-9]+]], !DIExpression(), [[DBG84]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META72:![0-9]+]], !DIExpression(), [[DBG85]]) +; LZCNT-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META73:![0-9]+]], !DIExpression(), [[DBG86]]) ; LZCNT-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG87:![0-9]+]] ; LZCNT-NEXT: ret i8 [[IV_RES]], !dbg [[DBG88:![0-9]+]] ; @@ -306,15 +306,15 @@ define i8 @n3(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: br label [[LOOP:%.*]], !dbg [[DBG101:![0-9]+]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG102:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META91:![0-9]+]], metadata !DIExpression()), !dbg [[DBG102]] +; CHECK-NEXT: #dbg_value(i8 [[IV]], [[META91:![0-9]+]], !DIExpression(), [[DBG102]]) ; CHECK-NEXT: [[NBITS:%.*]] = sub nuw i8 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG103:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META92:![0-9]+]], metadata !DIExpression()), !dbg [[DBG103]] +; CHECK-NEXT: #dbg_value(i8 [[NBITS]], [[META92:![0-9]+]], !DIExpression(), [[DBG103]]) ; CHECK-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]], !dbg [[DBG104:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META93:![0-9]+]], metadata !DIExpression()), !dbg [[DBG104]] +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META93:![0-9]+]], !DIExpression(), [[DBG104]]) ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0, !dbg [[DBG105:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META94:![0-9]+]], metadata !DIExpression()), !dbg [[DBG105]] +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META94:![0-9]+]], !DIExpression(), [[DBG105]]) ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1, !dbg [[DBG106:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META95:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106]] +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META95:![0-9]+]], !DIExpression(), [[DBG106]]) ; CHECK-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]]), !dbg [[DBG107:![0-9]+]] ; CHECK-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG108:![0-9]+]] ; CHECK: end: @@ -323,11 +323,11 @@ define i8 @n3(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG111:![0-9]+]] ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG112:![0-9]+]] ; CHECK-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG113:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META96:![0-9]+]], metadata !DIExpression()), !dbg [[DBG109]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META97:![0-9]+]], metadata !DIExpression()), !dbg [[DBG110]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META98:![0-9]+]], metadata !DIExpression()), !dbg [[DBG111]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META99:![0-9]+]], metadata !DIExpression()), !dbg [[DBG112]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META100:![0-9]+]], metadata !DIExpression()), !dbg [[DBG113]] +; CHECK-NEXT: #dbg_value(i8 [[IV_RES]], [[META96:![0-9]+]], !DIExpression(), [[DBG109]]) +; CHECK-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META97:![0-9]+]], !DIExpression(), [[DBG110]]) +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META98:![0-9]+]], !DIExpression(), [[DBG111]]) +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META99:![0-9]+]], !DIExpression(), [[DBG112]]) +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META100:![0-9]+]], !DIExpression(), [[DBG113]]) ; CHECK-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG114:![0-9]+]] ; CHECK-NEXT: ret i8 [[IV_RES]], !dbg [[DBG115:![0-9]+]] ; @@ -364,15 +364,15 @@ define i8 @n4(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: br label [[LOOP:%.*]], !dbg [[DBG128:![0-9]+]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG129:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG129]] +; CHECK-NEXT: #dbg_value(i8 [[IV]], [[META118:![0-9]+]], !DIExpression(), [[DBG129]]) ; CHECK-NEXT: [[NBITS:%.*]] = sub i8 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG130:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META119:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130]] +; CHECK-NEXT: #dbg_value(i8 [[NBITS]], [[META119:![0-9]+]], !DIExpression(), [[DBG130]]) ; CHECK-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]], !dbg [[DBG131:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG131]] +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META120:![0-9]+]], !DIExpression(), [[DBG131]]) ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0, !dbg [[DBG132:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META121:![0-9]+]], metadata !DIExpression()), !dbg [[DBG132]] +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META121:![0-9]+]], !DIExpression(), [[DBG132]]) ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1, !dbg [[DBG133:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META122:![0-9]+]], metadata !DIExpression()), !dbg [[DBG133]] +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META122:![0-9]+]], !DIExpression(), [[DBG133]]) ; CHECK-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]]), !dbg [[DBG134:![0-9]+]] ; CHECK-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG135:![0-9]+]] ; CHECK: end: @@ -381,11 +381,11 @@ define i8 @n4(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG138:![0-9]+]] ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG139:![0-9]+]] ; CHECK-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG140:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META123:![0-9]+]], metadata !DIExpression()), !dbg [[DBG136]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META124:![0-9]+]], metadata !DIExpression()), !dbg [[DBG137]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META125:![0-9]+]], metadata !DIExpression()), !dbg [[DBG138]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META126:![0-9]+]], metadata !DIExpression()), !dbg [[DBG139]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META127:![0-9]+]], metadata !DIExpression()), !dbg [[DBG140]] +; CHECK-NEXT: #dbg_value(i8 [[IV_RES]], [[META123:![0-9]+]], !DIExpression(), [[DBG136]]) +; CHECK-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META124:![0-9]+]], !DIExpression(), [[DBG137]]) +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META125:![0-9]+]], !DIExpression(), [[DBG138]]) +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META126:![0-9]+]], !DIExpression(), [[DBG139]]) +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META127:![0-9]+]], !DIExpression(), [[DBG140]]) ; CHECK-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG141:![0-9]+]] ; CHECK-NEXT: ret i8 [[IV_RES]], !dbg [[DBG142:![0-9]+]] ; @@ -422,15 +422,15 @@ define i8 @n5(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: br label [[LOOP:%.*]], !dbg [[DBG155:![0-9]+]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG156:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG156]] +; CHECK-NEXT: #dbg_value(i8 [[IV]], [[META145:![0-9]+]], !DIExpression(), [[DBG156]]) ; CHECK-NEXT: [[NBITS:%.*]] = add i8 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG157:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META146:![0-9]+]], metadata !DIExpression()), !dbg [[DBG157]] +; CHECK-NEXT: #dbg_value(i8 [[NBITS]], [[META146:![0-9]+]], !DIExpression(), [[DBG157]]) ; CHECK-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]], !dbg [[DBG158:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META147:![0-9]+]], metadata !DIExpression()), !dbg [[DBG158]] +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META147:![0-9]+]], !DIExpression(), [[DBG158]]) ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0, !dbg [[DBG159:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META148:![0-9]+]], metadata !DIExpression()), !dbg [[DBG159]] +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META148:![0-9]+]], !DIExpression(), [[DBG159]]) ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1, !dbg [[DBG160:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META149:![0-9]+]], metadata !DIExpression()), !dbg [[DBG160]] +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META149:![0-9]+]], !DIExpression(), [[DBG160]]) ; CHECK-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]]), !dbg [[DBG161:![0-9]+]] ; CHECK-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG162:![0-9]+]] ; CHECK: end: @@ -439,11 +439,11 @@ define i8 @n5(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG165:![0-9]+]] ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG166:![0-9]+]] ; CHECK-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG167:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META150:![0-9]+]], metadata !DIExpression()), !dbg [[DBG163]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META151:![0-9]+]], metadata !DIExpression()), !dbg [[DBG164]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META152:![0-9]+]], metadata !DIExpression()), !dbg [[DBG165]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META153:![0-9]+]], metadata !DIExpression()), !dbg [[DBG166]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META154:![0-9]+]], metadata !DIExpression()), !dbg [[DBG167]] +; CHECK-NEXT: #dbg_value(i8 [[IV_RES]], [[META150:![0-9]+]], !DIExpression(), [[DBG163]]) +; CHECK-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META151:![0-9]+]], !DIExpression(), [[DBG164]]) +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META152:![0-9]+]], !DIExpression(), [[DBG165]]) +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META153:![0-9]+]], !DIExpression(), [[DBG166]]) +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META154:![0-9]+]], !DIExpression(), [[DBG167]]) ; CHECK-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG168:![0-9]+]] ; CHECK-NEXT: ret i8 [[IV_RES]], !dbg [[DBG169:![0-9]+]] ; @@ -480,13 +480,13 @@ define i8 @p6(i8 %val, i8 %start) mustprogress { ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG180:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG181:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG181]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV]], [[META172:![0-9]+]], !DIExpression(), [[DBG181]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[IV]], !dbg [[DBG182:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META173:![0-9]+]], metadata !DIExpression()), !dbg [[DBG182]] +; NOLZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META173:![0-9]+]], !DIExpression(), [[DBG182]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0, !dbg [[DBG183:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META174:![0-9]+]], metadata !DIExpression()), !dbg [[DBG183]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META174:![0-9]+]], !DIExpression(), [[DBG183]]) ; NOLZCNT-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1, !dbg [[DBG184:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META175:![0-9]+]], metadata !DIExpression()), !dbg [[DBG184]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META175:![0-9]+]], !DIExpression(), [[DBG184]]) ; NOLZCNT-NEXT: call void @escape_inner(i8 [[IV]], i8 [[IV]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]]), !dbg [[DBG185:![0-9]+]] ; NOLZCNT-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG186:![0-9]+]] ; NOLZCNT: end: @@ -494,10 +494,10 @@ define i8 @p6(i8 %val, i8 %start) mustprogress { ; NOLZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG188:![0-9]+]] ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG189:![0-9]+]] ; NOLZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG190:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META176:![0-9]+]], metadata !DIExpression()), !dbg [[DBG187]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META177:![0-9]+]], metadata !DIExpression()), !dbg [[DBG188]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META178:![0-9]+]], metadata !DIExpression()), !dbg [[DBG189]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META179:![0-9]+]], metadata !DIExpression()), !dbg [[DBG190]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_RES]], [[META176:![0-9]+]], !DIExpression(), [[DBG187]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META177:![0-9]+]], !DIExpression(), [[DBG188]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META178:![0-9]+]], !DIExpression(), [[DBG189]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META179:![0-9]+]], !DIExpression(), [[DBG190]]) ; NOLZCNT-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[IV_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG191:![0-9]+]] ; NOLZCNT-NEXT: ret i8 [[IV_RES]], !dbg [[DBG192:![0-9]+]] ; @@ -515,12 +515,12 @@ define i8 @p6(i8 %val, i8 %start) mustprogress { ; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i8 [[LOOP_IV]], 1, !dbg [[DBG182:![0-9]+]] ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i8 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG182]] ; LZCNT-NEXT: [[IV:%.*]] = add nsw i8 [[LOOP_IV]], [[START]], !dbg [[DBG182]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META172:![0-9]+]], metadata !DIExpression()), !dbg [[DBG180]] +; LZCNT-NEXT: #dbg_value(i8 [[IV]], [[META172:![0-9]+]], !DIExpression(), [[DBG180]]) ; LZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL]], [[IV]], !dbg [[DBG182]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META173:![0-9]+]], metadata !DIExpression()), !dbg [[DBG182]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[LOOP_IVCHECK]], metadata [[META174:![0-9]+]], metadata !DIExpression()), !dbg [[DBG183:![0-9]+]] +; LZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META173:![0-9]+]], !DIExpression(), [[DBG182]]) +; LZCNT-NEXT: #dbg_value(i1 [[LOOP_IVCHECK]], [[META174:![0-9]+]], !DIExpression(), [[META183:![0-9]+]]) ; LZCNT-NEXT: [[IV_NEXT:%.*]] = add i8 [[IV]], 1, !dbg [[DBG184:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META175:![0-9]+]], metadata !DIExpression()), !dbg [[DBG184]] +; LZCNT-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META175:![0-9]+]], !DIExpression(), [[DBG184]]) ; LZCNT-NEXT: call void @escape_inner(i8 [[IV]], i8 [[IV]], i8 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i8 [[IV_NEXT]]), !dbg [[DBG185:![0-9]+]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG186:![0-9]+]] ; LZCNT: end: @@ -528,10 +528,10 @@ define i8 @p6(i8 %val, i8 %start) mustprogress { ; LZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG188:![0-9]+]] ; LZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ], !dbg [[DBG189:![0-9]+]] ; LZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG190:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META176:![0-9]+]], metadata !DIExpression()), !dbg [[DBG187]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META177:![0-9]+]], metadata !DIExpression()), !dbg [[DBG188]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META178:![0-9]+]], metadata !DIExpression()), !dbg [[DBG189]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META179:![0-9]+]], metadata !DIExpression()), !dbg [[DBG190]] +; LZCNT-NEXT: #dbg_value(i8 [[IV_RES]], [[META176:![0-9]+]], !DIExpression(), [[DBG187]]) +; LZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META177:![0-9]+]], !DIExpression(), [[DBG188]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META178:![0-9]+]], !DIExpression(), [[DBG189]]) +; LZCNT-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META179:![0-9]+]], !DIExpression(), [[DBG190]]) ; LZCNT-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[IV_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG191:![0-9]+]] ; LZCNT-NEXT: ret i8 [[IV_RES]], !dbg [[DBG192:![0-9]+]] ; @@ -569,15 +569,15 @@ define i7 @p7(i7 %val, i7 %start, i7 %extraoffset) mustprogress { ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG205:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[IV:%.*]] = phi i7 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG206:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i7 [[IV]], metadata [[META195:![0-9]+]], metadata !DIExpression()), !dbg [[DBG206]] +; NOLZCNT-NEXT: #dbg_value(i7 [[IV]], [[META195:![0-9]+]], !DIExpression(), [[DBG206]]) ; NOLZCNT-NEXT: [[NBITS:%.*]] = add nsw i7 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG207:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i7 [[NBITS]], metadata [[META196:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]] +; NOLZCNT-NEXT: #dbg_value(i7 [[NBITS]], [[META196:![0-9]+]], !DIExpression(), [[DBG207]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i7 [[VAL:%.*]], [[NBITS]], !dbg [[DBG208:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i7 [[VAL_SHIFTED]], metadata [[META197:![0-9]+]], metadata !DIExpression()), !dbg [[DBG208]] +; NOLZCNT-NEXT: #dbg_value(i7 [[VAL_SHIFTED]], [[META197:![0-9]+]], !DIExpression(), [[DBG208]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i7 [[VAL_SHIFTED]], 0, !dbg [[DBG209:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META198:![0-9]+]], metadata !DIExpression()), !dbg [[DBG209]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META198:![0-9]+]], !DIExpression(), [[DBG209]]) ; NOLZCNT-NEXT: [[IV_NEXT]] = add i7 [[IV]], 1, !dbg [[DBG210:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i7 [[IV_NEXT]], metadata [[META199:![0-9]+]], metadata !DIExpression()), !dbg [[DBG210]] +; NOLZCNT-NEXT: #dbg_value(i7 [[IV_NEXT]], [[META199:![0-9]+]], !DIExpression(), [[DBG210]]) ; NOLZCNT-NEXT: call void @escape_inner.i7(i7 [[IV]], i7 [[NBITS]], i7 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i7 [[IV_NEXT]]), !dbg [[DBG211:![0-9]+]] ; NOLZCNT-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG212:![0-9]+]] ; NOLZCNT: end: @@ -586,11 +586,11 @@ define i7 @p7(i7 %val, i7 %start, i7 %extraoffset) mustprogress { ; NOLZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i7 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG215:![0-9]+]] ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG216:![0-9]+]] ; NOLZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i7 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG217:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i7 [[IV_RES]], metadata [[META200:![0-9]+]], metadata !DIExpression()), !dbg [[DBG213]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i7 [[NBITS_RES]], metadata [[META201:![0-9]+]], metadata !DIExpression()), !dbg [[DBG214]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i7 [[VAL_SHIFTED_RES]], metadata [[META202:![0-9]+]], metadata !DIExpression()), !dbg [[DBG215]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META203:![0-9]+]], metadata !DIExpression()), !dbg [[DBG216]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i7 [[IV_NEXT_RES]], metadata [[META204:![0-9]+]], metadata !DIExpression()), !dbg [[DBG217]] +; NOLZCNT-NEXT: #dbg_value(i7 [[IV_RES]], [[META200:![0-9]+]], !DIExpression(), [[DBG213]]) +; NOLZCNT-NEXT: #dbg_value(i7 [[NBITS_RES]], [[META201:![0-9]+]], !DIExpression(), [[DBG214]]) +; NOLZCNT-NEXT: #dbg_value(i7 [[VAL_SHIFTED_RES]], [[META202:![0-9]+]], !DIExpression(), [[DBG215]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META203:![0-9]+]], !DIExpression(), [[DBG216]]) +; NOLZCNT-NEXT: #dbg_value(i7 [[IV_NEXT_RES]], [[META204:![0-9]+]], !DIExpression(), [[DBG217]]) ; NOLZCNT-NEXT: call void @escape_outer.i7(i7 [[IV_RES]], i7 [[NBITS_RES]], i7 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i7 [[IV_NEXT_RES]]), !dbg [[DBG218:![0-9]+]] ; NOLZCNT-NEXT: ret i7 [[IV_RES]], !dbg [[DBG219:![0-9]+]] ; @@ -609,14 +609,14 @@ define i7 @p7(i7 %val, i7 %start, i7 %extraoffset) mustprogress { ; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i7 [[LOOP_IV]], 1, !dbg [[DBG207:![0-9]+]] ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i7 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG207]] ; LZCNT-NEXT: [[IV:%.*]] = add nsw i7 [[LOOP_IV]], [[START]], !dbg [[DBG207]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i7 [[IV]], metadata [[META195:![0-9]+]], metadata !DIExpression()), !dbg [[DBG205]] +; LZCNT-NEXT: #dbg_value(i7 [[IV]], [[META195:![0-9]+]], !DIExpression(), [[DBG205]]) ; LZCNT-NEXT: [[NBITS:%.*]] = add nsw i7 [[IV]], [[EXTRAOFFSET]], !dbg [[DBG207]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i7 [[NBITS]], metadata [[META196:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]] +; LZCNT-NEXT: #dbg_value(i7 [[NBITS]], [[META196:![0-9]+]], !DIExpression(), [[DBG207]]) ; LZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i7 [[VAL]], [[NBITS]], !dbg [[DBG208:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i7 [[VAL_SHIFTED]], metadata [[META197:![0-9]+]], metadata !DIExpression()), !dbg [[DBG208]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[LOOP_IVCHECK]], metadata [[META198:![0-9]+]], metadata !DIExpression()), !dbg [[DBG209:![0-9]+]] +; LZCNT-NEXT: #dbg_value(i7 [[VAL_SHIFTED]], [[META197:![0-9]+]], !DIExpression(), [[DBG208]]) +; LZCNT-NEXT: #dbg_value(i1 [[LOOP_IVCHECK]], [[META198:![0-9]+]], !DIExpression(), [[META209:![0-9]+]]) ; LZCNT-NEXT: [[IV_NEXT:%.*]] = add i7 [[IV]], 1, !dbg [[DBG210:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i7 [[IV_NEXT]], metadata [[META199:![0-9]+]], metadata !DIExpression()), !dbg [[DBG210]] +; LZCNT-NEXT: #dbg_value(i7 [[IV_NEXT]], [[META199:![0-9]+]], !DIExpression(), [[DBG210]]) ; LZCNT-NEXT: call void @escape_inner.i7(i7 [[IV]], i7 [[NBITS]], i7 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i7 [[IV_NEXT]]), !dbg [[DBG211:![0-9]+]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG212:![0-9]+]] ; LZCNT: end: @@ -625,11 +625,11 @@ define i7 @p7(i7 %val, i7 %start, i7 %extraoffset) mustprogress { ; LZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i7 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG215:![0-9]+]] ; LZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ], !dbg [[DBG216:![0-9]+]] ; LZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i7 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG217:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i7 [[IV_RES]], metadata [[META200:![0-9]+]], metadata !DIExpression()), !dbg [[DBG213]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i7 [[NBITS_RES]], metadata [[META201:![0-9]+]], metadata !DIExpression()), !dbg [[DBG214]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i7 [[VAL_SHIFTED_RES]], metadata [[META202:![0-9]+]], metadata !DIExpression()), !dbg [[DBG215]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META203:![0-9]+]], metadata !DIExpression()), !dbg [[DBG216]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i7 [[IV_NEXT_RES]], metadata [[META204:![0-9]+]], metadata !DIExpression()), !dbg [[DBG217]] +; LZCNT-NEXT: #dbg_value(i7 [[IV_RES]], [[META200:![0-9]+]], !DIExpression(), [[DBG213]]) +; LZCNT-NEXT: #dbg_value(i7 [[NBITS_RES]], [[META201:![0-9]+]], !DIExpression(), [[DBG214]]) +; LZCNT-NEXT: #dbg_value(i7 [[VAL_SHIFTED_RES]], [[META202:![0-9]+]], !DIExpression(), [[DBG215]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META203:![0-9]+]], !DIExpression(), [[DBG216]]) +; LZCNT-NEXT: #dbg_value(i7 [[IV_NEXT_RES]], [[META204:![0-9]+]], !DIExpression(), [[DBG217]]) ; LZCNT-NEXT: call void @escape_outer.i7(i7 [[IV_RES]], i7 [[NBITS_RES]], i7 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i7 [[IV_NEXT_RES]]), !dbg [[DBG218:![0-9]+]] ; LZCNT-NEXT: ret i7 [[IV_RES]], !dbg [[DBG219:![0-9]+]] ; @@ -666,15 +666,15 @@ define i8 @n8(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: br label [[LOOP:%.*]], !dbg [[DBG232:![0-9]+]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG233:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META222:![0-9]+]], metadata !DIExpression()), !dbg [[DBG233]] +; CHECK-NEXT: #dbg_value(i8 [[IV]], [[META222:![0-9]+]], !DIExpression(), [[DBG233]]) ; CHECK-NEXT: [[NBITS:%.*]] = add nsw i8 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG234:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META223:![0-9]+]], metadata !DIExpression()), !dbg [[DBG234]] +; CHECK-NEXT: #dbg_value(i8 [[NBITS]], [[META223:![0-9]+]], !DIExpression(), [[DBG234]]) ; CHECK-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]], !dbg [[DBG235:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META224:![0-9]+]], metadata !DIExpression()), !dbg [[DBG235]] +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META224:![0-9]+]], !DIExpression(), [[DBG235]]) ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0, !dbg [[DBG236:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META225:![0-9]+]], metadata !DIExpression()), !dbg [[DBG236]] +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META225:![0-9]+]], !DIExpression(), [[DBG236]]) ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 2, !dbg [[DBG237:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META226:![0-9]+]], metadata !DIExpression()), !dbg [[DBG237]] +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META226:![0-9]+]], !DIExpression(), [[DBG237]]) ; CHECK-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]]), !dbg [[DBG238:![0-9]+]] ; CHECK-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG239:![0-9]+]] ; CHECK: end: @@ -683,11 +683,11 @@ define i8 @n8(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG242:![0-9]+]] ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG243:![0-9]+]] ; CHECK-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG244:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META227:![0-9]+]], metadata !DIExpression()), !dbg [[DBG240]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META228:![0-9]+]], metadata !DIExpression()), !dbg [[DBG241]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META229:![0-9]+]], metadata !DIExpression()), !dbg [[DBG242]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META230:![0-9]+]], metadata !DIExpression()), !dbg [[DBG243]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META231:![0-9]+]], metadata !DIExpression()), !dbg [[DBG244]] +; CHECK-NEXT: #dbg_value(i8 [[IV_RES]], [[META227:![0-9]+]], !DIExpression(), [[DBG240]]) +; CHECK-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META228:![0-9]+]], !DIExpression(), [[DBG241]]) +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META229:![0-9]+]], !DIExpression(), [[DBG242]]) +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META230:![0-9]+]], !DIExpression(), [[DBG243]]) +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META231:![0-9]+]], !DIExpression(), [[DBG244]]) ; CHECK-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG245:![0-9]+]] ; CHECK-NEXT: ret i8 [[IV_RES]], !dbg [[DBG246:![0-9]+]] ; @@ -724,15 +724,15 @@ define i8 @t9(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG259:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG260:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META249:![0-9]+]], metadata !DIExpression()), !dbg [[DBG260]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV]], [[META249:![0-9]+]], !DIExpression(), [[DBG260]]) ; NOLZCNT-NEXT: [[NBITS:%.*]] = add nsw i8 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG261:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META250:![0-9]+]], metadata !DIExpression()), !dbg [[DBG261]] +; NOLZCNT-NEXT: #dbg_value(i8 [[NBITS]], [[META250:![0-9]+]], !DIExpression(), [[DBG261]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]], !dbg [[DBG262:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META251:![0-9]+]], metadata !DIExpression()), !dbg [[DBG262]] +; NOLZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META251:![0-9]+]], !DIExpression(), [[DBG262]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISNOTZERO:%.*]] = icmp ne i8 [[VAL_SHIFTED]], 0, !dbg [[DBG263:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISNOTZERO]], metadata [[META252:![0-9]+]], metadata !DIExpression()), !dbg [[DBG263]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISNOTZERO]], [[META252:![0-9]+]], !DIExpression(), [[DBG263]]) ; NOLZCNT-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1, !dbg [[DBG264:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META253:![0-9]+]], metadata !DIExpression()), !dbg [[DBG264]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META253:![0-9]+]], !DIExpression(), [[DBG264]]) ; NOLZCNT-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISNOTZERO]], i8 [[IV_NEXT]]), !dbg [[DBG265:![0-9]+]] ; NOLZCNT-NEXT: br i1 [[VAL_SHIFTED_ISNOTZERO]], label [[LOOP]], label [[END:%.*]], !dbg [[DBG266:![0-9]+]] ; NOLZCNT: end: @@ -741,11 +741,11 @@ define i8 @t9(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; NOLZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG269:![0-9]+]] ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISNOTZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISNOTZERO]], [[LOOP]] ], !dbg [[DBG270:![0-9]+]] ; NOLZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG271:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META254:![0-9]+]], metadata !DIExpression()), !dbg [[DBG267]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META255:![0-9]+]], metadata !DIExpression()), !dbg [[DBG268]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META256:![0-9]+]], metadata !DIExpression()), !dbg [[DBG269]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISNOTZERO_RES]], metadata [[META257:![0-9]+]], metadata !DIExpression()), !dbg [[DBG270]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META258:![0-9]+]], metadata !DIExpression()), !dbg [[DBG271]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_RES]], [[META254:![0-9]+]], !DIExpression(), [[DBG267]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META255:![0-9]+]], !DIExpression(), [[DBG268]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META256:![0-9]+]], !DIExpression(), [[DBG269]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISNOTZERO_RES]], [[META257:![0-9]+]], !DIExpression(), [[DBG270]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META258:![0-9]+]], !DIExpression(), [[DBG271]]) ; NOLZCNT-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISNOTZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG272:![0-9]+]] ; NOLZCNT-NEXT: ret i8 [[IV_RES]], !dbg [[DBG273:![0-9]+]] ; @@ -765,14 +765,14 @@ define i8 @t9(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i8 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG261]] ; LZCNT-NEXT: [[VAL_SHIFTED_ISNOTZERO:%.*]] = xor i1 [[LOOP_IVCHECK]], true, !dbg [[DBG261]] ; LZCNT-NEXT: [[IV:%.*]] = add nsw i8 [[LOOP_IV]], [[START]], !dbg [[DBG261]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META249:![0-9]+]], metadata !DIExpression()), !dbg [[DBG259]] +; LZCNT-NEXT: #dbg_value(i8 [[IV]], [[META249:![0-9]+]], !DIExpression(), [[DBG259]]) ; LZCNT-NEXT: [[NBITS:%.*]] = add nsw i8 [[IV]], [[EXTRAOFFSET]], !dbg [[DBG261]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META250:![0-9]+]], metadata !DIExpression()), !dbg [[DBG261]] +; LZCNT-NEXT: #dbg_value(i8 [[NBITS]], [[META250:![0-9]+]], !DIExpression(), [[DBG261]]) ; LZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL]], [[NBITS]], !dbg [[DBG262:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META251:![0-9]+]], metadata !DIExpression()), !dbg [[DBG262]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISNOTZERO]], metadata [[META252:![0-9]+]], metadata !DIExpression()), !dbg [[DBG263:![0-9]+]] +; LZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META251:![0-9]+]], !DIExpression(), [[DBG262]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISNOTZERO]], [[META252:![0-9]+]], !DIExpression(), [[META263:![0-9]+]]) ; LZCNT-NEXT: [[IV_NEXT:%.*]] = add i8 [[IV]], 1, !dbg [[DBG264:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META253:![0-9]+]], metadata !DIExpression()), !dbg [[DBG264]] +; LZCNT-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META253:![0-9]+]], !DIExpression(), [[DBG264]]) ; LZCNT-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISNOTZERO]], i8 [[IV_NEXT]]), !dbg [[DBG265:![0-9]+]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG266:![0-9]+]] ; LZCNT: end: @@ -781,11 +781,11 @@ define i8 @t9(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; LZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG269:![0-9]+]] ; LZCNT-NEXT: [[VAL_SHIFTED_ISNOTZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISNOTZERO]], [[LOOP]] ], !dbg [[DBG270:![0-9]+]] ; LZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG271:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META254:![0-9]+]], metadata !DIExpression()), !dbg [[DBG267]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META255:![0-9]+]], metadata !DIExpression()), !dbg [[DBG268]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META256:![0-9]+]], metadata !DIExpression()), !dbg [[DBG269]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISNOTZERO_RES]], metadata [[META257:![0-9]+]], metadata !DIExpression()), !dbg [[DBG270]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META258:![0-9]+]], metadata !DIExpression()), !dbg [[DBG271]] +; LZCNT-NEXT: #dbg_value(i8 [[IV_RES]], [[META254:![0-9]+]], !DIExpression(), [[DBG267]]) +; LZCNT-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META255:![0-9]+]], !DIExpression(), [[DBG268]]) +; LZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META256:![0-9]+]], !DIExpression(), [[DBG269]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISNOTZERO_RES]], [[META257:![0-9]+]], !DIExpression(), [[DBG270]]) +; LZCNT-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META258:![0-9]+]], !DIExpression(), [[DBG271]]) ; LZCNT-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISNOTZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG272:![0-9]+]] ; LZCNT-NEXT: ret i8 [[IV_RES]], !dbg [[DBG273:![0-9]+]] ; @@ -822,15 +822,15 @@ define i8 @n10(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: br label [[LOOP:%.*]], !dbg [[DBG286:![0-9]+]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG287:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META276:![0-9]+]], metadata !DIExpression()), !dbg [[DBG287]] +; CHECK-NEXT: #dbg_value(i8 [[IV]], [[META276:![0-9]+]], !DIExpression(), [[DBG287]]) ; CHECK-NEXT: [[NBITS:%.*]] = add nsw i8 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG288:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META277:![0-9]+]], metadata !DIExpression()), !dbg [[DBG288]] +; CHECK-NEXT: #dbg_value(i8 [[NBITS]], [[META277:![0-9]+]], !DIExpression(), [[DBG288]]) ; CHECK-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]], !dbg [[DBG289:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META278:![0-9]+]], metadata !DIExpression()), !dbg [[DBG289]] +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META278:![0-9]+]], !DIExpression(), [[DBG289]]) ; CHECK-NEXT: [[VAL_SHIFTED_ISNOTZERO:%.*]] = icmp ne i8 [[VAL_SHIFTED]], 0, !dbg [[DBG290:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISNOTZERO]], metadata [[META279:![0-9]+]], metadata !DIExpression()), !dbg [[DBG290]] +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISNOTZERO]], [[META279:![0-9]+]], !DIExpression(), [[DBG290]]) ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1, !dbg [[DBG291:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META280:![0-9]+]], metadata !DIExpression()), !dbg [[DBG291]] +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META280:![0-9]+]], !DIExpression(), [[DBG291]]) ; CHECK-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISNOTZERO]], i8 [[IV_NEXT]]), !dbg [[DBG292:![0-9]+]] ; CHECK-NEXT: br i1 [[VAL_SHIFTED_ISNOTZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG293:![0-9]+]] ; CHECK: end: @@ -839,11 +839,11 @@ define i8 @n10(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG296:![0-9]+]] ; CHECK-NEXT: [[VAL_SHIFTED_ISNOTZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISNOTZERO]], [[LOOP]] ], !dbg [[DBG297:![0-9]+]] ; CHECK-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG298:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META281:![0-9]+]], metadata !DIExpression()), !dbg [[DBG294]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META282:![0-9]+]], metadata !DIExpression()), !dbg [[DBG295]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META283:![0-9]+]], metadata !DIExpression()), !dbg [[DBG296]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISNOTZERO_RES]], metadata [[META284:![0-9]+]], metadata !DIExpression()), !dbg [[DBG297]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META285:![0-9]+]], metadata !DIExpression()), !dbg [[DBG298]] +; CHECK-NEXT: #dbg_value(i8 [[IV_RES]], [[META281:![0-9]+]], !DIExpression(), [[DBG294]]) +; CHECK-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META282:![0-9]+]], !DIExpression(), [[DBG295]]) +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META283:![0-9]+]], !DIExpression(), [[DBG296]]) +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISNOTZERO_RES]], [[META284:![0-9]+]], !DIExpression(), [[DBG297]]) +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META285:![0-9]+]], !DIExpression(), [[DBG298]]) ; CHECK-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISNOTZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG299:![0-9]+]] ; CHECK-NEXT: ret i8 [[IV_RES]], !dbg [[DBG300:![0-9]+]] ; @@ -880,15 +880,15 @@ define i8 @n11(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: br label [[LOOP:%.*]], !dbg [[DBG313:![0-9]+]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG314:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META303:![0-9]+]], metadata !DIExpression()), !dbg [[DBG314]] +; CHECK-NEXT: #dbg_value(i8 [[IV]], [[META303:![0-9]+]], !DIExpression(), [[DBG314]]) ; CHECK-NEXT: [[NBITS:%.*]] = add nsw i8 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG315:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META304:![0-9]+]], metadata !DIExpression()), !dbg [[DBG315]] +; CHECK-NEXT: #dbg_value(i8 [[NBITS]], [[META304:![0-9]+]], !DIExpression(), [[DBG315]]) ; CHECK-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]], !dbg [[DBG316:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META305:![0-9]+]], metadata !DIExpression()), !dbg [[DBG316]] +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META305:![0-9]+]], !DIExpression(), [[DBG316]]) ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0, !dbg [[DBG317:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META306:![0-9]+]], metadata !DIExpression()), !dbg [[DBG317]] +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META306:![0-9]+]], !DIExpression(), [[DBG317]]) ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1, !dbg [[DBG318:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META307:![0-9]+]], metadata !DIExpression()), !dbg [[DBG318]] +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META307:![0-9]+]], !DIExpression(), [[DBG318]]) ; CHECK-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]]), !dbg [[DBG319:![0-9]+]] ; CHECK-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[LOOP]], label [[END:%.*]], !dbg [[DBG320:![0-9]+]] ; CHECK: end: @@ -897,11 +897,11 @@ define i8 @n11(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG323:![0-9]+]] ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG324:![0-9]+]] ; CHECK-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG325:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META308:![0-9]+]], metadata !DIExpression()), !dbg [[DBG321]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META309:![0-9]+]], metadata !DIExpression()), !dbg [[DBG322]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META310:![0-9]+]], metadata !DIExpression()), !dbg [[DBG323]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META311:![0-9]+]], metadata !DIExpression()), !dbg [[DBG324]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META312:![0-9]+]], metadata !DIExpression()), !dbg [[DBG325]] +; CHECK-NEXT: #dbg_value(i8 [[IV_RES]], [[META308:![0-9]+]], !DIExpression(), [[DBG321]]) +; CHECK-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META309:![0-9]+]], !DIExpression(), [[DBG322]]) +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META310:![0-9]+]], !DIExpression(), [[DBG323]]) +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META311:![0-9]+]], !DIExpression(), [[DBG324]]) +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META312:![0-9]+]], !DIExpression(), [[DBG325]]) ; CHECK-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG326:![0-9]+]] ; CHECK-NEXT: ret i8 [[IV_RES]], !dbg [[DBG327:![0-9]+]] ; @@ -938,15 +938,15 @@ define i8 @n12(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: br label [[LOOP:%.*]], !dbg [[DBG340:![0-9]+]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG341:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META330:![0-9]+]], metadata !DIExpression()), !dbg [[DBG341]] +; CHECK-NEXT: #dbg_value(i8 [[IV]], [[META330:![0-9]+]], !DIExpression(), [[DBG341]]) ; CHECK-NEXT: [[NBITS:%.*]] = add nsw i8 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG342:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META331:![0-9]+]], metadata !DIExpression()), !dbg [[DBG342]] +; CHECK-NEXT: #dbg_value(i8 [[NBITS]], [[META331:![0-9]+]], !DIExpression(), [[DBG342]]) ; CHECK-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]], !dbg [[DBG343:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META332:![0-9]+]], metadata !DIExpression()), !dbg [[DBG343]] +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META332:![0-9]+]], !DIExpression(), [[DBG343]]) ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 1, !dbg [[DBG344:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META333:![0-9]+]], metadata !DIExpression()), !dbg [[DBG344]] +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META333:![0-9]+]], !DIExpression(), [[DBG344]]) ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1, !dbg [[DBG345:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META334:![0-9]+]], metadata !DIExpression()), !dbg [[DBG345]] +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META334:![0-9]+]], !DIExpression(), [[DBG345]]) ; CHECK-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]]), !dbg [[DBG346:![0-9]+]] ; CHECK-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG347:![0-9]+]] ; CHECK: end: @@ -955,11 +955,11 @@ define i8 @n12(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG350:![0-9]+]] ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG351:![0-9]+]] ; CHECK-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG352:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META335:![0-9]+]], metadata !DIExpression()), !dbg [[DBG348]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META336:![0-9]+]], metadata !DIExpression()), !dbg [[DBG349]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META337:![0-9]+]], metadata !DIExpression()), !dbg [[DBG350]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META338:![0-9]+]], metadata !DIExpression()), !dbg [[DBG351]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META339:![0-9]+]], metadata !DIExpression()), !dbg [[DBG352]] +; CHECK-NEXT: #dbg_value(i8 [[IV_RES]], [[META335:![0-9]+]], !DIExpression(), [[DBG348]]) +; CHECK-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META336:![0-9]+]], !DIExpression(), [[DBG349]]) +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META337:![0-9]+]], !DIExpression(), [[DBG350]]) +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META338:![0-9]+]], !DIExpression(), [[DBG351]]) +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META339:![0-9]+]], !DIExpression(), [[DBG352]]) ; CHECK-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG353:![0-9]+]] ; CHECK-NEXT: ret i8 [[IV_RES]], !dbg [[DBG354:![0-9]+]] ; @@ -996,17 +996,17 @@ define i8 @n13(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: br label [[LOOP:%.*]], !dbg [[DBG367:![0-9]+]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_END:%.*]] ], !dbg [[DBG368:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META357:![0-9]+]], metadata !DIExpression()), !dbg [[DBG368]] +; CHECK-NEXT: #dbg_value(i8 [[IV]], [[META357:![0-9]+]], !DIExpression(), [[DBG368]]) ; CHECK-NEXT: br label [[LOOP_END]], !dbg [[DBG369:![0-9]+]] ; CHECK: loop.end: ; CHECK-NEXT: [[NBITS:%.*]] = add nsw i8 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG370:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META358:![0-9]+]], metadata !DIExpression()), !dbg [[DBG370]] +; CHECK-NEXT: #dbg_value(i8 [[NBITS]], [[META358:![0-9]+]], !DIExpression(), [[DBG370]]) ; CHECK-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]], !dbg [[DBG371:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META359:![0-9]+]], metadata !DIExpression()), !dbg [[DBG371]] +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META359:![0-9]+]], !DIExpression(), [[DBG371]]) ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0, !dbg [[DBG372:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META360:![0-9]+]], metadata !DIExpression()), !dbg [[DBG372]] +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META360:![0-9]+]], !DIExpression(), [[DBG372]]) ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1, !dbg [[DBG373:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META361:![0-9]+]], metadata !DIExpression()), !dbg [[DBG373]] +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META361:![0-9]+]], !DIExpression(), [[DBG373]]) ; CHECK-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]]), !dbg [[DBG374:![0-9]+]] ; CHECK-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG375:![0-9]+]] ; CHECK: end: @@ -1015,11 +1015,11 @@ define i8 @n13(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP_END]] ], !dbg [[DBG378:![0-9]+]] ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP_END]] ], !dbg [[DBG379:![0-9]+]] ; CHECK-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP_END]] ], !dbg [[DBG380:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META362:![0-9]+]], metadata !DIExpression()), !dbg [[DBG376]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META363:![0-9]+]], metadata !DIExpression()), !dbg [[DBG377]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META364:![0-9]+]], metadata !DIExpression()), !dbg [[DBG378]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META365:![0-9]+]], metadata !DIExpression()), !dbg [[DBG379]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META366:![0-9]+]], metadata !DIExpression()), !dbg [[DBG380]] +; CHECK-NEXT: #dbg_value(i8 [[IV_RES]], [[META362:![0-9]+]], !DIExpression(), [[DBG376]]) +; CHECK-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META363:![0-9]+]], !DIExpression(), [[DBG377]]) +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META364:![0-9]+]], !DIExpression(), [[DBG378]]) +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META365:![0-9]+]], !DIExpression(), [[DBG379]]) +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META366:![0-9]+]], !DIExpression(), [[DBG380]]) ; CHECK-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG381:![0-9]+]] ; CHECK-NEXT: ret i8 [[IV_RES]], !dbg [[DBG382:![0-9]+]] ; @@ -1059,15 +1059,15 @@ define i8 @n14(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: br label [[LOOP:%.*]], !dbg [[DBG395:![0-9]+]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG396:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META385:![0-9]+]], metadata !DIExpression()), !dbg [[DBG396]] +; CHECK-NEXT: #dbg_value(i8 [[IV]], [[META385:![0-9]+]], !DIExpression(), [[DBG396]]) ; CHECK-NEXT: [[NBITS:%.*]] = add nsw i8 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG397:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META386:![0-9]+]], metadata !DIExpression()), !dbg [[DBG397]] +; CHECK-NEXT: #dbg_value(i8 [[NBITS]], [[META386:![0-9]+]], !DIExpression(), [[DBG397]]) ; CHECK-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]], !dbg [[DBG398:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META387:![0-9]+]], metadata !DIExpression()), !dbg [[DBG398]] +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META387:![0-9]+]], !DIExpression(), [[DBG398]]) ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp ult i8 [[VAL_SHIFTED]], 1, !dbg [[DBG399:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META388:![0-9]+]], metadata !DIExpression()), !dbg [[DBG399]] +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META388:![0-9]+]], !DIExpression(), [[DBG399]]) ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1, !dbg [[DBG400:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META389:![0-9]+]], metadata !DIExpression()), !dbg [[DBG400]] +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META389:![0-9]+]], !DIExpression(), [[DBG400]]) ; CHECK-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]]), !dbg [[DBG401:![0-9]+]] ; CHECK-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG402:![0-9]+]] ; CHECK: end: @@ -1076,11 +1076,11 @@ define i8 @n14(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG405:![0-9]+]] ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG406:![0-9]+]] ; CHECK-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG407:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META390:![0-9]+]], metadata !DIExpression()), !dbg [[DBG403]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META391:![0-9]+]], metadata !DIExpression()), !dbg [[DBG404]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META392:![0-9]+]], metadata !DIExpression()), !dbg [[DBG405]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META393:![0-9]+]], metadata !DIExpression()), !dbg [[DBG406]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META394:![0-9]+]], metadata !DIExpression()), !dbg [[DBG407]] +; CHECK-NEXT: #dbg_value(i8 [[IV_RES]], [[META390:![0-9]+]], !DIExpression(), [[DBG403]]) +; CHECK-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META391:![0-9]+]], !DIExpression(), [[DBG404]]) +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META392:![0-9]+]], !DIExpression(), [[DBG405]]) +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META393:![0-9]+]], !DIExpression(), [[DBG406]]) +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META394:![0-9]+]], !DIExpression(), [[DBG407]]) ; CHECK-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG408:![0-9]+]] ; CHECK-NEXT: ret i8 [[IV_RES]], !dbg [[DBG409:![0-9]+]] ; @@ -1117,15 +1117,15 @@ define i8 @t15(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG422:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG423:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META412:![0-9]+]], metadata !DIExpression()), !dbg [[DBG423]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV]], [[META412:![0-9]+]], !DIExpression(), [[DBG423]]) ; NOLZCNT-NEXT: [[NBITS:%.*]] = add nsw i8 [[EXTRAOFFSET:%.*]], [[IV]], !dbg [[DBG424:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META413:![0-9]+]], metadata !DIExpression()), !dbg [[DBG424]] +; NOLZCNT-NEXT: #dbg_value(i8 [[NBITS]], [[META413:![0-9]+]], !DIExpression(), [[DBG424]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]], !dbg [[DBG425:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META414:![0-9]+]], metadata !DIExpression()), !dbg [[DBG425]] +; NOLZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META414:![0-9]+]], !DIExpression(), [[DBG425]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0, !dbg [[DBG426:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META415:![0-9]+]], metadata !DIExpression()), !dbg [[DBG426]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META415:![0-9]+]], !DIExpression(), [[DBG426]]) ; NOLZCNT-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1, !dbg [[DBG427:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META416:![0-9]+]], metadata !DIExpression()), !dbg [[DBG427]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META416:![0-9]+]], !DIExpression(), [[DBG427]]) ; NOLZCNT-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]]), !dbg [[DBG428:![0-9]+]] ; NOLZCNT-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG429:![0-9]+]] ; NOLZCNT: end: @@ -1134,11 +1134,11 @@ define i8 @t15(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; NOLZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG432:![0-9]+]] ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG433:![0-9]+]] ; NOLZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG434:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META417:![0-9]+]], metadata !DIExpression()), !dbg [[DBG430]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META418:![0-9]+]], metadata !DIExpression()), !dbg [[DBG431]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META419:![0-9]+]], metadata !DIExpression()), !dbg [[DBG432]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META420:![0-9]+]], metadata !DIExpression()), !dbg [[DBG433]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META421:![0-9]+]], metadata !DIExpression()), !dbg [[DBG434]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_RES]], [[META417:![0-9]+]], !DIExpression(), [[DBG430]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META418:![0-9]+]], !DIExpression(), [[DBG431]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META419:![0-9]+]], !DIExpression(), [[DBG432]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META420:![0-9]+]], !DIExpression(), [[DBG433]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META421:![0-9]+]], !DIExpression(), [[DBG434]]) ; NOLZCNT-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG435:![0-9]+]] ; NOLZCNT-NEXT: ret i8 [[IV_RES]], !dbg [[DBG436:![0-9]+]] ; @@ -1157,14 +1157,14 @@ define i8 @t15(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i8 [[LOOP_IV]], 1, !dbg [[DBG424:![0-9]+]] ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i8 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG424]] ; LZCNT-NEXT: [[IV:%.*]] = add nsw i8 [[LOOP_IV]], [[START]], !dbg [[DBG424]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META412:![0-9]+]], metadata !DIExpression()), !dbg [[DBG422]] +; LZCNT-NEXT: #dbg_value(i8 [[IV]], [[META412:![0-9]+]], !DIExpression(), [[DBG422]]) ; LZCNT-NEXT: [[NBITS:%.*]] = add nsw i8 [[EXTRAOFFSET]], [[IV]], !dbg [[DBG424]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META413:![0-9]+]], metadata !DIExpression()), !dbg [[DBG424]] +; LZCNT-NEXT: #dbg_value(i8 [[NBITS]], [[META413:![0-9]+]], !DIExpression(), [[DBG424]]) ; LZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL]], [[NBITS]], !dbg [[DBG425:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META414:![0-9]+]], metadata !DIExpression()), !dbg [[DBG425]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[LOOP_IVCHECK]], metadata [[META415:![0-9]+]], metadata !DIExpression()), !dbg [[DBG426:![0-9]+]] +; LZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META414:![0-9]+]], !DIExpression(), [[DBG425]]) +; LZCNT-NEXT: #dbg_value(i1 [[LOOP_IVCHECK]], [[META415:![0-9]+]], !DIExpression(), [[META426:![0-9]+]]) ; LZCNT-NEXT: [[IV_NEXT:%.*]] = add i8 [[IV]], 1, !dbg [[DBG427:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META416:![0-9]+]], metadata !DIExpression()), !dbg [[DBG427]] +; LZCNT-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META416:![0-9]+]], !DIExpression(), [[DBG427]]) ; LZCNT-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i8 [[IV_NEXT]]), !dbg [[DBG428:![0-9]+]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG429:![0-9]+]] ; LZCNT: end: @@ -1173,11 +1173,11 @@ define i8 @t15(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; LZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG432:![0-9]+]] ; LZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ], !dbg [[DBG433:![0-9]+]] ; LZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG434:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META417:![0-9]+]], metadata !DIExpression()), !dbg [[DBG430]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META418:![0-9]+]], metadata !DIExpression()), !dbg [[DBG431]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META419:![0-9]+]], metadata !DIExpression()), !dbg [[DBG432]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META420:![0-9]+]], metadata !DIExpression()), !dbg [[DBG433]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META421:![0-9]+]], metadata !DIExpression()), !dbg [[DBG434]] +; LZCNT-NEXT: #dbg_value(i8 [[IV_RES]], [[META417:![0-9]+]], !DIExpression(), [[DBG430]]) +; LZCNT-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META418:![0-9]+]], !DIExpression(), [[DBG431]]) +; LZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META419:![0-9]+]], !DIExpression(), [[DBG432]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META420:![0-9]+]], !DIExpression(), [[DBG433]]) +; LZCNT-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META421:![0-9]+]], !DIExpression(), [[DBG434]]) ; LZCNT-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG435:![0-9]+]] ; LZCNT-NEXT: ret i8 [[IV_RES]], !dbg [[DBG436:![0-9]+]] ; @@ -1214,15 +1214,15 @@ define i8 @n16(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: br label [[LOOP:%.*]], !dbg [[DBG449:![0-9]+]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG450:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META439:![0-9]+]], metadata !DIExpression()), !dbg [[DBG450]] +; CHECK-NEXT: #dbg_value(i8 [[IV]], [[META439:![0-9]+]], !DIExpression(), [[DBG450]]) ; CHECK-NEXT: [[NBITS:%.*]] = sub nsw i8 [[EXTRAOFFSET:%.*]], [[IV]], !dbg [[DBG451:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META440:![0-9]+]], metadata !DIExpression()), !dbg [[DBG451]] +; CHECK-NEXT: #dbg_value(i8 [[NBITS]], [[META440:![0-9]+]], !DIExpression(), [[DBG451]]) ; CHECK-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]], !dbg [[DBG452:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META441:![0-9]+]], metadata !DIExpression()), !dbg [[DBG452]] +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META441:![0-9]+]], !DIExpression(), [[DBG452]]) ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0, !dbg [[DBG453:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META442:![0-9]+]], metadata !DIExpression()), !dbg [[DBG453]] +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META442:![0-9]+]], !DIExpression(), [[DBG453]]) ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1, !dbg [[DBG454:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META443:![0-9]+]], metadata !DIExpression()), !dbg [[DBG454]] +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META443:![0-9]+]], !DIExpression(), [[DBG454]]) ; CHECK-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]]), !dbg [[DBG455:![0-9]+]] ; CHECK-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG456:![0-9]+]] ; CHECK: end: @@ -1231,11 +1231,11 @@ define i8 @n16(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG459:![0-9]+]] ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG460:![0-9]+]] ; CHECK-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG461:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META444:![0-9]+]], metadata !DIExpression()), !dbg [[DBG457]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META445:![0-9]+]], metadata !DIExpression()), !dbg [[DBG458]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META446:![0-9]+]], metadata !DIExpression()), !dbg [[DBG459]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META447:![0-9]+]], metadata !DIExpression()), !dbg [[DBG460]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META448:![0-9]+]], metadata !DIExpression()), !dbg [[DBG461]] +; CHECK-NEXT: #dbg_value(i8 [[IV_RES]], [[META444:![0-9]+]], !DIExpression(), [[DBG457]]) +; CHECK-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META445:![0-9]+]], !DIExpression(), [[DBG458]]) +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META446:![0-9]+]], !DIExpression(), [[DBG459]]) +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META447:![0-9]+]], !DIExpression(), [[DBG460]]) +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META448:![0-9]+]], !DIExpression(), [[DBG461]]) ; CHECK-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG462:![0-9]+]] ; CHECK-NEXT: ret i8 [[IV_RES]], !dbg [[DBG463:![0-9]+]] ; @@ -1272,17 +1272,17 @@ define i8 @n17(i8 %val, i8 %start) mustprogress { ; CHECK-NEXT: br label [[LOOP:%.*]], !dbg [[DBG477:![0-9]+]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG478:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META466:![0-9]+]], metadata !DIExpression()), !dbg [[DBG478]] +; CHECK-NEXT: #dbg_value(i8 [[IV]], [[META466:![0-9]+]], !DIExpression(), [[DBG478]]) ; CHECK-NEXT: [[EXTRAOFFSET:%.*]] = call i8 @gen.i8(), !dbg [[DBG479:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[EXTRAOFFSET]], metadata [[META467:![0-9]+]], metadata !DIExpression()), !dbg [[DBG479]] +; CHECK-NEXT: #dbg_value(i8 [[EXTRAOFFSET]], [[META467:![0-9]+]], !DIExpression(), [[DBG479]]) ; CHECK-NEXT: [[NBITS:%.*]] = add nsw i8 [[IV]], [[EXTRAOFFSET]], !dbg [[DBG480:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META468:![0-9]+]], metadata !DIExpression()), !dbg [[DBG480]] +; CHECK-NEXT: #dbg_value(i8 [[NBITS]], [[META468:![0-9]+]], !DIExpression(), [[DBG480]]) ; CHECK-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]], !dbg [[DBG481:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META469:![0-9]+]], metadata !DIExpression()), !dbg [[DBG481]] +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META469:![0-9]+]], !DIExpression(), [[DBG481]]) ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0, !dbg [[DBG482:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META470:![0-9]+]], metadata !DIExpression()), !dbg [[DBG482]] +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META470:![0-9]+]], !DIExpression(), [[DBG482]]) ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1, !dbg [[DBG483:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META471:![0-9]+]], metadata !DIExpression()), !dbg [[DBG483]] +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META471:![0-9]+]], !DIExpression(), [[DBG483]]) ; CHECK-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]]), !dbg [[DBG484:![0-9]+]] ; CHECK-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG485:![0-9]+]] ; CHECK: end: @@ -1291,11 +1291,11 @@ define i8 @n17(i8 %val, i8 %start) mustprogress { ; CHECK-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG488:![0-9]+]] ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG489:![0-9]+]] ; CHECK-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG490:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META472:![0-9]+]], metadata !DIExpression()), !dbg [[DBG486]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META473:![0-9]+]], metadata !DIExpression()), !dbg [[DBG487]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META474:![0-9]+]], metadata !DIExpression()), !dbg [[DBG488]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META475:![0-9]+]], metadata !DIExpression()), !dbg [[DBG489]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META476:![0-9]+]], metadata !DIExpression()), !dbg [[DBG490]] +; CHECK-NEXT: #dbg_value(i8 [[IV_RES]], [[META472:![0-9]+]], !DIExpression(), [[DBG486]]) +; CHECK-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META473:![0-9]+]], !DIExpression(), [[DBG487]]) +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META474:![0-9]+]], !DIExpression(), [[DBG488]]) +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META475:![0-9]+]], !DIExpression(), [[DBG489]]) +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META476:![0-9]+]], !DIExpression(), [[DBG490]]) ; CHECK-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG491:![0-9]+]] ; CHECK-NEXT: ret i8 [[IV_RES]], !dbg [[DBG492:![0-9]+]] ; @@ -1333,17 +1333,17 @@ define i8 @n18(i8 %val, i8 %start) mustprogress { ; CHECK-NEXT: br label [[LOOP:%.*]], !dbg [[DBG506:![0-9]+]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG507:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META495:![0-9]+]], metadata !DIExpression()), !dbg [[DBG507]] +; CHECK-NEXT: #dbg_value(i8 [[IV]], [[META495:![0-9]+]], !DIExpression(), [[DBG507]]) ; CHECK-NEXT: [[EXTRAOFFSET:%.*]] = call i8 @gen.i8(), !dbg [[DBG508:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[EXTRAOFFSET]], metadata [[META496:![0-9]+]], metadata !DIExpression()), !dbg [[DBG508]] +; CHECK-NEXT: #dbg_value(i8 [[EXTRAOFFSET]], [[META496:![0-9]+]], !DIExpression(), [[DBG508]]) ; CHECK-NEXT: [[NBITS:%.*]] = sub nsw i8 [[IV]], [[EXTRAOFFSET]], !dbg [[DBG509:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META497:![0-9]+]], metadata !DIExpression()), !dbg [[DBG509]] +; CHECK-NEXT: #dbg_value(i8 [[NBITS]], [[META497:![0-9]+]], !DIExpression(), [[DBG509]]) ; CHECK-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]], !dbg [[DBG510:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META498:![0-9]+]], metadata !DIExpression()), !dbg [[DBG510]] +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META498:![0-9]+]], !DIExpression(), [[DBG510]]) ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0, !dbg [[DBG511:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META499:![0-9]+]], metadata !DIExpression()), !dbg [[DBG511]] +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META499:![0-9]+]], !DIExpression(), [[DBG511]]) ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1, !dbg [[DBG512:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META500:![0-9]+]], metadata !DIExpression()), !dbg [[DBG512]] +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META500:![0-9]+]], !DIExpression(), [[DBG512]]) ; CHECK-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]]), !dbg [[DBG513:![0-9]+]] ; CHECK-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG514:![0-9]+]] ; CHECK: end: @@ -1352,11 +1352,11 @@ define i8 @n18(i8 %val, i8 %start) mustprogress { ; CHECK-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG517:![0-9]+]] ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG518:![0-9]+]] ; CHECK-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG519:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META501:![0-9]+]], metadata !DIExpression()), !dbg [[DBG515]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META502:![0-9]+]], metadata !DIExpression()), !dbg [[DBG516]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META503:![0-9]+]], metadata !DIExpression()), !dbg [[DBG517]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META504:![0-9]+]], metadata !DIExpression()), !dbg [[DBG518]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META505:![0-9]+]], metadata !DIExpression()), !dbg [[DBG519]] +; CHECK-NEXT: #dbg_value(i8 [[IV_RES]], [[META501:![0-9]+]], !DIExpression(), [[DBG515]]) +; CHECK-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META502:![0-9]+]], !DIExpression(), [[DBG516]]) +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META503:![0-9]+]], !DIExpression(), [[DBG517]]) +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META504:![0-9]+]], !DIExpression(), [[DBG518]]) +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META505:![0-9]+]], !DIExpression(), [[DBG519]]) ; CHECK-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG520:![0-9]+]] ; CHECK-NEXT: ret i8 [[IV_RES]], !dbg [[DBG521:![0-9]+]] ; @@ -1394,19 +1394,19 @@ define i8 @n19(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: br label [[LOOP_PREHEADER:%.*]], !dbg [[DBG535:![0-9]+]] ; CHECK: loop.preheader: ; CHECK-NEXT: [[NOTIV:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], !dbg [[DBG536:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NOTIV]], metadata [[META524:![0-9]+]], metadata !DIExpression()), !dbg [[DBG536]] +; CHECK-NEXT: #dbg_value(i8 [[NOTIV]], [[META524:![0-9]+]], !DIExpression(), [[DBG536]]) ; CHECK-NEXT: br label [[LOOP:%.*]], !dbg [[DBG537:![0-9]+]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[LOOP_PREHEADER]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG538:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META525:![0-9]+]], metadata !DIExpression()), !dbg [[DBG538]] +; CHECK-NEXT: #dbg_value(i8 [[IV]], [[META525:![0-9]+]], !DIExpression(), [[DBG538]]) ; CHECK-NEXT: [[NBITS:%.*]] = add nsw i8 [[NOTIV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG539:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META526:![0-9]+]], metadata !DIExpression()), !dbg [[DBG539]] +; CHECK-NEXT: #dbg_value(i8 [[NBITS]], [[META526:![0-9]+]], !DIExpression(), [[DBG539]]) ; CHECK-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]], !dbg [[DBG540:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META527:![0-9]+]], metadata !DIExpression()), !dbg [[DBG540]] +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META527:![0-9]+]], !DIExpression(), [[DBG540]]) ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0, !dbg [[DBG541:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META528:![0-9]+]], metadata !DIExpression()), !dbg [[DBG541]] +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META528:![0-9]+]], !DIExpression(), [[DBG541]]) ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1, !dbg [[DBG542:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META529:![0-9]+]], metadata !DIExpression()), !dbg [[DBG542]] +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META529:![0-9]+]], !DIExpression(), [[DBG542]]) ; CHECK-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]]), !dbg [[DBG543:![0-9]+]] ; CHECK-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG544:![0-9]+]] ; CHECK: end: @@ -1415,11 +1415,11 @@ define i8 @n19(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG547:![0-9]+]] ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG548:![0-9]+]] ; CHECK-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG549:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META530:![0-9]+]], metadata !DIExpression()), !dbg [[DBG545]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META531:![0-9]+]], metadata !DIExpression()), !dbg [[DBG546]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META532:![0-9]+]], metadata !DIExpression()), !dbg [[DBG547]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META533:![0-9]+]], metadata !DIExpression()), !dbg [[DBG548]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META534:![0-9]+]], metadata !DIExpression()), !dbg [[DBG549]] +; CHECK-NEXT: #dbg_value(i8 [[IV_RES]], [[META530:![0-9]+]], !DIExpression(), [[DBG545]]) +; CHECK-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META531:![0-9]+]], !DIExpression(), [[DBG546]]) +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META532:![0-9]+]], !DIExpression(), [[DBG547]]) +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META533:![0-9]+]], !DIExpression(), [[DBG548]]) +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META534:![0-9]+]], !DIExpression(), [[DBG549]]) ; CHECK-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG550:![0-9]+]] ; CHECK-NEXT: ret i8 [[IV_RES]], !dbg [[DBG551:![0-9]+]] ; @@ -1460,15 +1460,15 @@ define i8 @n20(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: br label [[LOOP:%.*]], !dbg [[DBG564:![0-9]+]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = add i8 0, 0, !dbg [[DBG565:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META554:![0-9]+]], metadata !DIExpression()), !dbg [[DBG565]] +; CHECK-NEXT: #dbg_value(i8 [[IV]], [[META554:![0-9]+]], !DIExpression(), [[DBG565]]) ; CHECK-NEXT: [[NBITS:%.*]] = add nsw i8 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG566:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META555:![0-9]+]], metadata !DIExpression()), !dbg [[DBG566]] +; CHECK-NEXT: #dbg_value(i8 [[NBITS]], [[META555:![0-9]+]], !DIExpression(), [[DBG566]]) ; CHECK-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]], !dbg [[DBG567:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META556:![0-9]+]], metadata !DIExpression()), !dbg [[DBG567]] +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META556:![0-9]+]], !DIExpression(), [[DBG567]]) ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0, !dbg [[DBG568:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META557:![0-9]+]], metadata !DIExpression()), !dbg [[DBG568]] +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META557:![0-9]+]], !DIExpression(), [[DBG568]]) ; CHECK-NEXT: [[IV_NEXT:%.*]] = add i8 [[IV]], 1, !dbg [[DBG569:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META558:![0-9]+]], metadata !DIExpression()), !dbg [[DBG569]] +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META558:![0-9]+]], !DIExpression(), [[DBG569]]) ; CHECK-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]]), !dbg [[DBG570:![0-9]+]] ; CHECK-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG571:![0-9]+]] ; CHECK: end: @@ -1477,11 +1477,11 @@ define i8 @n20(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG574:![0-9]+]] ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG575:![0-9]+]] ; CHECK-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG576:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META559:![0-9]+]], metadata !DIExpression()), !dbg [[DBG572]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META560:![0-9]+]], metadata !DIExpression()), !dbg [[DBG573]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META561:![0-9]+]], metadata !DIExpression()), !dbg [[DBG574]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META562:![0-9]+]], metadata !DIExpression()), !dbg [[DBG575]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META563:![0-9]+]], metadata !DIExpression()), !dbg [[DBG576]] +; CHECK-NEXT: #dbg_value(i8 [[IV_RES]], [[META559:![0-9]+]], !DIExpression(), [[DBG572]]) +; CHECK-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META560:![0-9]+]], !DIExpression(), [[DBG573]]) +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META561:![0-9]+]], !DIExpression(), [[DBG574]]) +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META562:![0-9]+]], !DIExpression(), [[DBG575]]) +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META563:![0-9]+]], !DIExpression(), [[DBG576]]) ; CHECK-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG577:![0-9]+]] ; CHECK-NEXT: ret i8 [[IV_RES]], !dbg [[DBG578:![0-9]+]] ; @@ -1518,15 +1518,15 @@ define i8 @n21(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: br label [[LOOP:%.*]], !dbg [[DBG591:![0-9]+]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG592:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META581:![0-9]+]], metadata !DIExpression()), !dbg [[DBG592]] +; CHECK-NEXT: #dbg_value(i8 [[IV]], [[META581:![0-9]+]], !DIExpression(), [[DBG592]]) ; CHECK-NEXT: [[NBITS:%.*]] = add nsw i8 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG593:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META582:![0-9]+]], metadata !DIExpression()), !dbg [[DBG593]] +; CHECK-NEXT: #dbg_value(i8 [[NBITS]], [[META582:![0-9]+]], !DIExpression(), [[DBG593]]) ; CHECK-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]], !dbg [[DBG594:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META583:![0-9]+]], metadata !DIExpression()), !dbg [[DBG594]] +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META583:![0-9]+]], !DIExpression(), [[DBG594]]) ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0, !dbg [[DBG595:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META584:![0-9]+]], metadata !DIExpression()), !dbg [[DBG595]] +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META584:![0-9]+]], !DIExpression(), [[DBG595]]) ; CHECK-NEXT: [[IV_NEXT]] = add i8 0, 1, !dbg [[DBG596:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META585:![0-9]+]], metadata !DIExpression()), !dbg [[DBG596]] +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META585:![0-9]+]], !DIExpression(), [[DBG596]]) ; CHECK-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]]), !dbg [[DBG597:![0-9]+]] ; CHECK-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG598:![0-9]+]] ; CHECK: end: @@ -1535,11 +1535,11 @@ define i8 @n21(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG601:![0-9]+]] ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG602:![0-9]+]] ; CHECK-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG603:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META586:![0-9]+]], metadata !DIExpression()), !dbg [[DBG599]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META587:![0-9]+]], metadata !DIExpression()), !dbg [[DBG600]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META588:![0-9]+]], metadata !DIExpression()), !dbg [[DBG601]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META589:![0-9]+]], metadata !DIExpression()), !dbg [[DBG602]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META590:![0-9]+]], metadata !DIExpression()), !dbg [[DBG603]] +; CHECK-NEXT: #dbg_value(i8 [[IV_RES]], [[META586:![0-9]+]], !DIExpression(), [[DBG599]]) +; CHECK-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META587:![0-9]+]], !DIExpression(), [[DBG600]]) +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META588:![0-9]+]], !DIExpression(), [[DBG601]]) +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META589:![0-9]+]], !DIExpression(), [[DBG602]]) +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META590:![0-9]+]], !DIExpression(), [[DBG603]]) ; CHECK-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG604:![0-9]+]] ; CHECK-NEXT: ret i8 [[IV_RES]], !dbg [[DBG605:![0-9]+]] ; @@ -1576,19 +1576,19 @@ define i8 @n22(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG620:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG621:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META608:![0-9]+]], metadata !DIExpression()), !dbg [[DBG621]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV]], [[META608:![0-9]+]], !DIExpression(), [[DBG621]]) ; NOLZCNT-NEXT: [[NBITS:%.*]] = add nsw i8 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG622:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META609:![0-9]+]], metadata !DIExpression()), !dbg [[DBG622]] +; NOLZCNT-NEXT: #dbg_value(i8 [[NBITS]], [[META609:![0-9]+]], !DIExpression(), [[DBG622]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]], !dbg [[DBG623:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META610:![0-9]+]], metadata !DIExpression()), !dbg [[DBG623]] +; NOLZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META610:![0-9]+]], !DIExpression(), [[DBG623]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0, !dbg [[DBG624:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META611:![0-9]+]], metadata !DIExpression()), !dbg [[DBG624]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META611:![0-9]+]], !DIExpression(), [[DBG624]]) ; NOLZCNT-NEXT: [[NOT_IV_NEXT:%.*]] = add i8 [[IV]], 1, !dbg [[DBG625:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NOT_IV_NEXT]], metadata [[META612:![0-9]+]], metadata !DIExpression()), !dbg [[DBG625]] +; NOLZCNT-NEXT: #dbg_value(i8 [[NOT_IV_NEXT]], [[META612:![0-9]+]], !DIExpression(), [[DBG625]]) ; NOLZCNT-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1, !dbg [[DBG626:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META613:![0-9]+]], metadata !DIExpression()), !dbg [[DBG626]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META613:![0-9]+]], !DIExpression(), [[DBG626]]) ; NOLZCNT-NEXT: [[ALSO_IV_NEXT:%.*]] = add i8 [[IV]], 1, !dbg [[DBG627:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[ALSO_IV_NEXT]], metadata [[META614:![0-9]+]], metadata !DIExpression()), !dbg [[DBG627]] +; NOLZCNT-NEXT: #dbg_value(i8 [[ALSO_IV_NEXT]], [[META614:![0-9]+]], !DIExpression(), [[DBG627]]) ; NOLZCNT-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]]), !dbg [[DBG628:![0-9]+]] ; NOLZCNT-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG629:![0-9]+]] ; NOLZCNT: end: @@ -1597,11 +1597,11 @@ define i8 @n22(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; NOLZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG632:![0-9]+]] ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG633:![0-9]+]] ; NOLZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG634:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META615:![0-9]+]], metadata !DIExpression()), !dbg [[DBG630]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META616:![0-9]+]], metadata !DIExpression()), !dbg [[DBG631]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META617:![0-9]+]], metadata !DIExpression()), !dbg [[DBG632]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META618:![0-9]+]], metadata !DIExpression()), !dbg [[DBG633]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META619:![0-9]+]], metadata !DIExpression()), !dbg [[DBG634]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_RES]], [[META615:![0-9]+]], !DIExpression(), [[DBG630]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META616:![0-9]+]], !DIExpression(), [[DBG631]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META617:![0-9]+]], !DIExpression(), [[DBG632]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META618:![0-9]+]], !DIExpression(), [[DBG633]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META619:![0-9]+]], !DIExpression(), [[DBG634]]) ; NOLZCNT-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG635:![0-9]+]] ; NOLZCNT-NEXT: ret i8 [[IV_RES]], !dbg [[DBG636:![0-9]+]] ; @@ -1620,18 +1620,18 @@ define i8 @n22(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i8 [[LOOP_IV]], 1, !dbg [[DBG622:![0-9]+]] ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i8 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG622]] ; LZCNT-NEXT: [[IV:%.*]] = add nsw i8 [[LOOP_IV]], [[START]], !dbg [[DBG622]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META608:![0-9]+]], metadata !DIExpression()), !dbg [[DBG620]] +; LZCNT-NEXT: #dbg_value(i8 [[IV]], [[META608:![0-9]+]], !DIExpression(), [[DBG620]]) ; LZCNT-NEXT: [[NBITS:%.*]] = add nsw i8 [[IV]], [[EXTRAOFFSET]], !dbg [[DBG622]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META609:![0-9]+]], metadata !DIExpression()), !dbg [[DBG622]] +; LZCNT-NEXT: #dbg_value(i8 [[NBITS]], [[META609:![0-9]+]], !DIExpression(), [[DBG622]]) ; LZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL]], [[NBITS]], !dbg [[DBG623:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META610:![0-9]+]], metadata !DIExpression()), !dbg [[DBG623]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[LOOP_IVCHECK]], metadata [[META611:![0-9]+]], metadata !DIExpression()), !dbg [[DBG624:![0-9]+]] +; LZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META610:![0-9]+]], !DIExpression(), [[DBG623]]) +; LZCNT-NEXT: #dbg_value(i1 [[LOOP_IVCHECK]], [[META611:![0-9]+]], !DIExpression(), [[META624:![0-9]+]]) ; LZCNT-NEXT: [[NOT_IV_NEXT:%.*]] = add i8 [[IV]], 1, !dbg [[DBG625:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NOT_IV_NEXT]], metadata [[META612:![0-9]+]], metadata !DIExpression()), !dbg [[DBG625]] +; LZCNT-NEXT: #dbg_value(i8 [[NOT_IV_NEXT]], [[META612:![0-9]+]], !DIExpression(), [[DBG625]]) ; LZCNT-NEXT: [[IV_NEXT:%.*]] = add i8 [[IV]], 1, !dbg [[DBG626:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META613:![0-9]+]], metadata !DIExpression()), !dbg [[DBG626]] +; LZCNT-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META613:![0-9]+]], !DIExpression(), [[DBG626]]) ; LZCNT-NEXT: [[ALSO_IV_NEXT:%.*]] = add i8 [[IV]], 1, !dbg [[DBG627:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[ALSO_IV_NEXT]], metadata [[META614:![0-9]+]], metadata !DIExpression()), !dbg [[DBG627]] +; LZCNT-NEXT: #dbg_value(i8 [[ALSO_IV_NEXT]], [[META614:![0-9]+]], !DIExpression(), [[DBG627]]) ; LZCNT-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i8 [[IV_NEXT]]), !dbg [[DBG628:![0-9]+]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG629:![0-9]+]] ; LZCNT: end: @@ -1640,11 +1640,11 @@ define i8 @n22(i8 %val, i8 %start, i8 %extraoffset) mustprogress { ; LZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG632:![0-9]+]] ; LZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ], !dbg [[DBG633:![0-9]+]] ; LZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG634:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META615:![0-9]+]], metadata !DIExpression()), !dbg [[DBG630]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META616:![0-9]+]], metadata !DIExpression()), !dbg [[DBG631]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META617:![0-9]+]], metadata !DIExpression()), !dbg [[DBG632]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META618:![0-9]+]], metadata !DIExpression()), !dbg [[DBG633]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META619:![0-9]+]], metadata !DIExpression()), !dbg [[DBG634]] +; LZCNT-NEXT: #dbg_value(i8 [[IV_RES]], [[META615:![0-9]+]], !DIExpression(), [[DBG630]]) +; LZCNT-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META616:![0-9]+]], !DIExpression(), [[DBG631]]) +; LZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META617:![0-9]+]], !DIExpression(), [[DBG632]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META618:![0-9]+]], !DIExpression(), [[DBG633]]) +; LZCNT-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META619:![0-9]+]], !DIExpression(), [[DBG634]]) ; LZCNT-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG635:![0-9]+]] ; LZCNT-NEXT: ret i8 [[IV_RES]], !dbg [[DBG636:![0-9]+]] ; @@ -1682,17 +1682,17 @@ define i8 @n23(i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: br label [[LOOP:%.*]], !dbg [[DBG650:![0-9]+]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG651:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META639:![0-9]+]], metadata !DIExpression()), !dbg [[DBG651]] +; CHECK-NEXT: #dbg_value(i8 [[IV]], [[META639:![0-9]+]], !DIExpression(), [[DBG651]]) ; CHECK-NEXT: [[NBITS:%.*]] = add nsw i8 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG652:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META640:![0-9]+]], metadata !DIExpression()), !dbg [[DBG652]] +; CHECK-NEXT: #dbg_value(i8 [[NBITS]], [[META640:![0-9]+]], !DIExpression(), [[DBG652]]) ; CHECK-NEXT: [[VAL:%.*]] = call i8 @gen.i8(), !dbg [[DBG653:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL]], metadata [[META641:![0-9]+]], metadata !DIExpression()), !dbg [[DBG653]] +; CHECK-NEXT: #dbg_value(i8 [[VAL]], [[META641:![0-9]+]], !DIExpression(), [[DBG653]]) ; CHECK-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL]], [[NBITS]], !dbg [[DBG654:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META642:![0-9]+]], metadata !DIExpression()), !dbg [[DBG654]] +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META642:![0-9]+]], !DIExpression(), [[DBG654]]) ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0, !dbg [[DBG655:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META643:![0-9]+]], metadata !DIExpression()), !dbg [[DBG655]] +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META643:![0-9]+]], !DIExpression(), [[DBG655]]) ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1, !dbg [[DBG656:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META644:![0-9]+]], metadata !DIExpression()), !dbg [[DBG656]] +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META644:![0-9]+]], !DIExpression(), [[DBG656]]) ; CHECK-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]]), !dbg [[DBG657:![0-9]+]] ; CHECK-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG658:![0-9]+]] ; CHECK: end: @@ -1701,11 +1701,11 @@ define i8 @n23(i8 %start, i8 %extraoffset) mustprogress { ; CHECK-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG661:![0-9]+]] ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG662:![0-9]+]] ; CHECK-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG663:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META645:![0-9]+]], metadata !DIExpression()), !dbg [[DBG659]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META646:![0-9]+]], metadata !DIExpression()), !dbg [[DBG660]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META647:![0-9]+]], metadata !DIExpression()), !dbg [[DBG661]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META648:![0-9]+]], metadata !DIExpression()), !dbg [[DBG662]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META649:![0-9]+]], metadata !DIExpression()), !dbg [[DBG663]] +; CHECK-NEXT: #dbg_value(i8 [[IV_RES]], [[META645:![0-9]+]], !DIExpression(), [[DBG659]]) +; CHECK-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META646:![0-9]+]], !DIExpression(), [[DBG660]]) +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META647:![0-9]+]], !DIExpression(), [[DBG661]]) +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META648:![0-9]+]], !DIExpression(), [[DBG662]]) +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META649:![0-9]+]], !DIExpression(), [[DBG663]]) ; CHECK-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG664:![0-9]+]] ; CHECK-NEXT: ret i8 [[IV_RES]], !dbg [[DBG665:![0-9]+]] ; @@ -1750,13 +1750,13 @@ define i1 @t24_nooffset_i1(i1 %val, i1 %start) mustprogress { ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG676:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[IV:%.*]] = phi i1 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG677:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV]], metadata [[META668:![0-9]+]], metadata !DIExpression()), !dbg [[DBG677]] +; NOLZCNT-NEXT: #dbg_value(i1 [[IV]], [[META668:![0-9]+]], !DIExpression(), [[DBG677]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i1 [[VAL:%.*]], [[IV]], !dbg [[DBG678:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED]], metadata [[META669:![0-9]+]], metadata !DIExpression()), !dbg [[DBG678]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED]], [[META669:![0-9]+]], !DIExpression(), [[DBG678]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i1 [[VAL_SHIFTED]], false, !dbg [[DBG679:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META670:![0-9]+]], metadata !DIExpression()), !dbg [[DBG679]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META670:![0-9]+]], !DIExpression(), [[DBG679]]) ; NOLZCNT-NEXT: [[IV_NEXT]] = add i1 [[IV]], true, !dbg [[DBG680:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV_NEXT]], metadata [[META671:![0-9]+]], metadata !DIExpression()), !dbg [[DBG680]] +; NOLZCNT-NEXT: #dbg_value(i1 [[IV_NEXT]], [[META671:![0-9]+]], !DIExpression(), [[DBG680]]) ; NOLZCNT-NEXT: call void @escape_inner.i1(i1 [[IV]], i1 [[IV]], i1 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i1 [[IV_NEXT]]), !dbg [[DBG681:![0-9]+]] ; NOLZCNT-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG682:![0-9]+]] ; NOLZCNT: end: @@ -1764,10 +1764,10 @@ define i1 @t24_nooffset_i1(i1 %val, i1 %start) mustprogress { ; NOLZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i1 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG684:![0-9]+]] ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG685:![0-9]+]] ; NOLZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i1 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG686:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV_RES]], metadata [[META672:![0-9]+]], metadata !DIExpression()), !dbg [[DBG683]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_RES]], metadata [[META673:![0-9]+]], metadata !DIExpression()), !dbg [[DBG684]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META674:![0-9]+]], metadata !DIExpression()), !dbg [[DBG685]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV_NEXT_RES]], metadata [[META675:![0-9]+]], metadata !DIExpression()), !dbg [[DBG686]] +; NOLZCNT-NEXT: #dbg_value(i1 [[IV_RES]], [[META672:![0-9]+]], !DIExpression(), [[DBG683]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_RES]], [[META673:![0-9]+]], !DIExpression(), [[DBG684]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META674:![0-9]+]], !DIExpression(), [[DBG685]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[IV_NEXT_RES]], [[META675:![0-9]+]], !DIExpression(), [[DBG686]]) ; NOLZCNT-NEXT: call void @escape_outer.i1(i1 [[IV_RES]], i1 [[IV_RES]], i1 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i1 [[IV_NEXT_RES]]), !dbg [[DBG687:![0-9]+]] ; NOLZCNT-NEXT: ret i1 [[IV_RES]], !dbg [[DBG688:![0-9]+]] ; @@ -1785,12 +1785,12 @@ define i1 @t24_nooffset_i1(i1 %val, i1 %start) mustprogress { ; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i1 [[LOOP_IV]], true, !dbg [[DBG678:![0-9]+]] ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i1 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG678]] ; LZCNT-NEXT: [[IV:%.*]] = add nsw i1 [[LOOP_IV]], [[START]], !dbg [[DBG678]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV]], metadata [[META668:![0-9]+]], metadata !DIExpression()), !dbg [[DBG676]] +; LZCNT-NEXT: #dbg_value(i1 [[IV]], [[META668:![0-9]+]], !DIExpression(), [[DBG676]]) ; LZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i1 [[VAL]], [[IV]], !dbg [[DBG678]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED]], metadata [[META669:![0-9]+]], metadata !DIExpression()), !dbg [[DBG678]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[LOOP_IVCHECK]], metadata [[META670:![0-9]+]], metadata !DIExpression()), !dbg [[DBG679:![0-9]+]] +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED]], [[META669:![0-9]+]], !DIExpression(), [[DBG678]]) +; LZCNT-NEXT: #dbg_value(i1 [[LOOP_IVCHECK]], [[META670:![0-9]+]], !DIExpression(), [[META679:![0-9]+]]) ; LZCNT-NEXT: [[IV_NEXT:%.*]] = add i1 [[IV]], true, !dbg [[DBG680:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV_NEXT]], metadata [[META671:![0-9]+]], metadata !DIExpression()), !dbg [[DBG680]] +; LZCNT-NEXT: #dbg_value(i1 [[IV_NEXT]], [[META671:![0-9]+]], !DIExpression(), [[DBG680]]) ; LZCNT-NEXT: call void @escape_inner.i1(i1 [[IV]], i1 [[IV]], i1 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i1 [[IV_NEXT]]), !dbg [[DBG681:![0-9]+]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG682:![0-9]+]] ; LZCNT: end: @@ -1798,10 +1798,10 @@ define i1 @t24_nooffset_i1(i1 %val, i1 %start) mustprogress { ; LZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i1 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG684:![0-9]+]] ; LZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ], !dbg [[DBG685:![0-9]+]] ; LZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i1 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG686:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV_RES]], metadata [[META672:![0-9]+]], metadata !DIExpression()), !dbg [[DBG683]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_RES]], metadata [[META673:![0-9]+]], metadata !DIExpression()), !dbg [[DBG684]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META674:![0-9]+]], metadata !DIExpression()), !dbg [[DBG685]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV_NEXT_RES]], metadata [[META675:![0-9]+]], metadata !DIExpression()), !dbg [[DBG686]] +; LZCNT-NEXT: #dbg_value(i1 [[IV_RES]], [[META672:![0-9]+]], !DIExpression(), [[DBG683]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_RES]], [[META673:![0-9]+]], !DIExpression(), [[DBG684]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META674:![0-9]+]], !DIExpression(), [[DBG685]]) +; LZCNT-NEXT: #dbg_value(i1 [[IV_NEXT_RES]], [[META675:![0-9]+]], !DIExpression(), [[DBG686]]) ; LZCNT-NEXT: call void @escape_outer.i1(i1 [[IV_RES]], i1 [[IV_RES]], i1 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i1 [[IV_NEXT_RES]]), !dbg [[DBG687:![0-9]+]] ; LZCNT-NEXT: ret i1 [[IV_RES]], !dbg [[DBG688:![0-9]+]] ; @@ -1834,13 +1834,13 @@ define i2 @t25_nooffset_i2(i2 %val, i2 %start) mustprogress { ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG699:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[IV:%.*]] = phi i2 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG700:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV]], metadata [[META691:![0-9]+]], metadata !DIExpression()), !dbg [[DBG700]] +; NOLZCNT-NEXT: #dbg_value(i2 [[IV]], [[META691:![0-9]+]], !DIExpression(), [[DBG700]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i2 [[VAL:%.*]], [[IV]], !dbg [[DBG701:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[VAL_SHIFTED]], metadata [[META692:![0-9]+]], metadata !DIExpression()), !dbg [[DBG701]] +; NOLZCNT-NEXT: #dbg_value(i2 [[VAL_SHIFTED]], [[META692:![0-9]+]], !DIExpression(), [[DBG701]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i2 [[VAL_SHIFTED]], 0, !dbg [[DBG702:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META693:![0-9]+]], metadata !DIExpression()), !dbg [[DBG702]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META693:![0-9]+]], !DIExpression(), [[DBG702]]) ; NOLZCNT-NEXT: [[IV_NEXT]] = add i2 [[IV]], 1, !dbg [[DBG703:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV_NEXT]], metadata [[META694:![0-9]+]], metadata !DIExpression()), !dbg [[DBG703]] +; NOLZCNT-NEXT: #dbg_value(i2 [[IV_NEXT]], [[META694:![0-9]+]], !DIExpression(), [[DBG703]]) ; NOLZCNT-NEXT: call void @escape_inner.i2(i2 [[IV]], i2 [[IV]], i2 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i2 [[IV_NEXT]]), !dbg [[DBG704:![0-9]+]] ; NOLZCNT-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG705:![0-9]+]] ; NOLZCNT: end: @@ -1848,10 +1848,10 @@ define i2 @t25_nooffset_i2(i2 %val, i2 %start) mustprogress { ; NOLZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i2 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG707:![0-9]+]] ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG708:![0-9]+]] ; NOLZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i2 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG709:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV_RES]], metadata [[META695:![0-9]+]], metadata !DIExpression()), !dbg [[DBG706]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[VAL_SHIFTED_RES]], metadata [[META696:![0-9]+]], metadata !DIExpression()), !dbg [[DBG707]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META697:![0-9]+]], metadata !DIExpression()), !dbg [[DBG708]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV_NEXT_RES]], metadata [[META698:![0-9]+]], metadata !DIExpression()), !dbg [[DBG709]] +; NOLZCNT-NEXT: #dbg_value(i2 [[IV_RES]], [[META695:![0-9]+]], !DIExpression(), [[DBG706]]) +; NOLZCNT-NEXT: #dbg_value(i2 [[VAL_SHIFTED_RES]], [[META696:![0-9]+]], !DIExpression(), [[DBG707]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META697:![0-9]+]], !DIExpression(), [[DBG708]]) +; NOLZCNT-NEXT: #dbg_value(i2 [[IV_NEXT_RES]], [[META698:![0-9]+]], !DIExpression(), [[DBG709]]) ; NOLZCNT-NEXT: call void @escape_outer.i2(i2 [[IV_RES]], i2 [[IV_RES]], i2 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i2 [[IV_NEXT_RES]]), !dbg [[DBG710:![0-9]+]] ; NOLZCNT-NEXT: ret i2 [[IV_RES]], !dbg [[DBG711:![0-9]+]] ; @@ -1869,12 +1869,12 @@ define i2 @t25_nooffset_i2(i2 %val, i2 %start) mustprogress { ; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw i2 [[LOOP_IV]], 1, !dbg [[DBG701:![0-9]+]] ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i2 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG701]] ; LZCNT-NEXT: [[IV:%.*]] = add nsw i2 [[LOOP_IV]], [[START]], !dbg [[DBG701]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV]], metadata [[META691:![0-9]+]], metadata !DIExpression()), !dbg [[DBG699]] +; LZCNT-NEXT: #dbg_value(i2 [[IV]], [[META691:![0-9]+]], !DIExpression(), [[DBG699]]) ; LZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i2 [[VAL]], [[IV]], !dbg [[DBG701]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[VAL_SHIFTED]], metadata [[META692:![0-9]+]], metadata !DIExpression()), !dbg [[DBG701]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[LOOP_IVCHECK]], metadata [[META693:![0-9]+]], metadata !DIExpression()), !dbg [[DBG702:![0-9]+]] +; LZCNT-NEXT: #dbg_value(i2 [[VAL_SHIFTED]], [[META692:![0-9]+]], !DIExpression(), [[DBG701]]) +; LZCNT-NEXT: #dbg_value(i1 [[LOOP_IVCHECK]], [[META693:![0-9]+]], !DIExpression(), [[META702:![0-9]+]]) ; LZCNT-NEXT: [[IV_NEXT:%.*]] = add i2 [[IV]], 1, !dbg [[DBG703:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV_NEXT]], metadata [[META694:![0-9]+]], metadata !DIExpression()), !dbg [[DBG703]] +; LZCNT-NEXT: #dbg_value(i2 [[IV_NEXT]], [[META694:![0-9]+]], !DIExpression(), [[DBG703]]) ; LZCNT-NEXT: call void @escape_inner.i2(i2 [[IV]], i2 [[IV]], i2 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i2 [[IV_NEXT]]), !dbg [[DBG704:![0-9]+]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG705:![0-9]+]] ; LZCNT: end: @@ -1882,10 +1882,10 @@ define i2 @t25_nooffset_i2(i2 %val, i2 %start) mustprogress { ; LZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i2 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG707:![0-9]+]] ; LZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ], !dbg [[DBG708:![0-9]+]] ; LZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i2 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG709:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV_RES]], metadata [[META695:![0-9]+]], metadata !DIExpression()), !dbg [[DBG706]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[VAL_SHIFTED_RES]], metadata [[META696:![0-9]+]], metadata !DIExpression()), !dbg [[DBG707]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META697:![0-9]+]], metadata !DIExpression()), !dbg [[DBG708]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV_NEXT_RES]], metadata [[META698:![0-9]+]], metadata !DIExpression()), !dbg [[DBG709]] +; LZCNT-NEXT: #dbg_value(i2 [[IV_RES]], [[META695:![0-9]+]], !DIExpression(), [[DBG706]]) +; LZCNT-NEXT: #dbg_value(i2 [[VAL_SHIFTED_RES]], [[META696:![0-9]+]], !DIExpression(), [[DBG707]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META697:![0-9]+]], !DIExpression(), [[DBG708]]) +; LZCNT-NEXT: #dbg_value(i2 [[IV_NEXT_RES]], [[META698:![0-9]+]], !DIExpression(), [[DBG709]]) ; LZCNT-NEXT: call void @escape_outer.i2(i2 [[IV_RES]], i2 [[IV_RES]], i2 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i2 [[IV_NEXT_RES]]), !dbg [[DBG710:![0-9]+]] ; LZCNT-NEXT: ret i2 [[IV_RES]], !dbg [[DBG711:![0-9]+]] ; @@ -1918,13 +1918,13 @@ define i3 @t26_nooffset_i3(i3 %val, i3 %start) mustprogress { ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG722:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[IV:%.*]] = phi i3 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG723:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV]], metadata [[META714:![0-9]+]], metadata !DIExpression()), !dbg [[DBG723]] +; NOLZCNT-NEXT: #dbg_value(i3 [[IV]], [[META714:![0-9]+]], !DIExpression(), [[DBG723]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i3 [[VAL:%.*]], [[IV]], !dbg [[DBG724:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[VAL_SHIFTED]], metadata [[META715:![0-9]+]], metadata !DIExpression()), !dbg [[DBG724]] +; NOLZCNT-NEXT: #dbg_value(i3 [[VAL_SHIFTED]], [[META715:![0-9]+]], !DIExpression(), [[DBG724]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i3 [[VAL_SHIFTED]], 0, !dbg [[DBG725:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META716:![0-9]+]], metadata !DIExpression()), !dbg [[DBG725]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META716:![0-9]+]], !DIExpression(), [[DBG725]]) ; NOLZCNT-NEXT: [[IV_NEXT]] = add i3 [[IV]], 1, !dbg [[DBG726:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV_NEXT]], metadata [[META717:![0-9]+]], metadata !DIExpression()), !dbg [[DBG726]] +; NOLZCNT-NEXT: #dbg_value(i3 [[IV_NEXT]], [[META717:![0-9]+]], !DIExpression(), [[DBG726]]) ; NOLZCNT-NEXT: call void @escape_inner.i3(i3 [[IV]], i3 [[IV]], i3 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i3 [[IV_NEXT]]), !dbg [[DBG727:![0-9]+]] ; NOLZCNT-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG728:![0-9]+]] ; NOLZCNT: end: @@ -1932,10 +1932,10 @@ define i3 @t26_nooffset_i3(i3 %val, i3 %start) mustprogress { ; NOLZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i3 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG730:![0-9]+]] ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG731:![0-9]+]] ; NOLZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i3 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG732:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV_RES]], metadata [[META718:![0-9]+]], metadata !DIExpression()), !dbg [[DBG729]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[VAL_SHIFTED_RES]], metadata [[META719:![0-9]+]], metadata !DIExpression()), !dbg [[DBG730]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META720:![0-9]+]], metadata !DIExpression()), !dbg [[DBG731]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV_NEXT_RES]], metadata [[META721:![0-9]+]], metadata !DIExpression()), !dbg [[DBG732]] +; NOLZCNT-NEXT: #dbg_value(i3 [[IV_RES]], [[META718:![0-9]+]], !DIExpression(), [[DBG729]]) +; NOLZCNT-NEXT: #dbg_value(i3 [[VAL_SHIFTED_RES]], [[META719:![0-9]+]], !DIExpression(), [[DBG730]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META720:![0-9]+]], !DIExpression(), [[DBG731]]) +; NOLZCNT-NEXT: #dbg_value(i3 [[IV_NEXT_RES]], [[META721:![0-9]+]], !DIExpression(), [[DBG732]]) ; NOLZCNT-NEXT: call void @escape_outer.i3(i3 [[IV_RES]], i3 [[IV_RES]], i3 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i3 [[IV_NEXT_RES]]), !dbg [[DBG733:![0-9]+]] ; NOLZCNT-NEXT: ret i3 [[IV_RES]], !dbg [[DBG734:![0-9]+]] ; @@ -1953,12 +1953,12 @@ define i3 @t26_nooffset_i3(i3 %val, i3 %start) mustprogress { ; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i3 [[LOOP_IV]], 1, !dbg [[DBG724:![0-9]+]] ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i3 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG724]] ; LZCNT-NEXT: [[IV:%.*]] = add nsw i3 [[LOOP_IV]], [[START]], !dbg [[DBG724]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV]], metadata [[META714:![0-9]+]], metadata !DIExpression()), !dbg [[DBG722]] +; LZCNT-NEXT: #dbg_value(i3 [[IV]], [[META714:![0-9]+]], !DIExpression(), [[DBG722]]) ; LZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i3 [[VAL]], [[IV]], !dbg [[DBG724]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[VAL_SHIFTED]], metadata [[META715:![0-9]+]], metadata !DIExpression()), !dbg [[DBG724]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[LOOP_IVCHECK]], metadata [[META716:![0-9]+]], metadata !DIExpression()), !dbg [[DBG725:![0-9]+]] +; LZCNT-NEXT: #dbg_value(i3 [[VAL_SHIFTED]], [[META715:![0-9]+]], !DIExpression(), [[DBG724]]) +; LZCNT-NEXT: #dbg_value(i1 [[LOOP_IVCHECK]], [[META716:![0-9]+]], !DIExpression(), [[META725:![0-9]+]]) ; LZCNT-NEXT: [[IV_NEXT:%.*]] = add i3 [[IV]], 1, !dbg [[DBG726:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV_NEXT]], metadata [[META717:![0-9]+]], metadata !DIExpression()), !dbg [[DBG726]] +; LZCNT-NEXT: #dbg_value(i3 [[IV_NEXT]], [[META717:![0-9]+]], !DIExpression(), [[DBG726]]) ; LZCNT-NEXT: call void @escape_inner.i3(i3 [[IV]], i3 [[IV]], i3 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i3 [[IV_NEXT]]), !dbg [[DBG727:![0-9]+]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG728:![0-9]+]] ; LZCNT: end: @@ -1966,10 +1966,10 @@ define i3 @t26_nooffset_i3(i3 %val, i3 %start) mustprogress { ; LZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i3 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG730:![0-9]+]] ; LZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ], !dbg [[DBG731:![0-9]+]] ; LZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i3 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG732:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV_RES]], metadata [[META718:![0-9]+]], metadata !DIExpression()), !dbg [[DBG729]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[VAL_SHIFTED_RES]], metadata [[META719:![0-9]+]], metadata !DIExpression()), !dbg [[DBG730]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META720:![0-9]+]], metadata !DIExpression()), !dbg [[DBG731]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV_NEXT_RES]], metadata [[META721:![0-9]+]], metadata !DIExpression()), !dbg [[DBG732]] +; LZCNT-NEXT: #dbg_value(i3 [[IV_RES]], [[META718:![0-9]+]], !DIExpression(), [[DBG729]]) +; LZCNT-NEXT: #dbg_value(i3 [[VAL_SHIFTED_RES]], [[META719:![0-9]+]], !DIExpression(), [[DBG730]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META720:![0-9]+]], !DIExpression(), [[DBG731]]) +; LZCNT-NEXT: #dbg_value(i3 [[IV_NEXT_RES]], [[META721:![0-9]+]], !DIExpression(), [[DBG732]]) ; LZCNT-NEXT: call void @escape_outer.i3(i3 [[IV_RES]], i3 [[IV_RES]], i3 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i3 [[IV_NEXT_RES]]), !dbg [[DBG733:![0-9]+]] ; LZCNT-NEXT: ret i3 [[IV_RES]], !dbg [[DBG734:![0-9]+]] ; @@ -2003,15 +2003,15 @@ define i1 @t27_addnsw_i1(i1 %val, i1 %start, i1 %extraoffset) mustprogress { ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG747:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[IV:%.*]] = phi i1 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG748:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV]], metadata [[META737:![0-9]+]], metadata !DIExpression()), !dbg [[DBG748]] +; NOLZCNT-NEXT: #dbg_value(i1 [[IV]], [[META737:![0-9]+]], !DIExpression(), [[DBG748]]) ; NOLZCNT-NEXT: [[NBITS:%.*]] = add nsw i1 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG749:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[NBITS]], metadata [[META738:![0-9]+]], metadata !DIExpression()), !dbg [[DBG749]] +; NOLZCNT-NEXT: #dbg_value(i1 [[NBITS]], [[META738:![0-9]+]], !DIExpression(), [[DBG749]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i1 [[VAL:%.*]], [[NBITS]], !dbg [[DBG750:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED]], metadata [[META739:![0-9]+]], metadata !DIExpression()), !dbg [[DBG750]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED]], [[META739:![0-9]+]], !DIExpression(), [[DBG750]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i1 [[VAL_SHIFTED]], false, !dbg [[DBG751:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META740:![0-9]+]], metadata !DIExpression()), !dbg [[DBG751]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META740:![0-9]+]], !DIExpression(), [[DBG751]]) ; NOLZCNT-NEXT: [[IV_NEXT]] = add i1 [[IV]], true, !dbg [[DBG752:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV_NEXT]], metadata [[META741:![0-9]+]], metadata !DIExpression()), !dbg [[DBG752]] +; NOLZCNT-NEXT: #dbg_value(i1 [[IV_NEXT]], [[META741:![0-9]+]], !DIExpression(), [[DBG752]]) ; NOLZCNT-NEXT: call void @escape_inner.i1(i1 [[IV]], i1 [[NBITS]], i1 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i1 [[IV_NEXT]]), !dbg [[DBG753:![0-9]+]] ; NOLZCNT-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG754:![0-9]+]] ; NOLZCNT: end: @@ -2020,11 +2020,11 @@ define i1 @t27_addnsw_i1(i1 %val, i1 %start, i1 %extraoffset) mustprogress { ; NOLZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i1 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG757:![0-9]+]] ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG758:![0-9]+]] ; NOLZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i1 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG759:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV_RES]], metadata [[META742:![0-9]+]], metadata !DIExpression()), !dbg [[DBG755]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[NBITS_RES]], metadata [[META743:![0-9]+]], metadata !DIExpression()), !dbg [[DBG756]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_RES]], metadata [[META744:![0-9]+]], metadata !DIExpression()), !dbg [[DBG757]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META745:![0-9]+]], metadata !DIExpression()), !dbg [[DBG758]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV_NEXT_RES]], metadata [[META746:![0-9]+]], metadata !DIExpression()), !dbg [[DBG759]] +; NOLZCNT-NEXT: #dbg_value(i1 [[IV_RES]], [[META742:![0-9]+]], !DIExpression(), [[DBG755]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[NBITS_RES]], [[META743:![0-9]+]], !DIExpression(), [[DBG756]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_RES]], [[META744:![0-9]+]], !DIExpression(), [[DBG757]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META745:![0-9]+]], !DIExpression(), [[DBG758]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[IV_NEXT_RES]], [[META746:![0-9]+]], !DIExpression(), [[DBG759]]) ; NOLZCNT-NEXT: call void @escape_outer.i1(i1 [[IV_RES]], i1 [[NBITS_RES]], i1 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i1 [[IV_NEXT_RES]]), !dbg [[DBG760:![0-9]+]] ; NOLZCNT-NEXT: ret i1 [[IV_RES]], !dbg [[DBG761:![0-9]+]] ; @@ -2042,14 +2042,14 @@ define i1 @t27_addnsw_i1(i1 %val, i1 %start, i1 %extraoffset) mustprogress { ; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i1 [[LOOP_IV]], true, !dbg [[DBG749:![0-9]+]] ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i1 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG749]] ; LZCNT-NEXT: [[IV:%.*]] = add nsw i1 [[LOOP_IV]], [[START]], !dbg [[DBG749]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV]], metadata [[META737:![0-9]+]], metadata !DIExpression()), !dbg [[DBG747]] +; LZCNT-NEXT: #dbg_value(i1 [[IV]], [[META737:![0-9]+]], !DIExpression(), [[DBG747]]) ; LZCNT-NEXT: [[NBITS:%.*]] = add nsw i1 [[IV]], [[EXTRAOFFSET]], !dbg [[DBG749]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[NBITS]], metadata [[META738:![0-9]+]], metadata !DIExpression()), !dbg [[DBG749]] +; LZCNT-NEXT: #dbg_value(i1 [[NBITS]], [[META738:![0-9]+]], !DIExpression(), [[DBG749]]) ; LZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i1 [[VAL]], [[NBITS]], !dbg [[DBG750:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED]], metadata [[META739:![0-9]+]], metadata !DIExpression()), !dbg [[DBG750]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[LOOP_IVCHECK]], metadata [[META740:![0-9]+]], metadata !DIExpression()), !dbg [[DBG751:![0-9]+]] +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED]], [[META739:![0-9]+]], !DIExpression(), [[DBG750]]) +; LZCNT-NEXT: #dbg_value(i1 [[LOOP_IVCHECK]], [[META740:![0-9]+]], !DIExpression(), [[META751:![0-9]+]]) ; LZCNT-NEXT: [[IV_NEXT:%.*]] = add i1 [[IV]], true, !dbg [[DBG752:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV_NEXT]], metadata [[META741:![0-9]+]], metadata !DIExpression()), !dbg [[DBG752]] +; LZCNT-NEXT: #dbg_value(i1 [[IV_NEXT]], [[META741:![0-9]+]], !DIExpression(), [[DBG752]]) ; LZCNT-NEXT: call void @escape_inner.i1(i1 [[IV]], i1 [[NBITS]], i1 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i1 [[IV_NEXT]]), !dbg [[DBG753:![0-9]+]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG754:![0-9]+]] ; LZCNT: end: @@ -2058,11 +2058,11 @@ define i1 @t27_addnsw_i1(i1 %val, i1 %start, i1 %extraoffset) mustprogress { ; LZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i1 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG757:![0-9]+]] ; LZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ], !dbg [[DBG758:![0-9]+]] ; LZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i1 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG759:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV_RES]], metadata [[META742:![0-9]+]], metadata !DIExpression()), !dbg [[DBG755]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[NBITS_RES]], metadata [[META743:![0-9]+]], metadata !DIExpression()), !dbg [[DBG756]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_RES]], metadata [[META744:![0-9]+]], metadata !DIExpression()), !dbg [[DBG757]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META745:![0-9]+]], metadata !DIExpression()), !dbg [[DBG758]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV_NEXT_RES]], metadata [[META746:![0-9]+]], metadata !DIExpression()), !dbg [[DBG759]] +; LZCNT-NEXT: #dbg_value(i1 [[IV_RES]], [[META742:![0-9]+]], !DIExpression(), [[DBG755]]) +; LZCNT-NEXT: #dbg_value(i1 [[NBITS_RES]], [[META743:![0-9]+]], !DIExpression(), [[DBG756]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_RES]], [[META744:![0-9]+]], !DIExpression(), [[DBG757]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META745:![0-9]+]], !DIExpression(), [[DBG758]]) +; LZCNT-NEXT: #dbg_value(i1 [[IV_NEXT_RES]], [[META746:![0-9]+]], !DIExpression(), [[DBG759]]) ; LZCNT-NEXT: call void @escape_outer.i1(i1 [[IV_RES]], i1 [[NBITS_RES]], i1 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i1 [[IV_NEXT_RES]]), !dbg [[DBG760:![0-9]+]] ; LZCNT-NEXT: ret i1 [[IV_RES]], !dbg [[DBG761:![0-9]+]] ; @@ -2097,15 +2097,15 @@ define i2 @t28_addnsw_i2(i2 %val, i2 %start, i2 %extraoffset) mustprogress { ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG774:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[IV:%.*]] = phi i2 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG775:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV]], metadata [[META764:![0-9]+]], metadata !DIExpression()), !dbg [[DBG775]] +; NOLZCNT-NEXT: #dbg_value(i2 [[IV]], [[META764:![0-9]+]], !DIExpression(), [[DBG775]]) ; NOLZCNT-NEXT: [[NBITS:%.*]] = add nsw i2 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG776:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[NBITS]], metadata [[META765:![0-9]+]], metadata !DIExpression()), !dbg [[DBG776]] +; NOLZCNT-NEXT: #dbg_value(i2 [[NBITS]], [[META765:![0-9]+]], !DIExpression(), [[DBG776]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i2 [[VAL:%.*]], [[NBITS]], !dbg [[DBG777:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[VAL_SHIFTED]], metadata [[META766:![0-9]+]], metadata !DIExpression()), !dbg [[DBG777]] +; NOLZCNT-NEXT: #dbg_value(i2 [[VAL_SHIFTED]], [[META766:![0-9]+]], !DIExpression(), [[DBG777]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i2 [[VAL_SHIFTED]], 0, !dbg [[DBG778:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META767:![0-9]+]], metadata !DIExpression()), !dbg [[DBG778]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META767:![0-9]+]], !DIExpression(), [[DBG778]]) ; NOLZCNT-NEXT: [[IV_NEXT]] = add i2 [[IV]], 1, !dbg [[DBG779:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV_NEXT]], metadata [[META768:![0-9]+]], metadata !DIExpression()), !dbg [[DBG779]] +; NOLZCNT-NEXT: #dbg_value(i2 [[IV_NEXT]], [[META768:![0-9]+]], !DIExpression(), [[DBG779]]) ; NOLZCNT-NEXT: call void @escape_inner.i2(i2 [[IV]], i2 [[NBITS]], i2 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i2 [[IV_NEXT]]), !dbg [[DBG780:![0-9]+]] ; NOLZCNT-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG781:![0-9]+]] ; NOLZCNT: end: @@ -2114,11 +2114,11 @@ define i2 @t28_addnsw_i2(i2 %val, i2 %start, i2 %extraoffset) mustprogress { ; NOLZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i2 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG784:![0-9]+]] ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG785:![0-9]+]] ; NOLZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i2 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG786:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV_RES]], metadata [[META769:![0-9]+]], metadata !DIExpression()), !dbg [[DBG782]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[NBITS_RES]], metadata [[META770:![0-9]+]], metadata !DIExpression()), !dbg [[DBG783]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[VAL_SHIFTED_RES]], metadata [[META771:![0-9]+]], metadata !DIExpression()), !dbg [[DBG784]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META772:![0-9]+]], metadata !DIExpression()), !dbg [[DBG785]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV_NEXT_RES]], metadata [[META773:![0-9]+]], metadata !DIExpression()), !dbg [[DBG786]] +; NOLZCNT-NEXT: #dbg_value(i2 [[IV_RES]], [[META769:![0-9]+]], !DIExpression(), [[DBG782]]) +; NOLZCNT-NEXT: #dbg_value(i2 [[NBITS_RES]], [[META770:![0-9]+]], !DIExpression(), [[DBG783]]) +; NOLZCNT-NEXT: #dbg_value(i2 [[VAL_SHIFTED_RES]], [[META771:![0-9]+]], !DIExpression(), [[DBG784]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META772:![0-9]+]], !DIExpression(), [[DBG785]]) +; NOLZCNT-NEXT: #dbg_value(i2 [[IV_NEXT_RES]], [[META773:![0-9]+]], !DIExpression(), [[DBG786]]) ; NOLZCNT-NEXT: call void @escape_outer.i2(i2 [[IV_RES]], i2 [[NBITS_RES]], i2 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i2 [[IV_NEXT_RES]]), !dbg [[DBG787:![0-9]+]] ; NOLZCNT-NEXT: ret i2 [[IV_RES]], !dbg [[DBG788:![0-9]+]] ; @@ -2137,14 +2137,14 @@ define i2 @t28_addnsw_i2(i2 %val, i2 %start, i2 %extraoffset) mustprogress { ; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw i2 [[LOOP_IV]], 1, !dbg [[DBG776:![0-9]+]] ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i2 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG776]] ; LZCNT-NEXT: [[IV:%.*]] = add nsw i2 [[LOOP_IV]], [[START]], !dbg [[DBG776]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV]], metadata [[META764:![0-9]+]], metadata !DIExpression()), !dbg [[DBG774]] +; LZCNT-NEXT: #dbg_value(i2 [[IV]], [[META764:![0-9]+]], !DIExpression(), [[DBG774]]) ; LZCNT-NEXT: [[NBITS:%.*]] = add nsw i2 [[IV]], [[EXTRAOFFSET]], !dbg [[DBG776]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[NBITS]], metadata [[META765:![0-9]+]], metadata !DIExpression()), !dbg [[DBG776]] +; LZCNT-NEXT: #dbg_value(i2 [[NBITS]], [[META765:![0-9]+]], !DIExpression(), [[DBG776]]) ; LZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i2 [[VAL]], [[NBITS]], !dbg [[DBG777:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[VAL_SHIFTED]], metadata [[META766:![0-9]+]], metadata !DIExpression()), !dbg [[DBG777]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[LOOP_IVCHECK]], metadata [[META767:![0-9]+]], metadata !DIExpression()), !dbg [[DBG778:![0-9]+]] +; LZCNT-NEXT: #dbg_value(i2 [[VAL_SHIFTED]], [[META766:![0-9]+]], !DIExpression(), [[DBG777]]) +; LZCNT-NEXT: #dbg_value(i1 [[LOOP_IVCHECK]], [[META767:![0-9]+]], !DIExpression(), [[META778:![0-9]+]]) ; LZCNT-NEXT: [[IV_NEXT:%.*]] = add i2 [[IV]], 1, !dbg [[DBG779:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV_NEXT]], metadata [[META768:![0-9]+]], metadata !DIExpression()), !dbg [[DBG779]] +; LZCNT-NEXT: #dbg_value(i2 [[IV_NEXT]], [[META768:![0-9]+]], !DIExpression(), [[DBG779]]) ; LZCNT-NEXT: call void @escape_inner.i2(i2 [[IV]], i2 [[NBITS]], i2 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i2 [[IV_NEXT]]), !dbg [[DBG780:![0-9]+]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG781:![0-9]+]] ; LZCNT: end: @@ -2153,11 +2153,11 @@ define i2 @t28_addnsw_i2(i2 %val, i2 %start, i2 %extraoffset) mustprogress { ; LZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i2 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG784:![0-9]+]] ; LZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ], !dbg [[DBG785:![0-9]+]] ; LZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i2 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG786:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV_RES]], metadata [[META769:![0-9]+]], metadata !DIExpression()), !dbg [[DBG782]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[NBITS_RES]], metadata [[META770:![0-9]+]], metadata !DIExpression()), !dbg [[DBG783]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[VAL_SHIFTED_RES]], metadata [[META771:![0-9]+]], metadata !DIExpression()), !dbg [[DBG784]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META772:![0-9]+]], metadata !DIExpression()), !dbg [[DBG785]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV_NEXT_RES]], metadata [[META773:![0-9]+]], metadata !DIExpression()), !dbg [[DBG786]] +; LZCNT-NEXT: #dbg_value(i2 [[IV_RES]], [[META769:![0-9]+]], !DIExpression(), [[DBG782]]) +; LZCNT-NEXT: #dbg_value(i2 [[NBITS_RES]], [[META770:![0-9]+]], !DIExpression(), [[DBG783]]) +; LZCNT-NEXT: #dbg_value(i2 [[VAL_SHIFTED_RES]], [[META771:![0-9]+]], !DIExpression(), [[DBG784]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META772:![0-9]+]], !DIExpression(), [[DBG785]]) +; LZCNT-NEXT: #dbg_value(i2 [[IV_NEXT_RES]], [[META773:![0-9]+]], !DIExpression(), [[DBG786]]) ; LZCNT-NEXT: call void @escape_outer.i2(i2 [[IV_RES]], i2 [[NBITS_RES]], i2 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i2 [[IV_NEXT_RES]]), !dbg [[DBG787:![0-9]+]] ; LZCNT-NEXT: ret i2 [[IV_RES]], !dbg [[DBG788:![0-9]+]] ; @@ -2192,15 +2192,15 @@ define i3 @t29_addnsw_i3(i3 %val, i3 %start, i3 %extraoffset) mustprogress { ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG801:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[IV:%.*]] = phi i3 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG802:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV]], metadata [[META791:![0-9]+]], metadata !DIExpression()), !dbg [[DBG802]] +; NOLZCNT-NEXT: #dbg_value(i3 [[IV]], [[META791:![0-9]+]], !DIExpression(), [[DBG802]]) ; NOLZCNT-NEXT: [[NBITS:%.*]] = add nsw i3 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG803:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[NBITS]], metadata [[META792:![0-9]+]], metadata !DIExpression()), !dbg [[DBG803]] +; NOLZCNT-NEXT: #dbg_value(i3 [[NBITS]], [[META792:![0-9]+]], !DIExpression(), [[DBG803]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i3 [[VAL:%.*]], [[NBITS]], !dbg [[DBG804:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[VAL_SHIFTED]], metadata [[META793:![0-9]+]], metadata !DIExpression()), !dbg [[DBG804]] +; NOLZCNT-NEXT: #dbg_value(i3 [[VAL_SHIFTED]], [[META793:![0-9]+]], !DIExpression(), [[DBG804]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i3 [[VAL_SHIFTED]], 0, !dbg [[DBG805:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META794:![0-9]+]], metadata !DIExpression()), !dbg [[DBG805]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META794:![0-9]+]], !DIExpression(), [[DBG805]]) ; NOLZCNT-NEXT: [[IV_NEXT]] = add i3 [[IV]], 1, !dbg [[DBG806:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV_NEXT]], metadata [[META795:![0-9]+]], metadata !DIExpression()), !dbg [[DBG806]] +; NOLZCNT-NEXT: #dbg_value(i3 [[IV_NEXT]], [[META795:![0-9]+]], !DIExpression(), [[DBG806]]) ; NOLZCNT-NEXT: call void @escape_inner.i3(i3 [[IV]], i3 [[NBITS]], i3 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i3 [[IV_NEXT]]), !dbg [[DBG807:![0-9]+]] ; NOLZCNT-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG808:![0-9]+]] ; NOLZCNT: end: @@ -2209,11 +2209,11 @@ define i3 @t29_addnsw_i3(i3 %val, i3 %start, i3 %extraoffset) mustprogress { ; NOLZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i3 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG811:![0-9]+]] ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG812:![0-9]+]] ; NOLZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i3 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG813:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV_RES]], metadata [[META796:![0-9]+]], metadata !DIExpression()), !dbg [[DBG809]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[NBITS_RES]], metadata [[META797:![0-9]+]], metadata !DIExpression()), !dbg [[DBG810]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[VAL_SHIFTED_RES]], metadata [[META798:![0-9]+]], metadata !DIExpression()), !dbg [[DBG811]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META799:![0-9]+]], metadata !DIExpression()), !dbg [[DBG812]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV_NEXT_RES]], metadata [[META800:![0-9]+]], metadata !DIExpression()), !dbg [[DBG813]] +; NOLZCNT-NEXT: #dbg_value(i3 [[IV_RES]], [[META796:![0-9]+]], !DIExpression(), [[DBG809]]) +; NOLZCNT-NEXT: #dbg_value(i3 [[NBITS_RES]], [[META797:![0-9]+]], !DIExpression(), [[DBG810]]) +; NOLZCNT-NEXT: #dbg_value(i3 [[VAL_SHIFTED_RES]], [[META798:![0-9]+]], !DIExpression(), [[DBG811]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META799:![0-9]+]], !DIExpression(), [[DBG812]]) +; NOLZCNT-NEXT: #dbg_value(i3 [[IV_NEXT_RES]], [[META800:![0-9]+]], !DIExpression(), [[DBG813]]) ; NOLZCNT-NEXT: call void @escape_outer.i3(i3 [[IV_RES]], i3 [[NBITS_RES]], i3 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i3 [[IV_NEXT_RES]]), !dbg [[DBG814:![0-9]+]] ; NOLZCNT-NEXT: ret i3 [[IV_RES]], !dbg [[DBG815:![0-9]+]] ; @@ -2232,14 +2232,14 @@ define i3 @t29_addnsw_i3(i3 %val, i3 %start, i3 %extraoffset) mustprogress { ; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i3 [[LOOP_IV]], 1, !dbg [[DBG803:![0-9]+]] ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i3 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG803]] ; LZCNT-NEXT: [[IV:%.*]] = add nsw i3 [[LOOP_IV]], [[START]], !dbg [[DBG803]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV]], metadata [[META791:![0-9]+]], metadata !DIExpression()), !dbg [[DBG801]] +; LZCNT-NEXT: #dbg_value(i3 [[IV]], [[META791:![0-9]+]], !DIExpression(), [[DBG801]]) ; LZCNT-NEXT: [[NBITS:%.*]] = add nsw i3 [[IV]], [[EXTRAOFFSET]], !dbg [[DBG803]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[NBITS]], metadata [[META792:![0-9]+]], metadata !DIExpression()), !dbg [[DBG803]] +; LZCNT-NEXT: #dbg_value(i3 [[NBITS]], [[META792:![0-9]+]], !DIExpression(), [[DBG803]]) ; LZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i3 [[VAL]], [[NBITS]], !dbg [[DBG804:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[VAL_SHIFTED]], metadata [[META793:![0-9]+]], metadata !DIExpression()), !dbg [[DBG804]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[LOOP_IVCHECK]], metadata [[META794:![0-9]+]], metadata !DIExpression()), !dbg [[DBG805:![0-9]+]] +; LZCNT-NEXT: #dbg_value(i3 [[VAL_SHIFTED]], [[META793:![0-9]+]], !DIExpression(), [[DBG804]]) +; LZCNT-NEXT: #dbg_value(i1 [[LOOP_IVCHECK]], [[META794:![0-9]+]], !DIExpression(), [[META805:![0-9]+]]) ; LZCNT-NEXT: [[IV_NEXT:%.*]] = add i3 [[IV]], 1, !dbg [[DBG806:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV_NEXT]], metadata [[META795:![0-9]+]], metadata !DIExpression()), !dbg [[DBG806]] +; LZCNT-NEXT: #dbg_value(i3 [[IV_NEXT]], [[META795:![0-9]+]], !DIExpression(), [[DBG806]]) ; LZCNT-NEXT: call void @escape_inner.i3(i3 [[IV]], i3 [[NBITS]], i3 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i3 [[IV_NEXT]]), !dbg [[DBG807:![0-9]+]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG808:![0-9]+]] ; LZCNT: end: @@ -2248,11 +2248,11 @@ define i3 @t29_addnsw_i3(i3 %val, i3 %start, i3 %extraoffset) mustprogress { ; LZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i3 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG811:![0-9]+]] ; LZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ], !dbg [[DBG812:![0-9]+]] ; LZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i3 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG813:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV_RES]], metadata [[META796:![0-9]+]], metadata !DIExpression()), !dbg [[DBG809]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[NBITS_RES]], metadata [[META797:![0-9]+]], metadata !DIExpression()), !dbg [[DBG810]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[VAL_SHIFTED_RES]], metadata [[META798:![0-9]+]], metadata !DIExpression()), !dbg [[DBG811]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META799:![0-9]+]], metadata !DIExpression()), !dbg [[DBG812]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV_NEXT_RES]], metadata [[META800:![0-9]+]], metadata !DIExpression()), !dbg [[DBG813]] +; LZCNT-NEXT: #dbg_value(i3 [[IV_RES]], [[META796:![0-9]+]], !DIExpression(), [[DBG809]]) +; LZCNT-NEXT: #dbg_value(i3 [[NBITS_RES]], [[META797:![0-9]+]], !DIExpression(), [[DBG810]]) +; LZCNT-NEXT: #dbg_value(i3 [[VAL_SHIFTED_RES]], [[META798:![0-9]+]], !DIExpression(), [[DBG811]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META799:![0-9]+]], !DIExpression(), [[DBG812]]) +; LZCNT-NEXT: #dbg_value(i3 [[IV_NEXT_RES]], [[META800:![0-9]+]], !DIExpression(), [[DBG813]]) ; LZCNT-NEXT: call void @escape_outer.i3(i3 [[IV_RES]], i3 [[NBITS_RES]], i3 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i3 [[IV_NEXT_RES]]), !dbg [[DBG814:![0-9]+]] ; LZCNT-NEXT: ret i3 [[IV_RES]], !dbg [[DBG815:![0-9]+]] ; @@ -2288,15 +2288,15 @@ define i1 @t30_addnuw_i1(i1 %val, i1 %start, i1 %extraoffset) mustprogress { ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG828:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[IV:%.*]] = phi i1 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG829:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV]], metadata [[META818:![0-9]+]], metadata !DIExpression()), !dbg [[DBG829]] +; NOLZCNT-NEXT: #dbg_value(i1 [[IV]], [[META818:![0-9]+]], !DIExpression(), [[DBG829]]) ; NOLZCNT-NEXT: [[NBITS:%.*]] = add nuw i1 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG830:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[NBITS]], metadata [[META819:![0-9]+]], metadata !DIExpression()), !dbg [[DBG830]] +; NOLZCNT-NEXT: #dbg_value(i1 [[NBITS]], [[META819:![0-9]+]], !DIExpression(), [[DBG830]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i1 [[VAL:%.*]], [[NBITS]], !dbg [[DBG831:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED]], metadata [[META820:![0-9]+]], metadata !DIExpression()), !dbg [[DBG831]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED]], [[META820:![0-9]+]], !DIExpression(), [[DBG831]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i1 [[VAL_SHIFTED]], false, !dbg [[DBG832:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META821:![0-9]+]], metadata !DIExpression()), !dbg [[DBG832]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META821:![0-9]+]], !DIExpression(), [[DBG832]]) ; NOLZCNT-NEXT: [[IV_NEXT]] = add i1 [[IV]], true, !dbg [[DBG833:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV_NEXT]], metadata [[META822:![0-9]+]], metadata !DIExpression()), !dbg [[DBG833]] +; NOLZCNT-NEXT: #dbg_value(i1 [[IV_NEXT]], [[META822:![0-9]+]], !DIExpression(), [[DBG833]]) ; NOLZCNT-NEXT: call void @escape_inner.i1(i1 [[IV]], i1 [[NBITS]], i1 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i1 [[IV_NEXT]]), !dbg [[DBG834:![0-9]+]] ; NOLZCNT-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG835:![0-9]+]] ; NOLZCNT: end: @@ -2305,11 +2305,11 @@ define i1 @t30_addnuw_i1(i1 %val, i1 %start, i1 %extraoffset) mustprogress { ; NOLZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i1 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG838:![0-9]+]] ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG839:![0-9]+]] ; NOLZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i1 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG840:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV_RES]], metadata [[META823:![0-9]+]], metadata !DIExpression()), !dbg [[DBG836]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[NBITS_RES]], metadata [[META824:![0-9]+]], metadata !DIExpression()), !dbg [[DBG837]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_RES]], metadata [[META825:![0-9]+]], metadata !DIExpression()), !dbg [[DBG838]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META826:![0-9]+]], metadata !DIExpression()), !dbg [[DBG839]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV_NEXT_RES]], metadata [[META827:![0-9]+]], metadata !DIExpression()), !dbg [[DBG840]] +; NOLZCNT-NEXT: #dbg_value(i1 [[IV_RES]], [[META823:![0-9]+]], !DIExpression(), [[DBG836]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[NBITS_RES]], [[META824:![0-9]+]], !DIExpression(), [[DBG837]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_RES]], [[META825:![0-9]+]], !DIExpression(), [[DBG838]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META826:![0-9]+]], !DIExpression(), [[DBG839]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[IV_NEXT_RES]], [[META827:![0-9]+]], !DIExpression(), [[DBG840]]) ; NOLZCNT-NEXT: call void @escape_outer.i1(i1 [[IV_RES]], i1 [[NBITS_RES]], i1 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i1 [[IV_NEXT_RES]]), !dbg [[DBG841:![0-9]+]] ; NOLZCNT-NEXT: ret i1 [[IV_RES]], !dbg [[DBG842:![0-9]+]] ; @@ -2327,14 +2327,14 @@ define i1 @t30_addnuw_i1(i1 %val, i1 %start, i1 %extraoffset) mustprogress { ; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i1 [[LOOP_IV]], true, !dbg [[DBG830:![0-9]+]] ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i1 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG830]] ; LZCNT-NEXT: [[IV:%.*]] = add nsw i1 [[LOOP_IV]], [[START]], !dbg [[DBG830]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV]], metadata [[META818:![0-9]+]], metadata !DIExpression()), !dbg [[DBG828]] +; LZCNT-NEXT: #dbg_value(i1 [[IV]], [[META818:![0-9]+]], !DIExpression(), [[DBG828]]) ; LZCNT-NEXT: [[NBITS:%.*]] = add nuw i1 [[IV]], [[EXTRAOFFSET]], !dbg [[DBG830]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[NBITS]], metadata [[META819:![0-9]+]], metadata !DIExpression()), !dbg [[DBG830]] +; LZCNT-NEXT: #dbg_value(i1 [[NBITS]], [[META819:![0-9]+]], !DIExpression(), [[DBG830]]) ; LZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i1 [[VAL]], [[NBITS]], !dbg [[DBG831:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED]], metadata [[META820:![0-9]+]], metadata !DIExpression()), !dbg [[DBG831]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[LOOP_IVCHECK]], metadata [[META821:![0-9]+]], metadata !DIExpression()), !dbg [[DBG832:![0-9]+]] +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED]], [[META820:![0-9]+]], !DIExpression(), [[DBG831]]) +; LZCNT-NEXT: #dbg_value(i1 [[LOOP_IVCHECK]], [[META821:![0-9]+]], !DIExpression(), [[META832:![0-9]+]]) ; LZCNT-NEXT: [[IV_NEXT:%.*]] = add i1 [[IV]], true, !dbg [[DBG833:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV_NEXT]], metadata [[META822:![0-9]+]], metadata !DIExpression()), !dbg [[DBG833]] +; LZCNT-NEXT: #dbg_value(i1 [[IV_NEXT]], [[META822:![0-9]+]], !DIExpression(), [[DBG833]]) ; LZCNT-NEXT: call void @escape_inner.i1(i1 [[IV]], i1 [[NBITS]], i1 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i1 [[IV_NEXT]]), !dbg [[DBG834:![0-9]+]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG835:![0-9]+]] ; LZCNT: end: @@ -2343,11 +2343,11 @@ define i1 @t30_addnuw_i1(i1 %val, i1 %start, i1 %extraoffset) mustprogress { ; LZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i1 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG838:![0-9]+]] ; LZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ], !dbg [[DBG839:![0-9]+]] ; LZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i1 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG840:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV_RES]], metadata [[META823:![0-9]+]], metadata !DIExpression()), !dbg [[DBG836]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[NBITS_RES]], metadata [[META824:![0-9]+]], metadata !DIExpression()), !dbg [[DBG837]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_RES]], metadata [[META825:![0-9]+]], metadata !DIExpression()), !dbg [[DBG838]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META826:![0-9]+]], metadata !DIExpression()), !dbg [[DBG839]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV_NEXT_RES]], metadata [[META827:![0-9]+]], metadata !DIExpression()), !dbg [[DBG840]] +; LZCNT-NEXT: #dbg_value(i1 [[IV_RES]], [[META823:![0-9]+]], !DIExpression(), [[DBG836]]) +; LZCNT-NEXT: #dbg_value(i1 [[NBITS_RES]], [[META824:![0-9]+]], !DIExpression(), [[DBG837]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_RES]], [[META825:![0-9]+]], !DIExpression(), [[DBG838]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META826:![0-9]+]], !DIExpression(), [[DBG839]]) +; LZCNT-NEXT: #dbg_value(i1 [[IV_NEXT_RES]], [[META827:![0-9]+]], !DIExpression(), [[DBG840]]) ; LZCNT-NEXT: call void @escape_outer.i1(i1 [[IV_RES]], i1 [[NBITS_RES]], i1 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i1 [[IV_NEXT_RES]]), !dbg [[DBG841:![0-9]+]] ; LZCNT-NEXT: ret i1 [[IV_RES]], !dbg [[DBG842:![0-9]+]] ; @@ -2382,15 +2382,15 @@ define i2 @t31_addnuw_i2(i2 %val, i2 %start, i2 %extraoffset) mustprogress { ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG855:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[IV:%.*]] = phi i2 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG856:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV]], metadata [[META845:![0-9]+]], metadata !DIExpression()), !dbg [[DBG856]] +; NOLZCNT-NEXT: #dbg_value(i2 [[IV]], [[META845:![0-9]+]], !DIExpression(), [[DBG856]]) ; NOLZCNT-NEXT: [[NBITS:%.*]] = add nuw i2 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG857:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[NBITS]], metadata [[META846:![0-9]+]], metadata !DIExpression()), !dbg [[DBG857]] +; NOLZCNT-NEXT: #dbg_value(i2 [[NBITS]], [[META846:![0-9]+]], !DIExpression(), [[DBG857]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i2 [[VAL:%.*]], [[NBITS]], !dbg [[DBG858:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[VAL_SHIFTED]], metadata [[META847:![0-9]+]], metadata !DIExpression()), !dbg [[DBG858]] +; NOLZCNT-NEXT: #dbg_value(i2 [[VAL_SHIFTED]], [[META847:![0-9]+]], !DIExpression(), [[DBG858]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i2 [[VAL_SHIFTED]], 0, !dbg [[DBG859:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META848:![0-9]+]], metadata !DIExpression()), !dbg [[DBG859]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META848:![0-9]+]], !DIExpression(), [[DBG859]]) ; NOLZCNT-NEXT: [[IV_NEXT]] = add i2 [[IV]], 1, !dbg [[DBG860:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV_NEXT]], metadata [[META849:![0-9]+]], metadata !DIExpression()), !dbg [[DBG860]] +; NOLZCNT-NEXT: #dbg_value(i2 [[IV_NEXT]], [[META849:![0-9]+]], !DIExpression(), [[DBG860]]) ; NOLZCNT-NEXT: call void @escape_inner.i2(i2 [[IV]], i2 [[NBITS]], i2 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i2 [[IV_NEXT]]), !dbg [[DBG861:![0-9]+]] ; NOLZCNT-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG862:![0-9]+]] ; NOLZCNT: end: @@ -2399,11 +2399,11 @@ define i2 @t31_addnuw_i2(i2 %val, i2 %start, i2 %extraoffset) mustprogress { ; NOLZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i2 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG865:![0-9]+]] ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG866:![0-9]+]] ; NOLZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i2 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG867:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV_RES]], metadata [[META850:![0-9]+]], metadata !DIExpression()), !dbg [[DBG863]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[NBITS_RES]], metadata [[META851:![0-9]+]], metadata !DIExpression()), !dbg [[DBG864]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[VAL_SHIFTED_RES]], metadata [[META852:![0-9]+]], metadata !DIExpression()), !dbg [[DBG865]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META853:![0-9]+]], metadata !DIExpression()), !dbg [[DBG866]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV_NEXT_RES]], metadata [[META854:![0-9]+]], metadata !DIExpression()), !dbg [[DBG867]] +; NOLZCNT-NEXT: #dbg_value(i2 [[IV_RES]], [[META850:![0-9]+]], !DIExpression(), [[DBG863]]) +; NOLZCNT-NEXT: #dbg_value(i2 [[NBITS_RES]], [[META851:![0-9]+]], !DIExpression(), [[DBG864]]) +; NOLZCNT-NEXT: #dbg_value(i2 [[VAL_SHIFTED_RES]], [[META852:![0-9]+]], !DIExpression(), [[DBG865]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META853:![0-9]+]], !DIExpression(), [[DBG866]]) +; NOLZCNT-NEXT: #dbg_value(i2 [[IV_NEXT_RES]], [[META854:![0-9]+]], !DIExpression(), [[DBG867]]) ; NOLZCNT-NEXT: call void @escape_outer.i2(i2 [[IV_RES]], i2 [[NBITS_RES]], i2 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i2 [[IV_NEXT_RES]]), !dbg [[DBG868:![0-9]+]] ; NOLZCNT-NEXT: ret i2 [[IV_RES]], !dbg [[DBG869:![0-9]+]] ; @@ -2422,14 +2422,14 @@ define i2 @t31_addnuw_i2(i2 %val, i2 %start, i2 %extraoffset) mustprogress { ; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw i2 [[LOOP_IV]], 1, !dbg [[DBG857:![0-9]+]] ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i2 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG857]] ; LZCNT-NEXT: [[IV:%.*]] = add nsw i2 [[LOOP_IV]], [[START]], !dbg [[DBG857]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV]], metadata [[META845:![0-9]+]], metadata !DIExpression()), !dbg [[DBG855]] +; LZCNT-NEXT: #dbg_value(i2 [[IV]], [[META845:![0-9]+]], !DIExpression(), [[DBG855]]) ; LZCNT-NEXT: [[NBITS:%.*]] = add nuw i2 [[IV]], [[EXTRAOFFSET]], !dbg [[DBG857]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[NBITS]], metadata [[META846:![0-9]+]], metadata !DIExpression()), !dbg [[DBG857]] +; LZCNT-NEXT: #dbg_value(i2 [[NBITS]], [[META846:![0-9]+]], !DIExpression(), [[DBG857]]) ; LZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i2 [[VAL]], [[NBITS]], !dbg [[DBG858:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[VAL_SHIFTED]], metadata [[META847:![0-9]+]], metadata !DIExpression()), !dbg [[DBG858]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[LOOP_IVCHECK]], metadata [[META848:![0-9]+]], metadata !DIExpression()), !dbg [[DBG859:![0-9]+]] +; LZCNT-NEXT: #dbg_value(i2 [[VAL_SHIFTED]], [[META847:![0-9]+]], !DIExpression(), [[DBG858]]) +; LZCNT-NEXT: #dbg_value(i1 [[LOOP_IVCHECK]], [[META848:![0-9]+]], !DIExpression(), [[META859:![0-9]+]]) ; LZCNT-NEXT: [[IV_NEXT:%.*]] = add i2 [[IV]], 1, !dbg [[DBG860:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV_NEXT]], metadata [[META849:![0-9]+]], metadata !DIExpression()), !dbg [[DBG860]] +; LZCNT-NEXT: #dbg_value(i2 [[IV_NEXT]], [[META849:![0-9]+]], !DIExpression(), [[DBG860]]) ; LZCNT-NEXT: call void @escape_inner.i2(i2 [[IV]], i2 [[NBITS]], i2 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i2 [[IV_NEXT]]), !dbg [[DBG861:![0-9]+]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG862:![0-9]+]] ; LZCNT: end: @@ -2438,11 +2438,11 @@ define i2 @t31_addnuw_i2(i2 %val, i2 %start, i2 %extraoffset) mustprogress { ; LZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i2 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG865:![0-9]+]] ; LZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ], !dbg [[DBG866:![0-9]+]] ; LZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i2 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG867:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV_RES]], metadata [[META850:![0-9]+]], metadata !DIExpression()), !dbg [[DBG863]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[NBITS_RES]], metadata [[META851:![0-9]+]], metadata !DIExpression()), !dbg [[DBG864]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[VAL_SHIFTED_RES]], metadata [[META852:![0-9]+]], metadata !DIExpression()), !dbg [[DBG865]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META853:![0-9]+]], metadata !DIExpression()), !dbg [[DBG866]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV_NEXT_RES]], metadata [[META854:![0-9]+]], metadata !DIExpression()), !dbg [[DBG867]] +; LZCNT-NEXT: #dbg_value(i2 [[IV_RES]], [[META850:![0-9]+]], !DIExpression(), [[DBG863]]) +; LZCNT-NEXT: #dbg_value(i2 [[NBITS_RES]], [[META851:![0-9]+]], !DIExpression(), [[DBG864]]) +; LZCNT-NEXT: #dbg_value(i2 [[VAL_SHIFTED_RES]], [[META852:![0-9]+]], !DIExpression(), [[DBG865]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META853:![0-9]+]], !DIExpression(), [[DBG866]]) +; LZCNT-NEXT: #dbg_value(i2 [[IV_NEXT_RES]], [[META854:![0-9]+]], !DIExpression(), [[DBG867]]) ; LZCNT-NEXT: call void @escape_outer.i2(i2 [[IV_RES]], i2 [[NBITS_RES]], i2 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i2 [[IV_NEXT_RES]]), !dbg [[DBG868:![0-9]+]] ; LZCNT-NEXT: ret i2 [[IV_RES]], !dbg [[DBG869:![0-9]+]] ; @@ -2477,15 +2477,15 @@ define i3 @t32_addnuw_i3(i3 %val, i3 %start, i3 %extraoffset) mustprogress { ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG882:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[IV:%.*]] = phi i3 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG883:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV]], metadata [[META872:![0-9]+]], metadata !DIExpression()), !dbg [[DBG883]] +; NOLZCNT-NEXT: #dbg_value(i3 [[IV]], [[META872:![0-9]+]], !DIExpression(), [[DBG883]]) ; NOLZCNT-NEXT: [[NBITS:%.*]] = add nuw i3 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG884:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[NBITS]], metadata [[META873:![0-9]+]], metadata !DIExpression()), !dbg [[DBG884]] +; NOLZCNT-NEXT: #dbg_value(i3 [[NBITS]], [[META873:![0-9]+]], !DIExpression(), [[DBG884]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i3 [[VAL:%.*]], [[NBITS]], !dbg [[DBG885:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[VAL_SHIFTED]], metadata [[META874:![0-9]+]], metadata !DIExpression()), !dbg [[DBG885]] +; NOLZCNT-NEXT: #dbg_value(i3 [[VAL_SHIFTED]], [[META874:![0-9]+]], !DIExpression(), [[DBG885]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i3 [[VAL_SHIFTED]], 0, !dbg [[DBG886:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META875:![0-9]+]], metadata !DIExpression()), !dbg [[DBG886]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META875:![0-9]+]], !DIExpression(), [[DBG886]]) ; NOLZCNT-NEXT: [[IV_NEXT]] = add i3 [[IV]], 1, !dbg [[DBG887:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV_NEXT]], metadata [[META876:![0-9]+]], metadata !DIExpression()), !dbg [[DBG887]] +; NOLZCNT-NEXT: #dbg_value(i3 [[IV_NEXT]], [[META876:![0-9]+]], !DIExpression(), [[DBG887]]) ; NOLZCNT-NEXT: call void @escape_inner.i3(i3 [[IV]], i3 [[NBITS]], i3 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i3 [[IV_NEXT]]), !dbg [[DBG888:![0-9]+]] ; NOLZCNT-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG889:![0-9]+]] ; NOLZCNT: end: @@ -2494,11 +2494,11 @@ define i3 @t32_addnuw_i3(i3 %val, i3 %start, i3 %extraoffset) mustprogress { ; NOLZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i3 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG892:![0-9]+]] ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG893:![0-9]+]] ; NOLZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i3 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG894:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV_RES]], metadata [[META877:![0-9]+]], metadata !DIExpression()), !dbg [[DBG890]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[NBITS_RES]], metadata [[META878:![0-9]+]], metadata !DIExpression()), !dbg [[DBG891]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[VAL_SHIFTED_RES]], metadata [[META879:![0-9]+]], metadata !DIExpression()), !dbg [[DBG892]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META880:![0-9]+]], metadata !DIExpression()), !dbg [[DBG893]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV_NEXT_RES]], metadata [[META881:![0-9]+]], metadata !DIExpression()), !dbg [[DBG894]] +; NOLZCNT-NEXT: #dbg_value(i3 [[IV_RES]], [[META877:![0-9]+]], !DIExpression(), [[DBG890]]) +; NOLZCNT-NEXT: #dbg_value(i3 [[NBITS_RES]], [[META878:![0-9]+]], !DIExpression(), [[DBG891]]) +; NOLZCNT-NEXT: #dbg_value(i3 [[VAL_SHIFTED_RES]], [[META879:![0-9]+]], !DIExpression(), [[DBG892]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META880:![0-9]+]], !DIExpression(), [[DBG893]]) +; NOLZCNT-NEXT: #dbg_value(i3 [[IV_NEXT_RES]], [[META881:![0-9]+]], !DIExpression(), [[DBG894]]) ; NOLZCNT-NEXT: call void @escape_outer.i3(i3 [[IV_RES]], i3 [[NBITS_RES]], i3 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i3 [[IV_NEXT_RES]]), !dbg [[DBG895:![0-9]+]] ; NOLZCNT-NEXT: ret i3 [[IV_RES]], !dbg [[DBG896:![0-9]+]] ; @@ -2517,14 +2517,14 @@ define i3 @t32_addnuw_i3(i3 %val, i3 %start, i3 %extraoffset) mustprogress { ; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i3 [[LOOP_IV]], 1, !dbg [[DBG884:![0-9]+]] ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i3 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG884]] ; LZCNT-NEXT: [[IV:%.*]] = add nsw i3 [[LOOP_IV]], [[START]], !dbg [[DBG884]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV]], metadata [[META872:![0-9]+]], metadata !DIExpression()), !dbg [[DBG882]] +; LZCNT-NEXT: #dbg_value(i3 [[IV]], [[META872:![0-9]+]], !DIExpression(), [[DBG882]]) ; LZCNT-NEXT: [[NBITS:%.*]] = add nuw i3 [[IV]], [[EXTRAOFFSET]], !dbg [[DBG884]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[NBITS]], metadata [[META873:![0-9]+]], metadata !DIExpression()), !dbg [[DBG884]] +; LZCNT-NEXT: #dbg_value(i3 [[NBITS]], [[META873:![0-9]+]], !DIExpression(), [[DBG884]]) ; LZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i3 [[VAL]], [[NBITS]], !dbg [[DBG885:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[VAL_SHIFTED]], metadata [[META874:![0-9]+]], metadata !DIExpression()), !dbg [[DBG885]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[LOOP_IVCHECK]], metadata [[META875:![0-9]+]], metadata !DIExpression()), !dbg [[DBG886:![0-9]+]] +; LZCNT-NEXT: #dbg_value(i3 [[VAL_SHIFTED]], [[META874:![0-9]+]], !DIExpression(), [[DBG885]]) +; LZCNT-NEXT: #dbg_value(i1 [[LOOP_IVCHECK]], [[META875:![0-9]+]], !DIExpression(), [[META886:![0-9]+]]) ; LZCNT-NEXT: [[IV_NEXT:%.*]] = add i3 [[IV]], 1, !dbg [[DBG887:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV_NEXT]], metadata [[META876:![0-9]+]], metadata !DIExpression()), !dbg [[DBG887]] +; LZCNT-NEXT: #dbg_value(i3 [[IV_NEXT]], [[META876:![0-9]+]], !DIExpression(), [[DBG887]]) ; LZCNT-NEXT: call void @escape_inner.i3(i3 [[IV]], i3 [[NBITS]], i3 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i3 [[IV_NEXT]]), !dbg [[DBG888:![0-9]+]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG889:![0-9]+]] ; LZCNT: end: @@ -2533,11 +2533,11 @@ define i3 @t32_addnuw_i3(i3 %val, i3 %start, i3 %extraoffset) mustprogress { ; LZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i3 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG892:![0-9]+]] ; LZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ], !dbg [[DBG893:![0-9]+]] ; LZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i3 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG894:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV_RES]], metadata [[META877:![0-9]+]], metadata !DIExpression()), !dbg [[DBG890]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[NBITS_RES]], metadata [[META878:![0-9]+]], metadata !DIExpression()), !dbg [[DBG891]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[VAL_SHIFTED_RES]], metadata [[META879:![0-9]+]], metadata !DIExpression()), !dbg [[DBG892]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META880:![0-9]+]], metadata !DIExpression()), !dbg [[DBG893]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV_NEXT_RES]], metadata [[META881:![0-9]+]], metadata !DIExpression()), !dbg [[DBG894]] +; LZCNT-NEXT: #dbg_value(i3 [[IV_RES]], [[META877:![0-9]+]], !DIExpression(), [[DBG890]]) +; LZCNT-NEXT: #dbg_value(i3 [[NBITS_RES]], [[META878:![0-9]+]], !DIExpression(), [[DBG891]]) +; LZCNT-NEXT: #dbg_value(i3 [[VAL_SHIFTED_RES]], [[META879:![0-9]+]], !DIExpression(), [[DBG892]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META880:![0-9]+]], !DIExpression(), [[DBG893]]) +; LZCNT-NEXT: #dbg_value(i3 [[IV_NEXT_RES]], [[META881:![0-9]+]], !DIExpression(), [[DBG894]]) ; LZCNT-NEXT: call void @escape_outer.i3(i3 [[IV_RES]], i3 [[NBITS_RES]], i3 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i3 [[IV_NEXT_RES]]), !dbg [[DBG895:![0-9]+]] ; LZCNT-NEXT: ret i3 [[IV_RES]], !dbg [[DBG896:![0-9]+]] ; @@ -2574,15 +2574,15 @@ define i1 @t33_subnsw_i1(i1 %val, i1 %start, i1 %extraoffset) mustprogress { ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG909:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[IV:%.*]] = phi i1 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG910:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV]], metadata [[META899:![0-9]+]], metadata !DIExpression()), !dbg [[DBG910]] +; NOLZCNT-NEXT: #dbg_value(i1 [[IV]], [[META899:![0-9]+]], !DIExpression(), [[DBG910]]) ; NOLZCNT-NEXT: [[NBITS:%.*]] = sub nsw i1 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG911:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[NBITS]], metadata [[META900:![0-9]+]], metadata !DIExpression()), !dbg [[DBG911]] +; NOLZCNT-NEXT: #dbg_value(i1 [[NBITS]], [[META900:![0-9]+]], !DIExpression(), [[DBG911]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i1 [[VAL:%.*]], [[NBITS]], !dbg [[DBG912:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED]], metadata [[META901:![0-9]+]], metadata !DIExpression()), !dbg [[DBG912]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED]], [[META901:![0-9]+]], !DIExpression(), [[DBG912]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i1 [[VAL_SHIFTED]], false, !dbg [[DBG913:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META902:![0-9]+]], metadata !DIExpression()), !dbg [[DBG913]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META902:![0-9]+]], !DIExpression(), [[DBG913]]) ; NOLZCNT-NEXT: [[IV_NEXT]] = add i1 [[IV]], true, !dbg [[DBG914:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV_NEXT]], metadata [[META903:![0-9]+]], metadata !DIExpression()), !dbg [[DBG914]] +; NOLZCNT-NEXT: #dbg_value(i1 [[IV_NEXT]], [[META903:![0-9]+]], !DIExpression(), [[DBG914]]) ; NOLZCNT-NEXT: call void @escape_inner.i1(i1 [[IV]], i1 [[NBITS]], i1 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i1 [[IV_NEXT]]), !dbg [[DBG915:![0-9]+]] ; NOLZCNT-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG916:![0-9]+]] ; NOLZCNT: end: @@ -2591,11 +2591,11 @@ define i1 @t33_subnsw_i1(i1 %val, i1 %start, i1 %extraoffset) mustprogress { ; NOLZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i1 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG919:![0-9]+]] ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG920:![0-9]+]] ; NOLZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i1 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG921:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV_RES]], metadata [[META904:![0-9]+]], metadata !DIExpression()), !dbg [[DBG917]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[NBITS_RES]], metadata [[META905:![0-9]+]], metadata !DIExpression()), !dbg [[DBG918]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_RES]], metadata [[META906:![0-9]+]], metadata !DIExpression()), !dbg [[DBG919]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META907:![0-9]+]], metadata !DIExpression()), !dbg [[DBG920]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV_NEXT_RES]], metadata [[META908:![0-9]+]], metadata !DIExpression()), !dbg [[DBG921]] +; NOLZCNT-NEXT: #dbg_value(i1 [[IV_RES]], [[META904:![0-9]+]], !DIExpression(), [[DBG917]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[NBITS_RES]], [[META905:![0-9]+]], !DIExpression(), [[DBG918]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_RES]], [[META906:![0-9]+]], !DIExpression(), [[DBG919]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META907:![0-9]+]], !DIExpression(), [[DBG920]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[IV_NEXT_RES]], [[META908:![0-9]+]], !DIExpression(), [[DBG921]]) ; NOLZCNT-NEXT: call void @escape_outer.i1(i1 [[IV_RES]], i1 [[NBITS_RES]], i1 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i1 [[IV_NEXT_RES]]), !dbg [[DBG922:![0-9]+]] ; NOLZCNT-NEXT: ret i1 [[IV_RES]], !dbg [[DBG923:![0-9]+]] ; @@ -2613,14 +2613,14 @@ define i1 @t33_subnsw_i1(i1 %val, i1 %start, i1 %extraoffset) mustprogress { ; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i1 [[LOOP_IV]], true, !dbg [[DBG911:![0-9]+]] ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i1 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG911]] ; LZCNT-NEXT: [[IV:%.*]] = add nsw i1 [[LOOP_IV]], [[START]], !dbg [[DBG911]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV]], metadata [[META899:![0-9]+]], metadata !DIExpression()), !dbg [[DBG909]] +; LZCNT-NEXT: #dbg_value(i1 [[IV]], [[META899:![0-9]+]], !DIExpression(), [[DBG909]]) ; LZCNT-NEXT: [[NBITS:%.*]] = sub nsw i1 [[IV]], [[EXTRAOFFSET]], !dbg [[DBG911]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[NBITS]], metadata [[META900:![0-9]+]], metadata !DIExpression()), !dbg [[DBG911]] +; LZCNT-NEXT: #dbg_value(i1 [[NBITS]], [[META900:![0-9]+]], !DIExpression(), [[DBG911]]) ; LZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i1 [[VAL]], [[NBITS]], !dbg [[DBG912:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED]], metadata [[META901:![0-9]+]], metadata !DIExpression()), !dbg [[DBG912]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[LOOP_IVCHECK]], metadata [[META902:![0-9]+]], metadata !DIExpression()), !dbg [[DBG913:![0-9]+]] +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED]], [[META901:![0-9]+]], !DIExpression(), [[DBG912]]) +; LZCNT-NEXT: #dbg_value(i1 [[LOOP_IVCHECK]], [[META902:![0-9]+]], !DIExpression(), [[META913:![0-9]+]]) ; LZCNT-NEXT: [[IV_NEXT:%.*]] = add i1 [[IV]], true, !dbg [[DBG914:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV_NEXT]], metadata [[META903:![0-9]+]], metadata !DIExpression()), !dbg [[DBG914]] +; LZCNT-NEXT: #dbg_value(i1 [[IV_NEXT]], [[META903:![0-9]+]], !DIExpression(), [[DBG914]]) ; LZCNT-NEXT: call void @escape_inner.i1(i1 [[IV]], i1 [[NBITS]], i1 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i1 [[IV_NEXT]]), !dbg [[DBG915:![0-9]+]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG916:![0-9]+]] ; LZCNT: end: @@ -2629,11 +2629,11 @@ define i1 @t33_subnsw_i1(i1 %val, i1 %start, i1 %extraoffset) mustprogress { ; LZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i1 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG919:![0-9]+]] ; LZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ], !dbg [[DBG920:![0-9]+]] ; LZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i1 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG921:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV_RES]], metadata [[META904:![0-9]+]], metadata !DIExpression()), !dbg [[DBG917]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[NBITS_RES]], metadata [[META905:![0-9]+]], metadata !DIExpression()), !dbg [[DBG918]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_RES]], metadata [[META906:![0-9]+]], metadata !DIExpression()), !dbg [[DBG919]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META907:![0-9]+]], metadata !DIExpression()), !dbg [[DBG920]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[IV_NEXT_RES]], metadata [[META908:![0-9]+]], metadata !DIExpression()), !dbg [[DBG921]] +; LZCNT-NEXT: #dbg_value(i1 [[IV_RES]], [[META904:![0-9]+]], !DIExpression(), [[DBG917]]) +; LZCNT-NEXT: #dbg_value(i1 [[NBITS_RES]], [[META905:![0-9]+]], !DIExpression(), [[DBG918]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_RES]], [[META906:![0-9]+]], !DIExpression(), [[DBG919]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META907:![0-9]+]], !DIExpression(), [[DBG920]]) +; LZCNT-NEXT: #dbg_value(i1 [[IV_NEXT_RES]], [[META908:![0-9]+]], !DIExpression(), [[DBG921]]) ; LZCNT-NEXT: call void @escape_outer.i1(i1 [[IV_RES]], i1 [[NBITS_RES]], i1 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i1 [[IV_NEXT_RES]]), !dbg [[DBG922:![0-9]+]] ; LZCNT-NEXT: ret i1 [[IV_RES]], !dbg [[DBG923:![0-9]+]] ; @@ -2668,15 +2668,15 @@ define i2 @t34_addnuw_i2(i2 %val, i2 %start, i2 %extraoffset) mustprogress { ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG936:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[IV:%.*]] = phi i2 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG937:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV]], metadata [[META926:![0-9]+]], metadata !DIExpression()), !dbg [[DBG937]] +; NOLZCNT-NEXT: #dbg_value(i2 [[IV]], [[META926:![0-9]+]], !DIExpression(), [[DBG937]]) ; NOLZCNT-NEXT: [[NBITS:%.*]] = sub nsw i2 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG938:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[NBITS]], metadata [[META927:![0-9]+]], metadata !DIExpression()), !dbg [[DBG938]] +; NOLZCNT-NEXT: #dbg_value(i2 [[NBITS]], [[META927:![0-9]+]], !DIExpression(), [[DBG938]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i2 [[VAL:%.*]], [[NBITS]], !dbg [[DBG939:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[VAL_SHIFTED]], metadata [[META928:![0-9]+]], metadata !DIExpression()), !dbg [[DBG939]] +; NOLZCNT-NEXT: #dbg_value(i2 [[VAL_SHIFTED]], [[META928:![0-9]+]], !DIExpression(), [[DBG939]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i2 [[VAL_SHIFTED]], 0, !dbg [[DBG940:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META929:![0-9]+]], metadata !DIExpression()), !dbg [[DBG940]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META929:![0-9]+]], !DIExpression(), [[DBG940]]) ; NOLZCNT-NEXT: [[IV_NEXT]] = add i2 [[IV]], 1, !dbg [[DBG941:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV_NEXT]], metadata [[META930:![0-9]+]], metadata !DIExpression()), !dbg [[DBG941]] +; NOLZCNT-NEXT: #dbg_value(i2 [[IV_NEXT]], [[META930:![0-9]+]], !DIExpression(), [[DBG941]]) ; NOLZCNT-NEXT: call void @escape_inner.i2(i2 [[IV]], i2 [[NBITS]], i2 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i2 [[IV_NEXT]]), !dbg [[DBG942:![0-9]+]] ; NOLZCNT-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG943:![0-9]+]] ; NOLZCNT: end: @@ -2685,11 +2685,11 @@ define i2 @t34_addnuw_i2(i2 %val, i2 %start, i2 %extraoffset) mustprogress { ; NOLZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i2 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG946:![0-9]+]] ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG947:![0-9]+]] ; NOLZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i2 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG948:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV_RES]], metadata [[META931:![0-9]+]], metadata !DIExpression()), !dbg [[DBG944]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[NBITS_RES]], metadata [[META932:![0-9]+]], metadata !DIExpression()), !dbg [[DBG945]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[VAL_SHIFTED_RES]], metadata [[META933:![0-9]+]], metadata !DIExpression()), !dbg [[DBG946]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META934:![0-9]+]], metadata !DIExpression()), !dbg [[DBG947]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV_NEXT_RES]], metadata [[META935:![0-9]+]], metadata !DIExpression()), !dbg [[DBG948]] +; NOLZCNT-NEXT: #dbg_value(i2 [[IV_RES]], [[META931:![0-9]+]], !DIExpression(), [[DBG944]]) +; NOLZCNT-NEXT: #dbg_value(i2 [[NBITS_RES]], [[META932:![0-9]+]], !DIExpression(), [[DBG945]]) +; NOLZCNT-NEXT: #dbg_value(i2 [[VAL_SHIFTED_RES]], [[META933:![0-9]+]], !DIExpression(), [[DBG946]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META934:![0-9]+]], !DIExpression(), [[DBG947]]) +; NOLZCNT-NEXT: #dbg_value(i2 [[IV_NEXT_RES]], [[META935:![0-9]+]], !DIExpression(), [[DBG948]]) ; NOLZCNT-NEXT: call void @escape_outer.i2(i2 [[IV_RES]], i2 [[NBITS_RES]], i2 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i2 [[IV_NEXT_RES]]), !dbg [[DBG949:![0-9]+]] ; NOLZCNT-NEXT: ret i2 [[IV_RES]], !dbg [[DBG950:![0-9]+]] ; @@ -2707,14 +2707,14 @@ define i2 @t34_addnuw_i2(i2 %val, i2 %start, i2 %extraoffset) mustprogress { ; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw i2 [[LOOP_IV]], 1, !dbg [[DBG938:![0-9]+]] ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i2 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG938]] ; LZCNT-NEXT: [[IV:%.*]] = add nsw i2 [[LOOP_IV]], [[START]], !dbg [[DBG938]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV]], metadata [[META926:![0-9]+]], metadata !DIExpression()), !dbg [[DBG936]] +; LZCNT-NEXT: #dbg_value(i2 [[IV]], [[META926:![0-9]+]], !DIExpression(), [[DBG936]]) ; LZCNT-NEXT: [[NBITS:%.*]] = sub nsw i2 [[IV]], [[EXTRAOFFSET]], !dbg [[DBG938]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[NBITS]], metadata [[META927:![0-9]+]], metadata !DIExpression()), !dbg [[DBG938]] +; LZCNT-NEXT: #dbg_value(i2 [[NBITS]], [[META927:![0-9]+]], !DIExpression(), [[DBG938]]) ; LZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i2 [[VAL]], [[NBITS]], !dbg [[DBG939:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[VAL_SHIFTED]], metadata [[META928:![0-9]+]], metadata !DIExpression()), !dbg [[DBG939]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[LOOP_IVCHECK]], metadata [[META929:![0-9]+]], metadata !DIExpression()), !dbg [[DBG940:![0-9]+]] +; LZCNT-NEXT: #dbg_value(i2 [[VAL_SHIFTED]], [[META928:![0-9]+]], !DIExpression(), [[DBG939]]) +; LZCNT-NEXT: #dbg_value(i1 [[LOOP_IVCHECK]], [[META929:![0-9]+]], !DIExpression(), [[META940:![0-9]+]]) ; LZCNT-NEXT: [[IV_NEXT:%.*]] = add i2 [[IV]], 1, !dbg [[DBG941:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV_NEXT]], metadata [[META930:![0-9]+]], metadata !DIExpression()), !dbg [[DBG941]] +; LZCNT-NEXT: #dbg_value(i2 [[IV_NEXT]], [[META930:![0-9]+]], !DIExpression(), [[DBG941]]) ; LZCNT-NEXT: call void @escape_inner.i2(i2 [[IV]], i2 [[NBITS]], i2 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i2 [[IV_NEXT]]), !dbg [[DBG942:![0-9]+]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG943:![0-9]+]] ; LZCNT: end: @@ -2723,11 +2723,11 @@ define i2 @t34_addnuw_i2(i2 %val, i2 %start, i2 %extraoffset) mustprogress { ; LZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i2 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG946:![0-9]+]] ; LZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ], !dbg [[DBG947:![0-9]+]] ; LZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i2 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG948:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV_RES]], metadata [[META931:![0-9]+]], metadata !DIExpression()), !dbg [[DBG944]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[NBITS_RES]], metadata [[META932:![0-9]+]], metadata !DIExpression()), !dbg [[DBG945]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[VAL_SHIFTED_RES]], metadata [[META933:![0-9]+]], metadata !DIExpression()), !dbg [[DBG946]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META934:![0-9]+]], metadata !DIExpression()), !dbg [[DBG947]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[IV_NEXT_RES]], metadata [[META935:![0-9]+]], metadata !DIExpression()), !dbg [[DBG948]] +; LZCNT-NEXT: #dbg_value(i2 [[IV_RES]], [[META931:![0-9]+]], !DIExpression(), [[DBG944]]) +; LZCNT-NEXT: #dbg_value(i2 [[NBITS_RES]], [[META932:![0-9]+]], !DIExpression(), [[DBG945]]) +; LZCNT-NEXT: #dbg_value(i2 [[VAL_SHIFTED_RES]], [[META933:![0-9]+]], !DIExpression(), [[DBG946]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META934:![0-9]+]], !DIExpression(), [[DBG947]]) +; LZCNT-NEXT: #dbg_value(i2 [[IV_NEXT_RES]], [[META935:![0-9]+]], !DIExpression(), [[DBG948]]) ; LZCNT-NEXT: call void @escape_outer.i2(i2 [[IV_RES]], i2 [[NBITS_RES]], i2 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i2 [[IV_NEXT_RES]]), !dbg [[DBG949:![0-9]+]] ; LZCNT-NEXT: ret i2 [[IV_RES]], !dbg [[DBG950:![0-9]+]] ; @@ -2762,15 +2762,15 @@ define i3 @t35_addnuw_i3(i3 %val, i3 %start, i3 %extraoffset) mustprogress { ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG963:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[IV:%.*]] = phi i3 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG964:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV]], metadata [[META953:![0-9]+]], metadata !DIExpression()), !dbg [[DBG964]] +; NOLZCNT-NEXT: #dbg_value(i3 [[IV]], [[META953:![0-9]+]], !DIExpression(), [[DBG964]]) ; NOLZCNT-NEXT: [[NBITS:%.*]] = sub nsw i3 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG965:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[NBITS]], metadata [[META954:![0-9]+]], metadata !DIExpression()), !dbg [[DBG965]] +; NOLZCNT-NEXT: #dbg_value(i3 [[NBITS]], [[META954:![0-9]+]], !DIExpression(), [[DBG965]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i3 [[VAL:%.*]], [[NBITS]], !dbg [[DBG966:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[VAL_SHIFTED]], metadata [[META955:![0-9]+]], metadata !DIExpression()), !dbg [[DBG966]] +; NOLZCNT-NEXT: #dbg_value(i3 [[VAL_SHIFTED]], [[META955:![0-9]+]], !DIExpression(), [[DBG966]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i3 [[VAL_SHIFTED]], 0, !dbg [[DBG967:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META956:![0-9]+]], metadata !DIExpression()), !dbg [[DBG967]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META956:![0-9]+]], !DIExpression(), [[DBG967]]) ; NOLZCNT-NEXT: [[IV_NEXT]] = add i3 [[IV]], 1, !dbg [[DBG968:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV_NEXT]], metadata [[META957:![0-9]+]], metadata !DIExpression()), !dbg [[DBG968]] +; NOLZCNT-NEXT: #dbg_value(i3 [[IV_NEXT]], [[META957:![0-9]+]], !DIExpression(), [[DBG968]]) ; NOLZCNT-NEXT: call void @escape_inner.i3(i3 [[IV]], i3 [[NBITS]], i3 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i3 [[IV_NEXT]]), !dbg [[DBG969:![0-9]+]] ; NOLZCNT-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG970:![0-9]+]] ; NOLZCNT: end: @@ -2779,11 +2779,11 @@ define i3 @t35_addnuw_i3(i3 %val, i3 %start, i3 %extraoffset) mustprogress { ; NOLZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i3 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG973:![0-9]+]] ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG974:![0-9]+]] ; NOLZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i3 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG975:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV_RES]], metadata [[META958:![0-9]+]], metadata !DIExpression()), !dbg [[DBG971]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[NBITS_RES]], metadata [[META959:![0-9]+]], metadata !DIExpression()), !dbg [[DBG972]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[VAL_SHIFTED_RES]], metadata [[META960:![0-9]+]], metadata !DIExpression()), !dbg [[DBG973]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META961:![0-9]+]], metadata !DIExpression()), !dbg [[DBG974]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV_NEXT_RES]], metadata [[META962:![0-9]+]], metadata !DIExpression()), !dbg [[DBG975]] +; NOLZCNT-NEXT: #dbg_value(i3 [[IV_RES]], [[META958:![0-9]+]], !DIExpression(), [[DBG971]]) +; NOLZCNT-NEXT: #dbg_value(i3 [[NBITS_RES]], [[META959:![0-9]+]], !DIExpression(), [[DBG972]]) +; NOLZCNT-NEXT: #dbg_value(i3 [[VAL_SHIFTED_RES]], [[META960:![0-9]+]], !DIExpression(), [[DBG973]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META961:![0-9]+]], !DIExpression(), [[DBG974]]) +; NOLZCNT-NEXT: #dbg_value(i3 [[IV_NEXT_RES]], [[META962:![0-9]+]], !DIExpression(), [[DBG975]]) ; NOLZCNT-NEXT: call void @escape_outer.i3(i3 [[IV_RES]], i3 [[NBITS_RES]], i3 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i3 [[IV_NEXT_RES]]), !dbg [[DBG976:![0-9]+]] ; NOLZCNT-NEXT: ret i3 [[IV_RES]], !dbg [[DBG977:![0-9]+]] ; @@ -2801,14 +2801,14 @@ define i3 @t35_addnuw_i3(i3 %val, i3 %start, i3 %extraoffset) mustprogress { ; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i3 [[LOOP_IV]], 1, !dbg [[DBG965:![0-9]+]] ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i3 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG965]] ; LZCNT-NEXT: [[IV:%.*]] = add nsw i3 [[LOOP_IV]], [[START]], !dbg [[DBG965]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV]], metadata [[META953:![0-9]+]], metadata !DIExpression()), !dbg [[DBG963]] +; LZCNT-NEXT: #dbg_value(i3 [[IV]], [[META953:![0-9]+]], !DIExpression(), [[DBG963]]) ; LZCNT-NEXT: [[NBITS:%.*]] = sub nsw i3 [[IV]], [[EXTRAOFFSET]], !dbg [[DBG965]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[NBITS]], metadata [[META954:![0-9]+]], metadata !DIExpression()), !dbg [[DBG965]] +; LZCNT-NEXT: #dbg_value(i3 [[NBITS]], [[META954:![0-9]+]], !DIExpression(), [[DBG965]]) ; LZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i3 [[VAL]], [[NBITS]], !dbg [[DBG966:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[VAL_SHIFTED]], metadata [[META955:![0-9]+]], metadata !DIExpression()), !dbg [[DBG966]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[LOOP_IVCHECK]], metadata [[META956:![0-9]+]], metadata !DIExpression()), !dbg [[DBG967:![0-9]+]] +; LZCNT-NEXT: #dbg_value(i3 [[VAL_SHIFTED]], [[META955:![0-9]+]], !DIExpression(), [[DBG966]]) +; LZCNT-NEXT: #dbg_value(i1 [[LOOP_IVCHECK]], [[META956:![0-9]+]], !DIExpression(), [[META967:![0-9]+]]) ; LZCNT-NEXT: [[IV_NEXT:%.*]] = add i3 [[IV]], 1, !dbg [[DBG968:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV_NEXT]], metadata [[META957:![0-9]+]], metadata !DIExpression()), !dbg [[DBG968]] +; LZCNT-NEXT: #dbg_value(i3 [[IV_NEXT]], [[META957:![0-9]+]], !DIExpression(), [[DBG968]]) ; LZCNT-NEXT: call void @escape_inner.i3(i3 [[IV]], i3 [[NBITS]], i3 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i3 [[IV_NEXT]]), !dbg [[DBG969:![0-9]+]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG970:![0-9]+]] ; LZCNT: end: @@ -2817,11 +2817,11 @@ define i3 @t35_addnuw_i3(i3 %val, i3 %start, i3 %extraoffset) mustprogress { ; LZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i3 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG973:![0-9]+]] ; LZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ], !dbg [[DBG974:![0-9]+]] ; LZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i3 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG975:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV_RES]], metadata [[META958:![0-9]+]], metadata !DIExpression()), !dbg [[DBG971]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[NBITS_RES]], metadata [[META959:![0-9]+]], metadata !DIExpression()), !dbg [[DBG972]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[VAL_SHIFTED_RES]], metadata [[META960:![0-9]+]], metadata !DIExpression()), !dbg [[DBG973]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META961:![0-9]+]], metadata !DIExpression()), !dbg [[DBG974]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[IV_NEXT_RES]], metadata [[META962:![0-9]+]], metadata !DIExpression()), !dbg [[DBG975]] +; LZCNT-NEXT: #dbg_value(i3 [[IV_RES]], [[META958:![0-9]+]], !DIExpression(), [[DBG971]]) +; LZCNT-NEXT: #dbg_value(i3 [[NBITS_RES]], [[META959:![0-9]+]], !DIExpression(), [[DBG972]]) +; LZCNT-NEXT: #dbg_value(i3 [[VAL_SHIFTED_RES]], [[META960:![0-9]+]], !DIExpression(), [[DBG973]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META961:![0-9]+]], !DIExpression(), [[DBG974]]) +; LZCNT-NEXT: #dbg_value(i3 [[IV_NEXT_RES]], [[META962:![0-9]+]], !DIExpression(), [[DBG975]]) ; LZCNT-NEXT: call void @escape_outer.i3(i3 [[IV_RES]], i3 [[NBITS_RES]], i3 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i3 [[IV_NEXT_RES]]), !dbg [[DBG976:![0-9]+]] ; LZCNT-NEXT: ret i3 [[IV_RES]], !dbg [[DBG977:![0-9]+]] ; @@ -2858,15 +2858,15 @@ define i8 @n36(i8 %val, i8 %start, i8 %extraoffset) { ; CHECK-NEXT: br label [[LOOP:%.*]], !dbg [[DBG990:![0-9]+]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG991:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META980:![0-9]+]], metadata !DIExpression()), !dbg [[DBG991]] +; CHECK-NEXT: #dbg_value(i8 [[IV]], [[META980:![0-9]+]], !DIExpression(), [[DBG991]]) ; CHECK-NEXT: [[NBITS:%.*]] = add nsw i8 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG992:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META981:![0-9]+]], metadata !DIExpression()), !dbg [[DBG992]] +; CHECK-NEXT: #dbg_value(i8 [[NBITS]], [[META981:![0-9]+]], !DIExpression(), [[DBG992]]) ; CHECK-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]], !dbg [[DBG993:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META982:![0-9]+]], metadata !DIExpression()), !dbg [[DBG993]] +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META982:![0-9]+]], !DIExpression(), [[DBG993]]) ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0, !dbg [[DBG994:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META983:![0-9]+]], metadata !DIExpression()), !dbg [[DBG994]] +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META983:![0-9]+]], !DIExpression(), [[DBG994]]) ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1, !dbg [[DBG995:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META984:![0-9]+]], metadata !DIExpression()), !dbg [[DBG995]] +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META984:![0-9]+]], !DIExpression(), [[DBG995]]) ; CHECK-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]]), !dbg [[DBG996:![0-9]+]] ; CHECK-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG997:![0-9]+]] ; CHECK: end: @@ -2875,11 +2875,11 @@ define i8 @n36(i8 %val, i8 %start, i8 %extraoffset) { ; CHECK-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG1000:![0-9]+]] ; CHECK-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG1001:![0-9]+]] ; CHECK-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG1002:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META985:![0-9]+]], metadata !DIExpression()), !dbg [[DBG998]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META986:![0-9]+]], metadata !DIExpression()), !dbg [[DBG999]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META987:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1000]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META988:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1001]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META989:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1002]] +; CHECK-NEXT: #dbg_value(i8 [[IV_RES]], [[META985:![0-9]+]], !DIExpression(), [[DBG998]]) +; CHECK-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META986:![0-9]+]], !DIExpression(), [[DBG999]]) +; CHECK-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META987:![0-9]+]], !DIExpression(), [[DBG1000]]) +; CHECK-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META988:![0-9]+]], !DIExpression(), [[DBG1001]]) +; CHECK-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META989:![0-9]+]], !DIExpression(), [[DBG1002]]) ; CHECK-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG1003:![0-9]+]] ; CHECK-NEXT: ret i8 [[IV_RES]], !dbg [[DBG1004:![0-9]+]] ; @@ -2915,15 +2915,15 @@ define i8 @p37(i8 %val, i8 %start, i8 %extraoffset) { ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG1017:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG1018:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META1007:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1018]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV]], [[META1007:![0-9]+]], !DIExpression(), [[DBG1018]]) ; NOLZCNT-NEXT: [[NBITS:%.*]] = add nsw i8 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG1019:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META1008:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1019]] +; NOLZCNT-NEXT: #dbg_value(i8 [[NBITS]], [[META1008:![0-9]+]], !DIExpression(), [[DBG1019]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL:%.*]], [[NBITS]], !dbg [[DBG1020:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META1009:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1020]] +; NOLZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META1009:![0-9]+]], !DIExpression(), [[DBG1020]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0, !dbg [[DBG1021:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META1010:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1021]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META1010:![0-9]+]], !DIExpression(), [[DBG1021]]) ; NOLZCNT-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1, !dbg [[DBG1022:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META1011:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1022]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META1011:![0-9]+]], !DIExpression(), [[DBG1022]]) ; NOLZCNT-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]]), !dbg [[DBG1023:![0-9]+]] ; NOLZCNT-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG1024:![0-9]+]], !llvm.loop [[LOOP1025:![0-9]+]] ; NOLZCNT: end: @@ -2932,11 +2932,11 @@ define i8 @p37(i8 %val, i8 %start, i8 %extraoffset) { ; NOLZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG1029:![0-9]+]] ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG1030:![0-9]+]] ; NOLZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG1031:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META1012:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1027]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META1013:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1028]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META1014:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1029]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META1015:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1030]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META1016:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1031]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_RES]], [[META1012:![0-9]+]], !DIExpression(), [[DBG1027]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META1013:![0-9]+]], !DIExpression(), [[DBG1028]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META1014:![0-9]+]], !DIExpression(), [[DBG1029]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META1015:![0-9]+]], !DIExpression(), [[DBG1030]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META1016:![0-9]+]], !DIExpression(), [[DBG1031]]) ; NOLZCNT-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG1032:![0-9]+]] ; NOLZCNT-NEXT: ret i8 [[IV_RES]], !dbg [[DBG1033:![0-9]+]] ; @@ -2955,14 +2955,14 @@ define i8 @p37(i8 %val, i8 %start, i8 %extraoffset) { ; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i8 [[LOOP_IV]], 1, !dbg [[DBG1019:![0-9]+]] ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i8 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG1019]] ; LZCNT-NEXT: [[IV:%.*]] = add nsw i8 [[LOOP_IV]], [[START]], !dbg [[DBG1019]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META1007:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1017]] +; LZCNT-NEXT: #dbg_value(i8 [[IV]], [[META1007:![0-9]+]], !DIExpression(), [[DBG1017]]) ; LZCNT-NEXT: [[NBITS:%.*]] = add nsw i8 [[IV]], [[EXTRAOFFSET]], !dbg [[DBG1019]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META1008:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1019]] +; LZCNT-NEXT: #dbg_value(i8 [[NBITS]], [[META1008:![0-9]+]], !DIExpression(), [[DBG1019]]) ; LZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL]], [[NBITS]], !dbg [[DBG1020:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META1009:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1020]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[LOOP_IVCHECK]], metadata [[META1010:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1021:![0-9]+]] +; LZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META1009:![0-9]+]], !DIExpression(), [[DBG1020]]) +; LZCNT-NEXT: #dbg_value(i1 [[LOOP_IVCHECK]], [[META1010:![0-9]+]], !DIExpression(), [[META1021:![0-9]+]]) ; LZCNT-NEXT: [[IV_NEXT:%.*]] = add i8 [[IV]], 1, !dbg [[DBG1022:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META1011:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1022]] +; LZCNT-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META1011:![0-9]+]], !DIExpression(), [[DBG1022]]) ; LZCNT-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i8 [[IV_NEXT]]), !dbg [[DBG1023:![0-9]+]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG1024:![0-9]+]] ; LZCNT: end: @@ -2971,11 +2971,11 @@ define i8 @p37(i8 %val, i8 %start, i8 %extraoffset) { ; LZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG1027:![0-9]+]] ; LZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ], !dbg [[DBG1028:![0-9]+]] ; LZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG1029:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META1012:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1025]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META1013:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1026]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META1014:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1027]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META1015:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1028]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META1016:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1029]] +; LZCNT-NEXT: #dbg_value(i8 [[IV_RES]], [[META1012:![0-9]+]], !DIExpression(), [[DBG1025]]) +; LZCNT-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META1013:![0-9]+]], !DIExpression(), [[DBG1026]]) +; LZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META1014:![0-9]+]], !DIExpression(), [[DBG1027]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META1015:![0-9]+]], !DIExpression(), [[DBG1028]]) +; LZCNT-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META1016:![0-9]+]], !DIExpression(), [[DBG1029]]) ; LZCNT-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG1030:![0-9]+]] ; LZCNT-NEXT: ret i8 [[IV_RES]], !dbg [[DBG1031:![0-9]+]] ; @@ -3011,19 +3011,19 @@ define i8 @p38(i8 %val.crude, i8 %start, i8 %extraoffset) { ; NOLZCNT-LABEL: @p38( ; NOLZCNT-NEXT: entry: ; NOLZCNT-NEXT: [[VAL:%.*]] = and i8 [[VAL_CRUDE:%.*]], 127, !dbg [[DBG1047:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL]], metadata [[META1036:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1047]] +; NOLZCNT-NEXT: #dbg_value(i8 [[VAL]], [[META1036:![0-9]+]], !DIExpression(), [[DBG1047]]) ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG1048:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG1049:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META1037:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1049]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV]], [[META1037:![0-9]+]], !DIExpression(), [[DBG1049]]) ; NOLZCNT-NEXT: [[NBITS:%.*]] = add nsw i8 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG1050:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META1038:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1050]] +; NOLZCNT-NEXT: #dbg_value(i8 [[NBITS]], [[META1038:![0-9]+]], !DIExpression(), [[DBG1050]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL]], [[NBITS]], !dbg [[DBG1051:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META1039:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1051]] +; NOLZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META1039:![0-9]+]], !DIExpression(), [[DBG1051]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0, !dbg [[DBG1052:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META1040:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1052]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META1040:![0-9]+]], !DIExpression(), [[DBG1052]]) ; NOLZCNT-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1, !dbg [[DBG1053:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META1041:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1053]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META1041:![0-9]+]], !DIExpression(), [[DBG1053]]) ; NOLZCNT-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]]), !dbg [[DBG1054:![0-9]+]] ; NOLZCNT-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG1055:![0-9]+]] ; NOLZCNT: end: @@ -3032,18 +3032,18 @@ define i8 @p38(i8 %val.crude, i8 %start, i8 %extraoffset) { ; NOLZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG1058:![0-9]+]] ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG1059:![0-9]+]] ; NOLZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG1060:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META1042:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1056]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META1043:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1057]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META1044:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1058]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META1045:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1059]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META1046:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1060]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_RES]], [[META1042:![0-9]+]], !DIExpression(), [[DBG1056]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META1043:![0-9]+]], !DIExpression(), [[DBG1057]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META1044:![0-9]+]], !DIExpression(), [[DBG1058]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META1045:![0-9]+]], !DIExpression(), [[DBG1059]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META1046:![0-9]+]], !DIExpression(), [[DBG1060]]) ; NOLZCNT-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG1061:![0-9]+]] ; NOLZCNT-NEXT: ret i8 [[IV_RES]], !dbg [[DBG1062:![0-9]+]] ; ; LZCNT-LABEL: @p38( ; LZCNT-NEXT: entry: ; LZCNT-NEXT: [[VAL:%.*]] = and i8 [[VAL_CRUDE:%.*]], 127, !dbg [[DBG1045:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL]], metadata [[META1034:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1045]] +; LZCNT-NEXT: #dbg_value(i8 [[VAL]], [[META1034:![0-9]+]], !DIExpression(), [[DBG1045]]) ; LZCNT-NEXT: [[VAL_NUMLEADINGZEROS:%.*]] = call i8 @llvm.ctlz.i8(i8 [[VAL]], i1 false), !dbg [[DBG1046:![0-9]+]] ; LZCNT-NEXT: [[VAL_NUMACTIVEBITS:%.*]] = sub nuw nsw i8 8, [[VAL_NUMLEADINGZEROS]], !dbg [[DBG1046]] ; LZCNT-NEXT: [[TMP0:%.*]] = sub i8 0, [[EXTRAOFFSET:%.*]], !dbg [[DBG1047:![0-9]+]] @@ -3057,14 +3057,14 @@ define i8 @p38(i8 %val.crude, i8 %start, i8 %extraoffset) { ; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i8 [[LOOP_IV]], 1, !dbg [[DBG1048:![0-9]+]] ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i8 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG1048]] ; LZCNT-NEXT: [[IV:%.*]] = add nsw i8 [[LOOP_IV]], [[START]], !dbg [[DBG1048]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META1035:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1046]] +; LZCNT-NEXT: #dbg_value(i8 [[IV]], [[META1035:![0-9]+]], !DIExpression(), [[DBG1046]]) ; LZCNT-NEXT: [[NBITS:%.*]] = add nsw i8 [[IV]], [[EXTRAOFFSET]], !dbg [[DBG1048]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META1036:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1048]] +; LZCNT-NEXT: #dbg_value(i8 [[NBITS]], [[META1036:![0-9]+]], !DIExpression(), [[DBG1048]]) ; LZCNT-NEXT: [[VAL_SHIFTED:%.*]] = ashr i8 [[VAL]], [[NBITS]], !dbg [[DBG1049:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META1037:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1049]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[LOOP_IVCHECK]], metadata [[META1038:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1050:![0-9]+]] +; LZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META1037:![0-9]+]], !DIExpression(), [[DBG1049]]) +; LZCNT-NEXT: #dbg_value(i1 [[LOOP_IVCHECK]], [[META1038:![0-9]+]], !DIExpression(), [[META1050:![0-9]+]]) ; LZCNT-NEXT: [[IV_NEXT:%.*]] = add i8 [[IV]], 1, !dbg [[DBG1051:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META1039:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1051]] +; LZCNT-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META1039:![0-9]+]], !DIExpression(), [[DBG1051]]) ; LZCNT-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i8 [[IV_NEXT]]), !dbg [[DBG1052:![0-9]+]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG1053:![0-9]+]] ; LZCNT: end: @@ -3073,11 +3073,11 @@ define i8 @p38(i8 %val.crude, i8 %start, i8 %extraoffset) { ; LZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG1056:![0-9]+]] ; LZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ], !dbg [[DBG1057:![0-9]+]] ; LZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG1058:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META1040:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1054]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META1041:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1055]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META1042:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1056]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META1043:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1057]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META1044:![0-9]+]], metadata !DIExpression()), !dbg [[DBG1058]] +; LZCNT-NEXT: #dbg_value(i8 [[IV_RES]], [[META1040:![0-9]+]], !DIExpression(), [[DBG1054]]) +; LZCNT-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META1041:![0-9]+]], !DIExpression(), [[DBG1055]]) +; LZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META1042:![0-9]+]], !DIExpression(), [[DBG1056]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META1043:![0-9]+]], !DIExpression(), [[DBG1057]]) +; LZCNT-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META1044:![0-9]+]], !DIExpression(), [[DBG1058]]) ; LZCNT-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG1059:![0-9]+]] ; LZCNT-NEXT: ret i8 [[IV_RES]], !dbg [[DBG1060:![0-9]+]] ; diff --git a/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll b/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll index 734d3bf0391150..b5f595c8145e13 100644 --- a/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll +++ b/llvm/test/Transforms/LoopIdiom/X86/left-shift-until-bittest.ll @@ -20,7 +20,7 @@ define i32 @p0_i32(i32 %x, i32 %bit) { ; ALL-NEXT: entry: ; ALL-NEXT: [[BIT_FR:%.*]] = freeze i32 [[BIT:%.*]] ; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT_FR]], !dbg [[DBG16:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG16]] +; ALL-NEXT: #dbg_value(i32 [[BITMASK]], [[META9:![0-9]+]], !DIExpression(), [[DBG16]]) ; ALL-NEXT: [[BIT_FR_LOWBITMASK:%.*]] = add i32 [[BITMASK]], -1, !dbg [[DBG17:![0-9]+]] ; ALL-NEXT: [[BIT_FR_MASK:%.*]] = or i32 [[BIT_FR_LOWBITMASK]], [[BITMASK]], !dbg [[DBG17]] ; ALL-NEXT: [[X_MASKED:%.*]] = and i32 [[X:%.*]], [[BIT_FR_MASK]], !dbg [[DBG17]] @@ -35,13 +35,13 @@ define i32 @p0_i32(i32 %x, i32 %bit) { ; ALL: loop: ; ALL-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG17]] ; ALL-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], !dbg [[DBG17]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG17]] +; ALL-NEXT: #dbg_value(i32 [[TMP0]], [[META11:![0-9]+]], !DIExpression(), [[DBG17]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[TMP0]], [[BITMASK]], !dbg [[DBG19:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META12:![0-9]+]], !DIExpression(), [[DBG19]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG20:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG20]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META13:![0-9]+]], !DIExpression(), [[DBG20]]) ; ALL-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, !dbg [[DBG21:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], metadata [[META15:![0-9]+]], metadata !DIExpression()), !dbg [[DBG21]] +; ALL-NEXT: #dbg_value(i32 [[TMP1]], [[META15:![0-9]+]], !DIExpression(), [[DBG21]]) ; ALL-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i32 [[LOOP_IV]], 1, !dbg [[DBG22:![0-9]+]] ; ALL-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG22]] ; ALL-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG22]] @@ -70,7 +70,7 @@ define i16 @p1_i16(i16 %x, i16 %bit) { ; LZCNT-NEXT: entry: ; LZCNT-NEXT: [[BIT_FR:%.*]] = freeze i16 [[BIT:%.*]] ; LZCNT-NEXT: [[BITMASK:%.*]] = shl i16 1, [[BIT_FR]], !dbg [[DBG32:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i16 [[BITMASK]], metadata [[META26:![0-9]+]], metadata !DIExpression()), !dbg [[DBG32]] +; LZCNT-NEXT: #dbg_value(i16 [[BITMASK]], [[META26:![0-9]+]], !DIExpression(), [[DBG32]]) ; LZCNT-NEXT: [[BIT_FR_LOWBITMASK:%.*]] = add i16 [[BITMASK]], -1, !dbg [[DBG33:![0-9]+]] ; LZCNT-NEXT: [[BIT_FR_MASK:%.*]] = or i16 [[BIT_FR_LOWBITMASK]], [[BITMASK]], !dbg [[DBG33]] ; LZCNT-NEXT: [[X_MASKED:%.*]] = and i16 [[X:%.*]], [[BIT_FR_MASK]], !dbg [[DBG33]] @@ -85,13 +85,13 @@ define i16 @p1_i16(i16 %x, i16 %bit) { ; LZCNT: loop: ; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG33]] ; LZCNT-NEXT: [[TMP0:%.*]] = phi i16 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], !dbg [[DBG33]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i16 [[TMP0]], metadata [[META28:![0-9]+]], metadata !DIExpression()), !dbg [[DBG33]] +; LZCNT-NEXT: #dbg_value(i16 [[TMP0]], [[META28:![0-9]+]], !DIExpression(), [[DBG33]]) ; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i16 [[TMP0]], [[BITMASK]], !dbg [[DBG35:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i16 [[X_CURR_BITMASKED]], metadata [[META29:![0-9]+]], metadata !DIExpression()), !dbg [[DBG35]] +; LZCNT-NEXT: #dbg_value(i16 [[X_CURR_BITMASKED]], [[META29:![0-9]+]], !DIExpression(), [[DBG35]]) ; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i16 [[X_CURR_BITMASKED]], 0, !dbg [[DBG36:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META30:![0-9]+]], metadata !DIExpression()), !dbg [[DBG36]] +; LZCNT-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META30:![0-9]+]], !DIExpression(), [[DBG36]]) ; LZCNT-NEXT: [[TMP1]] = shl i16 [[TMP0]], 1, !dbg [[DBG37:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i16 [[TMP1]], metadata [[META31:![0-9]+]], metadata !DIExpression()), !dbg [[DBG37]] +; LZCNT-NEXT: #dbg_value(i16 [[TMP1]], [[META31:![0-9]+]], !DIExpression(), [[DBG37]]) ; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i16 [[LOOP_IV]], 1, !dbg [[DBG38:![0-9]+]] ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i16 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG38]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG38]] @@ -102,17 +102,17 @@ define i16 @p1_i16(i16 %x, i16 %bit) { ; NOLZCNT-LABEL: @p1_i16( ; NOLZCNT-NEXT: entry: ; NOLZCNT-NEXT: [[BITMASK:%.*]] = shl i16 1, [[BIT:%.*]], !dbg [[DBG32:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i16 [[BITMASK]], metadata [[META26:![0-9]+]], metadata !DIExpression()), !dbg [[DBG32]] +; NOLZCNT-NEXT: #dbg_value(i16 [[BITMASK]], [[META26:![0-9]+]], !DIExpression(), [[DBG32]]) ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG33:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[X_CURR:%.*]] = phi i16 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG34:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i16 [[X_CURR]], metadata [[META28:![0-9]+]], metadata !DIExpression()), !dbg [[DBG34]] +; NOLZCNT-NEXT: #dbg_value(i16 [[X_CURR]], [[META28:![0-9]+]], !DIExpression(), [[DBG34]]) ; NOLZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i16 [[X_CURR]], [[BITMASK]], !dbg [[DBG35:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i16 [[X_CURR_BITMASKED]], metadata [[META29:![0-9]+]], metadata !DIExpression()), !dbg [[DBG35]] +; NOLZCNT-NEXT: #dbg_value(i16 [[X_CURR_BITMASKED]], [[META29:![0-9]+]], !DIExpression(), [[DBG35]]) ; NOLZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i16 [[X_CURR_BITMASKED]], 0, !dbg [[DBG36:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META30:![0-9]+]], metadata !DIExpression()), !dbg [[DBG36]] +; NOLZCNT-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META30:![0-9]+]], !DIExpression(), [[DBG36]]) ; NOLZCNT-NEXT: [[X_NEXT]] = shl i16 [[X_CURR]], 1, !dbg [[DBG37:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i16 [[X_NEXT]], metadata [[META31:![0-9]+]], metadata !DIExpression()), !dbg [[DBG37]] +; NOLZCNT-NEXT: #dbg_value(i16 [[X_NEXT]], [[META31:![0-9]+]], !DIExpression(), [[DBG37]]) ; NOLZCNT-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], !dbg [[DBG38:![0-9]+]] ; NOLZCNT: end: ; NOLZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i16 [ [[X_CURR]], [[LOOP]] ], !dbg [[DBG34]] @@ -139,7 +139,7 @@ define i32 @p2_different_liveout(i32 %x, i32 %bit) { ; ALL-NEXT: entry: ; ALL-NEXT: [[BIT_FR:%.*]] = freeze i32 [[BIT:%.*]] ; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT_FR]], !dbg [[DBG47:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], metadata [[META42:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47]] +; ALL-NEXT: #dbg_value(i32 [[BITMASK]], [[META42:![0-9]+]], !DIExpression(), [[DBG47]]) ; ALL-NEXT: [[BIT_FR_LOWBITMASK:%.*]] = add i32 [[BITMASK]], -1, !dbg [[DBG48:![0-9]+]] ; ALL-NEXT: [[BIT_FR_MASK:%.*]] = or i32 [[BIT_FR_LOWBITMASK]], [[BITMASK]], !dbg [[DBG48]] ; ALL-NEXT: [[X_MASKED:%.*]] = and i32 [[X:%.*]], [[BIT_FR_MASK]], !dbg [[DBG48]] @@ -154,13 +154,13 @@ define i32 @p2_different_liveout(i32 %x, i32 %bit) { ; ALL: loop: ; ALL-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG48]] ; ALL-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], !dbg [[DBG48]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], metadata [[META43:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48]] +; ALL-NEXT: #dbg_value(i32 [[TMP0]], [[META43:![0-9]+]], !DIExpression(), [[DBG48]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[TMP0]], [[BITMASK]], !dbg [[DBG50:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META44:![0-9]+]], metadata !DIExpression()), !dbg [[DBG50]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META44:![0-9]+]], !DIExpression(), [[DBG50]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG51:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META45:![0-9]+]], metadata !DIExpression()), !dbg [[DBG51]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META45:![0-9]+]], !DIExpression(), [[DBG51]]) ; ALL-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, !dbg [[DBG52:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], metadata [[META46:![0-9]+]], metadata !DIExpression()), !dbg [[DBG52]] +; ALL-NEXT: #dbg_value(i32 [[TMP1]], [[META46:![0-9]+]], !DIExpression(), [[DBG52]]) ; ALL-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i32 [[LOOP_IV]], 1, !dbg [[DBG53:![0-9]+]] ; ALL-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG53]] ; ALL-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG53]] @@ -199,13 +199,13 @@ define void @p3_constant_mask_24thbit(i32 %x, ptr %p0, ptr %p1) { ; ALL: loop: ; ALL-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG61]] ; ALL-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], !dbg [[DBG61]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], metadata [[META57:![0-9]+]], metadata !DIExpression()), !dbg [[DBG61]] +; ALL-NEXT: #dbg_value(i32 [[TMP0]], [[META57:![0-9]+]], !DIExpression(), [[DBG61]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[TMP0]], 16777216, !dbg [[DBG63:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META58:![0-9]+]], metadata !DIExpression()), !dbg [[DBG63]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META58:![0-9]+]], !DIExpression(), [[DBG63]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG64:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META59:![0-9]+]], metadata !DIExpression()), !dbg [[DBG64]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META59:![0-9]+]], !DIExpression(), [[DBG64]]) ; ALL-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, !dbg [[DBG65:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], metadata [[META60:![0-9]+]], metadata !DIExpression()), !dbg [[DBG65]] +; ALL-NEXT: #dbg_value(i32 [[TMP1]], [[META60:![0-9]+]], !DIExpression(), [[DBG65]]) ; ALL-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i32 [[LOOP_IV]], 1, !dbg [[DBG66:![0-9]+]] ; ALL-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG66]] ; ALL-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG66]] @@ -247,13 +247,13 @@ define void @p4_constant_mask_15thbit(i32 %x, ptr %p0, ptr %p1) { ; ALL: loop: ; ALL-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG76]] ; ALL-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], !dbg [[DBG76]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG76]] +; ALL-NEXT: #dbg_value(i32 [[TMP0]], [[META72:![0-9]+]], !DIExpression(), [[DBG76]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[TMP0]], 32768, !dbg [[DBG78:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META73:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META73:![0-9]+]], !DIExpression(), [[DBG78]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG79:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META74:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META74:![0-9]+]], !DIExpression(), [[DBG79]]) ; ALL-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, !dbg [[DBG80:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], metadata [[META75:![0-9]+]], metadata !DIExpression()), !dbg [[DBG80]] +; ALL-NEXT: #dbg_value(i32 [[TMP1]], [[META75:![0-9]+]], !DIExpression(), [[DBG80]]) ; ALL-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i32 [[LOOP_IV]], 1, !dbg [[DBG81:![0-9]+]] ; ALL-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG81]] ; ALL-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG81]] @@ -286,7 +286,7 @@ define void @p5_nuw(i32 %x, i32 %bit, ptr %p0, ptr %p1) { ; ALL-NEXT: entry: ; ALL-NEXT: [[BIT_FR:%.*]] = freeze i32 [[BIT:%.*]] ; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT_FR]], !dbg [[DBG92:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], metadata [[META87:![0-9]+]], metadata !DIExpression()), !dbg [[DBG92]] +; ALL-NEXT: #dbg_value(i32 [[BITMASK]], [[META87:![0-9]+]], !DIExpression(), [[DBG92]]) ; ALL-NEXT: [[BIT_FR_LOWBITMASK:%.*]] = add i32 [[BITMASK]], -1, !dbg [[DBG93:![0-9]+]] ; ALL-NEXT: [[BIT_FR_MASK:%.*]] = or i32 [[BIT_FR_LOWBITMASK]], [[BITMASK]], !dbg [[DBG93]] ; ALL-NEXT: [[X_MASKED:%.*]] = and i32 [[X:%.*]], [[BIT_FR_MASK]], !dbg [[DBG93]] @@ -301,13 +301,13 @@ define void @p5_nuw(i32 %x, i32 %bit, ptr %p0, ptr %p1) { ; ALL: loop: ; ALL-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG93]] ; ALL-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], !dbg [[DBG93]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], metadata [[META88:![0-9]+]], metadata !DIExpression()), !dbg [[DBG93]] +; ALL-NEXT: #dbg_value(i32 [[TMP0]], [[META88:![0-9]+]], !DIExpression(), [[DBG93]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[TMP0]], [[BITMASK]], !dbg [[DBG95:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META89:![0-9]+]], metadata !DIExpression()), !dbg [[DBG95]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META89:![0-9]+]], !DIExpression(), [[DBG95]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG96:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META90:![0-9]+]], metadata !DIExpression()), !dbg [[DBG96]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META90:![0-9]+]], !DIExpression(), [[DBG96]]) ; ALL-NEXT: [[TMP1]] = shl nuw i32 [[TMP0]], 1, !dbg [[DBG97:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], metadata [[META91:![0-9]+]], metadata !DIExpression()), !dbg [[DBG97]] +; ALL-NEXT: #dbg_value(i32 [[TMP1]], [[META91:![0-9]+]], !DIExpression(), [[DBG97]]) ; ALL-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i32 [[LOOP_IV]], 1, !dbg [[DBG98:![0-9]+]] ; ALL-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG98]] ; ALL-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG98]] @@ -339,7 +339,7 @@ define void @p6_nsw(i32 %x, i32 %bit, ptr %p0, ptr %p1) { ; ALL-NEXT: entry: ; ALL-NEXT: [[BIT_FR:%.*]] = freeze i32 [[BIT:%.*]] ; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT_FR]], !dbg [[DBG109:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], metadata [[META104:![0-9]+]], metadata !DIExpression()), !dbg [[DBG109]] +; ALL-NEXT: #dbg_value(i32 [[BITMASK]], [[META104:![0-9]+]], !DIExpression(), [[DBG109]]) ; ALL-NEXT: [[BIT_FR_LOWBITMASK:%.*]] = add i32 [[BITMASK]], -1, !dbg [[DBG110:![0-9]+]] ; ALL-NEXT: [[BIT_FR_MASK:%.*]] = or i32 [[BIT_FR_LOWBITMASK]], [[BITMASK]], !dbg [[DBG110]] ; ALL-NEXT: [[X_MASKED:%.*]] = and i32 [[X:%.*]], [[BIT_FR_MASK]], !dbg [[DBG110]] @@ -354,13 +354,13 @@ define void @p6_nsw(i32 %x, i32 %bit, ptr %p0, ptr %p1) { ; ALL: loop: ; ALL-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG110]] ; ALL-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], !dbg [[DBG110]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], metadata [[META105:![0-9]+]], metadata !DIExpression()), !dbg [[DBG110]] +; ALL-NEXT: #dbg_value(i32 [[TMP0]], [[META105:![0-9]+]], !DIExpression(), [[DBG110]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[TMP0]], [[BITMASK]], !dbg [[DBG112:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META106:![0-9]+]], metadata !DIExpression()), !dbg [[DBG112]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META106:![0-9]+]], !DIExpression(), [[DBG112]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG113:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META107:![0-9]+]], metadata !DIExpression()), !dbg [[DBG113]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META107:![0-9]+]], !DIExpression(), [[DBG113]]) ; ALL-NEXT: [[TMP1]] = shl nsw i32 [[TMP0]], 1, !dbg [[DBG114:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], metadata [[META108:![0-9]+]], metadata !DIExpression()), !dbg [[DBG114]] +; ALL-NEXT: #dbg_value(i32 [[TMP1]], [[META108:![0-9]+]], !DIExpression(), [[DBG114]]) ; ALL-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i32 [[LOOP_IV]], 1, !dbg [[DBG115:![0-9]+]] ; ALL-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG115]] ; ALL-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG115]] @@ -392,7 +392,7 @@ define void @p7_nuwnsw(i32 %x, i32 %bit, ptr %p0, ptr %p1) { ; ALL-NEXT: entry: ; ALL-NEXT: [[BIT_FR:%.*]] = freeze i32 [[BIT:%.*]] ; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT_FR]], !dbg [[DBG126:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], metadata [[META121:![0-9]+]], metadata !DIExpression()), !dbg [[DBG126]] +; ALL-NEXT: #dbg_value(i32 [[BITMASK]], [[META121:![0-9]+]], !DIExpression(), [[DBG126]]) ; ALL-NEXT: [[BIT_FR_LOWBITMASK:%.*]] = add i32 [[BITMASK]], -1, !dbg [[DBG127:![0-9]+]] ; ALL-NEXT: [[BIT_FR_MASK:%.*]] = or i32 [[BIT_FR_LOWBITMASK]], [[BITMASK]], !dbg [[DBG127]] ; ALL-NEXT: [[X_MASKED:%.*]] = and i32 [[X:%.*]], [[BIT_FR_MASK]], !dbg [[DBG127]] @@ -407,13 +407,13 @@ define void @p7_nuwnsw(i32 %x, i32 %bit, ptr %p0, ptr %p1) { ; ALL: loop: ; ALL-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG127]] ; ALL-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], !dbg [[DBG127]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], metadata [[META122:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] +; ALL-NEXT: #dbg_value(i32 [[TMP0]], [[META122:![0-9]+]], !DIExpression(), [[DBG127]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[TMP0]], [[BITMASK]], !dbg [[DBG129:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META123:![0-9]+]], metadata !DIExpression()), !dbg [[DBG129]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META123:![0-9]+]], !DIExpression(), [[DBG129]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG130:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META124:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META124:![0-9]+]], !DIExpression(), [[DBG130]]) ; ALL-NEXT: [[TMP1]] = shl nuw nsw i32 [[TMP0]], 1, !dbg [[DBG131:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], metadata [[META125:![0-9]+]], metadata !DIExpression()), !dbg [[DBG131]] +; ALL-NEXT: #dbg_value(i32 [[TMP1]], [[META125:![0-9]+]], !DIExpression(), [[DBG131]]) ; ALL-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i32 [[LOOP_IV]], 1, !dbg [[DBG132:![0-9]+]] ; ALL-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG132]] ; ALL-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG132]] @@ -456,13 +456,13 @@ define void @p8_constant_mask_signbit_noncanonical(i32 %x, ptr %p0, ptr %p1) { ; ALL: loop: ; ALL-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG142]] ; ALL-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], !dbg [[DBG142]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], metadata [[META138:![0-9]+]], metadata !DIExpression()), !dbg [[DBG142]] +; ALL-NEXT: #dbg_value(i32 [[TMP0]], [[META138:![0-9]+]], !DIExpression(), [[DBG142]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[TMP0]], -2147483648, !dbg [[DBG144:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META139:![0-9]+]], metadata !DIExpression()), !dbg [[DBG144]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META139:![0-9]+]], !DIExpression(), [[DBG144]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG145:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META140:![0-9]+]], metadata !DIExpression()), !dbg [[DBG145]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META140:![0-9]+]], !DIExpression(), [[DBG145]]) ; ALL-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, !dbg [[DBG146:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], metadata [[META141:![0-9]+]], metadata !DIExpression()), !dbg [[DBG146]] +; ALL-NEXT: #dbg_value(i32 [[TMP1]], [[META141:![0-9]+]], !DIExpression(), [[DBG146]]) ; ALL-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i32 [[LOOP_IV]], 1, !dbg [[DBG147:![0-9]+]] ; ALL-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG147]] ; ALL-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG147]] @@ -504,11 +504,11 @@ define void @p9_constant_mask_signbit_canonical(i32 %x, ptr %p0, ptr %p1) { ; ALL: loop: ; ALL-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG156]] ; ALL-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], !dbg [[DBG156]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], metadata [[META153:![0-9]+]], metadata !DIExpression()), !dbg [[DBG156]] +; ALL-NEXT: #dbg_value(i32 [[TMP0]], [[META153:![0-9]+]], !DIExpression(), [[DBG156]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp sgt i32 [[TMP0]], -1, !dbg [[DBG158:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META154:![0-9]+]], metadata !DIExpression()), !dbg [[DBG158]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META154:![0-9]+]], !DIExpression(), [[DBG158]]) ; ALL-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, !dbg [[DBG159:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], metadata [[META155:![0-9]+]], metadata !DIExpression()), !dbg [[DBG159]] +; ALL-NEXT: #dbg_value(i32 [[TMP1]], [[META155:![0-9]+]], !DIExpression(), [[DBG159]]) ; ALL-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i32 [[LOOP_IV]], 1, !dbg [[DBG160:![0-9]+]] ; ALL-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG160]] ; ALL-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG160]] @@ -539,7 +539,7 @@ define void @p10_x_is_not_one(i32 %bit, ptr %p0, ptr %p1) { ; ALL-NEXT: entry: ; ALL-NEXT: [[BIT_FR:%.*]] = freeze i32 [[BIT:%.*]] ; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT_FR]], !dbg [[DBG171:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], metadata [[META166:![0-9]+]], metadata !DIExpression()), !dbg [[DBG171]] +; ALL-NEXT: #dbg_value(i32 [[BITMASK]], [[META166:![0-9]+]], !DIExpression(), [[DBG171]]) ; ALL-NEXT: [[BIT_FR_LOWBITMASK:%.*]] = add i32 [[BITMASK]], -1, !dbg [[DBG172:![0-9]+]] ; ALL-NEXT: [[BIT_FR_MASK:%.*]] = or i32 [[BIT_FR_LOWBITMASK]], [[BITMASK]], !dbg [[DBG172]] ; ALL-NEXT: [[DOTMASKED:%.*]] = and i32 2, [[BIT_FR_MASK]], !dbg [[DBG172]] @@ -554,13 +554,13 @@ define void @p10_x_is_not_one(i32 %bit, ptr %p0, ptr %p1) { ; ALL: loop: ; ALL-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG172]] ; ALL-NEXT: [[TMP0:%.*]] = phi i32 [ 2, [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], !dbg [[DBG172]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], metadata [[META167:![0-9]+]], metadata !DIExpression()), !dbg [[DBG172]] +; ALL-NEXT: #dbg_value(i32 [[TMP0]], [[META167:![0-9]+]], !DIExpression(), [[DBG172]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[TMP0]], [[BITMASK]], !dbg [[DBG174:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META168:![0-9]+]], metadata !DIExpression()), !dbg [[DBG174]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META168:![0-9]+]], !DIExpression(), [[DBG174]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG175:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META169:![0-9]+]], metadata !DIExpression()), !dbg [[DBG175]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META169:![0-9]+]], !DIExpression(), [[DBG175]]) ; ALL-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, !dbg [[DBG176:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], metadata [[META170:![0-9]+]], metadata !DIExpression()), !dbg [[DBG176]] +; ALL-NEXT: #dbg_value(i32 [[TMP1]], [[META170:![0-9]+]], !DIExpression(), [[DBG176]]) ; ALL-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i32 [[LOOP_IV]], 1, !dbg [[DBG177:![0-9]+]] ; ALL-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG177]] ; ALL-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG177]] @@ -594,7 +594,7 @@ define i32 @p11(i32 %x, i32 %bit) { ; ALL-NEXT: entry: ; ALL-NEXT: [[BIT_FR:%.*]] = freeze i32 [[BIT:%.*]] ; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT_FR]], !dbg [[DBG188:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], metadata [[META183:![0-9]+]], metadata !DIExpression()), !dbg [[DBG188]] +; ALL-NEXT: #dbg_value(i32 [[BITMASK]], [[META183:![0-9]+]], !DIExpression(), [[DBG188]]) ; ALL-NEXT: [[BIT_FR_LOWBITMASK:%.*]] = add i32 [[BITMASK]], -1, !dbg [[DBG189:![0-9]+]] ; ALL-NEXT: [[BIT_FR_MASK:%.*]] = or i32 [[BIT_FR_LOWBITMASK]], [[BITMASK]], !dbg [[DBG189]] ; ALL-NEXT: [[X_MASKED:%.*]] = and i32 [[X:%.*]], [[BIT_FR_MASK]], !dbg [[DBG189]] @@ -609,13 +609,13 @@ define i32 @p11(i32 %x, i32 %bit) { ; ALL: loop: ; ALL-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG189]] ; ALL-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], !dbg [[DBG189]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], metadata [[META184:![0-9]+]], metadata !DIExpression()), !dbg [[DBG189]] +; ALL-NEXT: #dbg_value(i32 [[TMP0]], [[META184:![0-9]+]], !DIExpression(), [[DBG189]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[TMP0]], [[BITMASK]], !dbg [[DBG191:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META185:![0-9]+]], metadata !DIExpression()), !dbg [[DBG191]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META185:![0-9]+]], !DIExpression(), [[DBG191]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp ne i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG192:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META186:![0-9]+]], metadata !DIExpression()), !dbg [[DBG192]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META186:![0-9]+]], !DIExpression(), [[DBG192]]) ; ALL-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, !dbg [[DBG193:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], metadata [[META187:![0-9]+]], metadata !DIExpression()), !dbg [[DBG193]] +; ALL-NEXT: #dbg_value(i32 [[TMP1]], [[META187:![0-9]+]], !DIExpression(), [[DBG193]]) ; ALL-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i32 [[LOOP_IV]], 1, !dbg [[DBG194:![0-9]+]] ; ALL-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG194]] ; ALL-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG194]] @@ -644,7 +644,7 @@ define i32 @p12(i32 %x, i32 %bit) { ; ALL-NEXT: entry: ; ALL-NEXT: [[BIT_FR:%.*]] = freeze i32 [[BIT:%.*]] ; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT_FR]], !dbg [[DBG203:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], metadata [[META198:![0-9]+]], metadata !DIExpression()), !dbg [[DBG203]] +; ALL-NEXT: #dbg_value(i32 [[BITMASK]], [[META198:![0-9]+]], !DIExpression(), [[DBG203]]) ; ALL-NEXT: [[BIT_FR_LOWBITMASK:%.*]] = add i32 [[BITMASK]], -1, !dbg [[DBG204:![0-9]+]] ; ALL-NEXT: [[BIT_FR_MASK:%.*]] = or i32 [[BIT_FR_LOWBITMASK]], [[BITMASK]], !dbg [[DBG204]] ; ALL-NEXT: [[X_MASKED:%.*]] = and i32 [[X:%.*]], [[BIT_FR_MASK]], !dbg [[DBG204]] @@ -659,13 +659,13 @@ define i32 @p12(i32 %x, i32 %bit) { ; ALL: loop: ; ALL-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG204]] ; ALL-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], !dbg [[DBG204]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], metadata [[META199:![0-9]+]], metadata !DIExpression()), !dbg [[DBG204]] +; ALL-NEXT: #dbg_value(i32 [[TMP0]], [[META199:![0-9]+]], !DIExpression(), [[DBG204]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[BITMASK]], [[TMP0]], !dbg [[DBG206:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META200:![0-9]+]], metadata !DIExpression()), !dbg [[DBG206]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META200:![0-9]+]], !DIExpression(), [[DBG206]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG207:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META201:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META201:![0-9]+]], !DIExpression(), [[DBG207]]) ; ALL-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, !dbg [[DBG208:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], metadata [[META202:![0-9]+]], metadata !DIExpression()), !dbg [[DBG208]] +; ALL-NEXT: #dbg_value(i32 [[TMP1]], [[META202:![0-9]+]], !DIExpression(), [[DBG208]]) ; ALL-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i32 [[LOOP_IV]], 1, !dbg [[DBG209:![0-9]+]] ; ALL-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG209]] ; ALL-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG209]] @@ -695,7 +695,7 @@ define i32 @p13(i32 %x, i32 %bit) { ; ALL-NEXT: entry: ; ALL-NEXT: [[BIT_FR:%.*]] = freeze i32 [[BIT:%.*]] ; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT_FR]], !dbg [[DBG218:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], metadata [[META213:![0-9]+]], metadata !DIExpression()), !dbg [[DBG218]] +; ALL-NEXT: #dbg_value(i32 [[BITMASK]], [[META213:![0-9]+]], !DIExpression(), [[DBG218]]) ; ALL-NEXT: [[BIT_FR_LOWBITMASK:%.*]] = add i32 [[BITMASK]], -1, !dbg [[DBG219:![0-9]+]] ; ALL-NEXT: [[BIT_FR_MASK:%.*]] = or i32 [[BIT_FR_LOWBITMASK]], [[BITMASK]], !dbg [[DBG219]] ; ALL-NEXT: [[X_MASKED:%.*]] = and i32 [[X:%.*]], [[BIT_FR_MASK]], !dbg [[DBG219]] @@ -710,13 +710,13 @@ define i32 @p13(i32 %x, i32 %bit) { ; ALL: loop: ; ALL-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG219]] ; ALL-NEXT: [[TMP0:%.*]] = phi i32 [ [[TMP1:%.*]], [[LOOP]] ], [ [[X]], [[ENTRY]] ], !dbg [[DBG219]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], metadata [[META214:![0-9]+]], metadata !DIExpression()), !dbg [[DBG219]] +; ALL-NEXT: #dbg_value(i32 [[TMP0]], [[META214:![0-9]+]], !DIExpression(), [[DBG219]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[TMP0]], [[BITMASK]], !dbg [[DBG221:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META215:![0-9]+]], metadata !DIExpression()), !dbg [[DBG221]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META215:![0-9]+]], !DIExpression(), [[DBG221]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG222:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META216:![0-9]+]], metadata !DIExpression()), !dbg [[DBG222]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META216:![0-9]+]], !DIExpression(), [[DBG222]]) ; ALL-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, !dbg [[DBG223:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], metadata [[META217:![0-9]+]], metadata !DIExpression()), !dbg [[DBG223]] +; ALL-NEXT: #dbg_value(i32 [[TMP1]], [[META217:![0-9]+]], !DIExpression(), [[DBG223]]) ; ALL-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i32 [[LOOP_IV]], 1, !dbg [[DBG224:![0-9]+]] ; ALL-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG224]] ; ALL-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG224]] @@ -755,11 +755,11 @@ define i32 @p14(i32 %x) { ; ALL: loop: ; ALL-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG231]] ; ALL-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], !dbg [[DBG231]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], metadata [[META228:![0-9]+]], metadata !DIExpression()), !dbg [[DBG231]] +; ALL-NEXT: #dbg_value(i32 [[TMP0]], [[META228:![0-9]+]], !DIExpression(), [[DBG231]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp slt i32 [[TMP0]], 0, !dbg [[DBG233:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META229:![0-9]+]], metadata !DIExpression()), !dbg [[DBG233]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META229:![0-9]+]], !DIExpression(), [[DBG233]]) ; ALL-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, !dbg [[DBG234:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], metadata [[META230:![0-9]+]], metadata !DIExpression()), !dbg [[DBG234]] +; ALL-NEXT: #dbg_value(i32 [[TMP1]], [[META230:![0-9]+]], !DIExpression(), [[DBG234]]) ; ALL-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i32 [[LOOP_IV]], 1, !dbg [[DBG235:![0-9]+]] ; ALL-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG235]] ; ALL-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG235]] @@ -790,15 +790,15 @@ define i32 @n15(i32 %x, i32 %bit) { ; ALL-NEXT: br label [[LOOP:%.*]], !dbg [[DBG244:![0-9]+]] ; ALL: loop: ; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG245:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], metadata [[META239:![0-9]+]], metadata !DIExpression()), !dbg [[DBG245]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR]], [[META239:![0-9]+]], !DIExpression(), [[DBG245]]) ; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], !dbg [[DBG246:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], metadata [[META240:![0-9]+]], metadata !DIExpression()), !dbg [[DBG246]] +; ALL-NEXT: #dbg_value(i32 [[BITMASK]], [[META240:![0-9]+]], !DIExpression(), [[DBG246]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], !dbg [[DBG247:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META241:![0-9]+]], metadata !DIExpression()), !dbg [[DBG247]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META241:![0-9]+]], !DIExpression(), [[DBG247]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG248:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META242:![0-9]+]], metadata !DIExpression()), !dbg [[DBG248]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META242:![0-9]+]], !DIExpression(), [[DBG248]]) ; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, !dbg [[DBG249:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], metadata [[META243:![0-9]+]], metadata !DIExpression()), !dbg [[DBG249]] +; ALL-NEXT: #dbg_value(i32 [[X_NEXT]], [[META243:![0-9]+]], !DIExpression(), [[DBG249]]) ; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], !dbg [[DBG250:![0-9]+]] ; ALL: end: ; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], !dbg [[DBG245]] @@ -826,15 +826,15 @@ define i32 @n16(i32 %x, i32 %bit) { ; ALL-NEXT: br label [[LOOP:%.*]], !dbg [[DBG259:![0-9]+]] ; ALL: loop: ; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG260:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], metadata [[META254:![0-9]+]], metadata !DIExpression()), !dbg [[DBG260]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR]], [[META254:![0-9]+]], !DIExpression(), [[DBG260]]) ; ALL-NEXT: [[BITMASK:%.*]] = call i32 @gen32(), !dbg [[DBG261:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], metadata [[META255:![0-9]+]], metadata !DIExpression()), !dbg [[DBG261]] +; ALL-NEXT: #dbg_value(i32 [[BITMASK]], [[META255:![0-9]+]], !DIExpression(), [[DBG261]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], !dbg [[DBG262:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META256:![0-9]+]], metadata !DIExpression()), !dbg [[DBG262]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META256:![0-9]+]], !DIExpression(), [[DBG262]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG263:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META257:![0-9]+]], metadata !DIExpression()), !dbg [[DBG263]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META257:![0-9]+]], !DIExpression(), [[DBG263]]) ; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, !dbg [[DBG264:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], metadata [[META258:![0-9]+]], metadata !DIExpression()), !dbg [[DBG264]] +; ALL-NEXT: #dbg_value(i32 [[X_NEXT]], [[META258:![0-9]+]], !DIExpression(), [[DBG264]]) ; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], !dbg [[DBG265:![0-9]+]] ; ALL: end: ; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], !dbg [[DBG260]] @@ -860,17 +860,17 @@ define i32 @n17(i32 %x, i32 %bit) { ; ALL-LABEL: @n17( ; ALL-NEXT: entry: ; ALL-NEXT: [[BITMASK:%.*]] = shl i32 2, [[BIT:%.*]], !dbg [[DBG274:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], metadata [[META269:![0-9]+]], metadata !DIExpression()), !dbg [[DBG274]] +; ALL-NEXT: #dbg_value(i32 [[BITMASK]], [[META269:![0-9]+]], !DIExpression(), [[DBG274]]) ; ALL-NEXT: br label [[LOOP:%.*]], !dbg [[DBG275:![0-9]+]] ; ALL: loop: ; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG276:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], metadata [[META270:![0-9]+]], metadata !DIExpression()), !dbg [[DBG276]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR]], [[META270:![0-9]+]], !DIExpression(), [[DBG276]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], !dbg [[DBG277:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META271:![0-9]+]], metadata !DIExpression()), !dbg [[DBG277]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META271:![0-9]+]], !DIExpression(), [[DBG277]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG278:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META272:![0-9]+]], metadata !DIExpression()), !dbg [[DBG278]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META272:![0-9]+]], !DIExpression(), [[DBG278]]) ; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, !dbg [[DBG279:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], metadata [[META273:![0-9]+]], metadata !DIExpression()), !dbg [[DBG279]] +; ALL-NEXT: #dbg_value(i32 [[X_NEXT]], [[META273:![0-9]+]], !DIExpression(), [[DBG279]]) ; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], !dbg [[DBG280:![0-9]+]] ; ALL: end: ; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], !dbg [[DBG276]] @@ -896,17 +896,17 @@ define i32 @n18(i32 %x, i32 %bit) { ; ALL-LABEL: @n18( ; ALL-NEXT: entry: ; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], !dbg [[DBG289:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], metadata [[META284:![0-9]+]], metadata !DIExpression()), !dbg [[DBG289]] +; ALL-NEXT: #dbg_value(i32 [[BITMASK]], [[META284:![0-9]+]], !DIExpression(), [[DBG289]]) ; ALL-NEXT: br label [[LOOP:%.*]], !dbg [[DBG290:![0-9]+]] ; ALL: loop: ; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG291:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], metadata [[META285:![0-9]+]], metadata !DIExpression()), !dbg [[DBG291]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR]], [[META285:![0-9]+]], !DIExpression(), [[DBG291]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], !dbg [[DBG292:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META286:![0-9]+]], metadata !DIExpression()), !dbg [[DBG292]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META286:![0-9]+]], !DIExpression(), [[DBG292]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG293:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META287:![0-9]+]], metadata !DIExpression()), !dbg [[DBG293]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META287:![0-9]+]], !DIExpression(), [[DBG293]]) ; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 2, !dbg [[DBG294:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], metadata [[META288:![0-9]+]], metadata !DIExpression()), !dbg [[DBG294]] +; ALL-NEXT: #dbg_value(i32 [[X_NEXT]], [[META288:![0-9]+]], !DIExpression(), [[DBG294]]) ; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], !dbg [[DBG295:![0-9]+]] ; ALL: end: ; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], !dbg [[DBG291]] @@ -932,17 +932,17 @@ define i32 @n19(i32 %x, i32 %bit) { ; ALL-LABEL: @n19( ; ALL-NEXT: entry: ; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], !dbg [[DBG304:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], metadata [[META299:![0-9]+]], metadata !DIExpression()), !dbg [[DBG304]] +; ALL-NEXT: #dbg_value(i32 [[BITMASK]], [[META299:![0-9]+]], !DIExpression(), [[DBG304]]) ; ALL-NEXT: br label [[LOOP:%.*]], !dbg [[DBG305:![0-9]+]] ; ALL: loop: ; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG306:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], metadata [[META300:![0-9]+]], metadata !DIExpression()), !dbg [[DBG306]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR]], [[META300:![0-9]+]], !DIExpression(), [[DBG306]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], !dbg [[DBG307:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META301:![0-9]+]], metadata !DIExpression()), !dbg [[DBG307]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META301:![0-9]+]], !DIExpression(), [[DBG307]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp ne i32 [[X_CURR_BITMASKED]], [[BITMASK]], !dbg [[DBG308:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META302:![0-9]+]], metadata !DIExpression()), !dbg [[DBG308]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META302:![0-9]+]], !DIExpression(), [[DBG308]]) ; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, !dbg [[DBG309:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], metadata [[META303:![0-9]+]], metadata !DIExpression()), !dbg [[DBG309]] +; ALL-NEXT: #dbg_value(i32 [[X_NEXT]], [[META303:![0-9]+]], !DIExpression(), [[DBG309]]) ; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], !dbg [[DBG310:![0-9]+]] ; ALL: end: ; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], !dbg [[DBG306]] @@ -968,17 +968,17 @@ define i32 @n20(i32 %x, i32 %bit) { ; ALL-LABEL: @n20( ; ALL-NEXT: entry: ; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], !dbg [[DBG319:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], metadata [[META314:![0-9]+]], metadata !DIExpression()), !dbg [[DBG319]] +; ALL-NEXT: #dbg_value(i32 [[BITMASK]], [[META314:![0-9]+]], !DIExpression(), [[DBG319]]) ; ALL-NEXT: br label [[LOOP:%.*]], !dbg [[DBG320:![0-9]+]] ; ALL: loop: ; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG321:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], metadata [[META315:![0-9]+]], metadata !DIExpression()), !dbg [[DBG321]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR]], [[META315:![0-9]+]], !DIExpression(), [[DBG321]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], !dbg [[DBG322:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META316:![0-9]+]], metadata !DIExpression()), !dbg [[DBG322]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META316:![0-9]+]], !DIExpression(), [[DBG322]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG323:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META317:![0-9]+]], metadata !DIExpression()), !dbg [[DBG323]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META317:![0-9]+]], !DIExpression(), [[DBG323]]) ; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, !dbg [[DBG324:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], metadata [[META318:![0-9]+]], metadata !DIExpression()), !dbg [[DBG324]] +; ALL-NEXT: #dbg_value(i32 [[X_NEXT]], [[META318:![0-9]+]], !DIExpression(), [[DBG324]]) ; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG325:![0-9]+]] ; ALL: end: ; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], !dbg [[DBG321]] @@ -1004,17 +1004,17 @@ define i32 @n21(i32 %x, i32 %bit) { ; ALL-LABEL: @n21( ; ALL-NEXT: entry: ; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], !dbg [[DBG334:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], metadata [[META329:![0-9]+]], metadata !DIExpression()), !dbg [[DBG334]] +; ALL-NEXT: #dbg_value(i32 [[BITMASK]], [[META329:![0-9]+]], !DIExpression(), [[DBG334]]) ; ALL-NEXT: br label [[LOOP:%.*]], !dbg [[DBG335:![0-9]+]] ; ALL: loop: ; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG336:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], metadata [[META330:![0-9]+]], metadata !DIExpression()), !dbg [[DBG336]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR]], [[META330:![0-9]+]], !DIExpression(), [[DBG336]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], !dbg [[DBG337:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META331:![0-9]+]], metadata !DIExpression()), !dbg [[DBG337]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META331:![0-9]+]], !DIExpression(), [[DBG337]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp ne i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG338:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META332:![0-9]+]], metadata !DIExpression()), !dbg [[DBG338]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META332:![0-9]+]], !DIExpression(), [[DBG338]]) ; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, !dbg [[DBG339:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], metadata [[META333:![0-9]+]], metadata !DIExpression()), !dbg [[DBG339]] +; ALL-NEXT: #dbg_value(i32 [[X_NEXT]], [[META333:![0-9]+]], !DIExpression(), [[DBG339]]) ; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], !dbg [[DBG340:![0-9]+]] ; ALL: end: ; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], !dbg [[DBG336]] @@ -1040,16 +1040,16 @@ define i32 @n22(i32 %x, i32 %bit) { ; ALL-LABEL: @n22( ; ALL-NEXT: entry: ; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], !dbg [[DBG349:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], metadata [[META344:![0-9]+]], metadata !DIExpression()), !dbg [[DBG349]] +; ALL-NEXT: #dbg_value(i32 [[BITMASK]], [[META344:![0-9]+]], !DIExpression(), [[DBG349]]) ; ALL-NEXT: br label [[LOOP:%.*]], !dbg [[DBG350:![0-9]+]] ; ALL: loop: -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X:%.*]], metadata [[META345:![0-9]+]], metadata !DIExpression()), !dbg [[DBG351:![0-9]+]] +; ALL-NEXT: #dbg_value(i32 [[X:%.*]], [[META345:![0-9]+]], !DIExpression(), [[META351:![0-9]+]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X]], [[BITMASK]], !dbg [[DBG352:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META346:![0-9]+]], metadata !DIExpression()), !dbg [[DBG352]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META346:![0-9]+]], !DIExpression(), [[DBG352]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG353:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META347:![0-9]+]], metadata !DIExpression()), !dbg [[DBG353]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META347:![0-9]+]], !DIExpression(), [[DBG353]]) ; ALL-NEXT: [[X_NEXT:%.*]] = shl i32 [[X]], 1, !dbg [[DBG354:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], metadata [[META348:![0-9]+]], metadata !DIExpression()), !dbg [[DBG354]] +; ALL-NEXT: #dbg_value(i32 [[X_NEXT]], [[META348:![0-9]+]], !DIExpression(), [[DBG354]]) ; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], !dbg [[DBG355:![0-9]+]] ; ALL: end: ; ALL-NEXT: ret i32 [[X]], !dbg [[DBG356:![0-9]+]] @@ -1074,17 +1074,17 @@ define i32 @n23(i32 %x, i32 %bit) { ; ALL-LABEL: @n23( ; ALL-NEXT: entry: ; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], !dbg [[DBG364:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], metadata [[META359:![0-9]+]], metadata !DIExpression()), !dbg [[DBG364]] +; ALL-NEXT: #dbg_value(i32 [[BITMASK]], [[META359:![0-9]+]], !DIExpression(), [[DBG364]]) ; ALL-NEXT: br label [[LOOP:%.*]], !dbg [[DBG365:![0-9]+]] ; ALL: loop: ; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG366:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], metadata [[META360:![0-9]+]], metadata !DIExpression()), !dbg [[DBG366]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR]], [[META360:![0-9]+]], !DIExpression(), [[DBG366]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X]], [[BITMASK]], !dbg [[DBG367:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META361:![0-9]+]], metadata !DIExpression()), !dbg [[DBG367]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META361:![0-9]+]], !DIExpression(), [[DBG367]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG368:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META362:![0-9]+]], metadata !DIExpression()), !dbg [[DBG368]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META362:![0-9]+]], !DIExpression(), [[DBG368]]) ; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, !dbg [[DBG369:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], metadata [[META363:![0-9]+]], metadata !DIExpression()), !dbg [[DBG369]] +; ALL-NEXT: #dbg_value(i32 [[X_NEXT]], [[META363:![0-9]+]], !DIExpression(), [[DBG369]]) ; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], !dbg [[DBG370:![0-9]+]] ; ALL: end: ; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], !dbg [[DBG366]] @@ -1110,17 +1110,17 @@ define i32 @n24(i32 %x, i32 %bit) { ; ALL-LABEL: @n24( ; ALL-NEXT: entry: ; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], !dbg [[DBG379:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], metadata [[META374:![0-9]+]], metadata !DIExpression()), !dbg [[DBG379]] +; ALL-NEXT: #dbg_value(i32 [[BITMASK]], [[META374:![0-9]+]], !DIExpression(), [[DBG379]]) ; ALL-NEXT: br label [[LOOP:%.*]], !dbg [[DBG380:![0-9]+]] ; ALL: loop: ; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG381:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], metadata [[META375:![0-9]+]], metadata !DIExpression()), !dbg [[DBG381]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR]], [[META375:![0-9]+]], !DIExpression(), [[DBG381]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], !dbg [[DBG382:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META376:![0-9]+]], metadata !DIExpression()), !dbg [[DBG382]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META376:![0-9]+]], !DIExpression(), [[DBG382]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR]], 0, !dbg [[DBG383:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META377:![0-9]+]], metadata !DIExpression()), !dbg [[DBG383]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META377:![0-9]+]], !DIExpression(), [[DBG383]]) ; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, !dbg [[DBG384:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], metadata [[META378:![0-9]+]], metadata !DIExpression()), !dbg [[DBG384]] +; ALL-NEXT: #dbg_value(i32 [[X_NEXT]], [[META378:![0-9]+]], !DIExpression(), [[DBG384]]) ; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], !dbg [[DBG385:![0-9]+]] ; ALL: end: ; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], !dbg [[DBG381]] @@ -1146,17 +1146,17 @@ define i32 @n25(i32 %x, i32 %bit) { ; ALL-LABEL: @n25( ; ALL-NEXT: entry: ; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], !dbg [[DBG394:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], metadata [[META389:![0-9]+]], metadata !DIExpression()), !dbg [[DBG394]] +; ALL-NEXT: #dbg_value(i32 [[BITMASK]], [[META389:![0-9]+]], !DIExpression(), [[DBG394]]) ; ALL-NEXT: br label [[LOOP:%.*]], !dbg [[DBG395:![0-9]+]] ; ALL: loop: ; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG396:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], metadata [[META390:![0-9]+]], metadata !DIExpression()), !dbg [[DBG396]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR]], [[META390:![0-9]+]], !DIExpression(), [[DBG396]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], !dbg [[DBG397:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META391:![0-9]+]], metadata !DIExpression()), !dbg [[DBG397]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META391:![0-9]+]], !DIExpression(), [[DBG397]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG398:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META392:![0-9]+]], metadata !DIExpression()), !dbg [[DBG398]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META392:![0-9]+]], !DIExpression(), [[DBG398]]) ; ALL-NEXT: [[X_NEXT]] = shl i32 [[X]], 1, !dbg [[DBG399:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], metadata [[META393:![0-9]+]], metadata !DIExpression()), !dbg [[DBG399]] +; ALL-NEXT: #dbg_value(i32 [[X_NEXT]], [[META393:![0-9]+]], !DIExpression(), [[DBG399]]) ; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], !dbg [[DBG400:![0-9]+]] ; ALL: end: ; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], !dbg [[DBG396]] @@ -1184,13 +1184,13 @@ define i32 @n26(i32 %x) { ; ALL-NEXT: br label [[LOOP:%.*]], !dbg [[DBG408:![0-9]+]] ; ALL: loop: ; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG409:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], metadata [[META404:![0-9]+]], metadata !DIExpression()), !dbg [[DBG409]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR]], [[META404:![0-9]+]], !DIExpression(), [[DBG409]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], 16777215, !dbg [[DBG410:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META405:![0-9]+]], metadata !DIExpression()), !dbg [[DBG410]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META405:![0-9]+]], !DIExpression(), [[DBG410]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG411:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META406:![0-9]+]], metadata !DIExpression()), !dbg [[DBG411]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META406:![0-9]+]], !DIExpression(), [[DBG411]]) ; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, !dbg [[DBG412:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], metadata [[META407:![0-9]+]], metadata !DIExpression()), !dbg [[DBG412]] +; ALL-NEXT: #dbg_value(i32 [[X_NEXT]], [[META407:![0-9]+]], !DIExpression(), [[DBG412]]) ; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], !dbg [[DBG413:![0-9]+]] ; ALL: end: ; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], !dbg [[DBG409]] @@ -1217,13 +1217,13 @@ define i32 @n27(i32 %x) { ; ALL-NEXT: br label [[LOOP:%.*]], !dbg [[DBG421:![0-9]+]] ; ALL: loop: ; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG422:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], metadata [[META417:![0-9]+]], metadata !DIExpression()), !dbg [[DBG422]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR]], [[META417:![0-9]+]], !DIExpression(), [[DBG422]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], 384, !dbg [[DBG423:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META418:![0-9]+]], metadata !DIExpression()), !dbg [[DBG423]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META418:![0-9]+]], !DIExpression(), [[DBG423]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG424:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META419:![0-9]+]], metadata !DIExpression()), !dbg [[DBG424]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META419:![0-9]+]], !DIExpression(), [[DBG424]]) ; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, !dbg [[DBG425:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], metadata [[META420:![0-9]+]], metadata !DIExpression()), !dbg [[DBG425]] +; ALL-NEXT: #dbg_value(i32 [[X_NEXT]], [[META420:![0-9]+]], !DIExpression(), [[DBG425]]) ; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], !dbg [[DBG426:![0-9]+]] ; ALL: end: ; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], !dbg [[DBG422]] @@ -1250,13 +1250,13 @@ define i32 @n28(i32 %x) { ; ALL-NEXT: br label [[LOOP:%.*]], !dbg [[DBG434:![0-9]+]] ; ALL: loop: ; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG435:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], metadata [[META430:![0-9]+]], metadata !DIExpression()), !dbg [[DBG435]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR]], [[META430:![0-9]+]], !DIExpression(), [[DBG435]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], 32896, !dbg [[DBG436:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META431:![0-9]+]], metadata !DIExpression()), !dbg [[DBG436]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META431:![0-9]+]], !DIExpression(), [[DBG436]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG437:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META432:![0-9]+]], metadata !DIExpression()), !dbg [[DBG437]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META432:![0-9]+]], !DIExpression(), [[DBG437]]) ; ALL-NEXT: [[X_NEXT]] = shl i32 [[X_CURR]], 1, !dbg [[DBG438:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], metadata [[META433:![0-9]+]], metadata !DIExpression()), !dbg [[DBG438]] +; ALL-NEXT: #dbg_value(i32 [[X_NEXT]], [[META433:![0-9]+]], !DIExpression(), [[DBG438]]) ; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], !dbg [[DBG439:![0-9]+]] ; ALL: end: ; ALL-NEXT: [[X_CURR_LCSSA:%.*]] = phi i32 [ [[X_CURR]], [[LOOP]] ], !dbg [[DBG435]] @@ -1285,7 +1285,7 @@ define i32 @n29(i32 %x, i32 %bit) { ; ALL-NEXT: entry: ; ALL-NEXT: [[BIT_FR:%.*]] = freeze i32 [[BIT:%.*]] ; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT_FR]], !dbg [[DBG448:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], metadata [[META443:![0-9]+]], metadata !DIExpression()), !dbg [[DBG448]] +; ALL-NEXT: #dbg_value(i32 [[BITMASK]], [[META443:![0-9]+]], !DIExpression(), [[DBG448]]) ; ALL-NEXT: [[BIT_FR_LOWBITMASK:%.*]] = add i32 [[BITMASK]], -1, !dbg [[DBG449:![0-9]+]] ; ALL-NEXT: [[BIT_FR_MASK:%.*]] = or i32 [[BIT_FR_LOWBITMASK]], [[BITMASK]], !dbg [[DBG449]] ; ALL-NEXT: [[X_MASKED:%.*]] = and i32 [[X:%.*]], [[BIT_FR_MASK]], !dbg [[DBG449]] @@ -1300,14 +1300,14 @@ define i32 @n29(i32 %x, i32 %bit) { ; ALL: loop: ; ALL-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG449]] ; ALL-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], !dbg [[DBG449]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], metadata [[META444:![0-9]+]], metadata !DIExpression()), !dbg [[DBG449]] +; ALL-NEXT: #dbg_value(i32 [[TMP0]], [[META444:![0-9]+]], !DIExpression(), [[DBG449]]) ; ALL-NEXT: call void @external_side_effect(), !dbg [[DBG451:![0-9]+]] ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[TMP0]], [[BITMASK]], !dbg [[DBG452:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META445:![0-9]+]], metadata !DIExpression()), !dbg [[DBG452]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META445:![0-9]+]], !DIExpression(), [[DBG452]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG453:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META446:![0-9]+]], metadata !DIExpression()), !dbg [[DBG453]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META446:![0-9]+]], !DIExpression(), [[DBG453]]) ; ALL-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, !dbg [[DBG454:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], metadata [[META447:![0-9]+]], metadata !DIExpression()), !dbg [[DBG454]] +; ALL-NEXT: #dbg_value(i32 [[TMP1]], [[META447:![0-9]+]], !DIExpression(), [[DBG454]]) ; ALL-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i32 [[LOOP_IV]], 1, !dbg [[DBG455:![0-9]+]] ; ALL-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG455]] ; ALL-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG455]] @@ -1346,12 +1346,12 @@ define i32 @n30(i32 %x) { ; ALL: loop: ; ALL-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG462]] ; ALL-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], !dbg [[DBG462]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], metadata [[META459:![0-9]+]], metadata !DIExpression()), !dbg [[DBG462]] +; ALL-NEXT: #dbg_value(i32 [[TMP0]], [[META459:![0-9]+]], !DIExpression(), [[DBG462]]) ; ALL-NEXT: call void @external_side_effect(), !dbg [[DBG464:![0-9]+]] ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp sgt i32 [[TMP0]], -1, !dbg [[DBG465:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META460:![0-9]+]], metadata !DIExpression()), !dbg [[DBG465]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META460:![0-9]+]], !DIExpression(), [[DBG465]]) ; ALL-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, !dbg [[DBG466:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], metadata [[META461:![0-9]+]], metadata !DIExpression()), !dbg [[DBG466]] +; ALL-NEXT: #dbg_value(i32 [[TMP1]], [[META461:![0-9]+]], !DIExpression(), [[DBG466]]) ; ALL-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i32 [[LOOP_IV]], 1, !dbg [[DBG467:![0-9]+]] ; ALL-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG467]] ; ALL-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG467]] @@ -1379,7 +1379,7 @@ define i32 @n31(i32 %x, i32 %bit) { ; ALL-NEXT: entry: ; ALL-NEXT: [[BIT_FR:%.*]] = freeze i32 [[BIT:%.*]] ; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT_FR]], !dbg [[DBG476:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], metadata [[META471:![0-9]+]], metadata !DIExpression()), !dbg [[DBG476]] +; ALL-NEXT: #dbg_value(i32 [[BITMASK]], [[META471:![0-9]+]], !DIExpression(), [[DBG476]]) ; ALL-NEXT: [[BIT_FR_LOWBITMASK:%.*]] = add i32 [[BITMASK]], -1, !dbg [[DBG477:![0-9]+]] ; ALL-NEXT: [[BIT_FR_MASK:%.*]] = or i32 [[BIT_FR_LOWBITMASK]], [[BITMASK]], !dbg [[DBG477]] ; ALL-NEXT: [[X_MASKED:%.*]] = and i32 [[X:%.*]], [[BIT_FR_MASK]], !dbg [[DBG477]] @@ -1394,13 +1394,13 @@ define i32 @n31(i32 %x, i32 %bit) { ; ALL: loop: ; ALL-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG477]] ; ALL-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], !dbg [[DBG477]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], metadata [[META472:![0-9]+]], metadata !DIExpression()), !dbg [[DBG477]] +; ALL-NEXT: #dbg_value(i32 [[TMP0]], [[META472:![0-9]+]], !DIExpression(), [[DBG477]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[TMP0]], [[BITMASK]], !dbg [[DBG479:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META473:![0-9]+]], metadata !DIExpression()), !dbg [[DBG479]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META473:![0-9]+]], !DIExpression(), [[DBG479]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG480:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META474:![0-9]+]], metadata !DIExpression()), !dbg [[DBG480]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META474:![0-9]+]], !DIExpression(), [[DBG480]]) ; ALL-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, !dbg [[DBG481:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], metadata [[META475:![0-9]+]], metadata !DIExpression()), !dbg [[DBG481]] +; ALL-NEXT: #dbg_value(i32 [[TMP1]], [[META475:![0-9]+]], !DIExpression(), [[DBG481]]) ; ALL-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i32 [[LOOP_IV]], 1, !dbg [[DBG482:![0-9]+]] ; ALL-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG482]] ; ALL-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG482]] @@ -1430,7 +1430,7 @@ define i32 @n32(i32 %x, i32 %bit) { ; ALL-NEXT: entry: ; ALL-NEXT: [[BIT_FR:%.*]] = freeze i32 [[BIT:%.*]] ; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT_FR]], !dbg [[DBG492:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], metadata [[META487:![0-9]+]], metadata !DIExpression()), !dbg [[DBG492]] +; ALL-NEXT: #dbg_value(i32 [[BITMASK]], [[META487:![0-9]+]], !DIExpression(), [[DBG492]]) ; ALL-NEXT: [[BIT_FR_LOWBITMASK:%.*]] = add i32 [[BITMASK]], -1, !dbg [[DBG493:![0-9]+]] ; ALL-NEXT: [[BIT_FR_MASK:%.*]] = or i32 [[BIT_FR_LOWBITMASK]], [[BITMASK]], !dbg [[DBG493]] ; ALL-NEXT: [[X_MASKED:%.*]] = and i32 [[X:%.*]], [[BIT_FR_MASK]], !dbg [[DBG493]] @@ -1445,13 +1445,13 @@ define i32 @n32(i32 %x, i32 %bit) { ; ALL: loop: ; ALL-NEXT: [[LOOP_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG493]] ; ALL-NEXT: [[TMP0:%.*]] = phi i32 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], !dbg [[DBG493]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP0]], metadata [[META488:![0-9]+]], metadata !DIExpression()), !dbg [[DBG493]] +; ALL-NEXT: #dbg_value(i32 [[TMP0]], [[META488:![0-9]+]], !DIExpression(), [[DBG493]]) ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[TMP0]], [[BITMASK]], !dbg [[DBG495:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META489:![0-9]+]], metadata !DIExpression()), !dbg [[DBG495]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META489:![0-9]+]], !DIExpression(), [[DBG495]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG496:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META490:![0-9]+]], metadata !DIExpression()), !dbg [[DBG496]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META490:![0-9]+]], !DIExpression(), [[DBG496]]) ; ALL-NEXT: [[TMP1]] = shl i32 [[TMP0]], 1, !dbg [[DBG497:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], metadata [[META491:![0-9]+]], metadata !DIExpression()), !dbg [[DBG497]] +; ALL-NEXT: #dbg_value(i32 [[TMP1]], [[META491:![0-9]+]], !DIExpression(), [[DBG497]]) ; ALL-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i32 [[LOOP_IV]], 1, !dbg [[DBG498:![0-9]+]] ; ALL-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i32 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG498]] ; ALL-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG498]] @@ -1482,15 +1482,15 @@ define i32 @n33(i32 %x, i32 %bit, i32 %x.curr) { ; ALL-LABEL: @n33( ; ALL-NEXT: entry: ; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], !dbg [[DBG507:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], metadata [[META503:![0-9]+]], metadata !DIExpression()), !dbg [[DBG507]] +; ALL-NEXT: #dbg_value(i32 [[BITMASK]], [[META503:![0-9]+]], !DIExpression(), [[DBG507]]) ; ALL-NEXT: br label [[LOOP:%.*]], !dbg [[DBG508:![0-9]+]] ; ALL: loop: ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR:%.*]], [[BITMASK]], !dbg [[DBG509:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META504:![0-9]+]], metadata !DIExpression()), !dbg [[DBG509]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META504:![0-9]+]], !DIExpression(), [[DBG509]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG510:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META505:![0-9]+]], metadata !DIExpression()), !dbg [[DBG510]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META505:![0-9]+]], !DIExpression(), [[DBG510]]) ; ALL-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, !dbg [[DBG511:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], metadata [[META506:![0-9]+]], metadata !DIExpression()), !dbg [[DBG511]] +; ALL-NEXT: #dbg_value(i32 [[X_NEXT]], [[META506:![0-9]+]], !DIExpression(), [[DBG511]]) ; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], !dbg [[DBG512:![0-9]+]] ; ALL: end: ; ALL-NEXT: ret i32 [[X_CURR]], !dbg [[DBG513:![0-9]+]] @@ -1514,7 +1514,7 @@ define i32 @n34(i32 %bit, i1 %c, i32 %x0, i32 %x1) { ; ALL-LABEL: @n34( ; ALL-NEXT: entry: ; ALL-NEXT: [[BITMASK:%.*]] = shl i32 1, [[BIT:%.*]], !dbg [[DBG521:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[BITMASK]], metadata [[META516:![0-9]+]], metadata !DIExpression()), !dbg [[DBG521]] +; ALL-NEXT: #dbg_value(i32 [[BITMASK]], [[META516:![0-9]+]], !DIExpression(), [[DBG521]]) ; ALL-NEXT: br i1 [[C:%.*]], label [[BB0:%.*]], label [[BB1:%.*]], !dbg [[DBG522:![0-9]+]] ; ALL: bb0: ; ALL-NEXT: br label [[MERGE:%.*]], !dbg [[DBG523:![0-9]+]] @@ -1522,15 +1522,15 @@ define i32 @n34(i32 %bit, i1 %c, i32 %x0, i32 %x1) { ; ALL-NEXT: br label [[MERGE]], !dbg [[DBG524:![0-9]+]] ; ALL: merge: ; ALL-NEXT: [[X_CURR:%.*]] = phi i32 [ [[X0:%.*]], [[BB0]] ], [ [[X1:%.*]], [[BB1]] ], !dbg [[DBG525:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR]], metadata [[META517:![0-9]+]], metadata !DIExpression()), !dbg [[DBG525]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR]], [[META517:![0-9]+]], !DIExpression(), [[DBG525]]) ; ALL-NEXT: br label [[LOOP:%.*]], !dbg [[DBG526:![0-9]+]] ; ALL: loop: ; ALL-NEXT: [[X_CURR_BITMASKED:%.*]] = and i32 [[X_CURR]], [[BITMASK]], !dbg [[DBG527:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_CURR_BITMASKED]], metadata [[META518:![0-9]+]], metadata !DIExpression()), !dbg [[DBG527]] +; ALL-NEXT: #dbg_value(i32 [[X_CURR_BITMASKED]], [[META518:![0-9]+]], !DIExpression(), [[DBG527]]) ; ALL-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i32 [[X_CURR_BITMASKED]], 0, !dbg [[DBG528:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META519:![0-9]+]], metadata !DIExpression()), !dbg [[DBG528]] +; ALL-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META519:![0-9]+]], !DIExpression(), [[DBG528]]) ; ALL-NEXT: [[X_NEXT:%.*]] = shl i32 [[X_CURR]], 1, !dbg [[DBG529:![0-9]+]] -; ALL-NEXT: call void @llvm.dbg.value(metadata i32 [[X_NEXT]], metadata [[META520:![0-9]+]], metadata !DIExpression()), !dbg [[DBG529]] +; ALL-NEXT: #dbg_value(i32 [[X_NEXT]], [[META520:![0-9]+]], !DIExpression(), [[DBG529]]) ; ALL-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], !dbg [[DBG530:![0-9]+]] ; ALL: end: ; ALL-NEXT: ret i32 [[X_CURR]], !dbg [[DBG531:![0-9]+]] @@ -1564,7 +1564,7 @@ define void @t35_i1(i1 %x, i1 %bit, ptr %p0, ptr %p1) { ; LZCNT-NEXT: entry: ; LZCNT-NEXT: [[BIT_FR:%.*]] = freeze i1 [[BIT:%.*]] ; LZCNT-NEXT: [[BITMASK:%.*]] = shl i1 true, [[BIT_FR]], !dbg [[DBG539:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[BITMASK]], metadata [[META534:![0-9]+]], metadata !DIExpression()), !dbg [[DBG539]] +; LZCNT-NEXT: #dbg_value(i1 [[BITMASK]], [[META534:![0-9]+]], !DIExpression(), [[DBG539]]) ; LZCNT-NEXT: [[BIT_FR_LOWBITMASK:%.*]] = add i1 [[BITMASK]], true, !dbg [[DBG540:![0-9]+]] ; LZCNT-NEXT: [[BIT_FR_MASK:%.*]] = or i1 [[BIT_FR_LOWBITMASK]], [[BITMASK]], !dbg [[DBG540]] ; LZCNT-NEXT: [[X_MASKED:%.*]] = and i1 [[X:%.*]], [[BIT_FR_MASK]], !dbg [[DBG540]] @@ -1579,13 +1579,13 @@ define void @t35_i1(i1 %x, i1 %bit, ptr %p0, ptr %p1) { ; LZCNT: loop: ; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i1 [ false, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG540]] ; LZCNT-NEXT: [[TMP0:%.*]] = phi i1 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], !dbg [[DBG540]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[TMP0]], metadata [[META535:![0-9]+]], metadata !DIExpression()), !dbg [[DBG540]] +; LZCNT-NEXT: #dbg_value(i1 [[TMP0]], [[META535:![0-9]+]], !DIExpression(), [[DBG540]]) ; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i1 [[TMP0]], [[BITMASK]], !dbg [[DBG542:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_BITMASKED]], metadata [[META536:![0-9]+]], metadata !DIExpression()), !dbg [[DBG542]] +; LZCNT-NEXT: #dbg_value(i1 [[X_CURR_BITMASKED]], [[META536:![0-9]+]], !DIExpression(), [[DBG542]]) ; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i1 [[X_CURR_BITMASKED]], false, !dbg [[DBG543:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META537:![0-9]+]], metadata !DIExpression()), !dbg [[DBG543]] +; LZCNT-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META537:![0-9]+]], !DIExpression(), [[DBG543]]) ; LZCNT-NEXT: [[TMP1]] = shl i1 [[TMP0]], true, !dbg [[DBG544:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[TMP1]], metadata [[META538:![0-9]+]], metadata !DIExpression()), !dbg [[DBG544]] +; LZCNT-NEXT: #dbg_value(i1 [[TMP1]], [[META538:![0-9]+]], !DIExpression(), [[DBG544]]) ; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i1 [[LOOP_IV]], true, !dbg [[DBG545:![0-9]+]] ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i1 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG545]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG545]] @@ -1599,17 +1599,17 @@ define void @t35_i1(i1 %x, i1 %bit, ptr %p0, ptr %p1) { ; NOLZCNT-LABEL: @t35_i1( ; NOLZCNT-NEXT: entry: ; NOLZCNT-NEXT: [[BITMASK:%.*]] = shl i1 true, [[BIT:%.*]], !dbg [[DBG539:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[BITMASK]], metadata [[META534:![0-9]+]], metadata !DIExpression()), !dbg [[DBG539]] +; NOLZCNT-NEXT: #dbg_value(i1 [[BITMASK]], [[META534:![0-9]+]], !DIExpression(), [[DBG539]]) ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG540:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[X_CURR:%.*]] = phi i1 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG541:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR]], metadata [[META535:![0-9]+]], metadata !DIExpression()), !dbg [[DBG541]] +; NOLZCNT-NEXT: #dbg_value(i1 [[X_CURR]], [[META535:![0-9]+]], !DIExpression(), [[DBG541]]) ; NOLZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i1 [[X_CURR]], [[BITMASK]], !dbg [[DBG542:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_BITMASKED]], metadata [[META536:![0-9]+]], metadata !DIExpression()), !dbg [[DBG542]] +; NOLZCNT-NEXT: #dbg_value(i1 [[X_CURR_BITMASKED]], [[META536:![0-9]+]], !DIExpression(), [[DBG542]]) ; NOLZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i1 [[X_CURR_BITMASKED]], false, !dbg [[DBG543:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META537:![0-9]+]], metadata !DIExpression()), !dbg [[DBG543]] +; NOLZCNT-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META537:![0-9]+]], !DIExpression(), [[DBG543]]) ; NOLZCNT-NEXT: [[X_NEXT]] = shl i1 [[X_CURR]], true, !dbg [[DBG544:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_NEXT]], metadata [[META538:![0-9]+]], metadata !DIExpression()), !dbg [[DBG544]] +; NOLZCNT-NEXT: #dbg_value(i1 [[X_NEXT]], [[META538:![0-9]+]], !DIExpression(), [[DBG544]]) ; NOLZCNT-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], !dbg [[DBG545:![0-9]+]] ; NOLZCNT: end: ; NOLZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i1 [ [[X_CURR]], [[LOOP]] ], !dbg [[DBG541]] @@ -1639,7 +1639,7 @@ define void @t36_i2(i2 %x, i2 %bit, ptr %p0, ptr %p1) { ; LZCNT-NEXT: entry: ; LZCNT-NEXT: [[BIT_FR:%.*]] = freeze i2 [[BIT:%.*]] ; LZCNT-NEXT: [[BITMASK:%.*]] = shl i2 1, [[BIT_FR]], !dbg [[DBG556:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[BITMASK]], metadata [[META551:![0-9]+]], metadata !DIExpression()), !dbg [[DBG556]] +; LZCNT-NEXT: #dbg_value(i2 [[BITMASK]], [[META551:![0-9]+]], !DIExpression(), [[DBG556]]) ; LZCNT-NEXT: [[BIT_FR_LOWBITMASK:%.*]] = add i2 [[BITMASK]], -1, !dbg [[DBG557:![0-9]+]] ; LZCNT-NEXT: [[BIT_FR_MASK:%.*]] = or i2 [[BIT_FR_LOWBITMASK]], [[BITMASK]], !dbg [[DBG557]] ; LZCNT-NEXT: [[X_MASKED:%.*]] = and i2 [[X:%.*]], [[BIT_FR_MASK]], !dbg [[DBG557]] @@ -1654,13 +1654,13 @@ define void @t36_i2(i2 %x, i2 %bit, ptr %p0, ptr %p1) { ; LZCNT: loop: ; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i2 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG557]] ; LZCNT-NEXT: [[TMP0:%.*]] = phi i2 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], !dbg [[DBG557]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[TMP0]], metadata [[META552:![0-9]+]], metadata !DIExpression()), !dbg [[DBG557]] +; LZCNT-NEXT: #dbg_value(i2 [[TMP0]], [[META552:![0-9]+]], !DIExpression(), [[DBG557]]) ; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i2 [[TMP0]], [[BITMASK]], !dbg [[DBG559:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[X_CURR_BITMASKED]], metadata [[META553:![0-9]+]], metadata !DIExpression()), !dbg [[DBG559]] +; LZCNT-NEXT: #dbg_value(i2 [[X_CURR_BITMASKED]], [[META553:![0-9]+]], !DIExpression(), [[DBG559]]) ; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i2 [[X_CURR_BITMASKED]], 0, !dbg [[DBG560:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META554:![0-9]+]], metadata !DIExpression()), !dbg [[DBG560]] +; LZCNT-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META554:![0-9]+]], !DIExpression(), [[DBG560]]) ; LZCNT-NEXT: [[TMP1]] = shl i2 [[TMP0]], 1, !dbg [[DBG561:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[TMP1]], metadata [[META555:![0-9]+]], metadata !DIExpression()), !dbg [[DBG561]] +; LZCNT-NEXT: #dbg_value(i2 [[TMP1]], [[META555:![0-9]+]], !DIExpression(), [[DBG561]]) ; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw i2 [[LOOP_IV]], 1, !dbg [[DBG562:![0-9]+]] ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i2 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG562]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG562]] @@ -1674,17 +1674,17 @@ define void @t36_i2(i2 %x, i2 %bit, ptr %p0, ptr %p1) { ; NOLZCNT-LABEL: @t36_i2( ; NOLZCNT-NEXT: entry: ; NOLZCNT-NEXT: [[BITMASK:%.*]] = shl i2 1, [[BIT:%.*]], !dbg [[DBG556:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[BITMASK]], metadata [[META551:![0-9]+]], metadata !DIExpression()), !dbg [[DBG556]] +; NOLZCNT-NEXT: #dbg_value(i2 [[BITMASK]], [[META551:![0-9]+]], !DIExpression(), [[DBG556]]) ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG557:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[X_CURR:%.*]] = phi i2 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG558:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[X_CURR]], metadata [[META552:![0-9]+]], metadata !DIExpression()), !dbg [[DBG558]] +; NOLZCNT-NEXT: #dbg_value(i2 [[X_CURR]], [[META552:![0-9]+]], !DIExpression(), [[DBG558]]) ; NOLZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i2 [[X_CURR]], [[BITMASK]], !dbg [[DBG559:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[X_CURR_BITMASKED]], metadata [[META553:![0-9]+]], metadata !DIExpression()), !dbg [[DBG559]] +; NOLZCNT-NEXT: #dbg_value(i2 [[X_CURR_BITMASKED]], [[META553:![0-9]+]], !DIExpression(), [[DBG559]]) ; NOLZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i2 [[X_CURR_BITMASKED]], 0, !dbg [[DBG560:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META554:![0-9]+]], metadata !DIExpression()), !dbg [[DBG560]] +; NOLZCNT-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META554:![0-9]+]], !DIExpression(), [[DBG560]]) ; NOLZCNT-NEXT: [[X_NEXT]] = shl i2 [[X_CURR]], 1, !dbg [[DBG561:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i2 [[X_NEXT]], metadata [[META555:![0-9]+]], metadata !DIExpression()), !dbg [[DBG561]] +; NOLZCNT-NEXT: #dbg_value(i2 [[X_NEXT]], [[META555:![0-9]+]], !DIExpression(), [[DBG561]]) ; NOLZCNT-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], !dbg [[DBG562:![0-9]+]] ; NOLZCNT: end: ; NOLZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i2 [ [[X_CURR]], [[LOOP]] ], !dbg [[DBG558]] @@ -1714,7 +1714,7 @@ define void @t37_i3(i3 %x, i3 %bit, ptr %p0, ptr %p1) { ; LZCNT-NEXT: entry: ; LZCNT-NEXT: [[BIT_FR:%.*]] = freeze i3 [[BIT:%.*]] ; LZCNT-NEXT: [[BITMASK:%.*]] = shl i3 1, [[BIT_FR]], !dbg [[DBG573:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[BITMASK]], metadata [[META568:![0-9]+]], metadata !DIExpression()), !dbg [[DBG573]] +; LZCNT-NEXT: #dbg_value(i3 [[BITMASK]], [[META568:![0-9]+]], !DIExpression(), [[DBG573]]) ; LZCNT-NEXT: [[BIT_FR_LOWBITMASK:%.*]] = add i3 [[BITMASK]], -1, !dbg [[DBG574:![0-9]+]] ; LZCNT-NEXT: [[BIT_FR_MASK:%.*]] = or i3 [[BIT_FR_LOWBITMASK]], [[BITMASK]], !dbg [[DBG574]] ; LZCNT-NEXT: [[X_MASKED:%.*]] = and i3 [[X:%.*]], [[BIT_FR_MASK]], !dbg [[DBG574]] @@ -1729,13 +1729,13 @@ define void @t37_i3(i3 %x, i3 %bit, ptr %p0, ptr %p1) { ; LZCNT: loop: ; LZCNT-NEXT: [[LOOP_IV:%.*]] = phi i3 [ 0, [[ENTRY:%.*]] ], [ [[LOOP_IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG574]] ; LZCNT-NEXT: [[TMP0:%.*]] = phi i3 [ [[X]], [[ENTRY]] ], [ [[TMP1:%.*]], [[LOOP]] ], !dbg [[DBG574]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[TMP0]], metadata [[META569:![0-9]+]], metadata !DIExpression()), !dbg [[DBG574]] +; LZCNT-NEXT: #dbg_value(i3 [[TMP0]], [[META569:![0-9]+]], !DIExpression(), [[DBG574]]) ; LZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i3 [[TMP0]], [[BITMASK]], !dbg [[DBG576:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[X_CURR_BITMASKED]], metadata [[META570:![0-9]+]], metadata !DIExpression()), !dbg [[DBG576]] +; LZCNT-NEXT: #dbg_value(i3 [[X_CURR_BITMASKED]], [[META570:![0-9]+]], !DIExpression(), [[DBG576]]) ; LZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i3 [[X_CURR_BITMASKED]], 0, !dbg [[DBG577:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META571:![0-9]+]], metadata !DIExpression()), !dbg [[DBG577]] +; LZCNT-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META571:![0-9]+]], !DIExpression(), [[DBG577]]) ; LZCNT-NEXT: [[TMP1]] = shl i3 [[TMP0]], 1, !dbg [[DBG578:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[TMP1]], metadata [[META572:![0-9]+]], metadata !DIExpression()), !dbg [[DBG578]] +; LZCNT-NEXT: #dbg_value(i3 [[TMP1]], [[META572:![0-9]+]], !DIExpression(), [[DBG578]]) ; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i3 [[LOOP_IV]], 1, !dbg [[DBG579:![0-9]+]] ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i3 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG579]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG579]] @@ -1749,17 +1749,17 @@ define void @t37_i3(i3 %x, i3 %bit, ptr %p0, ptr %p1) { ; NOLZCNT-LABEL: @t37_i3( ; NOLZCNT-NEXT: entry: ; NOLZCNT-NEXT: [[BITMASK:%.*]] = shl i3 1, [[BIT:%.*]], !dbg [[DBG573:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[BITMASK]], metadata [[META568:![0-9]+]], metadata !DIExpression()), !dbg [[DBG573]] +; NOLZCNT-NEXT: #dbg_value(i3 [[BITMASK]], [[META568:![0-9]+]], !DIExpression(), [[DBG573]]) ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG574:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[X_CURR:%.*]] = phi i3 [ [[X:%.*]], [[ENTRY:%.*]] ], [ [[X_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG575:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[X_CURR]], metadata [[META569:![0-9]+]], metadata !DIExpression()), !dbg [[DBG575]] +; NOLZCNT-NEXT: #dbg_value(i3 [[X_CURR]], [[META569:![0-9]+]], !DIExpression(), [[DBG575]]) ; NOLZCNT-NEXT: [[X_CURR_BITMASKED:%.*]] = and i3 [[X_CURR]], [[BITMASK]], !dbg [[DBG576:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[X_CURR_BITMASKED]], metadata [[META570:![0-9]+]], metadata !DIExpression()), !dbg [[DBG576]] +; NOLZCNT-NEXT: #dbg_value(i3 [[X_CURR_BITMASKED]], [[META570:![0-9]+]], !DIExpression(), [[DBG576]]) ; NOLZCNT-NEXT: [[X_CURR_ISBITUNSET:%.*]] = icmp eq i3 [[X_CURR_BITMASKED]], 0, !dbg [[DBG577:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[X_CURR_ISBITUNSET]], metadata [[META571:![0-9]+]], metadata !DIExpression()), !dbg [[DBG577]] +; NOLZCNT-NEXT: #dbg_value(i1 [[X_CURR_ISBITUNSET]], [[META571:![0-9]+]], !DIExpression(), [[DBG577]]) ; NOLZCNT-NEXT: [[X_NEXT]] = shl i3 [[X_CURR]], 1, !dbg [[DBG578:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i3 [[X_NEXT]], metadata [[META572:![0-9]+]], metadata !DIExpression()), !dbg [[DBG578]] +; NOLZCNT-NEXT: #dbg_value(i3 [[X_NEXT]], [[META572:![0-9]+]], !DIExpression(), [[DBG578]]) ; NOLZCNT-NEXT: br i1 [[X_CURR_ISBITUNSET]], label [[LOOP]], label [[END:%.*]], !dbg [[DBG579:![0-9]+]] ; NOLZCNT: end: ; NOLZCNT-NEXT: [[X_CURR_LCSSA:%.*]] = phi i3 [ [[X_CURR]], [[LOOP]] ], !dbg [[DBG575]] diff --git a/llvm/test/Transforms/LoopIdiom/X86/logical-right-shift-until-zero-debuginfo.ll b/llvm/test/Transforms/LoopIdiom/X86/logical-right-shift-until-zero-debuginfo.ll index 3c0c53c8773c34..04946ce9b4f80d 100644 --- a/llvm/test/Transforms/LoopIdiom/X86/logical-right-shift-until-zero-debuginfo.ll +++ b/llvm/test/Transforms/LoopIdiom/X86/logical-right-shift-until-zero-debuginfo.ll @@ -12,15 +12,15 @@ define i8 @p(i8 %val, i8 %start, i8 %extraoffset) { ; NOLZCNT-NEXT: br label [[LOOP:%.*]], !dbg [[DBG20:![0-9]+]] ; NOLZCNT: loop: ; NOLZCNT-NEXT: [[IV:%.*]] = phi i8 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ], !dbg [[DBG21:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG21]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV]], [[META9:![0-9]+]], !DIExpression(), [[DBG21]]) ; NOLZCNT-NEXT: [[NBITS:%.*]] = add nsw i8 [[IV]], [[EXTRAOFFSET:%.*]], !dbg [[DBG22:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG22]] +; NOLZCNT-NEXT: #dbg_value(i8 [[NBITS]], [[META11:![0-9]+]], !DIExpression(), [[DBG22]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED:%.*]] = lshr i8 [[VAL:%.*]], [[NBITS]], !dbg [[DBG23:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG23]] +; NOLZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META12:![0-9]+]], !DIExpression(), [[DBG23]]) ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO:%.*]] = icmp eq i8 [[VAL_SHIFTED]], 0, !dbg [[DBG24:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24]] +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO]], [[META13:![0-9]+]], !DIExpression(), [[DBG24]]) ; NOLZCNT-NEXT: [[IV_NEXT]] = add i8 [[IV]], 1, !dbg [[DBG25:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META14:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META14:![0-9]+]], !DIExpression(), [[DBG25]]) ; NOLZCNT-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[VAL_SHIFTED_ISZERO]], i8 [[IV_NEXT]]), !dbg [[DBG26:![0-9]+]] ; NOLZCNT-NEXT: br i1 [[VAL_SHIFTED_ISZERO]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG27:![0-9]+]] ; NOLZCNT: end: @@ -29,11 +29,11 @@ define i8 @p(i8 %val, i8 %start, i8 %extraoffset) { ; NOLZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG30:![0-9]+]] ; NOLZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[VAL_SHIFTED_ISZERO]], [[LOOP]] ], !dbg [[DBG31:![0-9]+]] ; NOLZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG32:![0-9]+]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META15:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META16:![0-9]+]], metadata !DIExpression()), !dbg [[DBG29]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META17:![0-9]+]], metadata !DIExpression()), !dbg [[DBG30]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31]] -; NOLZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META19:![0-9]+]], metadata !DIExpression()), !dbg [[DBG32]] +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_RES]], [[META15:![0-9]+]], !DIExpression(), [[DBG28]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META16:![0-9]+]], !DIExpression(), [[DBG29]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META17:![0-9]+]], !DIExpression(), [[DBG30]]) +; NOLZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META18:![0-9]+]], !DIExpression(), [[DBG31]]) +; NOLZCNT-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META19:![0-9]+]], !DIExpression(), [[DBG32]]) ; NOLZCNT-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG33:![0-9]+]] ; NOLZCNT-NEXT: ret i8 [[IV_RES]], !dbg [[DBG34:![0-9]+]] ; @@ -52,14 +52,14 @@ define i8 @p(i8 %val, i8 %start, i8 %extraoffset) { ; LZCNT-NEXT: [[LOOP_IV_NEXT]] = add nuw nsw i8 [[LOOP_IV]], 1, !dbg [[DBG22:![0-9]+]] ; LZCNT-NEXT: [[LOOP_IVCHECK:%.*]] = icmp eq i8 [[LOOP_IV_NEXT]], [[LOOP_TRIPCOUNT]], !dbg [[DBG22]] ; LZCNT-NEXT: [[IV:%.*]] = add nsw i8 [[LOOP_IV]], [[START]], !dbg [[DBG22]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV]], metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG20]] -; LZCNT-NEXT: [[NBITS:%.*]] = add nsw i8 [[IV]], [[EXTRAOFFSET]], !dbg [[DBG22:]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG22]] +; LZCNT-NEXT: #dbg_value(i8 [[IV]], [[META9:![0-9]+]], !DIExpression(), [[DBG20]]) +; LZCNT-NEXT: [[NBITS:%.*]] = add nsw i8 [[IV]], [[EXTRAOFFSET]], !dbg [[DBG22]] +; LZCNT-NEXT: #dbg_value(i8 [[NBITS]], [[META11:![0-9]+]], !DIExpression(), [[DBG22]]) ; LZCNT-NEXT: [[VAL_SHIFTED:%.*]] = lshr i8 [[VAL]], [[NBITS]], !dbg [[DBG23:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED]], metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG23]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[LOOP_IVCHECK]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24:![0-9]+]] +; LZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED]], [[META12:![0-9]+]], !DIExpression(), [[DBG23]]) +; LZCNT-NEXT: #dbg_value(i1 [[LOOP_IVCHECK]], [[META13:![0-9]+]], !DIExpression(), [[META24:![0-9]+]]) ; LZCNT-NEXT: [[IV_NEXT:%.*]] = add i8 [[IV]], 1, !dbg [[DBG25:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT]], metadata [[META14:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25]] +; LZCNT-NEXT: #dbg_value(i8 [[IV_NEXT]], [[META14:![0-9]+]], !DIExpression(), [[DBG25]]) ; LZCNT-NEXT: call void @escape_inner(i8 [[IV]], i8 [[NBITS]], i8 [[VAL_SHIFTED]], i1 [[LOOP_IVCHECK]], i8 [[IV_NEXT]]), !dbg [[DBG26:![0-9]+]] ; LZCNT-NEXT: br i1 [[LOOP_IVCHECK]], label [[END:%.*]], label [[LOOP]], !dbg [[DBG27:![0-9]+]] ; LZCNT: end: @@ -68,11 +68,11 @@ define i8 @p(i8 %val, i8 %start, i8 %extraoffset) { ; LZCNT-NEXT: [[VAL_SHIFTED_RES:%.*]] = phi i8 [ [[VAL_SHIFTED]], [[LOOP]] ], !dbg [[DBG30:![0-9]+]] ; LZCNT-NEXT: [[VAL_SHIFTED_ISZERO_RES:%.*]] = phi i1 [ [[LOOP_IVCHECK]], [[LOOP]] ], !dbg [[DBG31:![0-9]+]] ; LZCNT-NEXT: [[IV_NEXT_RES:%.*]] = phi i8 [ [[IV_NEXT]], [[LOOP]] ], !dbg [[DBG32:![0-9]+]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_RES]], metadata [[META15:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[NBITS_RES]], metadata [[META16:![0-9]+]], metadata !DIExpression()), !dbg [[DBG29]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[VAL_SHIFTED_RES]], metadata [[META17:![0-9]+]], metadata !DIExpression()), !dbg [[DBG30]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i1 [[VAL_SHIFTED_ISZERO_RES]], metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31]] -; LZCNT-NEXT: call void @llvm.dbg.value(metadata i8 [[IV_NEXT_RES]], metadata [[META19:![0-9]+]], metadata !DIExpression()), !dbg [[DBG32]] +; LZCNT-NEXT: #dbg_value(i8 [[IV_RES]], [[META15:![0-9]+]], !DIExpression(), [[DBG28]]) +; LZCNT-NEXT: #dbg_value(i8 [[NBITS_RES]], [[META16:![0-9]+]], !DIExpression(), [[DBG29]]) +; LZCNT-NEXT: #dbg_value(i8 [[VAL_SHIFTED_RES]], [[META17:![0-9]+]], !DIExpression(), [[DBG30]]) +; LZCNT-NEXT: #dbg_value(i1 [[VAL_SHIFTED_ISZERO_RES]], [[META18:![0-9]+]], !DIExpression(), [[DBG31]]) +; LZCNT-NEXT: #dbg_value(i8 [[IV_NEXT_RES]], [[META19:![0-9]+]], !DIExpression(), [[DBG32]]) ; LZCNT-NEXT: call void @escape_outer(i8 [[IV_RES]], i8 [[NBITS_RES]], i8 [[VAL_SHIFTED_RES]], i1 [[VAL_SHIFTED_ISZERO_RES]], i8 [[IV_NEXT_RES]]), !dbg [[DBG33:![0-9]+]] ; LZCNT-NEXT: ret i8 [[IV_RES]], !dbg [[DBG34:![0-9]+]] ; diff --git a/llvm/test/Transforms/LoopIdiom/debug-line.ll b/llvm/test/Transforms/LoopIdiom/debug-line.ll index 017d63d0c14a50..eccfc38b9f8616 100644 --- a/llvm/test/Transforms/LoopIdiom/debug-line.ll +++ b/llvm/test/Transforms/LoopIdiom/debug-line.ll @@ -7,8 +7,8 @@ target triple = "x86_64-apple-darwin10.0.0" define void @foo(ptr nocapture %a) nounwind ssp !dbg !0 { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata ptr [[A:%.*]], metadata [[META7:![0-9]+]], metadata !DIExpression()), !dbg [[DBG10:![0-9]+]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 0, metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG15:![0-9]+]] +; CHECK-NEXT: #dbg_value(ptr [[A:%.*]], [[META7:![0-9]+]], !DIExpression(), [[META10:![0-9]+]]) +; CHECK-NEXT: #dbg_value(i32 0, [[META11:![0-9]+]], !DIExpression(), [[META15:![0-9]+]]) ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 [[A]], i8 0, i64 8000, i1 false), !dbg [[DBG16:![0-9]+]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: @@ -16,9 +16,9 @@ define void @foo(ptr nocapture %a) nounwind ssp !dbg !0 { ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr double, ptr [[A]], i64 [[INDVAR]] ; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVAR_NEXT]], 1000 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !dbg [[DBG15]] +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !dbg [[META15]] ; CHECK: for.end: -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata [[META3:![0-9]+]], metadata [[META11]], metadata !DIExpression()), !dbg [[DBG17:![0-9]+]] +; CHECK-NEXT: #dbg_value([[META3:![0-9]+]], [[META11]], !DIExpression(), [[META17:![0-9]+]]) ; CHECK-NEXT: ret void, !dbg [[DBG18:![0-9]+]] ; entry: diff --git a/llvm/test/Transforms/LoopIdiom/memcpy-debugify-remarks.ll b/llvm/test/Transforms/LoopIdiom/memcpy-debugify-remarks.ll index 3a48b178123c47..a9f8af22bbc392 100644 --- a/llvm/test/Transforms/LoopIdiom/memcpy-debugify-remarks.ll +++ b/llvm/test/Transforms/LoopIdiom/memcpy-debugify-remarks.ll @@ -38,17 +38,17 @@ define void @test6_dest_align(ptr noalias align 1 %Base, ptr noalias align 4 %De ; CHECK-NEXT: br label [[FOR_BODY:%.*]], !dbg [[DBG18]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[BB_NPH:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[FOR_BODY]] ], !dbg [[DBG20:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[INDVAR]], metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG20]] +; CHECK-NEXT: #dbg_value(i64 [[INDVAR]], [[META9:![0-9]+]], !DIExpression(), [[DBG20]]) ; CHECK-NEXT: [[I_0_014:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[INDVAR]], !dbg [[DBG21:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr [[I_0_014]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG21]] +; CHECK-NEXT: #dbg_value(ptr [[I_0_014]], [[META11:![0-9]+]], !DIExpression(), [[DBG21]]) ; CHECK-NEXT: [[DESTI:%.*]] = getelementptr i32, ptr [[DEST]], i64 [[INDVAR]], !dbg [[DBG22:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr [[DESTI]], metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG22]] +; CHECK-NEXT: #dbg_value(ptr [[DESTI]], [[META12:![0-9]+]], !DIExpression(), [[DBG22]]) ; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[I_0_014]], align 1, !dbg [[DBG23:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[V]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG23]] +; CHECK-NEXT: #dbg_value(i32 [[V]], [[META13:![0-9]+]], !DIExpression(), [[DBG23]]) ; CHECK-NEXT: [[INDVAR_NEXT]] = add i64 [[INDVAR]], 1, !dbg [[DBG24:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[INDVAR_NEXT]], metadata [[META15:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24]] +; CHECK-NEXT: #dbg_value(i64 [[INDVAR_NEXT]], [[META15:![0-9]+]], !DIExpression(), [[DBG24]]) ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[SIZE]], !dbg [[DBG25:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[EXITCOND]], metadata [[META16:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25]] +; CHECK-NEXT: #dbg_value(i1 [[EXITCOND]], [[META16:![0-9]+]], !DIExpression(), [[DBG25]]) ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !dbg [[DBG26:![0-9]+]] ; CHECK: for.end: ; CHECK-NEXT: ret void, !dbg [[DBG27:![0-9]+]] diff --git a/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll b/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll index 62c56d17c3baca..95859c0b49f468 100644 --- a/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll +++ b/llvm/test/Transforms/LoopIdiom/memset-debugify-remarks.ll @@ -39,7 +39,7 @@ define void @_Z15my_basic_memsetPcS_c(ptr %ptr, ptr %end, i8 %value) { ; CHECK-NEXT: [[PTR2:%.*]] = ptrtoint ptr [[PTR:%.*]] to i64, !dbg [[DBG15:![0-9]+]] ; CHECK-NEXT: [[END1:%.*]] = ptrtoint ptr [[END:%.*]] to i64, !dbg [[DBG15]] ; CHECK-NEXT: [[CMP3:%.*]] = icmp eq ptr [[PTR]], [[END]], !dbg [[DBG15]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP3]], metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG15]] +; CHECK-NEXT: #dbg_value(i1 [[CMP3]], [[META9:![0-9]+]], !DIExpression(), [[DBG15]]) ; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]], !dbg [[DBG16:![0-9]+]] ; CHECK: for.body.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[END1]], [[PTR2]], !dbg [[DBG17:![0-9]+]] @@ -47,11 +47,11 @@ define void @_Z15my_basic_memsetPcS_c(ptr %ptr, ptr %end, i8 %value) { ; CHECK-NEXT: br label [[FOR_BODY:%.*]], !dbg [[DBG17]] ; CHECK: for.body: ; CHECK-NEXT: [[PTR_ADDR_04:%.*]] = phi ptr [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[PTR]], [[FOR_BODY_PREHEADER]] ], !dbg [[DBG19:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr [[PTR_ADDR_04]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19]] +; CHECK-NEXT: #dbg_value(ptr [[PTR_ADDR_04]], [[META11:![0-9]+]], !DIExpression(), [[DBG19]]) ; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds i8, ptr [[PTR_ADDR_04]], i64 1, !dbg [[DBG20:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr [[INCDEC_PTR]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG20]] +; CHECK-NEXT: #dbg_value(ptr [[INCDEC_PTR]], [[META13:![0-9]+]], !DIExpression(), [[DBG20]]) ; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[INCDEC_PTR]], [[END]], !dbg [[DBG21:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[CMP]], metadata [[META14:![0-9]+]], metadata !DIExpression()), !dbg [[DBG21]] +; CHECK-NEXT: #dbg_value(i1 [[CMP]], [[META14:![0-9]+]], !DIExpression(), [[DBG21]]) ; CHECK-NEXT: br i1 [[CMP]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]], !dbg [[DBG17]] ; CHECK: for.end.loopexit: ; CHECK-NEXT: br label [[FOR_END]], !dbg [[DBG22:![0-9]+]] diff --git a/llvm/test/Transforms/LoopRotate/call-prepare-for-lto.ll b/llvm/test/Transforms/LoopRotate/call-prepare-for-lto.ll index a7c957a41f79f5..8b030163d66ad4 100644 --- a/llvm/test/Transforms/LoopRotate/call-prepare-for-lto.ll +++ b/llvm/test/Transforms/LoopRotate/call-prepare-for-lto.ll @@ -46,13 +46,13 @@ define void @test_prepare_for_lto_intrinsic() !dbg !7 { ; FULL-LABEL: @test_prepare_for_lto_intrinsic( ; FULL-NEXT: entry: ; FULL-NEXT: %array = alloca [20 x i32], align 16 -; FULL-NEXT: call void @llvm.dbg.value(metadata i32 0, metadata !12, metadata !DIExpression()), !dbg !13 +; FULL-NEXT: #dbg_value(i32 0, !12, !DIExpression(), !13 ; FULL-NEXT: br label %for.body ; ; PREPARE-LABEL: @test_prepare_for_lto_intrinsic( ; PREPARE-NEXT: entry: ; PREPARE-NEXT: %array = alloca [20 x i32], align 16 -; PREPARE-NEXT: call void @llvm.dbg.value(metadata i32 0, metadata !12, metadata !DIExpression()), !dbg !13 +; PREPARE-NEXT: #dbg_value(i32 0, !12, !DIExpression(), !13 ; PREPARE-NEXT: br label %for.body ; entry: diff --git a/llvm/test/Transforms/LoopRotate/dbg-value-duplicates-2.ll b/llvm/test/Transforms/LoopRotate/dbg-value-duplicates-2.ll index 25896ab5901ff7..dfb8aedf411717 100644 --- a/llvm/test/Transforms/LoopRotate/dbg-value-duplicates-2.ll +++ b/llvm/test/Transforms/LoopRotate/dbg-value-duplicates-2.ll @@ -4,22 +4,22 @@ define dso_local i16 @main() local_unnamed_addr #0 !dbg !7 { ; CHECK-LABEL: @main( ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.dbg.value(metadata i16 9, metadata !12, metadata !DIExpression()), !dbg !13 -; CHECK-NEXT: br label [[BB2:%.*]], !dbg !14 +; CHECK-NEXT: #dbg_value(i16 9, [[META12:![0-9]+]], !DIExpression(), [[META13:![0-9]+]]) +; CHECK-NEXT: br label [[BB2:%.*]], !dbg [[DBG14:![0-9]+]] ; CHECK: bb2: ; CHECK-NEXT: [[TMP1:%.*]] = phi i16 [ 9, [[ENTRY:%.*]] ], [ [[TMP5:%.*]], [[BB2]] ] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i16 [[TMP1]], metadata !12, metadata !DIExpression()), !dbg !13 -; CHECK-NEXT: [[TMP4:%.*]] = call i16 @wibble(i16 [[TMP1]]), !dbg !14 -; CHECK-NEXT: [[TMP5]] = add nsw i16 [[TMP4]], [[TMP1]], !dbg !14 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i16 [[TMP5]], metadata !12, metadata !DIExpression()), !dbg !13 -; CHECK-NEXT: [[TMP6:%.*]] = call i16 @wibble(i16 [[TMP4]]), !dbg !14 -; CHECK-NEXT: [[TMP7:%.*]] = mul nsw i16 [[TMP6]], 3, !dbg !14 -; CHECK-NEXT: [[TMP8:%.*]] = call i16 @wibble(i16 [[TMP7]]), !dbg !14 -; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i16 [[TMP5]], 17, !dbg !14 -; CHECK-NEXT: br i1 [[TMP2]], label [[BB2]], label [[BB3:%.*]], !dbg !14 +; CHECK-NEXT: #dbg_value(i16 [[TMP1]], [[META12]], !DIExpression(), [[META13]]) +; CHECK-NEXT: [[TMP4:%.*]] = call i16 @wibble(i16 [[TMP1]]), !dbg [[DBG14]] +; CHECK-NEXT: [[TMP5]] = add nsw i16 [[TMP4]], [[TMP1]], !dbg [[DBG14]] +; CHECK-NEXT: #dbg_value(i16 [[TMP5]], [[META12]], !DIExpression(), [[META13]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i16 @wibble(i16 [[TMP4]]), !dbg [[DBG14]] +; CHECK-NEXT: [[TMP7:%.*]] = mul nsw i16 [[TMP6]], 3, !dbg [[DBG14]] +; CHECK-NEXT: [[TMP8:%.*]] = call i16 @wibble(i16 [[TMP7]]), !dbg [[DBG14]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i16 [[TMP5]], 17, !dbg [[DBG14]] +; CHECK-NEXT: br i1 [[TMP2]], label [[BB2]], label [[BB3:%.*]], !dbg [[DBG14]] ; CHECK: bb3: -; CHECK-NEXT: [[TMP10:%.*]] = call i16 @wibble(i16 8), !dbg !14 -; CHECK-NEXT: ret i16 [[TMP10]], !dbg !14 +; CHECK-NEXT: [[TMP10:%.*]] = call i16 @wibble(i16 8), !dbg [[DBG14]] +; CHECK-NEXT: ret i16 [[TMP10]], !dbg [[DBG14]] ; entry: call void @llvm.dbg.value(metadata i16 9, metadata !12, metadata !DIExpression()), !dbg !13 diff --git a/llvm/test/Transforms/LoopRotate/dbg-value-duplicates.ll b/llvm/test/Transforms/LoopRotate/dbg-value-duplicates.ll index 59a8328090834e..c864c3224e52cb 100644 --- a/llvm/test/Transforms/LoopRotate/dbg-value-duplicates.ll +++ b/llvm/test/Transforms/LoopRotate/dbg-value-duplicates.ll @@ -10,8 +10,8 @@ entry: call void @llvm.dbg.value(metadata i64 %n, metadata !16, metadata !DIExpression()), !dbg !21 call void @llvm.dbg.value(metadata i64 %s, metadata !17, metadata !DIExpression()), !dbg !22 call void @llvm.dbg.value(metadata i64 0, metadata !18, metadata !DIExpression()), !dbg !23 - ; CHECK: call void @llvm.dbg.value(metadata i64 0, metadata !18, metadata !DIExpression()), !dbg !23 - ; CHECK-NOT: call void @llvm.dbg.value(metadata i64 0, metadata !18, metadata !DIExpression()), !dbg !23 + ; CHECK: #dbg_value(i64 0, !18, !DIExpression(), !23 + ; CHECK-NOT: #dbg_value(i64 0, !18, !DIExpression(), !23 br label %for.cond, !dbg !24 for.cond: ; preds = %for.body, %entry @@ -30,8 +30,8 @@ for.body: ; preds = %for.cond call void @bar(float %0), !dbg !34 %add = add nsw i64 %i.0, %s, !dbg !35 call void @llvm.dbg.value(metadata i64 %add, metadata !18, metadata !DIExpression()), !dbg !23 - ; CHECK: call void @llvm.dbg.value(metadata i64 %add, metadata !18, metadata !DIExpression()), !dbg !23 - ; CHECK-NOT: call void @llvm.dbg.value(metadata i64 %add, metadata !18, metadata !DIExpression()), !dbg !23 + ; CHECK: #dbg_value(i64 %add, !18, !DIExpression(), !23 + ; CHECK-NOT: #dbg_value(i64 %add, !18, !DIExpression(), !23 br label %for.cond, !dbg !36, !llvm.loop !37 } diff --git a/llvm/test/Transforms/LoopRotate/dbgvalue.ll b/llvm/test/Transforms/LoopRotate/dbgvalue.ll index 9ecc31e1bd2d3b..b58d821fbca5d7 100644 --- a/llvm/test/Transforms/LoopRotate/dbgvalue.ll +++ b/llvm/test/Transforms/LoopRotate/dbgvalue.ll @@ -4,7 +4,6 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone -; CHECK: declare void @llvm.dbg.value(metadata, ; This function rotates the exit conditon into the entry block, moving the ; dbg.values with it. Check that they resolve through the PHIs to the arguments @@ -15,23 +14,23 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone define i32 @tak(i32 %x, i32 %y, i32 %z) nounwind ssp !dbg !0 { ; CHECK-LABEL: define i32 @tak( ; CHECK: entry -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %x -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %y -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %z +; CHECK-NEXT: #dbg_value(i32 %x +; CHECK-NEXT: #dbg_value(i32 %y +; CHECK-NEXT: #dbg_value(i32 %z ; CHECK: if.then.lr.ph: ; CHECK: if.then: ; CHECK-NEXT: %z.tr4 = phi ; CHECK-NEXT: %y.tr3 = phi ; CHECK-NEXT: %x.tr2 = phi -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %z.tr4 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %y.tr3 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %x.tr2 +; CHECK-NEXT: #dbg_value(i32 %z.tr4 +; CHECK-NEXT: #dbg_value(i32 %y.tr3 +; CHECK-NEXT: #dbg_value(i32 %x.tr2 ; CHECK: %call = tail call i32 @tak(i32 ; CHECK: %call9 = tail call i32 @tak(i32 ; CHECK: %call14 = tail call i32 @tak(i32 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %call -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %call9 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %call14 +; CHECK-NEXT: #dbg_value(i32 %call +; CHECK-NEXT: #dbg_value(i32 %call9 +; CHECK-NEXT: #dbg_value(i32 %call14 entry: br label %tailrecurse @@ -70,19 +69,19 @@ return: ; preds = %if.end define i32 @tak_dup(i32 %x, i32 %y, i32 %z) nounwind ssp !dbg !50 { ; CHECK-LABEL: define i32 @tak_dup( ; CHECK: entry -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %x -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %y -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %z +; CHECK-NEXT: #dbg_value(i32 %x +; CHECK-NEXT: #dbg_value(i32 %y +; CHECK-NEXT: #dbg_value(i32 %z ; CHECK: if.then.lr.ph: ; CHECK: if.then: ; CHECK-NEXT: %z.tr4 = phi ; CHECK-NEXT: %y.tr3 = phi ; CHECK-NEXT: %x.tr2 = phi -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %x.tr2 +; CHECK-NEXT: #dbg_value(i32 %x.tr2 ; CHECK: %call = tail call i32 @tak(i32 ; CHECK: %call9 = tail call i32 @tak(i32 ; CHECK: %call14 = tail call i32 @tak(i32 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %call14 +; CHECK-NEXT: #dbg_value(i32 %call14 entry: br label %tailrecurse @@ -131,17 +130,17 @@ define i32 @tak2(i32 %x, i32 %y, i32 %z) nounwind ssp !dbg !21 { ; CHECK-NEXT: %z.tr4 = phi i32 ; CHECK-NEXT: %y.tr3 = phi i32 ; CHECK-NEXT: %x.tr2 = phi i32 -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 %x.tr2 -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 %y.tr3 -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 %z.tr4 +; CHECK-NEXT: #dbg_value(i32 %x.tr2 +; CHECK-NEXT: #dbg_value(i32 %y.tr3 +; CHECK-NEXT: #dbg_value(i32 %z.tr4 ; CHECK: tail call i32 @tak(i32 ; CHECK: tail call i32 @tak(i32 ; CHECK: tail call i32 @tak(i32 ; CHECK: if.end: ; CHECK-NEXT: z.tr.lcssa = phi i32 -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 undef -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 undef -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 %z.tr.lcssa +; CHECK-NEXT: #dbg_value(i32 undef +; CHECK-NEXT: #dbg_value(i32 undef +; CHECK-NEXT: #dbg_value(i32 %z.tr.lcssa entry: br label %tailrecurse @@ -182,7 +181,7 @@ define void @FindFreeHorzSeg(i64 %startCol, i64 %row, ptr %rowStart) { ; body, even though it contains a debug intrinsic call. ; CHECK-LABEL: define void @FindFreeHorzSeg( ; CHECK: %dec = add -; CHECK-NEXT: tail call void @llvm.dbg.value +; CHECK-NEXT: #dbg_value ; CHECK: %cmp = icmp ; CHECK: br i1 %cmp ; CHECK: phi i64 [ %{{[^,]*}}, %{{[^,]*}} ] @@ -228,14 +227,14 @@ define void @invariant_hoist() !dbg !70 { ; CHECK: entry: ; CHECK-NEXT: br label %L0.preheader ; CHECK: L0.preheader: -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 0, +; CHECK-NEXT: #dbg_value(i32 0, ; CHECK-NEXT: %cmp = icmp slt i32 0, 0, ; CHECK: L1.preheader: ; CHECK-NEXT: %spec.select3 = phi i32 ; CHECK-NEXT: %k.02 = phi i32 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %k.02, +; CHECK-NEXT: #dbg_value(i32 %k.02, ; CHECK: L0.latch: -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %spec.select3, +; CHECK-NEXT: #dbg_value(i32 %spec.select3, entry: br label %L0.preheader, !dbg !77 diff --git a/llvm/test/Transforms/LoopRotate/delete-dbg-values.ll b/llvm/test/Transforms/LoopRotate/delete-dbg-values.ll index bce5ed02b43bf9..909f9d8b182b70 100644 --- a/llvm/test/Transforms/LoopRotate/delete-dbg-values.ll +++ b/llvm/test/Transforms/LoopRotate/delete-dbg-values.ll @@ -10,17 +10,16 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; CHECK: declare void @llvm.dbg.value(metadata, ; CHECK-LABEL: define void @_ZNK4llvm5APInt4sextEj(ptr ; CHECK-LABEL: entry: -; CHECK: call void @llvm.dbg.value(metadata i32 0, metadata ![[SRC:[0-9]+]], +; CHECK: #dbg_value(i32 0, ![[SRC:[0-9]+]], ; CHECK-NEXT: load -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 0, metadata ![[SINK:[0-9]+]], -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 0, metadata ![[SRC]], +; CHECK-NEXT: #dbg_value(i32 0, ![[SINK:[0-9]+]], +; CHECK-NEXT: #dbg_value(i32 0, ![[SRC]], ; CHECK-LABEL: for.body: -; CHECK: call void @llvm.dbg.value(metadata i32 0, metadata ![[SINK]], -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 0, metadata ![[SRC]], +; CHECK: #dbg_value(i32 0, ![[SINK]], +; CHECK-NEXT: #dbg_value(i32 0, ![[SRC]], declare void @llvm.dbg.value(metadata, metadata, metadata) diff --git a/llvm/test/Transforms/LoopRotate/phi-dbgvalue.ll b/llvm/test/Transforms/LoopRotate/phi-dbgvalue.ll index dfaaa37825c886..1ab5ee6b7aa7d8 100644 --- a/llvm/test/Transforms/LoopRotate/phi-dbgvalue.ll +++ b/llvm/test/Transforms/LoopRotate/phi-dbgvalue.ll @@ -2,11 +2,11 @@ ;CHECK-LABEL: func ;CHECK-LABEL: entry -;CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 %a -;CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 1, metadata ![[I_VAR:[0-9]+]], metadata !DIExpression()) +;CHECK-NEXT: #dbg_value(i32 %a +;CHECK-NEXT: #dbg_value(i32 1, ![[I_VAR:[0-9]+]], !DIExpression(), ;CHECK-LABEL: for.body: ;CHECK-NEXT: [[I:%.*]] = phi i32 [ 1, %entry ], [ %inc, %for.body ] -;CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 [[I]], metadata ![[I_VAR]], metadata !DIExpression()) +;CHECK-NEXT: #dbg_value(i32 [[I]], ![[I_VAR]], !DIExpression(), ; CHECK: ![[I_VAR]] = !DILocalVariable(name: "i",{{.*}}) diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/lsr-cond-dbg.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/lsr-cond-dbg.ll index 01c7950ab702db..fb753c9536b1f7 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/lsr-cond-dbg.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/lsr-cond-dbg.ll @@ -12,7 +12,7 @@ ; CHECK-LABEL: i: ; CHECK-NOT: icmp ; CHECK: [[COND:%.*]] = icmp eq i8 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i1 [[COND]] +; CHECK-NEXT: #dbg_value(i1 [[COND]] ; CHECK-NEXT: br i1 [[COND]] diff --git a/llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-0.ll b/llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-0.ll index 204ea5d320f53d..8ebee5e270e6d0 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-0.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-0.ll @@ -23,12 +23,12 @@ for.body: ; preds = %entry, %for.body %p.addr.05 = phi ptr [ %p, %entry ], [ %add.ptr, %for.body ] call void @llvm.dbg.value(metadata i8 %i.06, metadata !14, metadata !DIExpression()), !dbg !17 call void @llvm.dbg.value(metadata ptr %p.addr.05, metadata !13, metadata !DIExpression()), !dbg !16 -; CHECK-NOT: call void @llvm.dbg.value(metadata ptr undef -; CHECK: all void @llvm.dbg.value(metadata ptr %lsr.iv, metadata ![[MID_p:[0-9]+]], metadata !DIExpression(DW_OP_constu, 3, DW_OP_minus, DW_OP_stack_value)) +; CHECK-NOT: #dbg_value(ptr undef +; CHECK: #dbg_value(ptr %lsr.iv, ![[MID_p:[0-9]+]], !DIExpression(DW_OP_constu, 3, DW_OP_minus, DW_OP_stack_value), %add.ptr = getelementptr inbounds i8, ptr %p.addr.05, i64 3, !dbg !20 call void @llvm.dbg.value(metadata ptr %add.ptr, metadata !13, metadata !DIExpression()), !dbg !16 -; CHECK-NOT: call void @llvm.dbg.value(metadata ptr undef -; CHECK: call void @llvm.dbg.value(metadata ptr %lsr.iv, metadata ![[MID_p]], metadata !DIExpression()) +; CHECK-NOT: #dbg_value(ptr undef +; CHECK: #dbg_value(ptr %lsr.iv, ![[MID_p]], !DIExpression(), store i8 %i.06, ptr %add.ptr, align 1, !dbg !23, !tbaa !24 %inc = add nuw nsw i8 %i.06, 1, !dbg !27 call void @llvm.dbg.value(metadata i8 %inc, metadata !14, metadata !DIExpression()), !dbg !17 diff --git a/llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-1.ll b/llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-1.ll index 8b857e1bb6845f..c087a85a352f14 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-1.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-1.ll @@ -4,7 +4,7 @@ ; Test that LSR avoids crashing on very large integer inputs. It should ; discard the variable location by creating an undef dbg.value. ; -; CHECK: call void @llvm.dbg.value(metadata i128 poison, +; CHECK: #dbg_value(i128 poison, source_filename = "" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-2.ll b/llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-2.ll index ec9c48628179a4..81cbe0cd7b949f 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-2.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/dbg-preserve-2.ll @@ -20,8 +20,8 @@ do.body: ; preds = %do.body, %entry ; CHECK-LABEL: do.body: %Result.addr.0 = phi i32 [ %Result, %entry ], [ %or, %do.body ] %Itr.0 = phi i32 [ 0, %entry ], [ %add, %do.body ], !dbg !17 -; CHECK-NOT: call void @llvm.dbg.value(metadata !DIArgList -; CHECK: call void @llvm.dbg.value(metadata !DIArgList(i32 %lsr.iv, i32 %Step), metadata ![[VAR_ITR:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_minus, DW_OP_LLVM_arg, 1, DW_OP_div, DW_OP_LLVM_arg, 1, DW_OP_mul, DW_OP_stack_value)) +; CHECK-NOT: #dbg_value(!DIArgList +; CHECK: #dbg_value(!DIArgList(i32 %lsr.iv, i32 %Step), ![[VAR_ITR:[0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_minus, DW_OP_LLVM_arg, 1, DW_OP_div, DW_OP_LLVM_arg, 1, DW_OP_mul, DW_OP_stack_value), call void @llvm.dbg.value(metadata i32 %Itr.0, metadata !16, metadata !DIExpression()), !dbg !17 call void @llvm.dbg.value(metadata i32 %Result.addr.0, metadata !12, metadata !DIExpression()), !dbg !17 %add = add nsw i32 %Itr.0, %Step, !dbg !19 diff --git a/llvm/test/Transforms/LoopStrengthReduce/debuginfo-scev-salvage-0.ll b/llvm/test/Transforms/LoopStrengthReduce/debuginfo-scev-salvage-0.ll index 836f5254221734..30d8c33e29ec5e 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/debuginfo-scev-salvage-0.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/debuginfo-scev-salvage-0.ll @@ -17,9 +17,9 @@ ;; i++; // DexLabel('mul_pow_of_2_induction_increment') ;; } ;; } -; CHECK: call void @llvm.dbg.value(metadata i64 %lsr.iv, metadata ![[i:[0-9]+]], metadata !DIExpression(DW_OP_consts, 8, DW_OP_div, DW_OP_stack_value)) -; CHECK: call void @llvm.dbg.value(metadata i64 %lsr.iv, metadata ![[comp:[0-9]+]], metadata !DIExpression(DW_OP_consts, 8, DW_OP_div, DW_OP_consts, 8, DW_OP_mul, DW_OP_stack_value)) -; CHECK: call void @llvm.dbg.value(metadata i64 %lsr.iv, metadata ![[i]], metadata !DIExpression(DW_OP_consts, 8, DW_OP_div, DW_OP_consts, 1, DW_OP_plus, DW_OP_stack_value)) +; CHECK: #dbg_value(i64 %lsr.iv, ![[i:[0-9]+]], !DIExpression(DW_OP_consts, 8, DW_OP_div, DW_OP_stack_value), +; CHECK: #dbg_value(i64 %lsr.iv, ![[comp:[0-9]+]], !DIExpression(DW_OP_consts, 8, DW_OP_div, DW_OP_consts, 8, DW_OP_mul, DW_OP_stack_value), +; CHECK: #dbg_value(i64 %lsr.iv, ![[i]], !DIExpression(DW_OP_consts, 8, DW_OP_div, DW_OP_consts, 1, DW_OP_plus, DW_OP_stack_value), ; CHECK: ![[i]] = !DILocalVariable(name: "i" ; CHECK: ![[comp]] = !DILocalVariable(name: "comp" diff --git a/llvm/test/Transforms/LoopStrengthReduce/debuginfo-scev-salvage-1.ll b/llvm/test/Transforms/LoopStrengthReduce/debuginfo-scev-salvage-1.ll index d20b99283fbbca..d2e32d0b23aa2e 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/debuginfo-scev-salvage-1.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/debuginfo-scev-salvage-1.ll @@ -17,9 +17,9 @@ ;; i += 1; ;; } ;; } -; CHECK: call void @llvm.dbg.value(metadata i64 %lsr.iv, metadata ![[i:[0-9]+]], metadata !DIExpression()) -; CHECK: call void @llvm.dbg.value(metadata !DIArgList(i64 %lsr.iv, i32 %k), metadata ![[comp:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_consts, 4, DW_OP_mul, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_stack_value)) -; CHECK: call void @llvm.dbg.value(metadata i64 %lsr.iv, metadata ![[i]], metadata !DIExpression(DW_OP_consts, 1, DW_OP_plus, DW_OP_stack_value)) +; CHECK: #dbg_value(i64 %lsr.iv, ![[i:[0-9]+]], !DIExpression(), +; CHECK: #dbg_value(!DIArgList(i64 %lsr.iv, i32 %k), ![[comp:[0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_consts, 4, DW_OP_mul, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_stack_value), +; CHECK: #dbg_value(i64 %lsr.iv, ![[i]], !DIExpression(DW_OP_consts, 1, DW_OP_plus, DW_OP_stack_value), ; CHECK: ![[i]] = !DILocalVariable(name: "i" ; CHECK: ![[comp]] = !DILocalVariable(name: "comp" diff --git a/llvm/test/Transforms/LoopStrengthReduce/debuginfo-scev-salvage-2.ll b/llvm/test/Transforms/LoopStrengthReduce/debuginfo-scev-salvage-2.ll index 789f47c87d174f..3ddb3851cd04e8 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/debuginfo-scev-salvage-2.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/debuginfo-scev-salvage-2.ll @@ -18,9 +18,9 @@ ;; i++; ;; } ;; } -; CHECK: call void @llvm.dbg.value(metadata i64 %lsr.iv, metadata ![[i:[0-9]+]], metadata !DIExpression()) -; CHECK: call void @llvm.dbg.value(metadata !DIArgList(i64 %lsr.iv, i32 %k), metadata ![[comp:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_mul, DW_OP_stack_value)) -; CHECK: call void @llvm.dbg.value(metadata i64 %lsr.iv, metadata ![[i]], metadata !DIExpression(DW_OP_consts, 1, DW_OP_plus, DW_OP_stack_value)) +; CHECK: #dbg_value(i64 %lsr.iv, ![[i:[0-9]+]], !DIExpression(), +; CHECK: #dbg_value(!DIArgList(i64 %lsr.iv, i32 %k), ![[comp:[0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_mul, DW_OP_stack_value), +; CHECK: #dbg_value(i64 %lsr.iv, ![[i]], !DIExpression(DW_OP_consts, 1, DW_OP_plus, DW_OP_stack_value), ; CHECK: ![[i]] = !DILocalVariable(name: "i" ; CHECK: ![[comp]] = !DILocalVariable(name: "comp" diff --git a/llvm/test/Transforms/LoopStrengthReduce/debuginfo-scev-salvage-3.ll b/llvm/test/Transforms/LoopStrengthReduce/debuginfo-scev-salvage-3.ll index 1c5dff9481693f..b402d92ac1421a 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/debuginfo-scev-salvage-3.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/debuginfo-scev-salvage-3.ll @@ -17,9 +17,9 @@ ;; i++; ;; } ;; } -; CHECK: call void @llvm.dbg.value(metadata i64 %lsr.iv, metadata ![[i:[0-9]+]], metadata !DIExpression()) -; CHECK: call void @llvm.dbg.value(metadata !DIArgList(i64 %lsr.iv, i32 %multiplicand), metadata ![[comp:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_mul, DW_OP_stack_value)) -; CHECK: call void @llvm.dbg.value(metadata i64 %lsr.iv, metadata ![[i]], metadata !DIExpression(DW_OP_consts, 1, DW_OP_plus, DW_OP_stack_value)) +; CHECK: #dbg_value(i64 %lsr.iv, ![[i:[0-9]+]], !DIExpression(), +; CHECK: #dbg_value(!DIArgList(i64 %lsr.iv, i32 %multiplicand), ![[comp:[0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_mul, DW_OP_stack_value), +; CHECK: #dbg_value(i64 %lsr.iv, ![[i]], !DIExpression(DW_OP_consts, 1, DW_OP_plus, DW_OP_stack_value), ; CHECK: ![[i]] = !DILocalVariable(name: "i" ; CHECK: ![[comp]] = !DILocalVariable(name: "comp" diff --git a/llvm/test/Transforms/LoopStrengthReduce/debuginfo-scev-salvage-4.ll b/llvm/test/Transforms/LoopStrengthReduce/debuginfo-scev-salvage-4.ll index 6456ed43aee64e..d7182708136205 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/debuginfo-scev-salvage-4.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/debuginfo-scev-salvage-4.ll @@ -19,9 +19,9 @@ ;; ++i; ;; } ;; } -; CHECK: call void @llvm.dbg.value(metadata i64 %lsr.iv, metadata ![[i:[0-9]+]], metadata !DIExpression()) -; CHECK: call void @llvm.dbg.value(metadata !DIArgList(i64 %lsr.iv, i16 %factor1, i32 %factor0), metadata ![[comp:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_consts, 18446744073709551612, DW_OP_LLVM_arg, 1, DW_OP_LLVM_convert, 32, DW_ATE_signed, DW_OP_mul, DW_OP_mul, DW_OP_LLVM_arg, 2, DW_OP_plus, DW_OP_stack_value)) -; CHECK: call void @llvm.dbg.value(metadata i64 %lsr.iv, metadata ![[i]], metadata !DIExpression(DW_OP_consts, 1, DW_OP_plus, DW_OP_stack_value)) +; CHECK: #dbg_value(i64 %lsr.iv, ![[i:[0-9]+]], !DIExpression(), +; CHECK: #dbg_value(!DIArgList(i64 %lsr.iv, i16 %factor1, i32 %factor0), ![[comp:[0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_consts, 18446744073709551612, DW_OP_LLVM_arg, 1, DW_OP_LLVM_convert, 32, DW_ATE_signed, DW_OP_mul, DW_OP_mul, DW_OP_LLVM_arg, 2, DW_OP_plus, DW_OP_stack_value), +; CHECK: #dbg_value(i64 %lsr.iv, ![[i]], !DIExpression(DW_OP_consts, 1, DW_OP_plus, DW_OP_stack_value), ; CHECK: ![[i]] = !DILocalVariable(name: "i" ; CHECK: ![[comp]] = !DILocalVariable(name: "comp" diff --git a/llvm/test/Transforms/LoopStrengthReduce/debuginfo-scev-salvage-5.ll b/llvm/test/Transforms/LoopStrengthReduce/debuginfo-scev-salvage-5.ll index b06757ccf91580..a6a8d070061654 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/debuginfo-scev-salvage-5.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/debuginfo-scev-salvage-5.ll @@ -24,10 +24,10 @@ ;; and the modifying the position of the optimised-out value in the location ;; list. -; CHECK: call void @llvm.dbg.value(metadata !DIArgList(i64 %lsr.iv, i32 %k), metadata ![[comp:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_consts, 4, DW_OP_div, DW_OP_consts, 4, DW_OP_mul, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_stack_value)) -; CHECK: call void @llvm.dbg.value(metadata !DIArgList(i64 %lsr.iv, i32 %l, i32 %k), metadata ![[comp2:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_consts, 4, DW_OP_div, DW_OP_consts, 4, DW_OP_mul, DW_OP_LLVM_arg, 2, DW_OP_plus, DW_OP_LLVM_arg, 1, DW_OP_mul, DW_OP_stack_value)) -; CHECK: call void @llvm.dbg.value(metadata !DIArgList(i64 %lsr.iv, i32 %m, i32 %l, i32 %k), metadata ![[comp3:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_consts, 4, DW_OP_div, DW_OP_consts, 4, DW_OP_mul, DW_OP_LLVM_arg, 3, DW_OP_plus, DW_OP_LLVM_arg, 2, DW_OP_mul, DW_OP_LLVM_arg, 1, DW_OP_shl, DW_OP_stack_value)) -; CHECK: call void @llvm.dbg.value(metadata !DIArgList(i64 %lsr.iv, i32 %m, i32 %l, i32 %k), metadata ![[comp3:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_consts, 4, DW_OP_div, DW_OP_consts, 4, DW_OP_mul, DW_OP_LLVM_arg, 3, DW_OP_plus, DW_OP_LLVM_arg, 2, DW_OP_mul, DW_OP_LLVM_arg, 1, DW_OP_shl, DW_OP_stack_value)) +; CHECK: #dbg_value(!DIArgList(i64 %lsr.iv, i32 %k), ![[comp:[0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_consts, 4, DW_OP_div, DW_OP_consts, 4, DW_OP_mul, DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_stack_value), +; CHECK: #dbg_value(!DIArgList(i64 %lsr.iv, i32 %l, i32 %k), ![[comp2:[0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_consts, 4, DW_OP_div, DW_OP_consts, 4, DW_OP_mul, DW_OP_LLVM_arg, 2, DW_OP_plus, DW_OP_LLVM_arg, 1, DW_OP_mul, DW_OP_stack_value), +; CHECK: #dbg_value(!DIArgList(i64 %lsr.iv, i32 %m, i32 %l, i32 %k), ![[comp3:[0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_consts, 4, DW_OP_div, DW_OP_consts, 4, DW_OP_mul, DW_OP_LLVM_arg, 3, DW_OP_plus, DW_OP_LLVM_arg, 2, DW_OP_mul, DW_OP_LLVM_arg, 1, DW_OP_shl, DW_OP_stack_value), +; CHECK: #dbg_value(!DIArgList(i64 %lsr.iv, i32 %m, i32 %l, i32 %k), ![[comp3:[0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_consts, 4, DW_OP_div, DW_OP_consts, 4, DW_OP_mul, DW_OP_LLVM_arg, 3, DW_OP_plus, DW_OP_LLVM_arg, 2, DW_OP_mul, DW_OP_LLVM_arg, 1, DW_OP_shl, DW_OP_stack_value), ; CHECK: ![[comp]] = !DILocalVariable(name: "comp" ; CHECK: ![[comp2]] = !DILocalVariable(name: "comp2" ; CHECK: ![[comp3]] = !DILocalVariable(name: "comp3" diff --git a/llvm/test/Transforms/LoopStrengthReduce/pr51329.ll b/llvm/test/Transforms/LoopStrengthReduce/pr51329.ll index e4edab1346d3c5..0135d380108ce1 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/pr51329.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/pr51329.ll @@ -6,8 +6,8 @@ ; Also show that no salvaging attempt is made for dbg.value that are undef ; pre-LSR. ; -; CHECK: call void @llvm.dbg.value(metadata i64 undef, metadata !{{[0-9]+}}, metadata !DIExpression(DW_OP_plus_uconst, 228, DW_OP_stack_value)) -; CHECK: call void @llvm.dbg.value(metadata i64 %var2, metadata !{{[0-9]+}}, metadata !DIExpression(DW_OP_plus_uconst, 228, DW_OP_stack_value)) +; CHECK: #dbg_value(i64 undef, !{{[0-9]+}}, !DIExpression(DW_OP_plus_uconst, 228, DW_OP_stack_value), +; CHECK: #dbg_value(i64 %var2, !{{[0-9]+}}, !DIExpression(DW_OP_plus_uconst, 228, DW_OP_stack_value), target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopStrengthReduce/pr51656.ll b/llvm/test/Transforms/LoopStrengthReduce/pr51656.ll index 1d5886c01c7377..0e8155332742a5 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/pr51656.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/pr51656.ll @@ -37,7 +37,7 @@ for.body2.preheader: ; preds = %entry for.body2: ; preds = %for.body2.preheader, %for.body2 %0 = phi i32 [ %sub, %for.body2 ], [ %.pr, %for.body2.preheader ] %sub = sub nsw i32 %0, %mul.9, !dbg !40 - ; CHECK: call void @llvm.dbg.value(metadata !DIArgList(i32 poison, i32 %mul.9), metadata ![[VAR_e:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_minus, DW_OP_LLVM_convert, 32, DW_ATE_signed, DW_OP_LLVM_convert, 64, DW_ATE_signed, DW_OP_stack_value)) + ; CHECK: #dbg_value(!DIArgList(i32 poison, i32 %mul.9), ![[VAR_e:[0-9]+]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_minus, DW_OP_LLVM_convert, 32, DW_ATE_signed, DW_OP_LLVM_convert, 64, DW_ATE_signed, DW_OP_stack_value), call void @llvm.dbg.value(metadata i32 %sub, metadata !20, metadata !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_signed, DW_OP_LLVM_convert, 64, DW_ATE_signed, DW_OP_stack_value)), !dbg !41 %tobool.not = icmp eq i32 %sub, 0, !dbg !39 br i1 %tobool.not, label %for.cond1.for.end3_crit_edge, label %for.body2, !dbg !39, !llvm.loop !42 diff --git a/llvm/test/Transforms/LoopStrengthReduce/pr52161.ll b/llvm/test/Transforms/LoopStrengthReduce/pr52161.ll index 1a679392a06009..449dfc1126c292 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/pr52161.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/pr52161.ll @@ -19,7 +19,7 @@ for.body: ; preds = %for.body, %entry m: ; preds = %m, %entry %0 = phi i16 [ 3, %m ], [ 6, %entry ] %gg = add i16 %0, 23 - ; CHECK: call void @llvm.dbg.value(metadata i16 poison, metadata !{{[0-9]+}}, metadata !DIExpression()), + ; CHECK: #dbg_value(i16 poison, !{{[0-9]+}}, !DIExpression(), call void @llvm.dbg.value(metadata i16 %0, metadata !14, metadata !DIExpression()), !dbg !19 br label %m } diff --git a/llvm/test/Transforms/LoopUnroll/debug-info.ll b/llvm/test/Transforms/LoopUnroll/debug-info.ll index 2188cea3b88114..2a1f679fd4b62e 100644 --- a/llvm/test/Transforms/LoopUnroll/debug-info.ll +++ b/llvm/test/Transforms/LoopUnroll/debug-info.ll @@ -24,10 +24,10 @@ for.body: ; preds = %entry, %for.body %shr = ashr i32 %i.04, 1, !dbg !18 ; The loop gets unrolled entirely. - ; CHECK: call void @llvm.dbg.value(metadata i32 1, metadata !12, metadata !DIExpression()), !dbg !15 - ; CHECK: call void @llvm.dbg.value(metadata i32 4, metadata !12, metadata !DIExpression()), !dbg !15 - ; CHECK: call void @llvm.dbg.value(metadata i32 16, metadata !12, metadata !DIExpression()), !dbg !15 - ; CHECK: call void @llvm.dbg.value(metadata i32 64, metadata !12, metadata !DIExpression()), !dbg !15 + ; CHECK: #dbg_value(i32 1, !12, !DIExpression(), !15 + ; CHECK: #dbg_value(i32 4, !12, !DIExpression(), !15 + ; CHECK: #dbg_value(i32 16, !12, !DIExpression(), !15 + ; CHECK: #dbg_value(i32 64, !12, !DIExpression(), !15 %call = tail call i32 (i32, ...) @bar(i32 %shr) #3, !dbg !20 %shl = shl i32 %i.04, 2, !dbg !21 diff --git a/llvm/test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll b/llvm/test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll index 4de90c3a00a69c..606bc9e574a52f 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-epilog-debuginfo.ll @@ -18,11 +18,11 @@ ; CHECK: %div.i.epil = sdiv i32 %t.08.i.epil, 2, ; CHECK-NEXT: %add.i.epil = add i32 %t.08.i.epil, %a, ; CHECK-NEXT: %add1.i.epil = add i32 %add.i.epil, %div.i.epil, -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %add1.i.epil, +; CHECK-NEXT: #dbg_value(i32 %add1.i.epil, ; CHECK-NEXT: %inc.i.epil = add nuw i32 %i.09.i.epil, 1, !dbg !36 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %inc.i.epil, -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %inc.i.epil, -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %add1.i.epil, +; CHECK-NEXT: #dbg_value(i32 %inc.i.epil, +; CHECK-NEXT: #dbg_value(i32 %inc.i.epil, +; CHECK-NEXT: #dbg_value(i32 %add1.i.epil, ; CHECK: lee1.exit.loopexit: ; CHECK: br label %lee1.exit, !dbg ![[EXIT_LOC:[0-9]+]] diff --git a/llvm/test/Transforms/LoopUnroll/unroll-remove-redundant-dbg.ll b/llvm/test/Transforms/LoopUnroll/unroll-remove-redundant-dbg.ll index 8e348281dc61c0..2a328e6cc37673 100644 --- a/llvm/test/Transforms/LoopUnroll/unroll-remove-redundant-dbg.ll +++ b/llvm/test/Transforms/LoopUnroll/unroll-remove-redundant-dbg.ll @@ -7,7 +7,7 @@ define i64 @d(i1 %tobool.not, i32 %add, i64 %conv23) !dbg !14{ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 0, metadata [[META16:![0-9]+]], metadata !DIExpression()), !dbg [[DBG17:![0-9]+]] +; CHECK-NEXT: #dbg_value(i32 0, [[META16:![0-9]+]], !DIExpression(), [[META17:![0-9]+]]) ; CHECK-NEXT: ret i64 5 ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll index 2c665a417ab599..475042d3edfba3 100644 --- a/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/dbg-outer-loop-vect.ll @@ -7,7 +7,7 @@ define void @foo(ptr %h) !dbg !4 { ; CHECK-LABEL: define void @foo( ; CHECK-SAME: ptr [[H:%.*]]) !dbg [[DBG4:![0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i64 0, metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG20:![0-9]+]] +; CHECK-NEXT: #dbg_value(i64 0, [[META11:![0-9]+]], !DIExpression(), [[META20:![0-9]+]]) ; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]], !dbg [[DBG21:![0-9]+]] ; CHECK: vector.ph: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]], !dbg [[DBG21]] @@ -43,7 +43,7 @@ define void @foo(ptr %h) !dbg !4 { ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]], !dbg [[DBG21]] ; CHECK: for.cond1.preheader: ; CHECK-NEXT: [[I_023:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC13:%.*]], [[FOR_COND_CLEANUP3:%.*]] ] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i64 [[I_023]], metadata [[META11]], metadata !DIExpression()), !dbg [[DBG20]] +; CHECK-NEXT: #dbg_value(i64 [[I_023]], [[META11]], !DIExpression(), [[META20]]) ; CHECK-NEXT: br label [[FOR_COND5_PREHEADER:%.*]], !dbg [[DBG26]] ; CHECK: for.cond5.preheader: ; CHECK-NEXT: [[L_022:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER]] ], [ [[INC10:%.*]], [[FOR_COND5_PREHEADER]] ] @@ -60,7 +60,7 @@ define void @foo(ptr %h) !dbg !4 { ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_COND5_PREHEADER]], !dbg [[DBG26]] ; CHECK: for.cond.cleanup3: ; CHECK-NEXT: [[INC13]] = add nuw nsw i64 [[I_023]], 1, !dbg [[DBG27]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i64 [[INC13]], metadata [[META11]], metadata !DIExpression()), !dbg [[DBG20]] +; CHECK-NEXT: #dbg_value(i64 [[INC13]], [[META11]], !DIExpression(), [[META20]]) ; CHECK-NEXT: [[EXITCOND24_NOT:%.*]] = icmp eq i64 [[INC13]], 23, !dbg [[DBG28]] ; CHECK-NEXT: br i1 [[EXITCOND24_NOT]], label [[EXIT]], label [[FOR_COND1_PREHEADER]], !dbg [[DBG21]], !llvm.loop [[LOOP34:![0-9]+]] ; CHECK: exit: @@ -157,7 +157,7 @@ declare void @llvm.dbg.value(metadata, metadata, metadata) ; CHECK: [[META17]] = !DILocalVariable(name: "j", scope: [[META18:![0-9]+]], file: [[META1]], line: 12, type: [[META13]]) ; CHECK: [[META18]] = distinct !DILexicalBlock(scope: [[META19:![0-9]+]], file: [[META1]], line: 12, column: 7) ; CHECK: [[META19]] = distinct !DILexicalBlock(scope: [[META15]], file: [[META1]], line: 11, column: 5) -; CHECK: [[DBG20]] = !DILocation(line: 0, scope: [[META12]]) +; CHECK: [[META20]] = !DILocation(line: 0, scope: [[META12]]) ; CHECK: [[DBG21]] = !DILocation(line: 10, column: 3, scope: [[META12]]) ; CHECK: [[DBG22]] = !DILocation(line: 13, column: 11, scope: [[META23:![0-9]+]]) ; CHECK: [[META23]] = distinct !DILexicalBlock(scope: [[META18]], file: [[META1]], line: 12, column: 7) diff --git a/llvm/test/Transforms/LoopVectorize/discriminator.ll b/llvm/test/Transforms/LoopVectorize/discriminator.ll index 0b12761ea69c38..5e56dcf0c434f8 100644 --- a/llvm/test/Transforms/LoopVectorize/discriminator.ll +++ b/llvm/test/Transforms/LoopVectorize/discriminator.ll @@ -32,7 +32,7 @@ define void @_Z3foov() local_unnamed_addr #0 !dbg !6 { %7 = load i32, ptr %6, align 4, !dbg !17, !tbaa !15 %8 = add nsw i32 %7, %5, !dbg !17 ;PSEUDO_PROBE-COUNT-5: call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1), !dbg ![[#PROBE:]] -;DBG_VALUE: call void @llvm.dbg.declare{{.*}}!dbg ![[DBG:[0-9]*]] +;DBG_VALUE: #dbg_declare{{.*}} ![[DBG:[0-9]*]] call void @llvm.dbg.declare(metadata i32 %8, metadata !22, metadata !DIExpression()), !dbg !17 store i32 %8, ptr %6, align 4, !dbg !17, !tbaa !15 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !18 diff --git a/llvm/test/Transforms/Mem2Reg/ConvertDebugInfo.ll b/llvm/test/Transforms/Mem2Reg/ConvertDebugInfo.ll index ac3bf8f8d78e4a..0b3c5a02f0bb26 100644 --- a/llvm/test/Transforms/Mem2Reg/ConvertDebugInfo.ll +++ b/llvm/test/Transforms/Mem2Reg/ConvertDebugInfo.ll @@ -9,8 +9,8 @@ entry: %0 = alloca double ; [#uses=2] %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] call void @llvm.dbg.declare(metadata ptr %i_addr, metadata !0, metadata !DIExpression()), !dbg !8 -; CHECK: call void @llvm.dbg.value(metadata i32 %i, metadata ![[IVAR:[0-9]*]], metadata {{.*}}) -; CHECK: call void @llvm.dbg.value(metadata double %j, metadata ![[JVAR:[0-9]*]], metadata {{.*}}) +; CHECK: #dbg_value(i32 %i, ![[IVAR:[0-9]*]], {{.*}}) +; CHECK: #dbg_value(double %j, ![[JVAR:[0-9]*]], {{.*}}) ; CHECK: ![[IVAR]] = !DILocalVariable(name: "i" ; CHECK: ![[JVAR]] = !DILocalVariable(name: "j" store i32 %i, ptr %i_addr diff --git a/llvm/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll b/llvm/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll index 97ff751bb73af3..b9dbb74b5b7135 100644 --- a/llvm/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll +++ b/llvm/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll @@ -8,10 +8,10 @@ declare void @foo(i32, i64, ptr) define void @baz(i32 %a) nounwind ssp !dbg !1 { ; CHECK-LABEL: entry: ; CHECK-NEXT: %"alloca point" = bitcast i32 0 to i32{{$}} -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %a,{{.*}}, !dbg -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %a,{{.*}}, !dbg -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 55,{{.*}}, !dbg -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr @baz,{{.*}}, !dbg +; CHECK-NEXT: #dbg_value(i32 %a,{{.*}}, +; CHECK-NEXT: #dbg_value(i32 %a,{{.*}}, +; CHECK-NEXT: #dbg_value(i64 55,{{.*}}, +; CHECK-NEXT: #dbg_value(ptr @baz,{{.*}}, ; CHECK-NEXT: call void @foo({{.*}}, !dbg ; CHECK-NEXT: br label %return, !dbg entry: diff --git a/llvm/test/Transforms/Mem2Reg/dbg_declare_to_value_conversions.ll b/llvm/test/Transforms/Mem2Reg/dbg_declare_to_value_conversions.ll index 8012c2b97a6b67..721405bca09192 100644 --- a/llvm/test/Transforms/Mem2Reg/dbg_declare_to_value_conversions.ll +++ b/llvm/test/Transforms/Mem2Reg/dbg_declare_to_value_conversions.ll @@ -9,7 +9,7 @@ define i64 @foo0(i64 %arg) { call void @llvm.dbg.declare(metadata ptr %arg.addr, metadata !26, metadata !DIExpression()), !dbg !40 ; CHECK-LABEL: @foo0 ; CHECK-SAME: (i64 [[arg:%.*]]) - ; CHECK-NEXT: dbg.value(metadata i64 [[arg]], {{.*}}, metadata !DIExpression()) + ; CHECK-NEXT: #dbg_value(i64 [[arg]], {{.*}}, !DIExpression(), %val = load i64, ptr %arg.addr ret i64 %val } @@ -21,7 +21,7 @@ define i32 @foo1(ptr %arg) { call void @llvm.dbg.declare(metadata ptr %arg.indirect_addr, metadata !25, metadata !DIExpression(DW_OP_deref)), !dbg !40 ; CHECK-LABEL: @foo1 ; CHECK-SAME: (ptr [[arg:%.*]]) - ; CHECK-NEXT: dbg.value(metadata ptr [[arg]], {{.*}}, metadata !DIExpression(DW_OP_deref)) + ; CHECK-NEXT: #dbg_value(ptr [[arg]], {{.*}}, !DIExpression(DW_OP_deref), %val = load i32, ptr %arg ret i32 %val } @@ -33,7 +33,7 @@ define i32 @foo2(ptr %arg) { store ptr %arg, ptr %arg.indirect_addr call void @llvm.dbg.declare(metadata ptr %arg.indirect_addr, metadata !25, metadata !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 2)), !dbg !40 ; CHECK-LABEL: @foo2 - ; CHECK-NEXT: dbg.value(metadata ptr undef, {{.*}}, metadata !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 2)) + ; CHECK-NEXT: #dbg_value(ptr undef, {{.*}}, !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 2), %val = load i32, ptr %arg ret i32 %val } diff --git a/llvm/test/Transforms/Mem2Reg/debug-alloca-phi-2.ll b/llvm/test/Transforms/Mem2Reg/debug-alloca-phi-2.ll index e4c638d39ccffd..2925c96ae18183 100644 --- a/llvm/test/Transforms/Mem2Reg/debug-alloca-phi-2.ll +++ b/llvm/test/Transforms/Mem2Reg/debug-alloca-phi-2.ll @@ -14,12 +14,12 @@ entry: for.cond: ; CHECK: %[[PHI:.*]] = phi i1 [ false, %entry ], [ %0, %for.cond ] %entryN = load i1, ptr %entry1, align 8, !dbg !20 -; CHECK: call void @llvm.dbg.value(metadata i1 %[[PHI]], -; CHECK-SAME: metadata !DIExpression()) +; CHECK: #dbg_value(i1 %[[PHI]], +; CHECK-SAME: !DIExpression(), %0 = add i1 %entryN, 1 ; CHECK: %0 = add i1 %[[PHI]], true -; CHECK: call void @llvm.dbg.value(metadata i1 %0, -; CHECK-SAME: metadata !DIExpression()) +; CHECK: #dbg_value(i1 %0, +; CHECK-SAME: !DIExpression(), store i1 %0, ptr %entry1, align 8, !dbg !20 br label %for.cond, !dbg !20 } diff --git a/llvm/test/Transforms/Mem2Reg/debug-alloca-phi.ll b/llvm/test/Transforms/Mem2Reg/debug-alloca-phi.ll index 01f45e9c081d39..32008dd33a85e5 100644 --- a/llvm/test/Transforms/Mem2Reg/debug-alloca-phi.ll +++ b/llvm/test/Transforms/Mem2Reg/debug-alloca-phi.ll @@ -14,12 +14,12 @@ entry: for.cond: ; CHECK: %[[PHI:.*]] = phi i8 [ 0, %entry ], [ %0, %for.cond ] %entryN = load i8, ptr %entry1, align 8, !dbg !20 -; CHECK: call void @llvm.dbg.value(metadata i8 %[[PHI]], -; CHECK-SAME: metadata !DIExpression()) +; CHECK: #dbg_value(i8 %[[PHI]], +; CHECK-SAME: !DIExpression(), %0 = add i8 %entryN, 1 ; CHECK: %0 = add i8 %[[PHI]], 1 -; CHECK: call void @llvm.dbg.value(metadata i8 %0, -; CHECK-SAME: metadata !DIExpression()) +; CHECK: #dbg_value(i8 %0, +; CHECK-SAME: !DIExpression(), store i8 %0, ptr %entry1, align 8, !dbg !20 br label %for.cond, !dbg !20 } diff --git a/llvm/test/Transforms/Mem2Reg/debug-alloca-vla-1.ll b/llvm/test/Transforms/Mem2Reg/debug-alloca-vla-1.ll index 9b29f981b26c1f..5cccb439ff42b9 100644 --- a/llvm/test/Transforms/Mem2Reg/debug-alloca-vla-1.ll +++ b/llvm/test/Transforms/Mem2Reg/debug-alloca-vla-1.ll @@ -21,9 +21,9 @@ define void @scan() #0 !dbg !4 { ; CHECK-NEXT: br label [[FOR_COND:%.*]], !dbg [[DBG10:![0-9]+]] ; CHECK: for.cond: ; CHECK-NEXT: [[VLA1_0:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[T0:%.*]], [[FOR_COND]] ] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[VLA1_0]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19:![0-9]+]] +; CHECK-NEXT: #dbg_value(i32 [[VLA1_0]], [[META11:![0-9]+]], !DIExpression(), [[META19:![0-9]+]]) ; CHECK-NEXT: [[T0]] = add i32 [[VLA1_0]], 1 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[T0]], metadata [[META11]], metadata !DIExpression()), !dbg [[DBG19]] +; CHECK-NEXT: #dbg_value(i32 [[T0]], [[META11]], !DIExpression(), [[META19]]) ; CHECK-NEXT: br label [[FOR_COND]], !dbg [[DBG10]] ; entry: diff --git a/llvm/test/Transforms/Mem2Reg/debug-alloca-vla-2.ll b/llvm/test/Transforms/Mem2Reg/debug-alloca-vla-2.ll index 894e9555ee6ebb..a17fb26c4b7bc4 100644 --- a/llvm/test/Transforms/Mem2Reg/debug-alloca-vla-2.ll +++ b/llvm/test/Transforms/Mem2Reg/debug-alloca-vla-2.ll @@ -25,7 +25,7 @@ define void @scan(i32 %n) #0 !dbg !4 { ; CHECK: for.cond: ; CHECK-NEXT: [[VLA1_0:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[T0:%.*]], [[FOR_COND]] ] ; CHECK-NEXT: [[T0]] = add i32 [[VLA1_0]], 1 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 undef, metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19:![0-9]+]] +; CHECK-NEXT: #dbg_value(i32 undef, [[META11:![0-9]+]], !DIExpression(), [[META19:![0-9]+]]) ; CHECK-NEXT: br label [[FOR_COND]], !dbg [[DBG10]] ; entry: diff --git a/llvm/test/Transforms/MemCpyOpt/pr37967.ll b/llvm/test/Transforms/MemCpyOpt/pr37967.ll index 3731345b022242..b6af060afb9abc 100644 --- a/llvm/test/Transforms/MemCpyOpt/pr37967.ll +++ b/llvm/test/Transforms/MemCpyOpt/pr37967.ll @@ -12,9 +12,9 @@ define dso_local void @_Z3bar3Foo(ptr byval(%struct.Foo) align 8 %0) { ; CHECK-LABEL: @_Z3bar3Foo( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8, !dbg [[DBG12:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr [[AGG_TMP]], metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG12]] +; CHECK-NEXT: #dbg_value(ptr [[AGG_TMP]], [[META9:![0-9]+]], !DIExpression(), [[DBG12]]) ; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr @a, align 8, !dbg [[DBG13:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr [[TMP1]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG13]] +; CHECK-NEXT: #dbg_value(ptr [[TMP1]], [[META11:![0-9]+]], !DIExpression(), [[DBG13]]) ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 8 dereferenceable(24) [[AGG_TMP]], ptr nonnull align 8 dereferenceable(24) [[TMP1]], i64 24, i1 false), !dbg [[DBG14:![0-9]+]] ; CHECK-NEXT: call void @_Z3bar3Foo(ptr nonnull byval([[STRUCT_FOO]]) align 8 [[TMP1]]), !dbg [[DBG15:![0-9]+]] ; CHECK-NEXT: ret void, !dbg [[DBG16:![0-9]+]] diff --git a/llvm/test/Transforms/MergeFunc/mergefunc-preserve-debug-info.ll b/llvm/test/Transforms/MergeFunc/mergefunc-preserve-debug-info.ll index 9208476a274297..331f608d027832 100644 --- a/llvm/test/Transforms/MergeFunc/mergefunc-preserve-debug-info.ll +++ b/llvm/test/Transforms/MergeFunc/mergefunc-preserve-debug-info.ll @@ -57,12 +57,12 @@ define i32 @maxA(i32 %x, i32 %y) !dbg !6 { ; OPTIMIZATION_LEVEL_0-NEXT: [[M:%.*]] = alloca i32, align 4 ; OPTIMIZATION_LEVEL_0-NEXT: [[J:%.*]] = alloca i32, align 4 ; OPTIMIZATION_LEVEL_0-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 -; OPTIMIZATION_LEVEL_0-NEXT: call void @llvm.dbg.declare(metadata ptr [[X_ADDR]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG12:![0-9]+]] +; OPTIMIZATION_LEVEL_0-NEXT: #dbg_declare(ptr [[X_ADDR]], [[META11:![0-9]+]], !DIExpression(), [[META12:![0-9]+]]) ; OPTIMIZATION_LEVEL_0-NEXT: store i32 [[Y]], ptr [[Y_ADDR]], align 4 -; OPTIMIZATION_LEVEL_0-NEXT: call void @llvm.dbg.declare(metadata ptr [[Y_ADDR]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG14:![0-9]+]] -; OPTIMIZATION_LEVEL_0-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META15:![0-9]+]], metadata !DIExpression()), !dbg [[DBG16:![0-9]+]] -; OPTIMIZATION_LEVEL_0-NEXT: call void @llvm.dbg.declare(metadata ptr [[M]], metadata [[META17:![0-9]+]], metadata !DIExpression()), !dbg [[DBG18:![0-9]+]] -; OPTIMIZATION_LEVEL_0-NEXT: call void @llvm.dbg.declare(metadata ptr [[J]], metadata [[META19:![0-9]+]], metadata !DIExpression()), !dbg [[DBG20:![0-9]+]] +; OPTIMIZATION_LEVEL_0-NEXT: #dbg_declare(ptr [[Y_ADDR]], [[META13:![0-9]+]], !DIExpression(), [[META14:![0-9]+]]) +; OPTIMIZATION_LEVEL_0-NEXT: #dbg_declare(ptr [[I]], [[META15:![0-9]+]], !DIExpression(), [[META16:![0-9]+]]) +; OPTIMIZATION_LEVEL_0-NEXT: #dbg_declare(ptr [[M]], [[META17:![0-9]+]], !DIExpression(), [[META18:![0-9]+]]) +; OPTIMIZATION_LEVEL_0-NEXT: #dbg_declare(ptr [[J]], [[META19:![0-9]+]], !DIExpression(), [[META20:![0-9]+]]) ; OPTIMIZATION_LEVEL_0-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4, !dbg [[DBG21:![0-9]+]] ; OPTIMIZATION_LEVEL_0-NEXT: [[TMP1:%.*]] = load i32, ptr [[Y_ADDR]], align 4, !dbg [[DBG23:![0-9]+]] ; OPTIMIZATION_LEVEL_0-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]], !dbg [[DBG24:![0-9]+]] @@ -82,12 +82,12 @@ define i32 @maxA(i32 %x, i32 %y) !dbg !6 { ; OPTIMIZATION_LEVEL_2-LABEL: define i32 @maxA ; OPTIMIZATION_LEVEL_2-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !dbg [[DBG6:![0-9]+]] { ; OPTIMIZATION_LEVEL_2-NEXT: entry: -; OPTIMIZATION_LEVEL_2-NEXT: call void @llvm.dbg.value(metadata i32 [[X]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG12:![0-9]+]] -; OPTIMIZATION_LEVEL_2-NEXT: call void @llvm.dbg.value(metadata i32 [[Y]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG12]] -; OPTIMIZATION_LEVEL_2-NEXT: call void @llvm.dbg.declare(metadata ptr undef, metadata [[META14:![0-9]+]], metadata !DIExpression()), !dbg [[DBG15:![0-9]+]] -; OPTIMIZATION_LEVEL_2-NEXT: call void @llvm.dbg.declare(metadata ptr undef, metadata [[META16:![0-9]+]], metadata !DIExpression()), !dbg [[DBG17:![0-9]+]] +; OPTIMIZATION_LEVEL_2-NEXT: #dbg_value(i32 [[X]], [[META11:![0-9]+]], !DIExpression(), [[META12:![0-9]+]]) +; OPTIMIZATION_LEVEL_2-NEXT: #dbg_value(i32 [[Y]], [[META13:![0-9]+]], !DIExpression(), [[META12]]) +; OPTIMIZATION_LEVEL_2-NEXT: #dbg_declare(ptr undef, [[META14:![0-9]+]], !DIExpression(), [[META15:![0-9]+]]) +; OPTIMIZATION_LEVEL_2-NEXT: #dbg_declare(ptr undef, [[META16:![0-9]+]], !DIExpression(), [[META17:![0-9]+]]) ; OPTIMIZATION_LEVEL_2-NEXT: [[X_Y:%.*]] = tail call i32 @llvm.smax.i32(i32 [[X]], i32 [[Y]]) -; OPTIMIZATION_LEVEL_2-NEXT: call void @llvm.dbg.value(metadata i32 [[X_Y]], metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG12]] +; OPTIMIZATION_LEVEL_2-NEXT: #dbg_value(i32 [[X_Y]], [[META18:![0-9]+]], !DIExpression(), [[META12]]) ; OPTIMIZATION_LEVEL_2-NEXT: ret i32 [[X_Y]], !dbg [[DBG19:![0-9]+]] ; entry: @@ -134,17 +134,17 @@ define i32 @maxB(i32 %x, i32 %y) !dbg !34 { ; OPTIMIZATION_LEVEL_0-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 ; OPTIMIZATION_LEVEL_0-NEXT: [[Y_ADDR:%.*]] = alloca i32, align 4 ; OPTIMIZATION_LEVEL_0-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 -; OPTIMIZATION_LEVEL_0-NEXT: call void @llvm.dbg.declare(metadata ptr [[X_ADDR]], metadata [[META34:![0-9]+]], metadata !DIExpression()), !dbg [[DBG35:![0-9]+]] +; OPTIMIZATION_LEVEL_0-NEXT: #dbg_declare(ptr [[X_ADDR]], [[META34:![0-9]+]], !DIExpression(), [[META35:![0-9]+]]) ; OPTIMIZATION_LEVEL_0-NEXT: store i32 [[Y]], ptr [[Y_ADDR]], align 4 -; OPTIMIZATION_LEVEL_0-NEXT: call void @llvm.dbg.declare(metadata ptr [[Y_ADDR]], metadata [[META36:![0-9]+]], metadata !DIExpression()), !dbg [[DBG37:![0-9]+]] +; OPTIMIZATION_LEVEL_0-NEXT: #dbg_declare(ptr [[Y_ADDR]], [[META36:![0-9]+]], !DIExpression(), [[META37:![0-9]+]]) ; OPTIMIZATION_LEVEL_0-NEXT: [[TMP0:%.*]] = tail call i32 @maxA(i32 [[X]], i32 [[Y]]), !dbg [[DBG38:![0-9]+]] ; OPTIMIZATION_LEVEL_0-NEXT: ret i32 [[TMP0]], !dbg [[DBG38]] ; ; OPTIMIZATION_LEVEL_2-LABEL: define i32 @maxB ; OPTIMIZATION_LEVEL_2-SAME: (i32 [[X:%.*]], i32 [[Y:%.*]]) local_unnamed_addr #[[ATTR0]] !dbg [[DBG20:![0-9]+]] { ; OPTIMIZATION_LEVEL_2-NEXT: entry: -; OPTIMIZATION_LEVEL_2-NEXT: call void @llvm.dbg.value(metadata i32 [[X]], metadata [[META21:![0-9]+]], metadata !DIExpression()), !dbg [[DBG22:![0-9]+]] -; OPTIMIZATION_LEVEL_2-NEXT: call void @llvm.dbg.value(metadata i32 [[Y]], metadata [[META23:![0-9]+]], metadata !DIExpression()), !dbg [[DBG22]] +; OPTIMIZATION_LEVEL_2-NEXT: #dbg_value(i32 [[X]], [[META21:![0-9]+]], !DIExpression(), [[META22:![0-9]+]]) +; OPTIMIZATION_LEVEL_2-NEXT: #dbg_value(i32 [[Y]], [[META23:![0-9]+]], !DIExpression(), [[META22]]) ; OPTIMIZATION_LEVEL_2-NEXT: [[TMP0:%.*]] = tail call i32 @maxA(i32 [[X]], i32 [[Y]]) #[[ATTR0]], !dbg [[DBG24:![0-9]+]] ; OPTIMIZATION_LEVEL_2-NEXT: ret i32 [[TMP0]], !dbg [[DBG24]] ; @@ -193,7 +193,7 @@ define void @f() !dbg !57 { ; OPTIMIZATION_LEVEL_0-NEXT: ret void, !dbg [[DBG44:![0-9]+]] ; ; OPTIMIZATION_LEVEL_2-LABEL: define void @f -; OPTIMIZATION_LEVEL_2-SAME: () local_unnamed_addr #[[ATTR2:[0-9]+]] !dbg [[DBG25:![0-9]+]] { +; OPTIMIZATION_LEVEL_2-SAME: () local_unnamed_addr #[[ATTR0]] !dbg [[DBG25:![0-9]+]] { ; OPTIMIZATION_LEVEL_2-NEXT: entry: ; OPTIMIZATION_LEVEL_2-NEXT: ret void, !dbg [[DBG28:![0-9]+]] ; diff --git a/llvm/test/Transforms/MergeFunc/no-merge-debug-thunks.ll b/llvm/test/Transforms/MergeFunc/no-merge-debug-thunks.ll index 80467c7aee407f..4d6f7561471e27 100644 --- a/llvm/test/Transforms/MergeFunc/no-merge-debug-thunks.ll +++ b/llvm/test/Transforms/MergeFunc/no-merge-debug-thunks.ll @@ -9,8 +9,8 @@ define hidden i32 @f(i32 %t) { ; CHECK-LABEL: define hidden i32 @f ; CHECK-SAME: (i32 [[T:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[T]], metadata [[META6:![0-9]+]], metadata !DIExpression()), !dbg [[DBG12:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[T]], metadata [[META6]], metadata !DIExpression()), !dbg [[DBG12]] +; CHECK-NEXT: #dbg_value(i32 [[T]], [[META6:![0-9]+]], !DIExpression(), [[META12:![0-9]+]]) +; CHECK-NEXT: #dbg_value(i32 [[T]], [[META6]], !DIExpression(), [[META12]]) ; CHECK-NEXT: ret i32 0 ; entry: @@ -24,8 +24,8 @@ define hidden i32 @f_thunk(i32 %t) { ; CHECK-LABEL: define hidden i32 @f_thunk ; CHECK-SAME: (i32 [[T:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[T]], metadata [[META6]], metadata !DIExpression()), !dbg [[DBG12]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[T]], metadata [[META6]], metadata !DIExpression()), !dbg [[DBG12]] +; CHECK-NEXT: #dbg_value(i32 [[T]], [[META6]], !DIExpression(), [[META12]]) +; CHECK-NEXT: #dbg_value(i32 [[T]], [[META6]], !DIExpression(), [[META12]]) ; CHECK-NEXT: ret i32 0 ; entry: diff --git a/llvm/test/Transforms/ObjCARC/basic.ll b/llvm/test/Transforms/ObjCARC/basic.ll index 51382145a8e6cf..0ee59dc8ba6ab7 100644 --- a/llvm/test/Transforms/ObjCARC/basic.ll +++ b/llvm/test/Transforms/ObjCARC/basic.ll @@ -2953,5 +2953,4 @@ define void @test68(ptr %a, ptr %b) { !5 = !{i32 2, !"Debug Info Version", i32 3} ; CHECK: attributes [[NUW]] = { nounwind } -; CHECK: attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ; CHECK: ![[RELEASE]] = !{} diff --git a/llvm/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll b/llvm/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll index e4c123e02540a1..22d14f52e96647 100644 --- a/llvm/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll +++ b/llvm/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll @@ -103,10 +103,9 @@ declare void @NSLog(ptr, ...) declare void @llvm.dbg.value(metadata, metadata, metadata) nounwind readnone ; CHECK: attributes #0 = { ssp uwtable } -; CHECK: attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -; CHECK: attributes #2 = { nonlazybind } +; CHECK: attributes #1 = { nonlazybind } ; CHECK: attributes [[NUW]] = { nounwind } -; CHECK: attributes #4 = { noinline ssp uwtable } +; CHECK: attributes #3 = { noinline ssp uwtable } !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!33, !34, !35, !36, !61} diff --git a/llvm/test/Transforms/Reassociate/matching-binops.ll b/llvm/test/Transforms/Reassociate/matching-binops.ll index 31f38708907b79..f9f96da04378b6 100644 --- a/llvm/test/Transforms/Reassociate/matching-binops.ll +++ b/llvm/test/Transforms/Reassociate/matching-binops.ll @@ -288,19 +288,19 @@ define float @fmul_fdiv(float %x, float %y, float %z, float %m) { define i32 @and_shl_dbg(i32 %x, i32 %y, i32 %z, i32 %shamt) { ; CHECK-LABEL: @and_shl_dbg( -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[X:%.*]], metadata !7, metadata !DIExpression()), !dbg !20 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[Y:%.*]], metadata !13, metadata !DIExpression()), !dbg !21 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[Z:%.*]], metadata !14, metadata !DIExpression()), !dbg !22 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[SHAMT:%.*]], metadata !15, metadata !DIExpression()), !dbg !23 -; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X]], [[SHAMT]], !dbg !24 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[SHL]], metadata !16, metadata !DIExpression()), !dbg !25 -; CHECK-NEXT: [[SHL1:%.*]] = shl i32 [[Y]], [[SHAMT]], !dbg !26 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[SHL1]], metadata !17, metadata !DIExpression()), !dbg !27 -; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], [[Z]], !dbg !28 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[AND]], metadata !18, metadata !DIExpression()), !dbg !29 -; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND]], [[SHL1]], !dbg !30 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[AND2]], metadata !19, metadata !DIExpression()), !dbg !31 -; CHECK-NEXT: ret i32 [[AND2]], !dbg !32 +; CHECK-NEXT: #dbg_value(i32 [[X:%.*]], [[META7:![0-9]+]], !DIExpression(), [[META20:![0-9]+]]) +; CHECK-NEXT: #dbg_value(i32 [[Y:%.*]], [[META13:![0-9]+]], !DIExpression(), [[META21:![0-9]+]]) +; CHECK-NEXT: #dbg_value(i32 [[Z:%.*]], [[META14:![0-9]+]], !DIExpression(), [[META22:![0-9]+]]) +; CHECK-NEXT: #dbg_value(i32 [[SHAMT:%.*]], [[META15:![0-9]+]], !DIExpression(), [[META23:![0-9]+]]) +; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[X]], [[SHAMT]], !dbg [[DBG24:![0-9]+]] +; CHECK-NEXT: #dbg_value(i32 [[SHL]], [[META16:![0-9]+]], !DIExpression(), [[META25:![0-9]+]]) +; CHECK-NEXT: [[SHL1:%.*]] = shl i32 [[Y]], [[SHAMT]], !dbg [[DBG26:![0-9]+]] +; CHECK-NEXT: #dbg_value(i32 [[SHL1]], [[META17:![0-9]+]], !DIExpression(), [[META27:![0-9]+]]) +; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], [[Z]], !dbg [[DBG28:![0-9]+]] +; CHECK-NEXT: #dbg_value(i32 [[AND]], [[META18:![0-9]+]], !DIExpression(), [[META29:![0-9]+]]) +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND]], [[SHL1]], !dbg [[DBG30:![0-9]+]] +; CHECK-NEXT: #dbg_value(i32 [[AND2]], [[META19:![0-9]+]], !DIExpression(), [[META31:![0-9]+]]) +; CHECK-NEXT: ret i32 [[AND2]], !dbg [[DBG32:![0-9]+]] ; call void @llvm.dbg.value(metadata i32 %x, metadata !13, metadata !DIExpression()), !dbg !21 call void @llvm.dbg.value(metadata i32 %y, metadata !14, metadata !DIExpression()), !dbg !22 diff --git a/llvm/test/Transforms/Reassociate/reassociate_dbgvalue_discard.ll b/llvm/test/Transforms/Reassociate/reassociate_dbgvalue_discard.ll index 4fb4f5801a2101..7f617d145b840b 100644 --- a/llvm/test/Transforms/Reassociate/reassociate_dbgvalue_discard.ll +++ b/llvm/test/Transforms/Reassociate/reassociate_dbgvalue_discard.ll @@ -11,16 +11,16 @@ target triple = "x86_64-unknown-linux-gnu" define dso_local i32 @test1(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 !dbg !7 { ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 poison, metadata !16, metadata !DIExpression()), !dbg !20 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 poison, metadata !17, metadata !DIExpression()), !dbg !21 -; CHECK-NEXT: [[M1:%.*]] = mul i32 [[D:%.*]], [[C:%.*]], !dbg !22 -; CHECK-NEXT: [[M3:%.*]] = mul i32 [[M1]], [[A:%.*]], !dbg !23 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[M3]], metadata !18, metadata !DIExpression()), !dbg !24 -; CHECK-NEXT: [[M2:%.*]] = mul i32 [[D]], [[C]], !dbg !25 -; CHECK-NEXT: [[M4:%.*]] = mul i32 [[M2]], [[B:%.*]], !dbg !26 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[M4]], metadata !19, metadata !DIExpression()), !dbg !27 +; CHECK-NEXT: #dbg_value(i32 poison, [[META16:![0-9]+]], !DIExpression(), [[META20:![0-9]+]]) +; CHECK-NEXT: #dbg_value(i32 poison, [[META17:![0-9]+]], !DIExpression(), [[META21:![0-9]+]]) +; CHECK-NEXT: [[M1:%.*]] = mul i32 [[D:%.*]], [[C:%.*]], !dbg [[DBG22:![0-9]+]] +; CHECK-NEXT: [[M3:%.*]] = mul i32 [[M1]], [[A:%.*]], !dbg [[DBG23:![0-9]+]] +; CHECK-NEXT: #dbg_value(i32 [[M3]], [[META18:![0-9]+]], !DIExpression(), [[META24:![0-9]+]]) +; CHECK-NEXT: [[M2:%.*]] = mul i32 [[D]], [[C]], !dbg [[DBG25:![0-9]+]] +; CHECK-NEXT: [[M4:%.*]] = mul i32 [[M2]], [[B:%.*]], !dbg [[DBG26:![0-9]+]] +; CHECK-NEXT: #dbg_value(i32 [[M4]], [[META19:![0-9]+]], !DIExpression(), [[META27:![0-9]+]]) ; CHECK-NEXT: [[RES:%.*]] = xor i32 [[M3]], [[M4]] -; CHECK-NEXT: ret i32 [[RES]], !dbg !28 +; CHECK-NEXT: ret i32 [[RES]], !dbg [[DBG28:![0-9]+]] ; entry: %m1 = mul i32 %c, %a, !dbg !24 diff --git a/llvm/test/Transforms/Reassociate/reassociate_salvages_debug_info.ll b/llvm/test/Transforms/Reassociate/reassociate_salvages_debug_info.ll index 2e29ecfdd3ca91..03b59f7c0a387e 100644 --- a/llvm/test/Transforms/Reassociate/reassociate_salvages_debug_info.ll +++ b/llvm/test/Transforms/Reassociate/reassociate_salvages_debug_info.ll @@ -4,7 +4,7 @@ define hidden i32 @main(i32 %argc, ptr %argv) { entry: - ; CHECK: call void @llvm.dbg.value(metadata i32 %argc, metadata [[VAR_B:![0-9]+]], metadata !DIExpression(DW_OP_plus_uconst, 1, DW_OP_stack_value)) + ; CHECK: #dbg_value(i32 %argc, [[VAR_B:![0-9]+]], !DIExpression(DW_OP_plus_uconst, 1, DW_OP_stack_value), %add = add nsw i32 %argc, 1, !dbg !26 call void @llvm.dbg.value(metadata i32 %add, metadata !22, metadata !DIExpression()), !dbg !25 %add1 = add nsw i32 %argc, %add, !dbg !27 diff --git a/llvm/test/Transforms/Reassociate/undef_intrinsics_when_deleting_instructions.ll b/llvm/test/Transforms/Reassociate/undef_intrinsics_when_deleting_instructions.ll index a98bdb5a88e40d..3843f4143547b9 100644 --- a/llvm/test/Transforms/Reassociate/undef_intrinsics_when_deleting_instructions.ll +++ b/llvm/test/Transforms/Reassociate/undef_intrinsics_when_deleting_instructions.ll @@ -5,11 +5,11 @@ ; that gets dropped and cannot be salvaged. ; CHECK-NOT: %add = fadd fast float %a, %b -; CHECK: call void @llvm.dbg.value(metadata float poison, metadata [[VAR_X:![0-9]+]], metadata !DIExpression()) +; CHECK: #dbg_value(float poison, [[VAR_X:![0-9]+]], !DIExpression(), ; CHECK-LABEL: if.then: ; CHECK-NOT: %add1 = fadd fast float %add, %c -; CHECK: call void @llvm.dbg.value(metadata float poison, metadata [[VAR_Y:![0-9]+]], metadata !DIExpression()) +; CHECK: #dbg_value(float poison, [[VAR_Y:![0-9]+]], !DIExpression(), ; CHECK-LABEL: !0 = ; CHECK-DAG: [[VAR_Y]] = !DILocalVariable(name: "y" ; CHECK-DAG: [[VAR_X]] = !DILocalVariable(name: "x" diff --git a/llvm/test/Transforms/SCCP/loadtest.ll b/llvm/test/Transforms/SCCP/loadtest.ll index 5674cfd7472a7d..7ad68228e77282 100644 --- a/llvm/test/Transforms/SCCP/loadtest.ll +++ b/llvm/test/Transforms/SCCP/loadtest.ll @@ -11,7 +11,7 @@ define i32 @test1() { ; CHECK-LABEL: @test1( -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 42 +; CHECK-NEXT: #dbg_value(i32 42 ; CHECK-NEXT: ret %B = load i32, ptr @X ; [#uses=1] ret i32 %B @@ -19,8 +19,8 @@ define i32 @test1() { define float @test2() { ; CHECK-LABEL: @test2( -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr getelementptr -; CHECK-NEXT: call void @llvm.dbg.value(metadata float 0x3FF3B2FEC0000000 +; CHECK-NEXT: #dbg_value(ptr getelementptr +; CHECK-NEXT: #dbg_value(float 0x3FF3B2FEC0000000 ; CHECK-NEXT: ret %A = getelementptr [2 x { i32, float }], ptr @Y, i64 0, i64 1, i32 1 ; [#uses=1] %B = load float, ptr %A ; [#uses=1] @@ -29,7 +29,7 @@ define float @test2() { define i32 @test3() { ; CHECK-LABEL: @test3( -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 12 +; CHECK-NEXT: #dbg_value(i32 12 ; CHECK-NEXT: ret %B = load i32, ptr @Y ret i32 %B @@ -37,7 +37,7 @@ define i32 @test3() { define i8 @test4() { ; CHECK-LABEL: @test4( -; CHECK-NEXT: call void @llvm.dbg.value(metadata i8 +; CHECK-NEXT: #dbg_value(i8 ; CHECK-NEXT: ret %B = load i8, ptr @X ret i8 %B diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll index 834d706a22d2b6..5232ae76fa8870 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/spillcost-di.ll @@ -9,12 +9,12 @@ target triple = "aarch64" define void @patatino(i64 %n, i64 %i, ptr %p) !dbg !7 { ; CHECK-LABEL: @patatino( ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[N:%.*]], metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG23:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[I:%.*]], metadata [[META19:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr [[P:%.*]], metadata [[META20:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25:![0-9]+]] +; CHECK-NEXT: #dbg_value(i64 [[N:%.*]], [[META18:![0-9]+]], !DIExpression(), [[META23:![0-9]+]]) +; CHECK-NEXT: #dbg_value(i64 [[I:%.*]], [[META19:![0-9]+]], !DIExpression(), [[META24:![0-9]+]]) +; CHECK-NEXT: #dbg_value(ptr [[P:%.*]], [[META20:![0-9]+]], !DIExpression(), [[META25:![0-9]+]]) ; CHECK-NEXT: [[X1:%.*]] = getelementptr inbounds [[STRUCT_S:%.*]], ptr [[P]], i64 [[N]], i32 0, !dbg [[DBG26:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 undef, metadata [[META21:![0-9]+]], metadata !DIExpression()), !dbg [[DBG27:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 undef, metadata [[META22:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28:![0-9]+]] +; CHECK-NEXT: #dbg_value(i64 undef, [[META21:![0-9]+]], !DIExpression(), [[META27:![0-9]+]]) +; CHECK-NEXT: #dbg_value(i64 undef, [[META22:![0-9]+]], !DIExpression(), [[META28:![0-9]+]]) ; CHECK-NEXT: [[X5:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[P]], i64 [[I]], i32 0, !dbg [[DBG29:![0-9]+]] ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[X1]], align 8, !dbg [[DBG26]], !tbaa [[TBAA30:![0-9]+]] ; CHECK-NEXT: store <2 x i64> [[TMP0]], ptr [[X5]], align 8, !dbg [[DBG34:![0-9]+]], !tbaa [[TBAA30]] diff --git a/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll b/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll index 8f44574e07b1a5..f98a569d679fed 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/debug_info.ll @@ -16,21 +16,21 @@ target triple = "x86_64-apple-macosx10.7.0" define i32 @depth(ptr nocapture %A, i32 %m) #0 !dbg !4 { ; CHECK-LABEL: @depth( ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr [[A:%.*]], metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG18:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[M:%.*]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG18]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata double 0.000000e+00, metadata [[META14:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata double 2.000000e-01, metadata [[META15:![0-9]+]], metadata !DIExpression()), !dbg [[DBG19]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 0, metadata [[META16:![0-9]+]], metadata !DIExpression()), !dbg [[DBG20:![0-9]+]] -; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[M]], 0, !dbg [[DBG20]] -; CHECK-NEXT: br i1 [[CMP8]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]], !dbg [[DBG20]] +; CHECK-NEXT: #dbg_value(ptr [[A:%.*]], [[META12:![0-9]+]], !DIExpression(), [[META18:![0-9]+]]) +; CHECK-NEXT: #dbg_value(i32 [[M:%.*]], [[META13:![0-9]+]], !DIExpression(), [[META18]]) +; CHECK-NEXT: #dbg_value(double 0.000000e+00, [[META14:![0-9]+]], !DIExpression(), [[META19:![0-9]+]]) +; CHECK-NEXT: #dbg_value(double 2.000000e-01, [[META15:![0-9]+]], !DIExpression(), [[META19]]) +; CHECK-NEXT: #dbg_value(i32 0, [[META16:![0-9]+]], !DIExpression(), [[META20:![0-9]+]]) +; CHECK-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[M]], 0, !dbg [[META20]] +; CHECK-NEXT: br i1 [[CMP8]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]], !dbg [[META20]] ; CHECK: for.body.lr.ph: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[A]], i64 4, !dbg [[DBG21:![0-9]+]] -; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 8, !dbg [[DBG21]] -; CHECK-NEXT: br label [[FOR_END]], !dbg [[DBG20]] +; CHECK-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[ARRAYIDX]], align 8, !dbg [[DBG21]] +; CHECK-NEXT: br label [[FOR_END]], !dbg [[META20]] ; CHECK: for.end: -; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x double> [ [[TMP1]], [[FOR_BODY_LR_PH]] ], [ , [[ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x double> [ [[TMP0]], [[FOR_BODY_LR_PH]] ], [ , [[ENTRY:%.*]] ] ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[A]], i64 8, !dbg [[DBG23:![0-9]+]] -; CHECK-NEXT: store <2 x double> [[TMP2]], ptr [[ARRAYIDX2]], align 8, !dbg [[DBG23]] +; CHECK-NEXT: store <2 x double> [[TMP1]], ptr [[ARRAYIDX2]], align 8, !dbg [[DBG23]] ; CHECK-NEXT: ret i32 undef, !dbg [[DBG24:![0-9]+]] ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/schedule_budget_debug_info.ll b/llvm/test/Transforms/SLPVectorizer/X86/schedule_budget_debug_info.ll index 2255e38f29128e..d45054b6bebce7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/schedule_budget_debug_info.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/schedule_budget_debug_info.ll @@ -43,14 +43,14 @@ define void @test(ptr %a, ptr %b, ptr %c, ptr %d) { ; VECTOR_DBG-NEXT: call void @unknown() ; VECTOR_DBG-NEXT: call void @unknown() ; VECTOR_DBG-NEXT: call void @unknown() -; VECTOR_DBG-NEXT: call void @llvm.dbg.value(metadata i16 1, metadata [[META3:![0-9]+]], metadata !DIExpression()), !dbg [[DBG5:![0-9]+]] -; VECTOR_DBG-NEXT: call void @llvm.dbg.value(metadata i16 1, metadata [[META3]], metadata !DIExpression()), !dbg [[DBG5]] -; VECTOR_DBG-NEXT: call void @llvm.dbg.value(metadata i16 1, metadata [[META3]], metadata !DIExpression()), !dbg [[DBG5]] -; VECTOR_DBG-NEXT: call void @llvm.dbg.value(metadata i16 1, metadata [[META3]], metadata !DIExpression()), !dbg [[DBG5]] -; VECTOR_DBG-NEXT: call void @llvm.dbg.value(metadata i16 1, metadata [[META3]], metadata !DIExpression()), !dbg [[DBG5]] -; VECTOR_DBG-NEXT: call void @llvm.dbg.value(metadata i16 1, metadata [[META3]], metadata !DIExpression()), !dbg [[DBG5]] -; VECTOR_DBG-NEXT: call void @llvm.dbg.value(metadata i16 1, metadata [[META3]], metadata !DIExpression()), !dbg [[DBG5]] -; VECTOR_DBG-NEXT: call void @llvm.dbg.value(metadata i16 1, metadata [[META3]], metadata !DIExpression()), !dbg [[DBG5]] +; VECTOR_DBG-NEXT: #dbg_value(i16 1, [[META3:![0-9]+]], !DIExpression(), [[META5:![0-9]+]]) +; VECTOR_DBG-NEXT: #dbg_value(i16 1, [[META3]], !DIExpression(), [[META5]]) +; VECTOR_DBG-NEXT: #dbg_value(i16 1, [[META3]], !DIExpression(), [[META5]]) +; VECTOR_DBG-NEXT: #dbg_value(i16 1, [[META3]], !DIExpression(), [[META5]]) +; VECTOR_DBG-NEXT: #dbg_value(i16 1, [[META3]], !DIExpression(), [[META5]]) +; VECTOR_DBG-NEXT: #dbg_value(i16 1, [[META3]], !DIExpression(), [[META5]]) +; VECTOR_DBG-NEXT: #dbg_value(i16 1, [[META3]], !DIExpression(), [[META5]]) +; VECTOR_DBG-NEXT: #dbg_value(i16 1, [[META3]], !DIExpression(), [[META5]]) ; VECTOR_DBG-NEXT: store <4 x float> [[TMP0]], ptr [[B:%.*]], align 4 ; VECTOR_DBG-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[C:%.*]], align 4 ; VECTOR_DBG-NEXT: store <4 x float> [[TMP1]], ptr [[D:%.*]], align 4 diff --git a/llvm/test/Transforms/SROA/alignment.ll b/llvm/test/Transforms/SROA/alignment.ll index 12de1fa3c5bc76..98be495e5eb354 100644 --- a/llvm/test/Transforms/SROA/alignment.ll +++ b/llvm/test/Transforms/SROA/alignment.ll @@ -23,12 +23,12 @@ define void @test1(ptr %a, ptr %b) { ; ; CHECK-DEBUGLOC-LABEL: @test1( ; CHECK-DEBUGLOC-NEXT: entry: -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG14:![0-9]+]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(ptr undef, [[META9:![0-9]+]], !DIExpression(), [[META14:![0-9]+]]) ; CHECK-DEBUGLOC-NEXT: [[GEP_A:%.*]] = getelementptr { i8, i8 }, ptr [[A:%.*]], i32 0, i32 0, !dbg [[DBG15:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr [[GEP_A]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG15]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG16:![0-9]+]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(ptr [[GEP_A]], [[META11:![0-9]+]], !DIExpression(), [[DBG15]]) +; CHECK-DEBUGLOC-NEXT: #dbg_value(ptr undef, [[META12:![0-9]+]], !DIExpression(), [[META16:![0-9]+]]) ; CHECK-DEBUGLOC-NEXT: [[GEP_B:%.*]] = getelementptr { i8, i8 }, ptr [[B:%.*]], i32 0, i32 0, !dbg [[DBG17:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr [[GEP_B]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG17]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(ptr [[GEP_B]], [[META13:![0-9]+]], !DIExpression(), [[DBG17]]) ; CHECK-DEBUGLOC-NEXT: [[ALLOCA_SROA_0_0_COPYLOAD:%.*]] = load i8, ptr [[GEP_A]], align 16, !dbg [[DBG18:![0-9]+]] ; CHECK-DEBUGLOC-NEXT: [[ALLOCA_SROA_3_0_GEP_A_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[GEP_A]], i64 1, !dbg [[DBG18]] ; CHECK-DEBUGLOC-NEXT: [[ALLOCA_SROA_3_0_COPYLOAD:%.*]] = load i8, ptr [[ALLOCA_SROA_3_0_GEP_A_SROA_IDX]], align 1, !dbg [[DBG18]] @@ -57,24 +57,24 @@ define void @test2() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca i16, align 2 ; CHECK-NEXT: store volatile i16 0, ptr [[A_SROA_0]], align 2 -; CHECK-NEXT: [[A_SROA_0_1_GEP2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_0]], i64 1 -; CHECK-NEXT: [[A_SROA_0_1_A_SROA_0_2_RESULT:%.*]] = load i8, ptr [[A_SROA_0_1_GEP2_SROA_IDX]], align 1 -; CHECK-NEXT: [[A_SROA_0_1_GEP2_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_0]], i64 1 -; CHECK-NEXT: store i8 42, ptr [[A_SROA_0_1_GEP2_SROA_IDX2]], align 1 +; CHECK-NEXT: [[A_SROA_0_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_0]], i64 1 +; CHECK-NEXT: [[A_SROA_0_1_A_SROA_0_2_RESULT:%.*]] = load i8, ptr [[A_SROA_0_1_SROA_IDX]], align 1 +; CHECK-NEXT: [[A_SROA_0_1_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_0]], i64 1 +; CHECK-NEXT: store i8 42, ptr [[A_SROA_0_1_SROA_IDX2]], align 1 ; CHECK-NEXT: ret void ; ; CHECK-DEBUGLOC-LABEL: @test2( ; CHECK-DEBUGLOC-NEXT: entry: ; CHECK-DEBUGLOC-NEXT: [[A_SROA_0:%.*]] = alloca i16, align 2, !dbg [[DBG28:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META23:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META24:![0-9]+]], metadata !DIExpression()), !dbg [[DBG29:![0-9]+]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(ptr undef, [[META23:![0-9]+]], !DIExpression(), [[DBG28]]) +; CHECK-DEBUGLOC-NEXT: #dbg_value(ptr undef, [[META24:![0-9]+]], !DIExpression(), [[META29:![0-9]+]]) ; CHECK-DEBUGLOC-NEXT: store volatile i16 0, ptr [[A_SROA_0]], align 2, !dbg [[DBG30:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META25:![0-9]+]], metadata !DIExpression()), !dbg [[DBG31:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: [[A_SROA_0_1_GEP2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_0]], i64 1, !dbg [[DBG32:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: [[A_SROA_0_1_A_SROA_0_2_RESULT:%.*]] = load i8, ptr [[A_SROA_0_1_GEP2_SROA_IDX]], align 1, !dbg [[DBG32]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i8 [[A_SROA_0_1_A_SROA_0_2_RESULT]], metadata [[META26:![0-9]+]], metadata !DIExpression()), !dbg [[DBG32]] -; CHECK-DEBUGLOC-NEXT: [[A_SROA_0_1_GEP2_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_0]], i64 1, !dbg [[DBG33:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: store i8 42, ptr [[A_SROA_0_1_GEP2_SROA_IDX2]], align 1, !dbg [[DBG33]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(ptr undef, [[META25:![0-9]+]], !DIExpression(), [[META31:![0-9]+]]) +; CHECK-DEBUGLOC-NEXT: [[A_SROA_0_1_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_0]], i64 1, !dbg [[DBG32:![0-9]+]] +; CHECK-DEBUGLOC-NEXT: [[A_SROA_0_1_A_SROA_0_2_RESULT:%.*]] = load i8, ptr [[A_SROA_0_1_SROA_IDX]], align 1, !dbg [[DBG32]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(i8 [[A_SROA_0_1_A_SROA_0_2_RESULT]], [[META26:![0-9]+]], !DIExpression(), [[DBG32]]) +; CHECK-DEBUGLOC-NEXT: [[A_SROA_0_1_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_0]], i64 1, !dbg [[DBG33:![0-9]+]] +; CHECK-DEBUGLOC-NEXT: store i8 42, ptr [[A_SROA_0_1_SROA_IDX2]], align 1, !dbg [[DBG33]] ; CHECK-DEBUGLOC-NEXT: ret void, !dbg [[DBG34:![0-9]+]] ; entry: @@ -99,7 +99,7 @@ define void @PR13920(ptr %a, ptr %b) { ; ; CHECK-DEBUGLOC-LABEL: @PR13920( ; CHECK-DEBUGLOC-NEXT: entry: -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META37:![0-9]+]], metadata !DIExpression()), !dbg [[DBG38:![0-9]+]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(ptr undef, [[META37:![0-9]+]], !DIExpression(), [[META38:![0-9]+]]) ; CHECK-DEBUGLOC-NEXT: [[AA_0_COPYLOAD:%.*]] = load <2 x i64>, ptr [[A:%.*]], align 2, !dbg [[DBG39:![0-9]+]] ; CHECK-DEBUGLOC-NEXT: store <2 x i64> [[AA_0_COPYLOAD]], ptr [[B:%.*]], align 2, !dbg [[DBG40:![0-9]+]] ; CHECK-DEBUGLOC-NEXT: ret void, !dbg [[DBG41:![0-9]+]] @@ -129,11 +129,11 @@ define void @test3(ptr %x) { ; CHECK-DEBUGLOC-LABEL: @test3( ; CHECK-DEBUGLOC-NEXT: entry: ; CHECK-DEBUGLOC-NEXT: [[A_SROA_0:%.*]] = alloca [22 x i8], align 8, !dbg [[DBG47:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META44:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(ptr undef, [[META44:![0-9]+]], !DIExpression(), [[DBG47]]) ; CHECK-DEBUGLOC-NEXT: [[B_SROA_0:%.*]] = alloca [18 x i8], align 2, !dbg [[DBG48:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META45:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(ptr undef, [[META45:![0-9]+]], !DIExpression(), [[DBG48]]) ; CHECK-DEBUGLOC-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[A_SROA_0]], ptr align 8 [[X:%.*]], i32 22, i1 false), !dbg [[DBG49:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META46:![0-9]+]], metadata !DIExpression()), !dbg [[DBG50:![0-9]+]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(ptr undef, [[META46:![0-9]+]], !DIExpression(), [[META50:![0-9]+]]) ; CHECK-DEBUGLOC-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[B_SROA_0]], ptr align 2 [[X]], i32 18, i1 false), !dbg [[DBG51:![0-9]+]] ; CHECK-DEBUGLOC-NEXT: ret void, !dbg [[DBG52:![0-9]+]] ; @@ -158,32 +158,32 @@ define void @test5() { ; CHECK-NEXT: [[A_SROA_0:%.*]] = alloca [9 x i8], align 1 ; CHECK-NEXT: [[A_SROA_3:%.*]] = alloca [9 x i8], align 1 ; CHECK-NEXT: store volatile double 0.000000e+00, ptr [[A_SROA_0]], align 1 -; CHECK-NEXT: [[A_SROA_0_7_WEIRD_GEP1_SROA_IDX1:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_0]], i64 7 -; CHECK-NEXT: [[A_SROA_0_7_A_SROA_0_7_WEIRD_LOAD1:%.*]] = load volatile i16, ptr [[A_SROA_0_7_WEIRD_GEP1_SROA_IDX1]], align 1 +; CHECK-NEXT: [[A_SROA_0_7_SROA_IDX1:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_0]], i64 7 +; CHECK-NEXT: [[A_SROA_0_7_A_SROA_0_7_WEIRD_LOAD1:%.*]] = load volatile i16, ptr [[A_SROA_0_7_SROA_IDX1]], align 1 ; CHECK-NEXT: [[A_SROA_0_0_A_SROA_0_0_D1:%.*]] = load double, ptr [[A_SROA_0]], align 1 ; CHECK-NEXT: store volatile double [[A_SROA_0_0_A_SROA_0_0_D1]], ptr [[A_SROA_3]], align 1 -; CHECK-NEXT: [[A_SROA_3_7_WEIRD_GEP2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_3]], i64 7 -; CHECK-NEXT: [[A_SROA_3_7_A_SROA_3_16_WEIRD_LOAD2:%.*]] = load volatile i16, ptr [[A_SROA_3_7_WEIRD_GEP2_SROA_IDX]], align 1 +; CHECK-NEXT: [[A_SROA_3_7_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_3]], i64 7 +; CHECK-NEXT: [[A_SROA_3_7_A_SROA_3_16_WEIRD_LOAD2:%.*]] = load volatile i16, ptr [[A_SROA_3_7_SROA_IDX]], align 1 ; CHECK-NEXT: ret void ; ; CHECK-DEBUGLOC-LABEL: @test5( ; CHECK-DEBUGLOC-NEXT: entry: ; CHECK-DEBUGLOC-NEXT: [[A_SROA_0:%.*]] = alloca [9 x i8], align 1, !dbg [[DBG63:![0-9]+]] ; CHECK-DEBUGLOC-NEXT: [[A_SROA_3:%.*]] = alloca [9 x i8], align 1, !dbg [[DBG63]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META55:![0-9]+]], metadata !DIExpression()), !dbg [[DBG63]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(ptr undef, [[META55:![0-9]+]], !DIExpression(), [[DBG63]]) ; CHECK-DEBUGLOC-NEXT: store volatile double 0.000000e+00, ptr [[A_SROA_0]], align 1, !dbg [[DBG64:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META56:![0-9]+]], metadata !DIExpression()), !dbg [[DBG65:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: [[A_SROA_0_7_WEIRD_GEP1_SROA_IDX1:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_0]], i64 7, !dbg [[DBG66:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: [[A_SROA_0_7_A_SROA_0_7_WEIRD_LOAD1:%.*]] = load volatile i16, ptr [[A_SROA_0_7_WEIRD_GEP1_SROA_IDX1]], align 1, !dbg [[DBG66]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i16 [[A_SROA_0_7_A_SROA_0_7_WEIRD_LOAD1]], metadata [[META57:![0-9]+]], metadata !DIExpression()), !dbg [[DBG66]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META59:![0-9]+]], metadata !DIExpression()), !dbg [[DBG67:![0-9]+]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(ptr undef, [[META56:![0-9]+]], !DIExpression(), [[META65:![0-9]+]]) +; CHECK-DEBUGLOC-NEXT: [[A_SROA_0_7_SROA_IDX1:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_0]], i64 7, !dbg [[DBG66:![0-9]+]] +; CHECK-DEBUGLOC-NEXT: [[A_SROA_0_7_A_SROA_0_7_WEIRD_LOAD1:%.*]] = load volatile i16, ptr [[A_SROA_0_7_SROA_IDX1]], align 1, !dbg [[DBG66]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(i16 [[A_SROA_0_7_A_SROA_0_7_WEIRD_LOAD1]], [[META57:![0-9]+]], !DIExpression(), [[DBG66]]) +; CHECK-DEBUGLOC-NEXT: #dbg_value(ptr undef, [[META59:![0-9]+]], !DIExpression(), [[META67:![0-9]+]]) ; CHECK-DEBUGLOC-NEXT: [[A_SROA_0_0_A_SROA_0_0_D1:%.*]] = load double, ptr [[A_SROA_0]], align 1, !dbg [[DBG68:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata double [[A_SROA_0_0_A_SROA_0_0_D1]], metadata [[META60:![0-9]+]], metadata !DIExpression()), !dbg [[DBG68]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(double [[A_SROA_0_0_A_SROA_0_0_D1]], [[META60:![0-9]+]], !DIExpression(), [[DBG68]]) ; CHECK-DEBUGLOC-NEXT: store volatile double [[A_SROA_0_0_A_SROA_0_0_D1]], ptr [[A_SROA_3]], align 1, !dbg [[DBG69:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META61:![0-9]+]], metadata !DIExpression()), !dbg [[DBG70:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: [[A_SROA_3_7_WEIRD_GEP2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_3]], i64 7, !dbg [[DBG71:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: [[A_SROA_3_7_A_SROA_3_16_WEIRD_LOAD2:%.*]] = load volatile i16, ptr [[A_SROA_3_7_WEIRD_GEP2_SROA_IDX]], align 1, !dbg [[DBG71]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i16 [[A_SROA_3_7_A_SROA_3_16_WEIRD_LOAD2]], metadata [[META62:![0-9]+]], metadata !DIExpression()), !dbg [[DBG71]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(ptr undef, [[META61:![0-9]+]], !DIExpression(), [[META70:![0-9]+]]) +; CHECK-DEBUGLOC-NEXT: [[A_SROA_3_7_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[A_SROA_3]], i64 7, !dbg [[DBG71:![0-9]+]] +; CHECK-DEBUGLOC-NEXT: [[A_SROA_3_7_A_SROA_3_16_WEIRD_LOAD2:%.*]] = load volatile i16, ptr [[A_SROA_3_7_SROA_IDX]], align 1, !dbg [[DBG71]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(i16 [[A_SROA_3_7_A_SROA_3_16_WEIRD_LOAD2]], [[META62:![0-9]+]], !DIExpression(), [[DBG71]]) ; CHECK-DEBUGLOC-NEXT: ret void, !dbg [[DBG72:![0-9]+]] ; entry: @@ -219,11 +219,11 @@ define void @test6() { ; CHECK-DEBUGLOC-NEXT: entry: ; CHECK-DEBUGLOC-NEXT: [[A_SROA_0:%.*]] = alloca double, align 8, !dbg [[DBG78:![0-9]+]] ; CHECK-DEBUGLOC-NEXT: [[A_SROA_2:%.*]] = alloca double, align 8, !dbg [[DBG78]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META75:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(ptr undef, [[META75:![0-9]+]], !DIExpression(), [[DBG78]]) ; CHECK-DEBUGLOC-NEXT: store volatile double 0.000000e+00, ptr [[A_SROA_0]], align 8, !dbg [[DBG79:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG80:![0-9]+]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(ptr undef, [[META76:![0-9]+]], !DIExpression(), [[META80:![0-9]+]]) ; CHECK-DEBUGLOC-NEXT: [[A_SROA_0_0_A_SROA_0_0_VAL:%.*]] = load double, ptr [[A_SROA_0]], align 8, !dbg [[DBG81:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata double [[A_SROA_0_0_A_SROA_0_0_VAL]], metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG81]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(double [[A_SROA_0_0_A_SROA_0_0_VAL]], [[META77:![0-9]+]], !DIExpression(), [[DBG81]]) ; CHECK-DEBUGLOC-NEXT: store volatile double [[A_SROA_0_0_A_SROA_0_0_VAL]], ptr [[A_SROA_2]], align 8, !dbg [[DBG82:![0-9]+]] ; CHECK-DEBUGLOC-NEXT: ret void, !dbg [[DBG83:![0-9]+]] ; @@ -255,13 +255,13 @@ define void @test7(ptr %out) { ; ; CHECK-DEBUGLOC-LABEL: @test7( ; CHECK-DEBUGLOC-NEXT: entry: -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META86:![0-9]+]], metadata !DIExpression()), !dbg [[DBG90:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META87:![0-9]+]], metadata !DIExpression()), !dbg [[DBG91:![0-9]+]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(ptr undef, [[META86:![0-9]+]], !DIExpression(), [[META90:![0-9]+]]) +; CHECK-DEBUGLOC-NEXT: #dbg_value(ptr undef, [[META87:![0-9]+]], !DIExpression(), [[META91:![0-9]+]]) ; CHECK-DEBUGLOC-NEXT: [[A_SROA_0_0_COPYLOAD:%.*]] = load double, ptr [[OUT:%.*]], align 1, !dbg [[DBG92:![0-9]+]] ; CHECK-DEBUGLOC-NEXT: [[A_SROA_4_0_OUT_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OUT]], i64 8, !dbg [[DBG92]] ; CHECK-DEBUGLOC-NEXT: [[A_SROA_4_0_COPYLOAD:%.*]] = load double, ptr [[A_SROA_4_0_OUT_SROA_IDX]], align 1, !dbg [[DBG92]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata double [[A_SROA_4_0_COPYLOAD]], metadata [[META88:![0-9]+]], metadata !DIExpression()), !dbg [[DBG93:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata double [[A_SROA_0_0_COPYLOAD]], metadata [[META89:![0-9]+]], metadata !DIExpression()), !dbg [[DBG94:![0-9]+]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(double [[A_SROA_4_0_COPYLOAD]], [[META88:![0-9]+]], !DIExpression(), [[META93:![0-9]+]]) +; CHECK-DEBUGLOC-NEXT: #dbg_value(double [[A_SROA_0_0_COPYLOAD]], [[META89:![0-9]+]], !DIExpression(), [[META94:![0-9]+]]) ; CHECK-DEBUGLOC-NEXT: store double [[A_SROA_4_0_COPYLOAD]], ptr [[OUT]], align 1, !dbg [[DBG95:![0-9]+]] ; CHECK-DEBUGLOC-NEXT: [[A_SROA_4_0_OUT_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[OUT]], i64 8, !dbg [[DBG95]] ; CHECK-DEBUGLOC-NEXT: store double [[A_SROA_0_0_COPYLOAD]], ptr [[A_SROA_4_0_OUT_SROA_IDX2]], align 1, !dbg [[DBG95]] @@ -307,7 +307,7 @@ define void @test8() { ; ; CHECK-DEBUGLOC-LABEL: @test8( ; CHECK-DEBUGLOC-NEXT: [[PTR:%.*]] = alloca [5 x i32], align 1, !dbg [[DBG102:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr [[PTR]], metadata [[META99:![0-9]+]], metadata !DIExpression()), !dbg [[DBG102]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(ptr [[PTR]], [[META99:![0-9]+]], !DIExpression(), [[DBG102]]) ; CHECK-DEBUGLOC-NEXT: call void @populate(ptr [[PTR]]), !dbg [[DBG103:![0-9]+]] ; CHECK-DEBUGLOC-NEXT: [[VAL_FCA_0_GEP:%.*]] = getelementptr inbounds [5 x i32], ptr [[PTR]], i32 0, i32 0, !dbg [[DBG104:![0-9]+]] ; CHECK-DEBUGLOC-NEXT: [[VAL_FCA_0_LOAD:%.*]] = load i32, ptr [[VAL_FCA_0_GEP]], align 1, !dbg [[DBG104]] @@ -324,7 +324,7 @@ define void @test8() { ; CHECK-DEBUGLOC-NEXT: [[VAL_FCA_4_GEP:%.*]] = getelementptr inbounds [5 x i32], ptr [[PTR]], i32 0, i32 4, !dbg [[DBG104]] ; CHECK-DEBUGLOC-NEXT: [[VAL_FCA_4_LOAD:%.*]] = load i32, ptr [[VAL_FCA_4_GEP]], align 1, !dbg [[DBG104]] ; CHECK-DEBUGLOC-NEXT: [[VAL_FCA_4_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_3_INSERT]], i32 [[VAL_FCA_4_LOAD]], 4, !dbg [[DBG104]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata [5 x i32] [[VAL_FCA_4_INSERT]], metadata [[META100:![0-9]+]], metadata !DIExpression()), !dbg [[DBG104]] +; CHECK-DEBUGLOC-NEXT: #dbg_value([5 x i32] [[VAL_FCA_4_INSERT]], [[META100:![0-9]+]], !DIExpression(), [[DBG104]]) ; CHECK-DEBUGLOC-NEXT: ret void, !dbg [[DBG105:![0-9]+]] ; %ptr = alloca [5 x i32], align 1 @@ -356,7 +356,7 @@ define void @test9() { ; ; CHECK-DEBUGLOC-LABEL: @test9( ; CHECK-DEBUGLOC-NEXT: [[PTR:%.*]] = alloca [5 x i32], align 8, !dbg [[DBG110:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr [[PTR]], metadata [[META108:![0-9]+]], metadata !DIExpression()), !dbg [[DBG110]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(ptr [[PTR]], [[META108:![0-9]+]], !DIExpression(), [[DBG110]]) ; CHECK-DEBUGLOC-NEXT: call void @populate(ptr [[PTR]]), !dbg [[DBG111:![0-9]+]] ; CHECK-DEBUGLOC-NEXT: [[VAL_FCA_0_GEP:%.*]] = getelementptr inbounds [5 x i32], ptr [[PTR]], i32 0, i32 0, !dbg [[DBG112:![0-9]+]] ; CHECK-DEBUGLOC-NEXT: [[VAL_FCA_0_LOAD:%.*]] = load i32, ptr [[VAL_FCA_0_GEP]], align 8, !dbg [[DBG112]] @@ -373,7 +373,7 @@ define void @test9() { ; CHECK-DEBUGLOC-NEXT: [[VAL_FCA_4_GEP:%.*]] = getelementptr inbounds [5 x i32], ptr [[PTR]], i32 0, i32 4, !dbg [[DBG112]] ; CHECK-DEBUGLOC-NEXT: [[VAL_FCA_4_LOAD:%.*]] = load i32, ptr [[VAL_FCA_4_GEP]], align 8, !dbg [[DBG112]] ; CHECK-DEBUGLOC-NEXT: [[VAL_FCA_4_INSERT:%.*]] = insertvalue [5 x i32] [[VAL_FCA_3_INSERT]], i32 [[VAL_FCA_4_LOAD]], 4, !dbg [[DBG112]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata [5 x i32] [[VAL_FCA_4_INSERT]], metadata [[META109:![0-9]+]], metadata !DIExpression()), !dbg [[DBG112]] +; CHECK-DEBUGLOC-NEXT: #dbg_value([5 x i32] [[VAL_FCA_4_INSERT]], [[META109:![0-9]+]], !DIExpression(), [[DBG112]]) ; CHECK-DEBUGLOC-NEXT: ret void, !dbg [[DBG113:![0-9]+]] ; %ptr = alloca [5 x i32], align 8 @@ -405,7 +405,7 @@ define void @test10() { ; ; CHECK-DEBUGLOC-LABEL: @test10( ; CHECK-DEBUGLOC-NEXT: [[PTR:%.*]] = alloca { i32, i8, i8, { i8, i16 } }, align 2, !dbg [[DBG119:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr [[PTR]], metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG119]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(ptr [[PTR]], [[META116:![0-9]+]], !DIExpression(), [[DBG119]]) ; CHECK-DEBUGLOC-NEXT: call void @populate(ptr [[PTR]]), !dbg [[DBG120:![0-9]+]] ; CHECK-DEBUGLOC-NEXT: [[VAL_FCA_0_GEP:%.*]] = getelementptr inbounds { i32, i8, i8, { i8, i16 } }, ptr [[PTR]], i32 0, i32 0, !dbg [[DBG121:![0-9]+]] ; CHECK-DEBUGLOC-NEXT: [[VAL_FCA_0_LOAD:%.*]] = load i32, ptr [[VAL_FCA_0_GEP]], align 2, !dbg [[DBG121]] @@ -422,7 +422,7 @@ define void @test10() { ; CHECK-DEBUGLOC-NEXT: [[VAL_FCA_3_1_GEP:%.*]] = getelementptr inbounds { i32, i8, i8, { i8, i16 } }, ptr [[PTR]], i32 0, i32 3, i32 1, !dbg [[DBG121]] ; CHECK-DEBUGLOC-NEXT: [[VAL_FCA_3_1_LOAD:%.*]] = load i16, ptr [[VAL_FCA_3_1_GEP]], align 2, !dbg [[DBG121]] ; CHECK-DEBUGLOC-NEXT: [[VAL_FCA_3_1_INSERT:%.*]] = insertvalue { i32, i8, i8, { i8, i16 } } [[VAL_FCA_3_0_INSERT]], i16 [[VAL_FCA_3_1_LOAD]], 3, 1, !dbg [[DBG121]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata { i32, i8, i8, { i8, i16 } } [[VAL_FCA_3_1_INSERT]], metadata [[META117:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121]] +; CHECK-DEBUGLOC-NEXT: #dbg_value({ i32, i8, i8, { i8, i16 } } [[VAL_FCA_3_1_INSERT]], [[META117:![0-9]+]], !DIExpression(), [[DBG121]]) ; CHECK-DEBUGLOC-NEXT: ret void, !dbg [[DBG122:![0-9]+]] ; %ptr = alloca {i32, i8, i8, {i8, i16}}, align 2 @@ -442,12 +442,12 @@ define dso_local i32 @pr45010(ptr %A) { ; ; CHECK-DEBUGLOC-LABEL: @pr45010( ; CHECK-DEBUGLOC-NEXT: [[B_SROA_0:%.*]] = alloca i32, align 4, !dbg [[DBG129:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META125:![0-9]+]], metadata !DIExpression()), !dbg [[DBG129]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(ptr undef, [[META125:![0-9]+]], !DIExpression(), [[DBG129]]) ; CHECK-DEBUGLOC-NEXT: [[TMP1:%.*]] = load i32, ptr [[A:%.*]], align 4, !dbg [[DBG130:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i32 [[TMP1]], metadata [[META126:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(i32 [[TMP1]], [[META126:![0-9]+]], !DIExpression(), [[DBG130]]) ; CHECK-DEBUGLOC-NEXT: store atomic volatile i32 [[TMP1]], ptr [[B_SROA_0]] release, align 4, !dbg [[DBG131:![0-9]+]] ; CHECK-DEBUGLOC-NEXT: [[B_SROA_0_0_B_SROA_0_0_X:%.*]] = load atomic volatile i32, ptr [[B_SROA_0]] acquire, align 4, !dbg [[DBG132:![0-9]+]] -; CHECK-DEBUGLOC-NEXT: call void @llvm.dbg.value(metadata i32 [[B_SROA_0_0_B_SROA_0_0_X]], metadata [[META128:![0-9]+]], metadata !DIExpression()), !dbg [[DBG132]] +; CHECK-DEBUGLOC-NEXT: #dbg_value(i32 [[B_SROA_0_0_B_SROA_0_0_X]], [[META128:![0-9]+]], !DIExpression(), [[DBG132]]) ; CHECK-DEBUGLOC-NEXT: ret i32 [[B_SROA_0_0_B_SROA_0_0_X]], !dbg [[DBG133:![0-9]+]] ; %B = alloca %struct, align 4 diff --git a/llvm/test/Transforms/SROA/dbg-inline.ll b/llvm/test/Transforms/SROA/dbg-inline.ll index 454ca13230bfa6..ace956d14e0227 100644 --- a/llvm/test/Transforms/SROA/dbg-inline.ll +++ b/llvm/test/Transforms/SROA/dbg-inline.ll @@ -19,10 +19,10 @@ target triple = "x86_64-apple-macosx10.15.0" define i64 @_Z1g4pair(i64 %p.coerce0, i64 %p.coerce1) #0 !dbg !8 { ; CHECK-LABEL: @_Z1g4pair( ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[P_COERCE0:%.*]], metadata [[META16:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)), !dbg [[DBG17:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[P_COERCE0]], metadata [[META18:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 64)), !dbg [[DBG20:![0-9]+]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[P_COERCE1:%.*]], metadata [[META16]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)), !dbg [[DBG17]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 [[P_COERCE1]], metadata [[META18]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)), !dbg [[DBG20]] +; CHECK-NEXT: #dbg_value(i64 [[P_COERCE0:%.*]], [[META16:![0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 0, 64), [[META17:![0-9]+]]) +; CHECK-NEXT: #dbg_value(i64 [[P_COERCE0]], [[META18:![0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 0, 64), [[META20:![0-9]+]]) +; CHECK-NEXT: #dbg_value(i64 [[P_COERCE1:%.*]], [[META16]], !DIExpression(DW_OP_LLVM_fragment, 64, 64), [[META17]]) +; CHECK-NEXT: #dbg_value(i64 [[P_COERCE1]], [[META18]], !DIExpression(DW_OP_LLVM_fragment, 64, 64), [[META20]]) ; CHECK-NEXT: ret i64 [[P_COERCE0]], !dbg [[DBG22:![0-9]+]] ; entry: @@ -77,32 +77,31 @@ attributes #2 = { argmemonly nounwind willreturn } !26 = !DILocation(line: 10, column: 3, scope: !8) ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { noinline ssp uwtable } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } ;. -; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 12.0.0 (git@github.com:llvm/llvm-project 5110fd0343c2d06c8ae538741fbef13ece5e68de)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None, sysroot: "/") -; CHECK: [[META1:![0-9]+]] = !DIFile(filename: "/tmp/inlinesplit.cpp", directory: "/Volumes/Data/llvm-project") -; CHECK: [[META2:![0-9]+]] = !{} +; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: [[META1:![0-9]+]], producer: "{{.*}}clang version {{.*}}", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: [[META2:![0-9]+]], nameTableKind: None, sysroot: "/") +; CHECK: [[META1]] = !DIFile(filename: "/tmp/inlinesplit.cpp", directory: {{.*}}) +; CHECK: [[META2]] = !{} ; CHECK: [[META3:![0-9]+]] = !{i32 7, !"Dwarf Version", i32 4} ; CHECK: [[META4:![0-9]+]] = !{i32 2, !"Debug Info Version", i32 3} ; CHECK: [[META5:![0-9]+]] = !{i32 1, !"wchar_size", i32 4} ; CHECK: [[META6:![0-9]+]] = !{i32 8, !"PIC Level", i32 2} -; CHECK: [[META7:![0-9]+]] = distinct !DISubprogram(name: "g", linkageName: "_Z1g4pair", scope: !8, file: !8, line: 9, type: !9, scopeLine: 9, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2) -; CHECK: [[META8:![0-9]+]] = !DIFile(filename: "/tmp/inlinesplit.cpp", directory: "") -; CHECK: [[META9:![0-9]+]] = !DISubroutineType(types: !10) -; CHECK: [[META10:![0-9]+]] = !{!11, !12} -; CHECK: [[META11:![0-9]+]] = !DIBasicType(name: "long long unsigned int", size: 64, encoding: DW_ATE_unsigned) -; CHECK: [[META12:![0-9]+]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "pair", file: !8, line: 1, size: 128, flags: DIFlagTypePassByValue, elements: !13, identifier: "_ZTS4pair") -; CHECK: [[META13:![0-9]+]] = !{!14, !15} -; CHECK: [[META14:![0-9]+]] = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !12, file: !8, line: 1, baseType: !11, size: 64) -; CHECK: [[META15:![0-9]+]] = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !12, file: !8, line: 1, baseType: !11, size: 64, offset: 64) -; CHECK: [[META16]] = !DILocalVariable(name: "p", arg: 1, scope: !7, file: !8, line: 9, type: !12) -; CHECK: [[DBG17]] = !DILocation(line: 0, scope: !7) -; CHECK: [[META18]] = !DILocalVariable(name: "p", arg: 1, scope: !19, file: !8, line: 5, type: !12) -; CHECK: [[META19:![0-9]+]] = distinct !DISubprogram(name: "f", linkageName: "_ZL1f4pair", scope: !8, file: !8, line: 5, type: !9, scopeLine: 5, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !0, retainedNodes: !2) -; CHECK: [[DBG20]] = !DILocation(line: 0, scope: !19, inlinedAt: !21) -; CHECK: [[META21:![0-9]+]] = distinct !DILocation(line: 10, column: 10, scope: !7) -; CHECK: [[DBG22]] = !DILocation(line: 10, column: 3, scope: !7) +; CHECK: [[META7:![0-9]+]] = distinct !DISubprogram(name: "g", linkageName: "_Z1g4pair", scope: [[META8:![0-9]+]], file: [[META8]], line: 9, type: [[META9:![0-9]+]], scopeLine: 9, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: [[META0]], retainedNodes: [[META2]]) +; CHECK: [[META8]] = !DIFile(filename: "/tmp/inlinesplit.cpp", directory: "") +; CHECK: [[META9]] = !DISubroutineType(types: [[META10:![0-9]+]]) +; CHECK: [[META10]] = !{[[META11:![0-9]+]], [[META12:![0-9]+]]} +; CHECK: [[META11]] = !DIBasicType(name: "long long unsigned int", size: 64, encoding: DW_ATE_unsigned) +; CHECK: [[META12]] = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "pair", file: [[META8]], line: 1, size: 128, flags: DIFlagTypePassByValue, elements: [[META13:![0-9]+]], identifier: "_ZTS4pair") +; CHECK: [[META13]] = !{[[META14:![0-9]+]], [[META15:![0-9]+]]} +; CHECK: [[META14]] = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: [[META12]], file: [[META8]], line: 1, baseType: [[META11]], size: 64) +; CHECK: [[META15]] = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: [[META12]], file: [[META8]], line: 1, baseType: [[META11]], size: 64, offset: 64) +; CHECK: [[META16]] = !DILocalVariable(name: "p", arg: 1, scope: [[META7]], file: [[META8]], line: 9, type: [[META12]]) +; CHECK: [[META17]] = !DILocation(line: 0, scope: [[META7]]) +; CHECK: [[META18]] = !DILocalVariable(name: "p", arg: 1, scope: [[META19:![0-9]+]], file: [[META8]], line: 5, type: [[META12]]) +; CHECK: [[META19]] = distinct !DISubprogram(name: "f", linkageName: "_ZL1f4pair", scope: [[META8]], file: [[META8]], line: 5, type: [[META9]], scopeLine: 5, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: [[META0]], retainedNodes: [[META2]]) +; CHECK: [[META20]] = !DILocation(line: 0, scope: [[META19]], inlinedAt: [[META21:![0-9]+]]) +; CHECK: [[META21]] = distinct !DILocation(line: 10, column: 10, scope: [[META7]]) +; CHECK: [[DBG22]] = !DILocation(line: 10, column: 3, scope: [[META7]]) ;. ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CHECK-MODIFY-CFG: {{.*}} diff --git a/llvm/test/Transforms/SROA/dbg-single-piece.ll b/llvm/test/Transforms/SROA/dbg-single-piece.ll index 9df1a835b42e55..6e9e8c060fd9f9 100644 --- a/llvm/test/Transforms/SROA/dbg-single-piece.ll +++ b/llvm/test/Transforms/SROA/dbg-single-piece.ll @@ -14,7 +14,7 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #0 define void @_ZL18findInsertLocationPN4llvm17MachineBasicBlockENS_9SlotIndexERNS_13LiveIntervalsE() { ; CHECK-LABEL: @_ZL18findInsertLocationPN4llvm17MachineBasicBlockENS_9SlotIndexERNS_13LiveIntervalsE( ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.dbg.value(metadata ptr poison, metadata [[META3:![0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 64, 64)), !dbg [[DBG8:![0-9]+]] +; CHECK-NEXT: #dbg_value(ptr poison, [[META3:![0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 64, 64), [[META8:![0-9]+]]) ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SROA/vector-promotion.ll b/llvm/test/Transforms/SROA/vector-promotion.ll index 91700671225884..8624ab27ed3cc9 100644 --- a/llvm/test/Transforms/SROA/vector-promotion.ll +++ b/llvm/test/Transforms/SROA/vector-promotion.ll @@ -22,21 +22,21 @@ define i32 @test1(<4 x i32> %x, <4 x i32> %y) { ; ; DEBUG-LABEL: @test1( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG21:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG22:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META12:![0-9]+]], metadata !DIExpression()), !dbg [[DBG23:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META9:![0-9]+]], !DIExpression(), [[META21:![0-9]+]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META11:![0-9]+]], !DIExpression(), [[META22:![0-9]+]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META12:![0-9]+]], !DIExpression(), [[META23:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2, !dbg [[DBG24:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_0_8_VEC_EXTRACT]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG24]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META15:![0-9]+]], metadata !DIExpression()), !dbg [[DBG25:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i32 [[A_SROA_0_8_VEC_EXTRACT]], [[META13:![0-9]+]], !DIExpression(), [[DBG24]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META15:![0-9]+]], !DIExpression(), [[META25:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_2_28_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[Y:%.*]], i32 3, !dbg [[DBG26:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_2_28_VEC_EXTRACT]], metadata [[META16:![0-9]+]], metadata !DIExpression()), !dbg [[DBG26]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META17:![0-9]+]], metadata !DIExpression()), !dbg [[DBG27:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i32 [[A_SROA_2_28_VEC_EXTRACT]], [[META16:![0-9]+]], !DIExpression(), [[DBG26]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META17:![0-9]+]], !DIExpression(), [[META27:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_2_16_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[Y]], i32 0, !dbg [[DBG28:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_2_16_VEC_EXTRACT]], metadata [[META18:![0-9]+]], metadata !DIExpression()), !dbg [[DBG28]] +; DEBUG-NEXT: #dbg_value(i32 [[A_SROA_2_16_VEC_EXTRACT]], [[META18:![0-9]+]], !DIExpression(), [[DBG28]]) ; DEBUG-NEXT: [[TMP4:%.*]] = add i32 [[A_SROA_0_8_VEC_EXTRACT]], [[A_SROA_2_28_VEC_EXTRACT]], !dbg [[DBG29:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP4]], metadata [[META19:![0-9]+]], metadata !DIExpression()), !dbg [[DBG29]] +; DEBUG-NEXT: #dbg_value(i32 [[TMP4]], [[META19:![0-9]+]], !DIExpression(), [[DBG29]]) ; DEBUG-NEXT: [[TMP5:%.*]] = add i32 [[A_SROA_2_16_VEC_EXTRACT]], [[TMP4]], !dbg [[DBG30:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP5]], metadata [[META20:![0-9]+]], metadata !DIExpression()), !dbg [[DBG30]] +; DEBUG-NEXT: #dbg_value(i32 [[TMP5]], [[META20:![0-9]+]], !DIExpression(), [[DBG30]]) ; DEBUG-NEXT: ret i32 [[TMP5]], !dbg [[DBG31:![0-9]+]] ; entry: @@ -71,23 +71,23 @@ define i32 @test2(<4 x i32> %x, <4 x i32> %y) { ; ; DEBUG-LABEL: @test2( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META34:![0-9]+]], metadata !DIExpression()), !dbg [[DBG45:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META35:![0-9]+]], metadata !DIExpression()), !dbg [[DBG46:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META36:![0-9]+]], metadata !DIExpression()), !dbg [[DBG47:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META34:![0-9]+]], !DIExpression(), [[META45:![0-9]+]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META35:![0-9]+]], !DIExpression(), [[META46:![0-9]+]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META36:![0-9]+]], !DIExpression(), [[META47:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2, !dbg [[DBG48:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_0_8_VEC_EXTRACT]], metadata [[META37:![0-9]+]], metadata !DIExpression()), !dbg [[DBG48]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META38:![0-9]+]], metadata !DIExpression()), !dbg [[DBG49:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i32 [[A_SROA_0_8_VEC_EXTRACT]], [[META37:![0-9]+]], !DIExpression(), [[DBG48]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META38:![0-9]+]], !DIExpression(), [[META49:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_2_28_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[Y:%.*]], i32 3, !dbg [[DBG50:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_2_28_VEC_EXTRACT]], metadata [[META39:![0-9]+]], metadata !DIExpression()), !dbg [[DBG50]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META40:![0-9]+]], metadata !DIExpression()), !dbg [[DBG51:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i32 [[A_SROA_2_28_VEC_EXTRACT]], [[META39:![0-9]+]], !DIExpression(), [[DBG50]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META40:![0-9]+]], !DIExpression(), [[META51:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_2_16_VEC_EXTRACT:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> poison, <2 x i32> , !dbg [[DBG52:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <2 x i32> [[A_SROA_2_16_VEC_EXTRACT]], metadata [[META41:![0-9]+]], metadata !DIExpression()), !dbg [[DBG52]] +; DEBUG-NEXT: #dbg_value(<2 x i32> [[A_SROA_2_16_VEC_EXTRACT]], [[META41:![0-9]+]], !DIExpression(), [[DBG52]]) ; DEBUG-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[A_SROA_2_16_VEC_EXTRACT]], i32 0, !dbg [[DBG53:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP3]], metadata [[META42:![0-9]+]], metadata !DIExpression()), !dbg [[DBG53]] +; DEBUG-NEXT: #dbg_value(i32 [[TMP3]], [[META42:![0-9]+]], !DIExpression(), [[DBG53]]) ; DEBUG-NEXT: [[TMP4:%.*]] = add i32 [[A_SROA_0_8_VEC_EXTRACT]], [[A_SROA_2_28_VEC_EXTRACT]], !dbg [[DBG54:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP4]], metadata [[META43:![0-9]+]], metadata !DIExpression()), !dbg [[DBG54]] +; DEBUG-NEXT: #dbg_value(i32 [[TMP4]], [[META43:![0-9]+]], !DIExpression(), [[DBG54]]) ; DEBUG-NEXT: [[TMP5:%.*]] = add i32 [[TMP3]], [[TMP4]], !dbg [[DBG55:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP5]], metadata [[META44:![0-9]+]], metadata !DIExpression()), !dbg [[DBG55]] +; DEBUG-NEXT: #dbg_value(i32 [[TMP5]], [[META44:![0-9]+]], !DIExpression(), [[DBG55]]) ; DEBUG-NEXT: ret i32 [[TMP5]], !dbg [[DBG56:![0-9]+]] ; entry: @@ -123,22 +123,22 @@ define i32 @test3(<4 x i32> %x, <4 x i32> %y) { ; ; DEBUG-LABEL: @test3( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META59:![0-9]+]], metadata !DIExpression()), !dbg [[DBG69:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META60:![0-9]+]], metadata !DIExpression()), !dbg [[DBG70:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META61:![0-9]+]], metadata !DIExpression()), !dbg [[DBG71:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META59:![0-9]+]], !DIExpression(), [[META69:![0-9]+]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META60:![0-9]+]], !DIExpression(), [[META70:![0-9]+]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META61:![0-9]+]], !DIExpression(), [[META71:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x i32> [[X:%.*]], i32 -1, i32 2, !dbg [[DBG72:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[A_SROA_0_8_VEC_INSERT]], i32 2, !dbg [[DBG73:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_0_8_VEC_EXTRACT]], metadata [[META62:![0-9]+]], metadata !DIExpression()), !dbg [[DBG73]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META63:![0-9]+]], metadata !DIExpression()), !dbg [[DBG74:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i32 [[A_SROA_0_8_VEC_EXTRACT]], [[META62:![0-9]+]], !DIExpression(), [[DBG73]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META63:![0-9]+]], !DIExpression(), [[META74:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_3_28_VEC_EXTRACT:%.*]] = extractelement <4 x i32> zeroinitializer, i32 3, !dbg [[DBG75:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_3_28_VEC_EXTRACT]], metadata [[META64:![0-9]+]], metadata !DIExpression()), !dbg [[DBG75]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META65:![0-9]+]], metadata !DIExpression()), !dbg [[DBG76:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i32 [[A_SROA_3_28_VEC_EXTRACT]], [[META64:![0-9]+]], !DIExpression(), [[DBG75]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META65:![0-9]+]], !DIExpression(), [[META76:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_3_16_VEC_EXTRACT:%.*]] = extractelement <4 x i32> zeroinitializer, i32 0, !dbg [[DBG77:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_3_16_VEC_EXTRACT]], metadata [[META66:![0-9]+]], metadata !DIExpression()), !dbg [[DBG77]] +; DEBUG-NEXT: #dbg_value(i32 [[A_SROA_3_16_VEC_EXTRACT]], [[META66:![0-9]+]], !DIExpression(), [[DBG77]]) ; DEBUG-NEXT: [[TMP4:%.*]] = add i32 [[A_SROA_0_8_VEC_EXTRACT]], [[A_SROA_3_28_VEC_EXTRACT]], !dbg [[DBG78:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP4]], metadata [[META67:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78]] +; DEBUG-NEXT: #dbg_value(i32 [[TMP4]], [[META67:![0-9]+]], !DIExpression(), [[DBG78]]) ; DEBUG-NEXT: [[TMP5:%.*]] = add i32 [[A_SROA_3_16_VEC_EXTRACT]], [[TMP4]], !dbg [[DBG79:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP5]], metadata [[META68:![0-9]+]], metadata !DIExpression()), !dbg [[DBG79]] +; DEBUG-NEXT: #dbg_value(i32 [[TMP5]], [[META68:![0-9]+]], !DIExpression(), [[DBG79]]) ; DEBUG-NEXT: ret i32 [[TMP5]], !dbg [[DBG80:![0-9]+]] ; entry: @@ -179,26 +179,26 @@ define i32 @test4(<4 x i32> %x, <4 x i32> %y, ptr %z) { ; ; DEBUG-LABEL: @test4( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META83:![0-9]+]], metadata !DIExpression()), !dbg [[DBG94:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META84:![0-9]+]], metadata !DIExpression()), !dbg [[DBG95:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META83:![0-9]+]], !DIExpression(), [[META94:![0-9]+]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META84:![0-9]+]], !DIExpression(), [[META95:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_3_16_COPYLOAD:%.*]] = load <4 x i32>, ptr [[Z:%.*]], align 1, !dbg [[DBG96:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META85:![0-9]+]], metadata !DIExpression()), !dbg [[DBG97:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META85:![0-9]+]], !DIExpression(), [[META97:![0-9]+]]) ; DEBUG-NEXT: [[Z_TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[Z]], i64 0, i64 2, !dbg [[DBG98:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[Z_TMP1]], metadata [[META86:![0-9]+]], metadata !DIExpression()), !dbg [[DBG98]] +; DEBUG-NEXT: #dbg_value(ptr [[Z_TMP1]], [[META86:![0-9]+]], !DIExpression(), [[DBG98]]) ; DEBUG-NEXT: [[A_SROA_0_8_COPYLOAD:%.*]] = load i32, ptr [[Z_TMP1]], align 1, !dbg [[DBG99:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x i32> [[X:%.*]], i32 [[A_SROA_0_8_COPYLOAD]], i32 2, !dbg [[DBG99]] ; DEBUG-NEXT: [[A_SROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[A_SROA_0_8_VEC_INSERT]], i32 2, !dbg [[DBG100:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_0_8_VEC_EXTRACT]], metadata [[META87:![0-9]+]], metadata !DIExpression()), !dbg [[DBG100]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META88:![0-9]+]], metadata !DIExpression()), !dbg [[DBG101:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i32 [[A_SROA_0_8_VEC_EXTRACT]], [[META87:![0-9]+]], !DIExpression(), [[DBG100]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META88:![0-9]+]], !DIExpression(), [[META101:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_3_28_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[A_SROA_3_16_COPYLOAD]], i32 3, !dbg [[DBG102:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_3_28_VEC_EXTRACT]], metadata [[META89:![0-9]+]], metadata !DIExpression()), !dbg [[DBG102]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META90:![0-9]+]], metadata !DIExpression()), !dbg [[DBG103:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i32 [[A_SROA_3_28_VEC_EXTRACT]], [[META89:![0-9]+]], !DIExpression(), [[DBG102]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META90:![0-9]+]], !DIExpression(), [[META103:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_3_16_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[A_SROA_3_16_COPYLOAD]], i32 0, !dbg [[DBG104:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_3_16_VEC_EXTRACT]], metadata [[META91:![0-9]+]], metadata !DIExpression()), !dbg [[DBG104]] +; DEBUG-NEXT: #dbg_value(i32 [[A_SROA_3_16_VEC_EXTRACT]], [[META91:![0-9]+]], !DIExpression(), [[DBG104]]) ; DEBUG-NEXT: [[TMP4:%.*]] = add i32 [[A_SROA_0_8_VEC_EXTRACT]], [[A_SROA_3_28_VEC_EXTRACT]], !dbg [[DBG105:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP4]], metadata [[META92:![0-9]+]], metadata !DIExpression()), !dbg [[DBG105]] +; DEBUG-NEXT: #dbg_value(i32 [[TMP4]], [[META92:![0-9]+]], !DIExpression(), [[DBG105]]) ; DEBUG-NEXT: [[TMP5:%.*]] = add i32 [[A_SROA_3_16_VEC_EXTRACT]], [[TMP4]], !dbg [[DBG106:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP5]], metadata [[META93:![0-9]+]], metadata !DIExpression()), !dbg [[DBG106]] +; DEBUG-NEXT: #dbg_value(i32 [[TMP5]], [[META93:![0-9]+]], !DIExpression(), [[DBG106]]) ; DEBUG-NEXT: ret i32 [[TMP5]], !dbg [[DBG107:![0-9]+]] ; entry: @@ -243,26 +243,26 @@ define i32 @test4_as1(<4 x i32> %x, <4 x i32> %y, ptr addrspace(1) %z) { ; ; DEBUG-LABEL: @test4_as1( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META110:![0-9]+]], metadata !DIExpression()), !dbg [[DBG121:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META111:![0-9]+]], metadata !DIExpression()), !dbg [[DBG122:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META110:![0-9]+]], !DIExpression(), [[META121:![0-9]+]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META111:![0-9]+]], !DIExpression(), [[META122:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_3_16_COPYLOAD:%.*]] = load <4 x i32>, ptr addrspace(1) [[Z:%.*]], align 1, !dbg [[DBG123:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META112:![0-9]+]], metadata !DIExpression()), !dbg [[DBG124:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META112:![0-9]+]], !DIExpression(), [[META124:![0-9]+]]) ; DEBUG-NEXT: [[Z_TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr addrspace(1) [[Z]], i16 0, i16 2, !dbg [[DBG125:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr addrspace(1) [[Z_TMP1]], metadata [[META113:![0-9]+]], metadata !DIExpression()), !dbg [[DBG125]] +; DEBUG-NEXT: #dbg_value(ptr addrspace(1) [[Z_TMP1]], [[META113:![0-9]+]], !DIExpression(), [[DBG125]]) ; DEBUG-NEXT: [[A_SROA_0_8_COPYLOAD:%.*]] = load i32, ptr addrspace(1) [[Z_TMP1]], align 1, !dbg [[DBG126:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x i32> [[X:%.*]], i32 [[A_SROA_0_8_COPYLOAD]], i32 2, !dbg [[DBG126]] ; DEBUG-NEXT: [[A_SROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[A_SROA_0_8_VEC_INSERT]], i32 2, !dbg [[DBG127:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_0_8_VEC_EXTRACT]], metadata [[META114:![0-9]+]], metadata !DIExpression()), !dbg [[DBG127]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META115:![0-9]+]], metadata !DIExpression()), !dbg [[DBG128:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i32 [[A_SROA_0_8_VEC_EXTRACT]], [[META114:![0-9]+]], !DIExpression(), [[DBG127]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META115:![0-9]+]], !DIExpression(), [[META128:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_3_28_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[A_SROA_3_16_COPYLOAD]], i32 3, !dbg [[DBG129:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_3_28_VEC_EXTRACT]], metadata [[META116:![0-9]+]], metadata !DIExpression()), !dbg [[DBG129]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META117:![0-9]+]], metadata !DIExpression()), !dbg [[DBG130:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i32 [[A_SROA_3_28_VEC_EXTRACT]], [[META116:![0-9]+]], !DIExpression(), [[DBG129]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META117:![0-9]+]], !DIExpression(), [[META130:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_3_16_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[A_SROA_3_16_COPYLOAD]], i32 0, !dbg [[DBG131:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_3_16_VEC_EXTRACT]], metadata [[META118:![0-9]+]], metadata !DIExpression()), !dbg [[DBG131]] +; DEBUG-NEXT: #dbg_value(i32 [[A_SROA_3_16_VEC_EXTRACT]], [[META118:![0-9]+]], !DIExpression(), [[DBG131]]) ; DEBUG-NEXT: [[TMP4:%.*]] = add i32 [[A_SROA_0_8_VEC_EXTRACT]], [[A_SROA_3_28_VEC_EXTRACT]], !dbg [[DBG132:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP4]], metadata [[META119:![0-9]+]], metadata !DIExpression()), !dbg [[DBG132]] +; DEBUG-NEXT: #dbg_value(i32 [[TMP4]], [[META119:![0-9]+]], !DIExpression(), [[DBG132]]) ; DEBUG-NEXT: [[TMP5:%.*]] = add i32 [[A_SROA_3_16_VEC_EXTRACT]], [[TMP4]], !dbg [[DBG133:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP5]], metadata [[META120:![0-9]+]], metadata !DIExpression()), !dbg [[DBG133]] +; DEBUG-NEXT: #dbg_value(i32 [[TMP5]], [[META120:![0-9]+]], !DIExpression(), [[DBG133]]) ; DEBUG-NEXT: ret i32 [[TMP5]], !dbg [[DBG134:![0-9]+]] ; entry: @@ -305,25 +305,25 @@ define i32 @test5(<4 x i32> %x, <4 x i32> %y, ptr %z) { ; ; DEBUG-LABEL: @test5( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META137:![0-9]+]], metadata !DIExpression()), !dbg [[DBG148:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META138:![0-9]+]], metadata !DIExpression()), !dbg [[DBG149:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META139:![0-9]+]], metadata !DIExpression()), !dbg [[DBG150:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META137:![0-9]+]], !DIExpression(), [[META148:![0-9]+]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META138:![0-9]+]], !DIExpression(), [[META149:![0-9]+]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META139:![0-9]+]], !DIExpression(), [[META150:![0-9]+]]) ; DEBUG-NEXT: [[Z_TMP1:%.*]] = getelementptr inbounds <4 x i32>, ptr [[Z:%.*]], i64 0, i64 2, !dbg [[DBG151:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[Z_TMP1]], metadata [[META140:![0-9]+]], metadata !DIExpression()), !dbg [[DBG151]] +; DEBUG-NEXT: #dbg_value(ptr [[Z_TMP1]], [[META140:![0-9]+]], !DIExpression(), [[DBG151]]) ; DEBUG-NEXT: [[A_SROA_0_8_VEC_EXTRACT3:%.*]] = extractelement <4 x i32> [[Y:%.*]], i32 2, !dbg [[DBG152:![0-9]+]] ; DEBUG-NEXT: store i32 [[A_SROA_0_8_VEC_EXTRACT3]], ptr [[Z_TMP1]], align 1, !dbg [[DBG152]] ; DEBUG-NEXT: [[A_SROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[Y]], i32 2, !dbg [[DBG153:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_0_8_VEC_EXTRACT]], metadata [[META141:![0-9]+]], metadata !DIExpression()), !dbg [[DBG153]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META142:![0-9]+]], metadata !DIExpression()), !dbg [[DBG154:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i32 [[A_SROA_0_8_VEC_EXTRACT]], [[META141:![0-9]+]], !DIExpression(), [[DBG153]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META142:![0-9]+]], !DIExpression(), [[META154:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_4_12_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[Y]], i32 3, !dbg [[DBG155:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_4_12_VEC_EXTRACT]], metadata [[META143:![0-9]+]], metadata !DIExpression()), !dbg [[DBG155]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG156:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i32 [[A_SROA_4_12_VEC_EXTRACT]], [[META143:![0-9]+]], !DIExpression(), [[DBG155]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META144:![0-9]+]], !DIExpression(), [[META156:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_4_0_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[Y]], i32 0, !dbg [[DBG157:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_4_0_VEC_EXTRACT]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG157]] +; DEBUG-NEXT: #dbg_value(i32 [[A_SROA_4_0_VEC_EXTRACT]], [[META145:![0-9]+]], !DIExpression(), [[DBG157]]) ; DEBUG-NEXT: [[TMP4:%.*]] = add i32 [[A_SROA_0_8_VEC_EXTRACT]], [[A_SROA_4_12_VEC_EXTRACT]], !dbg [[DBG158:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP4]], metadata [[META146:![0-9]+]], metadata !DIExpression()), !dbg [[DBG158]] +; DEBUG-NEXT: #dbg_value(i32 [[TMP4]], [[META146:![0-9]+]], !DIExpression(), [[DBG158]]) ; DEBUG-NEXT: [[TMP5:%.*]] = add i32 [[A_SROA_4_0_VEC_EXTRACT]], [[TMP4]], !dbg [[DBG159:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP5]], metadata [[META147:![0-9]+]], metadata !DIExpression()), !dbg [[DBG159]] +; DEBUG-NEXT: #dbg_value(i32 [[TMP5]], [[META147:![0-9]+]], !DIExpression(), [[DBG159]]) ; DEBUG-NEXT: ret i32 [[TMP5]], !dbg [[DBG160:![0-9]+]] ; entry: @@ -367,17 +367,17 @@ define i64 @test6(<4 x i64> %x, <4 x i64> %y, i64 %n) { ; ; DEBUG-LABEL: @test6( ; DEBUG-NEXT: [[TMP:%.*]] = alloca { <4 x i64>, <4 x i64> }, align 32, !dbg [[DBG168:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[TMP]], metadata [[META163:![0-9]+]], metadata !DIExpression()), !dbg [[DBG168]] +; DEBUG-NEXT: #dbg_value(ptr [[TMP]], [[META163:![0-9]+]], !DIExpression(), [[DBG168]]) ; DEBUG-NEXT: [[P0:%.*]] = getelementptr inbounds { <4 x i64>, <4 x i64> }, ptr [[TMP]], i32 0, i32 0, !dbg [[DBG169:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[P0]], metadata [[META164:![0-9]+]], metadata !DIExpression()), !dbg [[DBG169]] +; DEBUG-NEXT: #dbg_value(ptr [[P0]], [[META164:![0-9]+]], !DIExpression(), [[DBG169]]) ; DEBUG-NEXT: store <4 x i64> [[X:%.*]], ptr [[P0]], align 32, !dbg [[DBG170:![0-9]+]] ; DEBUG-NEXT: [[P1:%.*]] = getelementptr inbounds { <4 x i64>, <4 x i64> }, ptr [[TMP]], i32 0, i32 1, !dbg [[DBG171:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[P1]], metadata [[META165:![0-9]+]], metadata !DIExpression()), !dbg [[DBG171]] +; DEBUG-NEXT: #dbg_value(ptr [[P1]], [[META165:![0-9]+]], !DIExpression(), [[DBG171]]) ; DEBUG-NEXT: store <4 x i64> [[Y:%.*]], ptr [[P1]], align 32, !dbg [[DBG172:![0-9]+]] ; DEBUG-NEXT: [[ADDR:%.*]] = getelementptr inbounds { <4 x i64>, <4 x i64> }, ptr [[TMP]], i32 0, i32 0, i64 [[N:%.*]], !dbg [[DBG173:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[ADDR]], metadata [[META166:![0-9]+]], metadata !DIExpression()), !dbg [[DBG173]] +; DEBUG-NEXT: #dbg_value(ptr [[ADDR]], [[META166:![0-9]+]], !DIExpression(), [[DBG173]]) ; DEBUG-NEXT: [[RES:%.*]] = load i64, ptr [[ADDR]], align 4, !dbg [[DBG174:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i64 [[RES]], metadata [[META167:![0-9]+]], metadata !DIExpression()), !dbg [[DBG174]] +; DEBUG-NEXT: #dbg_value(i64 [[RES]], [[META167:![0-9]+]], !DIExpression(), [[DBG174]]) ; DEBUG-NEXT: ret i64 [[RES]], !dbg [[DBG175:![0-9]+]] ; %tmp = alloca { <4 x i64>, <4 x i64> } @@ -401,15 +401,15 @@ define <4 x i32> @test_subvec_store() { ; ; DEBUG-LABEL: @test_subvec_store( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META178:![0-9]+]], metadata !DIExpression()), !dbg [[DBG184:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META178:![0-9]+]], !DIExpression(), [[META184:![0-9]+]]) ; DEBUG-NEXT: [[A_0_VECBLEND:%.*]] = select <4 x i1> , <4 x i32> , <4 x i32> undef, !dbg [[DBG185:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META179:![0-9]+]], metadata !DIExpression()), !dbg [[DBG186:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META179:![0-9]+]], !DIExpression(), [[META186:![0-9]+]]) ; DEBUG-NEXT: [[A_4_VECBLEND:%.*]] = select <4 x i1> , <4 x i32> , <4 x i32> [[A_0_VECBLEND]], !dbg [[DBG187:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META180:![0-9]+]], metadata !DIExpression()), !dbg [[DBG188:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META180:![0-9]+]], !DIExpression(), [[META188:![0-9]+]]) ; DEBUG-NEXT: [[A_8_VECBLEND:%.*]] = select <4 x i1> , <4 x i32> , <4 x i32> [[A_4_VECBLEND]], !dbg [[DBG189:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META181:![0-9]+]], metadata !DIExpression()), !dbg [[DBG190:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META181:![0-9]+]], !DIExpression(), [[META190:![0-9]+]]) ; DEBUG-NEXT: [[A_12_VEC_INSERT:%.*]] = insertelement <4 x i32> [[A_8_VECBLEND]], i32 3, i32 3, !dbg [[DBG191:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <4 x i32> [[A_12_VEC_INSERT]], metadata [[META182:![0-9]+]], metadata !DIExpression()), !dbg [[DBG192:![0-9]+]] +; DEBUG-NEXT: #dbg_value(<4 x i32> [[A_12_VEC_INSERT]], [[META182:![0-9]+]], !DIExpression(), [[META192:![0-9]+]]) ; DEBUG-NEXT: ret <4 x i32> [[A_12_VEC_INSERT]], !dbg [[DBG193:![0-9]+]] ; entry: @@ -443,19 +443,19 @@ define <4 x i32> @test_subvec_load() { ; ; DEBUG-LABEL: @test_subvec_load( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META196:![0-9]+]], metadata !DIExpression()), !dbg [[DBG204:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META196:![0-9]+]], !DIExpression(), [[META204:![0-9]+]]) ; DEBUG-NEXT: [[A_0_VEC_EXTRACT:%.*]] = shufflevector <4 x i32> , <4 x i32> poison, <2 x i32> , !dbg [[DBG205:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <2 x i32> [[A_0_VEC_EXTRACT]], metadata [[META197:![0-9]+]], metadata !DIExpression()), !dbg [[DBG205]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META198:![0-9]+]], metadata !DIExpression()), !dbg [[DBG206:![0-9]+]] +; DEBUG-NEXT: #dbg_value(<2 x i32> [[A_0_VEC_EXTRACT]], [[META197:![0-9]+]], !DIExpression(), [[DBG205]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META198:![0-9]+]], !DIExpression(), [[META206:![0-9]+]]) ; DEBUG-NEXT: [[A_4_VEC_EXTRACT:%.*]] = shufflevector <4 x i32> , <4 x i32> poison, <2 x i32> , !dbg [[DBG207:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <2 x i32> [[A_4_VEC_EXTRACT]], metadata [[META199:![0-9]+]], metadata !DIExpression()), !dbg [[DBG207]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META200:![0-9]+]], metadata !DIExpression()), !dbg [[DBG208:![0-9]+]] +; DEBUG-NEXT: #dbg_value(<2 x i32> [[A_4_VEC_EXTRACT]], [[META199:![0-9]+]], !DIExpression(), [[DBG207]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META200:![0-9]+]], !DIExpression(), [[META208:![0-9]+]]) ; DEBUG-NEXT: [[A_8_VEC_EXTRACT:%.*]] = shufflevector <4 x i32> , <4 x i32> poison, <2 x i32> , !dbg [[DBG209:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <2 x i32> [[A_8_VEC_EXTRACT]], metadata [[META201:![0-9]+]], metadata !DIExpression()), !dbg [[DBG209]] +; DEBUG-NEXT: #dbg_value(<2 x i32> [[A_8_VEC_EXTRACT]], [[META201:![0-9]+]], !DIExpression(), [[DBG209]]) ; DEBUG-NEXT: [[TMP:%.*]] = shufflevector <2 x i32> [[A_0_VEC_EXTRACT]], <2 x i32> [[A_4_VEC_EXTRACT]], <2 x i32> , !dbg [[DBG210:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <2 x i32> [[TMP]], metadata [[META202:![0-9]+]], metadata !DIExpression()), !dbg [[DBG210]] +; DEBUG-NEXT: #dbg_value(<2 x i32> [[TMP]], [[META202:![0-9]+]], !DIExpression(), [[DBG210]]) ; DEBUG-NEXT: [[RET:%.*]] = shufflevector <2 x i32> [[TMP]], <2 x i32> [[A_8_VEC_EXTRACT]], <4 x i32> , !dbg [[DBG211:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <4 x i32> [[RET]], metadata [[META203:![0-9]+]], metadata !DIExpression()), !dbg [[DBG211]] +; DEBUG-NEXT: #dbg_value(<4 x i32> [[RET]], [[META203:![0-9]+]], !DIExpression(), [[DBG211]]) ; DEBUG-NEXT: ret <4 x i32> [[RET]], !dbg [[DBG212:![0-9]+]] ; entry: @@ -488,15 +488,15 @@ define <4 x float> @test_subvec_memset() { ; ; DEBUG-LABEL: @test_subvec_memset( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META215:![0-9]+]], metadata !DIExpression()), !dbg [[DBG220:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META215:![0-9]+]], !DIExpression(), [[META220:![0-9]+]]) ; DEBUG-NEXT: [[A_0_VECBLEND:%.*]] = select <4 x i1> , <4 x float> , <4 x float> undef, !dbg [[DBG221:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META216:![0-9]+]], metadata !DIExpression()), !dbg [[DBG222:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META216:![0-9]+]], !DIExpression(), [[META222:![0-9]+]]) ; DEBUG-NEXT: [[A_4_VECBLEND:%.*]] = select <4 x i1> , <4 x float> , <4 x float> [[A_0_VECBLEND]], !dbg [[DBG223:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META217:![0-9]+]], metadata !DIExpression()), !dbg [[DBG224:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META217:![0-9]+]], !DIExpression(), [[META224:![0-9]+]]) ; DEBUG-NEXT: [[A_8_VECBLEND:%.*]] = select <4 x i1> , <4 x float> , <4 x float> [[A_4_VECBLEND]], !dbg [[DBG225:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META218:![0-9]+]], metadata !DIExpression()), !dbg [[DBG226:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META218:![0-9]+]], !DIExpression(), [[META226:![0-9]+]]) ; DEBUG-NEXT: [[A_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[A_8_VECBLEND]], float 0x38E0E0E0E0000000, i32 3, !dbg [[DBG227:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <4 x float> [[A_12_VEC_INSERT]], metadata [[META219:![0-9]+]], metadata !DIExpression()), !dbg [[DBG228:![0-9]+]] +; DEBUG-NEXT: #dbg_value(<4 x float> [[A_12_VEC_INSERT]], [[META219:![0-9]+]], !DIExpression(), [[META228:![0-9]+]]) ; DEBUG-NEXT: ret <4 x float> [[A_12_VEC_INSERT]], !dbg [[DBG229:![0-9]+]] ; entry: @@ -538,24 +538,24 @@ define <4 x float> @test_subvec_memcpy(ptr %x, ptr %y, ptr %z, ptr %f, ptr %out) ; ; DEBUG-LABEL: @test_subvec_memcpy( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META232:![0-9]+]], metadata !DIExpression()), !dbg [[DBG237:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META232:![0-9]+]], !DIExpression(), [[META237:![0-9]+]]) ; DEBUG-NEXT: [[A_0_COPYLOAD:%.*]] = load <2 x float>, ptr [[X:%.*]], align 1, !dbg [[DBG238:![0-9]+]] ; DEBUG-NEXT: [[A_0_VEC_EXPAND:%.*]] = shufflevector <2 x float> [[A_0_COPYLOAD]], <2 x float> poison, <4 x i32> , !dbg [[DBG238]] ; DEBUG-NEXT: [[A_0_VECBLEND:%.*]] = select <4 x i1> , <4 x float> [[A_0_VEC_EXPAND]], <4 x float> undef, !dbg [[DBG238]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META233:![0-9]+]], metadata !DIExpression()), !dbg [[DBG239:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META233:![0-9]+]], !DIExpression(), [[META239:![0-9]+]]) ; DEBUG-NEXT: [[A_4_COPYLOAD:%.*]] = load <2 x float>, ptr [[Y:%.*]], align 1, !dbg [[DBG240:![0-9]+]] ; DEBUG-NEXT: [[A_4_VEC_EXPAND:%.*]] = shufflevector <2 x float> [[A_4_COPYLOAD]], <2 x float> poison, <4 x i32> , !dbg [[DBG240]] ; DEBUG-NEXT: [[A_4_VECBLEND:%.*]] = select <4 x i1> , <4 x float> [[A_4_VEC_EXPAND]], <4 x float> [[A_0_VECBLEND]], !dbg [[DBG240]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META234:![0-9]+]], metadata !DIExpression()), !dbg [[DBG241:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META234:![0-9]+]], !DIExpression(), [[META241:![0-9]+]]) ; DEBUG-NEXT: [[A_8_COPYLOAD:%.*]] = load <2 x float>, ptr [[Z:%.*]], align 1, !dbg [[DBG242:![0-9]+]] ; DEBUG-NEXT: [[A_8_VEC_EXPAND:%.*]] = shufflevector <2 x float> [[A_8_COPYLOAD]], <2 x float> poison, <4 x i32> , !dbg [[DBG242]] ; DEBUG-NEXT: [[A_8_VECBLEND:%.*]] = select <4 x i1> , <4 x float> [[A_8_VEC_EXPAND]], <4 x float> [[A_4_VECBLEND]], !dbg [[DBG242]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META235:![0-9]+]], metadata !DIExpression()), !dbg [[DBG243:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META235:![0-9]+]], !DIExpression(), [[META243:![0-9]+]]) ; DEBUG-NEXT: [[A_12_COPYLOAD:%.*]] = load float, ptr [[F:%.*]], align 1, !dbg [[DBG244:![0-9]+]] ; DEBUG-NEXT: [[A_12_VEC_INSERT:%.*]] = insertelement <4 x float> [[A_8_VECBLEND]], float [[A_12_COPYLOAD]], i32 3, !dbg [[DBG244]] ; DEBUG-NEXT: [[A_8_VEC_EXTRACT:%.*]] = shufflevector <4 x float> [[A_12_VEC_INSERT]], <4 x float> poison, <2 x i32> , !dbg [[DBG245:![0-9]+]] ; DEBUG-NEXT: store <2 x float> [[A_8_VEC_EXTRACT]], ptr [[OUT:%.*]], align 1, !dbg [[DBG245]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <4 x float> [[A_12_VEC_INSERT]], metadata [[META236:![0-9]+]], metadata !DIExpression()), !dbg [[DBG246:![0-9]+]] +; DEBUG-NEXT: #dbg_value(<4 x float> [[A_12_VEC_INSERT]], [[META236:![0-9]+]], !DIExpression(), [[META246:![0-9]+]]) ; DEBUG-NEXT: ret <4 x float> [[A_12_VEC_INSERT]], !dbg [[DBG247:![0-9]+]] ; entry: @@ -596,7 +596,7 @@ define i32 @PR14212(<3 x i8> %val) { ; ; DEBUG-LABEL: @PR14212( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META250:![0-9]+]], metadata !DIExpression()), !dbg [[DBG252:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META250:![0-9]+]], !DIExpression(), [[META252:![0-9]+]]) ; DEBUG-NEXT: [[TMP0:%.*]] = bitcast <3 x i8> [[VAL:%.*]] to i24, !dbg [[DBG253:![0-9]+]] ; DEBUG-NEXT: [[RETVAL_SROA_2_0_INSERT_EXT:%.*]] = zext i8 undef to i32, !dbg [[DBG254:![0-9]+]] ; DEBUG-NEXT: [[RETVAL_SROA_2_0_INSERT_SHIFT:%.*]] = shl i32 [[RETVAL_SROA_2_0_INSERT_EXT]], 24, !dbg [[DBG254]] @@ -605,7 +605,7 @@ define i32 @PR14212(<3 x i8> %val) { ; DEBUG-NEXT: [[RETVAL_0_INSERT_EXT:%.*]] = zext i24 [[TMP0]] to i32, !dbg [[DBG254]] ; DEBUG-NEXT: [[RETVAL_0_INSERT_MASK:%.*]] = and i32 [[RETVAL_SROA_2_0_INSERT_INSERT]], -16777216, !dbg [[DBG254]] ; DEBUG-NEXT: [[RETVAL_0_INSERT_INSERT:%.*]] = or i32 [[RETVAL_0_INSERT_MASK]], [[RETVAL_0_INSERT_EXT]], !dbg [[DBG254]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[RETVAL_0_INSERT_INSERT]], metadata [[META251:![0-9]+]], metadata !DIExpression()), !dbg [[DBG253]] +; DEBUG-NEXT: #dbg_value(i32 [[RETVAL_0_INSERT_INSERT]], [[META251:![0-9]+]], !DIExpression(), [[DBG253]]) ; DEBUG-NEXT: ret i32 [[RETVAL_0_INSERT_INSERT]], !dbg [[DBG254]] ; entry: @@ -630,12 +630,12 @@ define <2 x i8> @PR14349.1(i32 %x) { ; ; DEBUG-LABEL: @PR14349.1( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META257:![0-9]+]], metadata !DIExpression()), !dbg [[DBG260:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META257:![0-9]+]], !DIExpression(), [[META260:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_0_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[X:%.*]] to i16, !dbg [[DBG261:![0-9]+]] ; DEBUG-NEXT: [[TMP0:%.*]] = bitcast i16 [[A_SROA_0_0_EXTRACT_TRUNC]] to <2 x i8>, !dbg [[DBG261]] ; DEBUG-NEXT: [[A_SROA_2_0_EXTRACT_SHIFT:%.*]] = lshr i32 [[X]], 16, !dbg [[DBG261]] ; DEBUG-NEXT: [[A_SROA_2_0_EXTRACT_TRUNC:%.*]] = trunc i32 [[A_SROA_2_0_EXTRACT_SHIFT]] to i16, !dbg [[DBG261]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <2 x i8> [[TMP0]], metadata [[META258:![0-9]+]], metadata !DIExpression()), !dbg [[DBG262:![0-9]+]] +; DEBUG-NEXT: #dbg_value(<2 x i8> [[TMP0]], [[META258:![0-9]+]], !DIExpression(), [[META262:![0-9]+]]) ; DEBUG-NEXT: ret <2 x i8> [[TMP0]], !dbg [[DBG263:![0-9]+]] ; entry: @@ -666,7 +666,7 @@ define i32 @PR14349.2(<2 x i8> %x) { ; ; DEBUG-LABEL: @PR14349.2( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META266:![0-9]+]], metadata !DIExpression()), !dbg [[DBG268:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META266:![0-9]+]], !DIExpression(), [[META268:![0-9]+]]) ; DEBUG-NEXT: [[TMP0:%.*]] = bitcast <2 x i8> [[X:%.*]] to i16, !dbg [[DBG269:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_2_0_INSERT_EXT:%.*]] = zext i16 undef to i32, !dbg [[DBG270:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_2_0_INSERT_SHIFT:%.*]] = shl i32 [[A_SROA_2_0_INSERT_EXT]], 16, !dbg [[DBG270]] @@ -675,7 +675,7 @@ define i32 @PR14349.2(<2 x i8> %x) { ; DEBUG-NEXT: [[A_SROA_0_0_INSERT_EXT:%.*]] = zext i16 [[TMP0]] to i32, !dbg [[DBG270]] ; DEBUG-NEXT: [[A_SROA_0_0_INSERT_MASK:%.*]] = and i32 [[A_SROA_2_0_INSERT_INSERT]], -65536, !dbg [[DBG270]] ; DEBUG-NEXT: [[A_SROA_0_0_INSERT_INSERT:%.*]] = or i32 [[A_SROA_0_0_INSERT_MASK]], [[A_SROA_0_0_INSERT_EXT]], !dbg [[DBG270]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_0_0_INSERT_INSERT]], metadata [[META267:![0-9]+]], metadata !DIExpression()), !dbg [[DBG269]] +; DEBUG-NEXT: #dbg_value(i32 [[A_SROA_0_0_INSERT_INSERT]], [[META267:![0-9]+]], !DIExpression(), [[DBG269]]) ; DEBUG-NEXT: ret i32 [[A_SROA_0_0_INSERT_INSERT]], !dbg [[DBG270]] ; entry: @@ -702,21 +702,21 @@ define i32 @test7(<2 x i32> %x, <2 x i32> %y) { ; ; DEBUG-LABEL: @test7( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META273:![0-9]+]], metadata !DIExpression()), !dbg [[DBG283:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META274:![0-9]+]], metadata !DIExpression()), !dbg [[DBG284:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META275:![0-9]+]], metadata !DIExpression()), !dbg [[DBG285:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META273:![0-9]+]], !DIExpression(), [[META283:![0-9]+]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META274:![0-9]+]], !DIExpression(), [[META284:![0-9]+]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META275:![0-9]+]], !DIExpression(), [[META285:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x i32> [[X:%.*]], i32 1, !dbg [[DBG286:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_0_4_VEC_EXTRACT]], metadata [[META276:![0-9]+]], metadata !DIExpression()), !dbg [[DBG286]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META277:![0-9]+]], metadata !DIExpression()), !dbg [[DBG287:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i32 [[A_SROA_0_4_VEC_EXTRACT]], [[META276:![0-9]+]], !DIExpression(), [[DBG286]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META277:![0-9]+]], !DIExpression(), [[META287:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_2_12_VEC_EXTRACT:%.*]] = extractelement <2 x i32> [[Y:%.*]], i32 1, !dbg [[DBG288:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_2_12_VEC_EXTRACT]], metadata [[META278:![0-9]+]], metadata !DIExpression()), !dbg [[DBG288]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META279:![0-9]+]], metadata !DIExpression()), !dbg [[DBG289:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i32 [[A_SROA_2_12_VEC_EXTRACT]], [[META278:![0-9]+]], !DIExpression(), [[DBG288]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META279:![0-9]+]], !DIExpression(), [[META289:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_2_8_VEC_EXTRACT:%.*]] = extractelement <2 x i32> [[Y]], i32 0, !dbg [[DBG290:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_2_8_VEC_EXTRACT]], metadata [[META280:![0-9]+]], metadata !DIExpression()), !dbg [[DBG290]] +; DEBUG-NEXT: #dbg_value(i32 [[A_SROA_2_8_VEC_EXTRACT]], [[META280:![0-9]+]], !DIExpression(), [[DBG290]]) ; DEBUG-NEXT: [[TMP4:%.*]] = add i32 [[A_SROA_0_4_VEC_EXTRACT]], [[A_SROA_2_12_VEC_EXTRACT]], !dbg [[DBG291:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP4]], metadata [[META281:![0-9]+]], metadata !DIExpression()), !dbg [[DBG291]] +; DEBUG-NEXT: #dbg_value(i32 [[TMP4]], [[META281:![0-9]+]], !DIExpression(), [[DBG291]]) ; DEBUG-NEXT: [[TMP5:%.*]] = add i32 [[A_SROA_2_8_VEC_EXTRACT]], [[TMP4]], !dbg [[DBG292:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP5]], metadata [[META282:![0-9]+]], metadata !DIExpression()), !dbg [[DBG292]] +; DEBUG-NEXT: #dbg_value(i32 [[TMP5]], [[META282:![0-9]+]], !DIExpression(), [[DBG292]]) ; DEBUG-NEXT: ret i32 [[TMP5]], !dbg [[DBG293:![0-9]+]] ; entry: @@ -751,14 +751,14 @@ define i32 @test8(<2 x i32> %x) { ; ; DEBUG-LABEL: @test8( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META296:![0-9]+]], metadata !DIExpression()), !dbg [[DBG301:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META296:![0-9]+]], !DIExpression(), [[META301:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x i32> [[X:%.*]], i32 0, !dbg [[DBG302:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_0_0_VEC_EXTRACT]], metadata [[META297:![0-9]+]], metadata !DIExpression()), !dbg [[DBG302]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META298:![0-9]+]], metadata !DIExpression()), !dbg [[DBG303:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i32 [[A_SROA_0_0_VEC_EXTRACT]], [[META297:![0-9]+]], !DIExpression(), [[DBG302]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META298:![0-9]+]], !DIExpression(), [[META303:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x i32> [[X]], i32 1, !dbg [[DBG304:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[A_SROA_0_4_VEC_EXTRACT]], metadata [[META299:![0-9]+]], metadata !DIExpression()), !dbg [[DBG304]] +; DEBUG-NEXT: #dbg_value(i32 [[A_SROA_0_4_VEC_EXTRACT]], [[META299:![0-9]+]], !DIExpression(), [[DBG304]]) ; DEBUG-NEXT: [[TMP4:%.*]] = add i32 [[A_SROA_0_0_VEC_EXTRACT]], [[A_SROA_0_4_VEC_EXTRACT]], !dbg [[DBG305:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[TMP4]], metadata [[META300:![0-9]+]], metadata !DIExpression()), !dbg [[DBG305]] +; DEBUG-NEXT: #dbg_value(i32 [[TMP4]], [[META300:![0-9]+]], !DIExpression(), [[DBG305]]) ; DEBUG-NEXT: ret i32 [[TMP4]], !dbg [[DBG306:![0-9]+]] ; entry: @@ -786,11 +786,11 @@ define <2 x i32> @test9(i32 %x, i32 %y) { ; ; DEBUG-LABEL: @test9( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META309:![0-9]+]], metadata !DIExpression()), !dbg [[DBG312:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META309:![0-9]+]], !DIExpression(), [[META312:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i32> undef, i32 [[X:%.*]], i32 0, !dbg [[DBG313:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META310:![0-9]+]], metadata !DIExpression()), !dbg [[DBG314:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META310:![0-9]+]], !DIExpression(), [[META314:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x i32> [[A_SROA_0_0_VEC_INSERT]], i32 [[Y:%.*]], i32 1, !dbg [[DBG315:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <2 x i32> [[A_SROA_0_4_VEC_INSERT]], metadata [[META311:![0-9]+]], metadata !DIExpression()), !dbg [[DBG316:![0-9]+]] +; DEBUG-NEXT: #dbg_value(<2 x i32> [[A_SROA_0_4_VEC_INSERT]], [[META311:![0-9]+]], !DIExpression(), [[META316:![0-9]+]]) ; DEBUG-NEXT: ret <2 x i32> [[A_SROA_0_4_VEC_INSERT]], !dbg [[DBG317:![0-9]+]] ; entry: @@ -817,11 +817,11 @@ define <2 x i32> @test10(<4 x i16> %x, i32 %y) { ; ; DEBUG-LABEL: @test10( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META320:![0-9]+]], metadata !DIExpression()), !dbg [[DBG323:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META320:![0-9]+]], !DIExpression(), [[META323:![0-9]+]]) ; DEBUG-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[X:%.*]] to <2 x i32>, !dbg [[DBG324:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META321:![0-9]+]], metadata !DIExpression()), !dbg [[DBG325:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META321:![0-9]+]], !DIExpression(), [[META325:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[Y:%.*]], i32 1, !dbg [[DBG326:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <2 x i32> [[A_SROA_0_4_VEC_INSERT]], metadata [[META322:![0-9]+]], metadata !DIExpression()), !dbg [[DBG327:![0-9]+]] +; DEBUG-NEXT: #dbg_value(<2 x i32> [[A_SROA_0_4_VEC_INSERT]], [[META322:![0-9]+]], !DIExpression(), [[META327:![0-9]+]]) ; DEBUG-NEXT: ret <2 x i32> [[A_SROA_0_4_VEC_INSERT]], !dbg [[DBG328:![0-9]+]] ; entry: @@ -850,12 +850,12 @@ define <2 x float> @test11(<4 x i16> %x, i32 %y) { ; ; DEBUG-LABEL: @test11( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META331:![0-9]+]], metadata !DIExpression()), !dbg [[DBG334:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META331:![0-9]+]], !DIExpression(), [[META334:![0-9]+]]) ; DEBUG-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[X:%.*]] to <2 x i32>, !dbg [[DBG335:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META332:![0-9]+]], metadata !DIExpression()), !dbg [[DBG336:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META332:![0-9]+]], !DIExpression(), [[META336:![0-9]+]]) ; DEBUG-NEXT: [[A_SROA_0_4_VEC_INSERT:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[Y:%.*]], i32 1, !dbg [[DBG337:![0-9]+]] ; DEBUG-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[A_SROA_0_4_VEC_INSERT]] to <2 x float>, !dbg [[DBG338:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <2 x float> [[TMP1]], metadata [[META333:![0-9]+]], metadata !DIExpression()), !dbg [[DBG338]] +; DEBUG-NEXT: #dbg_value(<2 x float> [[TMP1]], [[META333:![0-9]+]], !DIExpression(), [[DBG338]]) ; DEBUG-NEXT: ret <2 x float> [[TMP1]], !dbg [[DBG339:![0-9]+]] ; entry: @@ -876,9 +876,9 @@ define <4 x float> @test12(<4 x i32> %val) { ; CHECK-NEXT: ret <4 x float> [[TMP1]] ; ; DEBUG-LABEL: @test12( -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META342:![0-9]+]], metadata !DIExpression()), !dbg [[DBG344:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META342:![0-9]+]], !DIExpression(), [[META344:![0-9]+]]) ; DEBUG-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[VAL:%.*]] to <4 x float>, !dbg [[DBG345:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <4 x float> [[TMP1]], metadata [[META343:![0-9]+]], metadata !DIExpression()), !dbg [[DBG345]] +; DEBUG-NEXT: #dbg_value(<4 x float> [[TMP1]], [[META343:![0-9]+]], !DIExpression(), [[DBG345]]) ; DEBUG-NEXT: ret <4 x float> [[TMP1]], !dbg [[DBG346:![0-9]+]] ; %a = alloca <3 x i32>, align 16 @@ -904,16 +904,16 @@ define <2 x i64> @test13(i32 %a, i32 %b, i32 %c, i32 %d) { ; ; DEBUG-LABEL: @test13( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META349:![0-9]+]], metadata !DIExpression()), !dbg [[DBG354:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META349:![0-9]+]], !DIExpression(), [[META354:![0-9]+]]) ; DEBUG-NEXT: [[X_SROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0, !dbg [[DBG355:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META350:![0-9]+]], metadata !DIExpression()), !dbg [[DBG356:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META350:![0-9]+]], !DIExpression(), [[META356:![0-9]+]]) ; DEBUG-NEXT: [[X_SROA_0_4_VEC_INSERT:%.*]] = insertelement <4 x i32> [[X_SROA_0_0_VEC_INSERT]], i32 [[B:%.*]], i32 1, !dbg [[DBG357:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META351:![0-9]+]], metadata !DIExpression()), !dbg [[DBG358:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META351:![0-9]+]], !DIExpression(), [[META358:![0-9]+]]) ; DEBUG-NEXT: [[X_SROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x i32> [[X_SROA_0_4_VEC_INSERT]], i32 [[C:%.*]], i32 2, !dbg [[DBG359:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META352:![0-9]+]], metadata !DIExpression()), !dbg [[DBG360:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META352:![0-9]+]], !DIExpression(), [[META360:![0-9]+]]) ; DEBUG-NEXT: [[X_SROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x i32> [[X_SROA_0_8_VEC_INSERT]], i32 [[D:%.*]], i32 3, !dbg [[DBG361:![0-9]+]] ; DEBUG-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[X_SROA_0_12_VEC_INSERT]] to <2 x i64>, !dbg [[DBG362:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <2 x i64> [[TMP0]], metadata [[META353:![0-9]+]], metadata !DIExpression()), !dbg [[DBG362]] +; DEBUG-NEXT: #dbg_value(<2 x i64> [[TMP0]], [[META353:![0-9]+]], !DIExpression(), [[DBG362]]) ; DEBUG-NEXT: ret <2 x i64> [[TMP0]], !dbg [[DBG363:![0-9]+]] ; entry: @@ -946,26 +946,26 @@ define i32 @test14(<2 x i64> %x) { ; ; DEBUG-LABEL: @test14( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META366:![0-9]+]], metadata !DIExpression()), !dbg [[DBG378:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META366:![0-9]+]], !DIExpression(), [[META378:![0-9]+]]) ; DEBUG-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[X:%.*]] to <4 x i32>, !dbg [[DBG379:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META367:![0-9]+]], metadata !DIExpression()), !dbg [[DBG380:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META367:![0-9]+]], !DIExpression(), [[META380:![0-9]+]]) ; DEBUG-NEXT: [[X_ADDR_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0, !dbg [[DBG381:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[X_ADDR_SROA_0_0_VEC_EXTRACT]], metadata [[META368:![0-9]+]], metadata !DIExpression()), !dbg [[DBG381]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META369:![0-9]+]], metadata !DIExpression()), !dbg [[DBG382:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i32 [[X_ADDR_SROA_0_0_VEC_EXTRACT]], [[META368:![0-9]+]], !DIExpression(), [[DBG381]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META369:![0-9]+]], !DIExpression(), [[META382:![0-9]+]]) ; DEBUG-NEXT: [[X_ADDR_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1, !dbg [[DBG383:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[X_ADDR_SROA_0_4_VEC_EXTRACT]], metadata [[META370:![0-9]+]], metadata !DIExpression()), !dbg [[DBG383]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META371:![0-9]+]], metadata !DIExpression()), !dbg [[DBG384:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i32 [[X_ADDR_SROA_0_4_VEC_EXTRACT]], [[META370:![0-9]+]], !DIExpression(), [[DBG383]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META371:![0-9]+]], !DIExpression(), [[META384:![0-9]+]]) ; DEBUG-NEXT: [[X_ADDR_SROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2, !dbg [[DBG385:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[X_ADDR_SROA_0_8_VEC_EXTRACT]], metadata [[META372:![0-9]+]], metadata !DIExpression()), !dbg [[DBG385]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META373:![0-9]+]], metadata !DIExpression()), !dbg [[DBG386:![0-9]+]] +; DEBUG-NEXT: #dbg_value(i32 [[X_ADDR_SROA_0_8_VEC_EXTRACT]], [[META372:![0-9]+]], !DIExpression(), [[DBG385]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META373:![0-9]+]], !DIExpression(), [[META386:![0-9]+]]) ; DEBUG-NEXT: [[X_ADDR_SROA_0_12_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3, !dbg [[DBG387:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[X_ADDR_SROA_0_12_VEC_EXTRACT]], metadata [[META374:![0-9]+]], metadata !DIExpression()), !dbg [[DBG387]] +; DEBUG-NEXT: #dbg_value(i32 [[X_ADDR_SROA_0_12_VEC_EXTRACT]], [[META374:![0-9]+]], !DIExpression(), [[DBG387]]) ; DEBUG-NEXT: [[ADD:%.*]] = add i32 [[X_ADDR_SROA_0_0_VEC_EXTRACT]], [[X_ADDR_SROA_0_4_VEC_EXTRACT]], !dbg [[DBG388:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[ADD]], metadata [[META375:![0-9]+]], metadata !DIExpression()), !dbg [[DBG388]] +; DEBUG-NEXT: #dbg_value(i32 [[ADD]], [[META375:![0-9]+]], !DIExpression(), [[DBG388]]) ; DEBUG-NEXT: [[ADD1:%.*]] = add i32 [[X_ADDR_SROA_0_8_VEC_EXTRACT]], [[X_ADDR_SROA_0_12_VEC_EXTRACT]], !dbg [[DBG389:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[ADD1]], metadata [[META376:![0-9]+]], metadata !DIExpression()), !dbg [[DBG389]] +; DEBUG-NEXT: #dbg_value(i32 [[ADD1]], [[META376:![0-9]+]], !DIExpression(), [[DBG389]]) ; DEBUG-NEXT: [[ADD2:%.*]] = add i32 [[ADD]], [[ADD1]], !dbg [[DBG390:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i32 [[ADD2]], metadata [[META377:![0-9]+]], metadata !DIExpression()), !dbg [[DBG390]] +; DEBUG-NEXT: #dbg_value(i32 [[ADD2]], [[META377:![0-9]+]], !DIExpression(), [[DBG390]]) ; DEBUG-NEXT: ret i32 [[ADD2]], !dbg [[DBG391:![0-9]+]] ; entry: @@ -990,31 +990,31 @@ define <4 x ptr> @test15(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[X_SROA_0:%.*]] = alloca <4 x ptr>, align 32 ; CHECK-NEXT: store i32 [[A:%.*]], ptr [[X_SROA_0]], align 32 -; CHECK-NEXT: [[X_SROA_0_4_X_TMP2_SROA_IDX1:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 4 -; CHECK-NEXT: store i32 [[B:%.*]], ptr [[X_SROA_0_4_X_TMP2_SROA_IDX1]], align 4 -; CHECK-NEXT: [[X_SROA_0_8_X_TMP3_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 8 -; CHECK-NEXT: store i32 [[C:%.*]], ptr [[X_SROA_0_8_X_TMP3_SROA_IDX2]], align 8 -; CHECK-NEXT: [[X_SROA_0_12_X_TMP4_SROA_IDX3:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 12 -; CHECK-NEXT: store i32 [[D:%.*]], ptr [[X_SROA_0_12_X_TMP4_SROA_IDX3]], align 4 +; CHECK-NEXT: [[X_SROA_0_4_SROA_IDX1:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 4 +; CHECK-NEXT: store i32 [[B:%.*]], ptr [[X_SROA_0_4_SROA_IDX1]], align 4 +; CHECK-NEXT: [[X_SROA_0_8_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 8 +; CHECK-NEXT: store i32 [[C:%.*]], ptr [[X_SROA_0_8_SROA_IDX2]], align 8 +; CHECK-NEXT: [[X_SROA_0_12_SROA_IDX3:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 12 +; CHECK-NEXT: store i32 [[D:%.*]], ptr [[X_SROA_0_12_SROA_IDX3]], align 4 ; CHECK-NEXT: [[X_SROA_0_0_X_SROA_0_0_RESULT:%.*]] = load <4 x ptr>, ptr [[X_SROA_0]], align 32 ; CHECK-NEXT: ret <4 x ptr> [[X_SROA_0_0_X_SROA_0_0_RESULT]] ; ; DEBUG-LABEL: @test15( ; DEBUG-NEXT: entry: ; DEBUG-NEXT: [[X_SROA_0:%.*]] = alloca <4 x ptr>, align 32, !dbg [[DBG400:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META394:![0-9]+]], metadata !DIExpression()), !dbg [[DBG400]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META394:![0-9]+]], !DIExpression(), [[DBG400]]) ; DEBUG-NEXT: store i32 [[A:%.*]], ptr [[X_SROA_0]], align 32, !dbg [[DBG401:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META395:![0-9]+]], metadata !DIExpression()), !dbg [[DBG402:![0-9]+]] -; DEBUG-NEXT: [[X_SROA_0_4_X_TMP2_SROA_IDX1:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 4, !dbg [[DBG403:![0-9]+]] -; DEBUG-NEXT: store i32 [[B:%.*]], ptr [[X_SROA_0_4_X_TMP2_SROA_IDX1]], align 4, !dbg [[DBG403]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META396:![0-9]+]], metadata !DIExpression()), !dbg [[DBG404:![0-9]+]] -; DEBUG-NEXT: [[X_SROA_0_8_X_TMP3_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 8, !dbg [[DBG405:![0-9]+]] -; DEBUG-NEXT: store i32 [[C:%.*]], ptr [[X_SROA_0_8_X_TMP3_SROA_IDX2]], align 8, !dbg [[DBG405]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META397:![0-9]+]], metadata !DIExpression()), !dbg [[DBG406:![0-9]+]] -; DEBUG-NEXT: [[X_SROA_0_12_X_TMP4_SROA_IDX3:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 12, !dbg [[DBG407:![0-9]+]] -; DEBUG-NEXT: store i32 [[D:%.*]], ptr [[X_SROA_0_12_X_TMP4_SROA_IDX3]], align 4, !dbg [[DBG407]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META395:![0-9]+]], !DIExpression(), [[META402:![0-9]+]]) +; DEBUG-NEXT: [[X_SROA_0_4_SROA_IDX1:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 4, !dbg [[DBG403:![0-9]+]] +; DEBUG-NEXT: store i32 [[B:%.*]], ptr [[X_SROA_0_4_SROA_IDX1]], align 4, !dbg [[DBG403]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META396:![0-9]+]], !DIExpression(), [[META404:![0-9]+]]) +; DEBUG-NEXT: [[X_SROA_0_8_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 8, !dbg [[DBG405:![0-9]+]] +; DEBUG-NEXT: store i32 [[C:%.*]], ptr [[X_SROA_0_8_SROA_IDX2]], align 8, !dbg [[DBG405]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META397:![0-9]+]], !DIExpression(), [[META406:![0-9]+]]) +; DEBUG-NEXT: [[X_SROA_0_12_SROA_IDX3:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 12, !dbg [[DBG407:![0-9]+]] +; DEBUG-NEXT: store i32 [[D:%.*]], ptr [[X_SROA_0_12_SROA_IDX3]], align 4, !dbg [[DBG407]] ; DEBUG-NEXT: [[X_SROA_0_0_X_SROA_0_0_RESULT:%.*]] = load <4 x ptr>, ptr [[X_SROA_0]], align 32, !dbg [[DBG408:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <4 x ptr> [[X_SROA_0_0_X_SROA_0_0_RESULT]], metadata [[META398:![0-9]+]], metadata !DIExpression()), !dbg [[DBG408]] +; DEBUG-NEXT: #dbg_value(<4 x ptr> [[X_SROA_0_0_X_SROA_0_0_RESULT]], [[META398:![0-9]+]], !DIExpression(), [[DBG408]]) ; DEBUG-NEXT: ret <4 x ptr> [[X_SROA_0_0_X_SROA_0_0_RESULT]], !dbg [[DBG409:![0-9]+]] ; entry: @@ -1045,19 +1045,19 @@ define <4 x ptr> @test16(i64 %a, i64 %b, i64 %c, i64 %d) { ; ; DEBUG-LABEL: @test16( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META412:![0-9]+]], metadata !DIExpression()), !dbg [[DBG417:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META412:![0-9]+]], !DIExpression(), [[META417:![0-9]+]]) ; DEBUG-NEXT: [[TMP0:%.*]] = inttoptr i64 [[A:%.*]] to ptr, !dbg [[DBG418:![0-9]+]] ; DEBUG-NEXT: [[X_SROA_0_0_VEC_INSERT:%.*]] = insertelement <4 x ptr> undef, ptr [[TMP0]], i32 0, !dbg [[DBG418]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META413:![0-9]+]], metadata !DIExpression()), !dbg [[DBG419:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META413:![0-9]+]], !DIExpression(), [[META419:![0-9]+]]) ; DEBUG-NEXT: [[TMP1:%.*]] = inttoptr i64 [[B:%.*]] to ptr, !dbg [[DBG420:![0-9]+]] ; DEBUG-NEXT: [[X_SROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x ptr> [[X_SROA_0_0_VEC_INSERT]], ptr [[TMP1]], i32 1, !dbg [[DBG420]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META414:![0-9]+]], metadata !DIExpression()), !dbg [[DBG421:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META414:![0-9]+]], !DIExpression(), [[META421:![0-9]+]]) ; DEBUG-NEXT: [[TMP2:%.*]] = inttoptr i64 [[C:%.*]] to ptr, !dbg [[DBG422:![0-9]+]] ; DEBUG-NEXT: [[X_SROA_0_16_VEC_INSERT:%.*]] = insertelement <4 x ptr> [[X_SROA_0_8_VEC_INSERT]], ptr [[TMP2]], i32 2, !dbg [[DBG422]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META415:![0-9]+]], metadata !DIExpression()), !dbg [[DBG423:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META415:![0-9]+]], !DIExpression(), [[META423:![0-9]+]]) ; DEBUG-NEXT: [[TMP3:%.*]] = inttoptr i64 [[D:%.*]] to ptr, !dbg [[DBG424:![0-9]+]] ; DEBUG-NEXT: [[X_SROA_0_24_VEC_INSERT:%.*]] = insertelement <4 x ptr> [[X_SROA_0_16_VEC_INSERT]], ptr [[TMP3]], i32 3, !dbg [[DBG424]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <4 x ptr> [[X_SROA_0_24_VEC_INSERT]], metadata [[META416:![0-9]+]], metadata !DIExpression()), !dbg [[DBG425:![0-9]+]] +; DEBUG-NEXT: #dbg_value(<4 x ptr> [[X_SROA_0_24_VEC_INSERT]], [[META416:![0-9]+]], !DIExpression(), [[META425:![0-9]+]]) ; DEBUG-NEXT: ret <4 x ptr> [[X_SROA_0_24_VEC_INSERT]], !dbg [[DBG426:![0-9]+]] ; entry: @@ -1078,31 +1078,31 @@ define <4 x ptr> @test17(i32 %a, i32 %b, i64 %c, i64 %d) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[X_SROA_0:%.*]] = alloca <4 x ptr>, align 32 ; CHECK-NEXT: store i32 [[A:%.*]], ptr [[X_SROA_0]], align 32 -; CHECK-NEXT: [[X_SROA_0_4_X_TMP2_SROA_IDX1:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 4 -; CHECK-NEXT: store i32 [[B:%.*]], ptr [[X_SROA_0_4_X_TMP2_SROA_IDX1]], align 4 -; CHECK-NEXT: [[X_SROA_0_16_X_TMP3_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 16 -; CHECK-NEXT: store i64 [[C:%.*]], ptr [[X_SROA_0_16_X_TMP3_SROA_IDX2]], align 16 -; CHECK-NEXT: [[X_SROA_0_24_X_TMP4_SROA_IDX3:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 24 -; CHECK-NEXT: store i64 [[D:%.*]], ptr [[X_SROA_0_24_X_TMP4_SROA_IDX3]], align 8 +; CHECK-NEXT: [[X_SROA_0_4_SROA_IDX1:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 4 +; CHECK-NEXT: store i32 [[B:%.*]], ptr [[X_SROA_0_4_SROA_IDX1]], align 4 +; CHECK-NEXT: [[X_SROA_0_16_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 16 +; CHECK-NEXT: store i64 [[C:%.*]], ptr [[X_SROA_0_16_SROA_IDX2]], align 16 +; CHECK-NEXT: [[X_SROA_0_24_SROA_IDX3:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 24 +; CHECK-NEXT: store i64 [[D:%.*]], ptr [[X_SROA_0_24_SROA_IDX3]], align 8 ; CHECK-NEXT: [[X_SROA_0_0_X_SROA_0_0_RESULT:%.*]] = load <4 x ptr>, ptr [[X_SROA_0]], align 32 ; CHECK-NEXT: ret <4 x ptr> [[X_SROA_0_0_X_SROA_0_0_RESULT]] ; ; DEBUG-LABEL: @test17( ; DEBUG-NEXT: entry: ; DEBUG-NEXT: [[X_SROA_0:%.*]] = alloca <4 x ptr>, align 32, !dbg [[DBG434:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META429:![0-9]+]], metadata !DIExpression()), !dbg [[DBG434]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META429:![0-9]+]], !DIExpression(), [[DBG434]]) ; DEBUG-NEXT: store i32 [[A:%.*]], ptr [[X_SROA_0]], align 32, !dbg [[DBG435:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META430:![0-9]+]], metadata !DIExpression()), !dbg [[DBG436:![0-9]+]] -; DEBUG-NEXT: [[X_SROA_0_4_X_TMP2_SROA_IDX1:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 4, !dbg [[DBG437:![0-9]+]] -; DEBUG-NEXT: store i32 [[B:%.*]], ptr [[X_SROA_0_4_X_TMP2_SROA_IDX1]], align 4, !dbg [[DBG437]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META431:![0-9]+]], metadata !DIExpression()), !dbg [[DBG438:![0-9]+]] -; DEBUG-NEXT: [[X_SROA_0_16_X_TMP3_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 16, !dbg [[DBG439:![0-9]+]] -; DEBUG-NEXT: store i64 [[C:%.*]], ptr [[X_SROA_0_16_X_TMP3_SROA_IDX2]], align 16, !dbg [[DBG439]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META432:![0-9]+]], metadata !DIExpression()), !dbg [[DBG440:![0-9]+]] -; DEBUG-NEXT: [[X_SROA_0_24_X_TMP4_SROA_IDX3:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 24, !dbg [[DBG441:![0-9]+]] -; DEBUG-NEXT: store i64 [[D:%.*]], ptr [[X_SROA_0_24_X_TMP4_SROA_IDX3]], align 8, !dbg [[DBG441]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META430:![0-9]+]], !DIExpression(), [[META436:![0-9]+]]) +; DEBUG-NEXT: [[X_SROA_0_4_SROA_IDX1:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 4, !dbg [[DBG437:![0-9]+]] +; DEBUG-NEXT: store i32 [[B:%.*]], ptr [[X_SROA_0_4_SROA_IDX1]], align 4, !dbg [[DBG437]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META431:![0-9]+]], !DIExpression(), [[META438:![0-9]+]]) +; DEBUG-NEXT: [[X_SROA_0_16_SROA_IDX2:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 16, !dbg [[DBG439:![0-9]+]] +; DEBUG-NEXT: store i64 [[C:%.*]], ptr [[X_SROA_0_16_SROA_IDX2]], align 16, !dbg [[DBG439]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META432:![0-9]+]], !DIExpression(), [[META440:![0-9]+]]) +; DEBUG-NEXT: [[X_SROA_0_24_SROA_IDX3:%.*]] = getelementptr inbounds i8, ptr [[X_SROA_0]], i64 24, !dbg [[DBG441:![0-9]+]] +; DEBUG-NEXT: store i64 [[D:%.*]], ptr [[X_SROA_0_24_SROA_IDX3]], align 8, !dbg [[DBG441]] ; DEBUG-NEXT: [[X_SROA_0_0_X_SROA_0_0_RESULT:%.*]] = load <4 x ptr>, ptr [[X_SROA_0]], align 32, !dbg [[DBG442:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <4 x ptr> [[X_SROA_0_0_X_SROA_0_0_RESULT]], metadata [[META433:![0-9]+]], metadata !DIExpression()), !dbg [[DBG442]] +; DEBUG-NEXT: #dbg_value(<4 x ptr> [[X_SROA_0_0_X_SROA_0_0_RESULT]], [[META433:![0-9]+]], !DIExpression(), [[DBG442]]) ; DEBUG-NEXT: ret <4 x ptr> [[X_SROA_0_0_X_SROA_0_0_RESULT]], !dbg [[DBG443:![0-9]+]] ; entry: @@ -1129,10 +1129,10 @@ define i1 @test18() { ; ; DEBUG-LABEL: @test18( ; DEBUG-NEXT: [[A_SROA_0:%.*]] = alloca <2 x i64>, align 32, !dbg [[DBG449:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META446:![0-9]+]], metadata !DIExpression()), !dbg [[DBG449]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META446:![0-9]+]], !DIExpression(), [[DBG449]]) ; DEBUG-NEXT: store <2 x i64> , ptr [[A_SROA_0]], align 32, !dbg [[DBG450:![0-9]+]] ; DEBUG-NEXT: [[A_SROA_0_0_A_SROA_0_0_L:%.*]] = load i1, ptr [[A_SROA_0]], align 32, !dbg [[DBG451:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata i1 [[A_SROA_0_0_A_SROA_0_0_L]], metadata [[META447:![0-9]+]], metadata !DIExpression()), !dbg [[DBG451]] +; DEBUG-NEXT: #dbg_value(i1 [[A_SROA_0_0_A_SROA_0_0_L]], [[META447:![0-9]+]], !DIExpression(), [[DBG451]]) ; DEBUG-NEXT: ret i1 [[A_SROA_0_0_A_SROA_0_0_L]], !dbg [[DBG452:![0-9]+]] ; %a = alloca <8 x i32> @@ -1149,7 +1149,7 @@ define void @swap-8bytes(ptr %x, ptr %y) { ; CHECK-NEXT: ret void ; ; DEBUG-LABEL: @swap-8bytes( -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META455:![0-9]+]], metadata !DIExpression()), !dbg [[DBG456:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META455:![0-9]+]], !DIExpression(), [[META456:![0-9]+]]) ; DEBUG-NEXT: [[TMP_SROA_0_0_COPYLOAD:%.*]] = load i64, ptr [[X:%.*]], align 1, !dbg [[DBG457:![0-9]+]] ; DEBUG-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[X]], ptr [[Y:%.*]], i64 8, i1 false), !dbg [[DBG458:![0-9]+]] ; DEBUG-NEXT: store i64 [[TMP_SROA_0_0_COPYLOAD]], ptr [[Y]], align 1, !dbg [[DBG459:![0-9]+]] @@ -1172,7 +1172,7 @@ define void @swap-7bytes(ptr %x, ptr %y) { ; ; DEBUG-LABEL: @swap-7bytes( ; DEBUG-NEXT: [[TMP:%.*]] = alloca [7 x i8], align 1, !dbg [[DBG464:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[TMP]], metadata [[META463:![0-9]+]], metadata !DIExpression()), !dbg [[DBG464]] +; DEBUG-NEXT: #dbg_value(ptr [[TMP]], [[META463:![0-9]+]], !DIExpression(), [[DBG464]]) ; DEBUG-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP]], ptr [[X:%.*]], i64 7, i1 false), !dbg [[DBG465:![0-9]+]] ; DEBUG-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[X]], ptr [[Y:%.*]], i64 7, i1 false), !dbg [[DBG466:![0-9]+]] ; DEBUG-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[Y]], ptr [[TMP]], i64 7, i1 false), !dbg [[DBG467:![0-9]+]] @@ -1195,7 +1195,7 @@ define void @swap-16bytes(ptr %x, ptr %y) { ; ; DEBUG-LABEL: @swap-16bytes( ; DEBUG-NEXT: [[TMP:%.*]] = alloca [2 x i64], align 8, !dbg [[DBG472:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[TMP]], metadata [[META471:![0-9]+]], metadata !DIExpression()), !dbg [[DBG472]] +; DEBUG-NEXT: #dbg_value(ptr [[TMP]], [[META471:![0-9]+]], !DIExpression(), [[DBG472]]) ; DEBUG-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP]], ptr [[X:%.*]], i64 16, i1 false), !dbg [[DBG473:![0-9]+]] ; DEBUG-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[X]], ptr [[Y:%.*]], i64 16, i1 false), !dbg [[DBG474:![0-9]+]] ; DEBUG-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[Y]], ptr [[TMP]], i64 16, i1 false), !dbg [[DBG475:![0-9]+]] @@ -1218,7 +1218,7 @@ define void @swap-15bytes(ptr %x, ptr %y) { ; ; DEBUG-LABEL: @swap-15bytes( ; DEBUG-NEXT: [[TMP:%.*]] = alloca [15 x i8], align 1, !dbg [[DBG480:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[TMP]], metadata [[META479:![0-9]+]], metadata !DIExpression()), !dbg [[DBG480]] +; DEBUG-NEXT: #dbg_value(ptr [[TMP]], [[META479:![0-9]+]], !DIExpression(), [[DBG480]]) ; DEBUG-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[TMP]], ptr [[X:%.*]], i64 15, i1 false), !dbg [[DBG481:![0-9]+]] ; DEBUG-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[X]], ptr [[Y:%.*]], i64 15, i1 false), !dbg [[DBG482:![0-9]+]] ; DEBUG-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[Y]], ptr [[TMP]], i64 15, i1 false), !dbg [[DBG483:![0-9]+]] @@ -1245,17 +1245,17 @@ define <4 x i32> @ptrLoadStoreTys(ptr %init, i32 %val2) { ; ; DEBUG-LABEL: @ptrLoadStoreTys( ; DEBUG-NEXT: [[VAL0:%.*]] = load ptr, ptr [[INIT:%.*]], align 8, !dbg [[DBG492:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[VAL0]], metadata [[META487:![0-9]+]], metadata !DIExpression()), !dbg [[DBG492]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META488:![0-9]+]], metadata !DIExpression()), !dbg [[DBG493:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr [[VAL0]], [[META487:![0-9]+]], !DIExpression(), [[DBG492]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META488:![0-9]+]], !DIExpression(), [[META493:![0-9]+]]) ; DEBUG-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[VAL0]] to i64, !dbg [[DBG494:![0-9]+]] ; DEBUG-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32>, !dbg [[DBG494]] ; DEBUG-NEXT: [[OBJ_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> , !dbg [[DBG494]] ; DEBUG-NEXT: [[OBJ_0_VECBLEND:%.*]] = select <4 x i1> , <4 x i32> [[OBJ_0_VEC_EXPAND]], <4 x i32> zeroinitializer, !dbg [[DBG494]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META489:![0-9]+]], metadata !DIExpression()), !dbg [[DBG495:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META489:![0-9]+]], !DIExpression(), [[META495:![0-9]+]]) ; DEBUG-NEXT: [[OBJ_8_VEC_INSERT:%.*]] = insertelement <4 x i32> [[OBJ_0_VECBLEND]], i32 [[VAL2:%.*]], i32 2, !dbg [[DBG496:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META490:![0-9]+]], metadata !DIExpression()), !dbg [[DBG497:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META490:![0-9]+]], !DIExpression(), [[META497:![0-9]+]]) ; DEBUG-NEXT: [[OBJ_12_VEC_INSERT:%.*]] = insertelement <4 x i32> [[OBJ_8_VEC_INSERT]], i32 131072, i32 3, !dbg [[DBG498:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <4 x i32> [[OBJ_12_VEC_INSERT]], metadata [[META491:![0-9]+]], metadata !DIExpression()), !dbg [[DBG499:![0-9]+]] +; DEBUG-NEXT: #dbg_value(<4 x i32> [[OBJ_12_VEC_INSERT]], [[META491:![0-9]+]], !DIExpression(), [[META499:![0-9]+]]) ; DEBUG-NEXT: ret <4 x i32> [[OBJ_12_VEC_INSERT]], !dbg [[DBG500:![0-9]+]] ; %val0 = load ptr, ptr %init, align 8 @@ -1276,28 +1276,28 @@ define <4 x float> @ptrLoadStoreTysFloat(ptr %init, float %val2) { ; CHECK-NEXT: [[OBJ:%.*]] = alloca <4 x float>, align 16 ; CHECK-NEXT: store <4 x float> zeroinitializer, ptr [[OBJ]], align 16 ; CHECK-NEXT: store ptr [[VAL0]], ptr [[OBJ]], align 16 -; CHECK-NEXT: [[OBJ_8_PTR2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 8 -; CHECK-NEXT: store float [[VAL2:%.*]], ptr [[OBJ_8_PTR2_SROA_IDX]], align 8 -; CHECK-NEXT: [[OBJ_12_PTR3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 12 -; CHECK-NEXT: store float 1.310720e+05, ptr [[OBJ_12_PTR3_SROA_IDX]], align 4 +; CHECK-NEXT: [[OBJ_8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 8 +; CHECK-NEXT: store float [[VAL2:%.*]], ptr [[OBJ_8_SROA_IDX]], align 8 +; CHECK-NEXT: [[OBJ_12_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 12 +; CHECK-NEXT: store float 1.310720e+05, ptr [[OBJ_12_SROA_IDX]], align 4 ; CHECK-NEXT: [[OBJ_0_SROAVAL:%.*]] = load <4 x float>, ptr [[OBJ]], align 16 ; CHECK-NEXT: ret <4 x float> [[OBJ_0_SROAVAL]] ; ; DEBUG-LABEL: @ptrLoadStoreTysFloat( ; DEBUG-NEXT: [[VAL0:%.*]] = load ptr, ptr [[INIT:%.*]], align 8, !dbg [[DBG508:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[VAL0]], metadata [[META503:![0-9]+]], metadata !DIExpression()), !dbg [[DBG508]] +; DEBUG-NEXT: #dbg_value(ptr [[VAL0]], [[META503:![0-9]+]], !DIExpression(), [[DBG508]]) ; DEBUG-NEXT: [[OBJ:%.*]] = alloca <4 x float>, align 16, !dbg [[DBG509:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[OBJ]], metadata [[META504:![0-9]+]], metadata !DIExpression()), !dbg [[DBG509]] +; DEBUG-NEXT: #dbg_value(ptr [[OBJ]], [[META504:![0-9]+]], !DIExpression(), [[DBG509]]) ; DEBUG-NEXT: store <4 x float> zeroinitializer, ptr [[OBJ]], align 16, !dbg [[DBG510:![0-9]+]] ; DEBUG-NEXT: store ptr [[VAL0]], ptr [[OBJ]], align 16, !dbg [[DBG511:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META505:![0-9]+]], metadata !DIExpression()), !dbg [[DBG512:![0-9]+]] -; DEBUG-NEXT: [[OBJ_8_PTR2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 8, !dbg [[DBG513:![0-9]+]] -; DEBUG-NEXT: store float [[VAL2:%.*]], ptr [[OBJ_8_PTR2_SROA_IDX]], align 8, !dbg [[DBG513]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META506:![0-9]+]], metadata !DIExpression()), !dbg [[DBG514:![0-9]+]] -; DEBUG-NEXT: [[OBJ_12_PTR3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 12, !dbg [[DBG515:![0-9]+]] -; DEBUG-NEXT: store float 1.310720e+05, ptr [[OBJ_12_PTR3_SROA_IDX]], align 4, !dbg [[DBG515]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META505:![0-9]+]], !DIExpression(), [[META512:![0-9]+]]) +; DEBUG-NEXT: [[OBJ_8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 8, !dbg [[DBG513:![0-9]+]] +; DEBUG-NEXT: store float [[VAL2:%.*]], ptr [[OBJ_8_SROA_IDX]], align 8, !dbg [[DBG513]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META506:![0-9]+]], !DIExpression(), [[META514:![0-9]+]]) +; DEBUG-NEXT: [[OBJ_12_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 12, !dbg [[DBG515:![0-9]+]] +; DEBUG-NEXT: store float 1.310720e+05, ptr [[OBJ_12_SROA_IDX]], align 4, !dbg [[DBG515]] ; DEBUG-NEXT: [[OBJ_0_SROAVAL:%.*]] = load <4 x float>, ptr [[OBJ]], align 16, !dbg [[DBG516:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <4 x float> [[OBJ_0_SROAVAL]], metadata [[META507:![0-9]+]], metadata !DIExpression()), !dbg [[DBG516]] +; DEBUG-NEXT: #dbg_value(<4 x float> [[OBJ_0_SROAVAL]], [[META507:![0-9]+]], !DIExpression(), [[DBG516]]) ; DEBUG-NEXT: ret <4 x float> [[OBJ_0_SROAVAL]], !dbg [[DBG517:![0-9]+]] ; %val0 = load ptr, ptr %init, align 8 @@ -1325,17 +1325,17 @@ define <4 x i32> @ptrLoadStoreTysAS3(ptr %init, i32 %val2) { ; ; DEBUG-LABEL: @ptrLoadStoreTysAS3( ; DEBUG-NEXT: [[VAL0:%.*]] = load ptr addrspace(3), ptr [[INIT:%.*]], align 8, !dbg [[DBG525:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr addrspace(3) [[VAL0]], metadata [[META520:![0-9]+]], metadata !DIExpression()), !dbg [[DBG525]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META521:![0-9]+]], metadata !DIExpression()), !dbg [[DBG526:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr addrspace(3) [[VAL0]], [[META520:![0-9]+]], !DIExpression(), [[DBG525]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META521:![0-9]+]], !DIExpression(), [[META526:![0-9]+]]) ; DEBUG-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[VAL0]] to i64, !dbg [[DBG527:![0-9]+]] ; DEBUG-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32>, !dbg [[DBG527]] ; DEBUG-NEXT: [[OBJ_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> , !dbg [[DBG527]] ; DEBUG-NEXT: [[OBJ_0_VECBLEND:%.*]] = select <4 x i1> , <4 x i32> [[OBJ_0_VEC_EXPAND]], <4 x i32> zeroinitializer, !dbg [[DBG527]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META522:![0-9]+]], metadata !DIExpression()), !dbg [[DBG528:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META522:![0-9]+]], !DIExpression(), [[META528:![0-9]+]]) ; DEBUG-NEXT: [[OBJ_8_VEC_INSERT:%.*]] = insertelement <4 x i32> [[OBJ_0_VECBLEND]], i32 [[VAL2:%.*]], i32 2, !dbg [[DBG529:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META523:![0-9]+]], metadata !DIExpression()), !dbg [[DBG530:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META523:![0-9]+]], !DIExpression(), [[META530:![0-9]+]]) ; DEBUG-NEXT: [[OBJ_12_VEC_INSERT:%.*]] = insertelement <4 x i32> [[OBJ_8_VEC_INSERT]], i32 131072, i32 3, !dbg [[DBG531:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <4 x i32> [[OBJ_12_VEC_INSERT]], metadata [[META524:![0-9]+]], metadata !DIExpression()), !dbg [[DBG532:![0-9]+]] +; DEBUG-NEXT: #dbg_value(<4 x i32> [[OBJ_12_VEC_INSERT]], [[META524:![0-9]+]], !DIExpression(), [[META532:![0-9]+]]) ; DEBUG-NEXT: ret <4 x i32> [[OBJ_12_VEC_INSERT]], !dbg [[DBG533:![0-9]+]] ; %val0 = load ptr addrspace(3), ptr %init, align 8 @@ -1356,28 +1356,28 @@ define <4 x ptr> @ptrLoadStoreTysPtr(ptr %init, i64 %val2) { ; CHECK-NEXT: [[OBJ:%.*]] = alloca <4 x ptr>, align 16 ; CHECK-NEXT: store <4 x ptr> zeroinitializer, ptr [[OBJ]], align 16 ; CHECK-NEXT: store ptr [[VAL0]], ptr [[OBJ]], align 16 -; CHECK-NEXT: [[OBJ_8_PTR2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 8 -; CHECK-NEXT: store i64 [[VAL2:%.*]], ptr [[OBJ_8_PTR2_SROA_IDX]], align 8 -; CHECK-NEXT: [[OBJ_12_PTR3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 12 -; CHECK-NEXT: store i64 131072, ptr [[OBJ_12_PTR3_SROA_IDX]], align 4 +; CHECK-NEXT: [[OBJ_8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 8 +; CHECK-NEXT: store i64 [[VAL2:%.*]], ptr [[OBJ_8_SROA_IDX]], align 8 +; CHECK-NEXT: [[OBJ_12_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 12 +; CHECK-NEXT: store i64 131072, ptr [[OBJ_12_SROA_IDX]], align 4 ; CHECK-NEXT: [[OBJ_0_SROAVAL:%.*]] = load <4 x ptr>, ptr [[OBJ]], align 16 ; CHECK-NEXT: ret <4 x ptr> [[OBJ_0_SROAVAL]] ; ; DEBUG-LABEL: @ptrLoadStoreTysPtr( ; DEBUG-NEXT: [[VAL0:%.*]] = load ptr, ptr [[INIT:%.*]], align 8, !dbg [[DBG541:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[VAL0]], metadata [[META536:![0-9]+]], metadata !DIExpression()), !dbg [[DBG541]] +; DEBUG-NEXT: #dbg_value(ptr [[VAL0]], [[META536:![0-9]+]], !DIExpression(), [[DBG541]]) ; DEBUG-NEXT: [[OBJ:%.*]] = alloca <4 x ptr>, align 16, !dbg [[DBG542:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr [[OBJ]], metadata [[META537:![0-9]+]], metadata !DIExpression()), !dbg [[DBG542]] +; DEBUG-NEXT: #dbg_value(ptr [[OBJ]], [[META537:![0-9]+]], !DIExpression(), [[DBG542]]) ; DEBUG-NEXT: store <4 x ptr> zeroinitializer, ptr [[OBJ]], align 16, !dbg [[DBG543:![0-9]+]] ; DEBUG-NEXT: store ptr [[VAL0]], ptr [[OBJ]], align 16, !dbg [[DBG544:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META538:![0-9]+]], metadata !DIExpression()), !dbg [[DBG545:![0-9]+]] -; DEBUG-NEXT: [[OBJ_8_PTR2_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 8, !dbg [[DBG546:![0-9]+]] -; DEBUG-NEXT: store i64 [[VAL2:%.*]], ptr [[OBJ_8_PTR2_SROA_IDX]], align 8, !dbg [[DBG546]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata ptr undef, metadata [[META539:![0-9]+]], metadata !DIExpression()), !dbg [[DBG547:![0-9]+]] -; DEBUG-NEXT: [[OBJ_12_PTR3_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 12, !dbg [[DBG548:![0-9]+]] -; DEBUG-NEXT: store i64 131072, ptr [[OBJ_12_PTR3_SROA_IDX]], align 4, !dbg [[DBG548]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META538:![0-9]+]], !DIExpression(), [[META545:![0-9]+]]) +; DEBUG-NEXT: [[OBJ_8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 8, !dbg [[DBG546:![0-9]+]] +; DEBUG-NEXT: store i64 [[VAL2:%.*]], ptr [[OBJ_8_SROA_IDX]], align 8, !dbg [[DBG546]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META539:![0-9]+]], !DIExpression(), [[META547:![0-9]+]]) +; DEBUG-NEXT: [[OBJ_12_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[OBJ]], i64 12, !dbg [[DBG548:![0-9]+]] +; DEBUG-NEXT: store i64 131072, ptr [[OBJ_12_SROA_IDX]], align 4, !dbg [[DBG548]] ; DEBUG-NEXT: [[OBJ_0_SROAVAL:%.*]] = load <4 x ptr>, ptr [[OBJ]], align 16, !dbg [[DBG549:![0-9]+]] -; DEBUG-NEXT: tail call void @llvm.dbg.value(metadata <4 x ptr> [[OBJ_0_SROAVAL]], metadata [[META540:![0-9]+]], metadata !DIExpression()), !dbg [[DBG549]] +; DEBUG-NEXT: #dbg_value(<4 x ptr> [[OBJ_0_SROAVAL]], [[META540:![0-9]+]], !DIExpression(), [[DBG549]]) ; DEBUG-NEXT: ret <4 x ptr> [[OBJ_0_SROAVAL]], !dbg [[DBG550:![0-9]+]] ; %val0 = load ptr, ptr %init, align 8 @@ -1404,15 +1404,15 @@ define <4 x i32> @validLoadStoreTy([2 x i64] %cond.coerce) { ; ; DEBUG-LABEL: @validLoadStoreTy( ; DEBUG-NEXT: entry: -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META553:![0-9]+]], metadata !DIExpression()), !dbg [[DBG557:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META554:![0-9]+]], metadata !DIExpression()), !dbg [[DBG558:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META553:![0-9]+]], !DIExpression(), [[META557:![0-9]+]]) +; DEBUG-NEXT: #dbg_value(ptr undef, [[META554:![0-9]+]], !DIExpression(), [[META558:![0-9]+]]) ; DEBUG-NEXT: [[COND_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE:%.*]], 0, !dbg [[DBG559:![0-9]+]] ; DEBUG-NEXT: [[COND_SROA_0_0_VEC_INSERT:%.*]] = insertelement <2 x i64> undef, i64 [[COND_COERCE_FCA_0_EXTRACT]], i32 0, !dbg [[DBG559]] ; DEBUG-NEXT: [[COND_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[COND_COERCE]], 1, !dbg [[DBG559]] ; DEBUG-NEXT: [[COND_SROA_0_8_VEC_INSERT:%.*]] = insertelement <2 x i64> [[COND_SROA_0_0_VEC_INSERT]], i64 [[COND_COERCE_FCA_1_EXTRACT]], i32 1, !dbg [[DBG559]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META555:![0-9]+]], metadata !DIExpression()), !dbg [[DBG560:![0-9]+]] +; DEBUG-NEXT: #dbg_value(ptr undef, [[META555:![0-9]+]], !DIExpression(), [[META560:![0-9]+]]) ; DEBUG-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[COND_SROA_0_8_VEC_INSERT]] to <4 x i32>, !dbg [[DBG561:![0-9]+]] -; DEBUG-NEXT: call void @llvm.dbg.value(metadata <4 x i32> [[TMP0]], metadata [[META556:![0-9]+]], metadata !DIExpression()), !dbg [[DBG561]] +; DEBUG-NEXT: #dbg_value(<4 x i32> [[TMP0]], [[META556:![0-9]+]], !DIExpression(), [[DBG561]]) ; DEBUG-NEXT: ret <4 x i32> [[TMP0]], !dbg [[DBG562:![0-9]+]] ; entry: @@ -1427,6 +1427,63 @@ entry: ; The following test should not crash the compiler ; (calls to CheckCandidateType from createAndCheckVectorTypesForPromotion may change the memory to hold CandidateTys.data()) define noundef zeroext i1 @CandidateTysRealloc() personality ptr null { +; CHECK-LABEL: @CandidateTysRealloc( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BB_1:%.*]] +; CHECK: bb.1: +; CHECK-NEXT: br label [[BB_1]] +; CHECK: bb.2: +; CHECK-NEXT: [[ALLOCA_SROA_0_0_LOAD1:%.*]] = load <2 x i64>, ptr poison, align 16 +; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[ALLOCA_SROA_0_0_LOAD1]] to <4 x i32> +; CHECK-NEXT: [[ALLOCA_SROA_0_0_LOAD2:%.*]] = load <2 x i64>, ptr poison, align 16 +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr poison, align 16 +; CHECK-NEXT: [[ALLOCA_SROA_0_0_LOAD3:%.*]] = load <2 x i64>, ptr poison, align 16 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[ALLOCA_SROA_0_0_LOAD3]] to <4 x i32> +; CHECK-NEXT: br label [[BB_3:%.*]] +; CHECK: bb.3: +; CHECK-NEXT: br label [[BB_3]] +; CHECK: bb.4: +; CHECK-NEXT: [[ALLOCA_SROA_0_0_LOAD6:%.*]] = load <2 x i64>, ptr poison, align 16 +; CHECK-NEXT: [[ALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x i64> [[ALLOCA_SROA_0_0_LOAD6]], i32 0 +; CHECK-NEXT: [[ALLOCA_SROA_0_0_LOAD4:%.*]] = load <2 x i64>, ptr poison, align 16 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[ALLOCA_SROA_0_0_LOAD4]] to <4 x i32> +; CHECK-NEXT: [[ALLOCA_SROA_0_0_LOAD5:%.*]] = load <2 x i64>, ptr poison, align 16 +; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr poison, align 16 +; CHECK-NEXT: br label [[BB_5:%.*]] +; CHECK: bb.5: +; CHECK-NEXT: br label [[BB_5]] +; +; DEBUG-LABEL: @CandidateTysRealloc( +; DEBUG-NEXT: entry: +; DEBUG-NEXT: #dbg_value(ptr undef, [[META565:![0-9]+]], !DIExpression(), [[META570:![0-9]+]]) +; DEBUG-NEXT: br label [[BB_1:%.*]], !dbg [[DBG571:![0-9]+]] +; DEBUG: bb.1: +; DEBUG-NEXT: br label [[BB_1]], !dbg [[DBG572:![0-9]+]] +; DEBUG: bb.2: +; DEBUG-NEXT: [[ALLOCA_SROA_0_0_LOAD1:%.*]] = load <2 x i64>, ptr poison, align 16, !dbg [[DBG573:![0-9]+]] +; DEBUG-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[ALLOCA_SROA_0_0_LOAD1]] to <4 x i32>, !dbg [[DBG573]] +; DEBUG-NEXT: #dbg_value(<4 x i32> [[TMP0]], [[META566:![0-9]+]], !DIExpression(), [[DBG573]]) +; DEBUG-NEXT: [[ALLOCA_SROA_0_0_LOAD2:%.*]] = load <2 x i64>, ptr poison, align 16, !dbg [[DBG574:![0-9]+]] +; DEBUG-NEXT: store <2 x i64> zeroinitializer, ptr poison, align 16, !dbg [[DBG574]] +; DEBUG-NEXT: [[ALLOCA_SROA_0_0_LOAD3:%.*]] = load <2 x i64>, ptr poison, align 16, !dbg [[DBG575:![0-9]+]] +; DEBUG-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[ALLOCA_SROA_0_0_LOAD3]] to <4 x i32>, !dbg [[DBG575]] +; DEBUG-NEXT: #dbg_value(<4 x i32> [[TMP1]], [[META567:![0-9]+]], !DIExpression(), [[DBG575]]) +; DEBUG-NEXT: br label [[BB_3:%.*]], !dbg [[DBG576:![0-9]+]] +; DEBUG: bb.3: +; DEBUG-NEXT: br label [[BB_3]], !dbg [[DBG577:![0-9]+]] +; DEBUG: bb.4: +; DEBUG-NEXT: [[ALLOCA_SROA_0_0_LOAD6:%.*]] = load <2 x i64>, ptr poison, align 16, !dbg [[DBG578:![0-9]+]] +; DEBUG-NEXT: [[ALLOCA_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x i64> [[ALLOCA_SROA_0_0_LOAD6]], i32 0, !dbg [[DBG578]] +; DEBUG-NEXT: #dbg_value(i64 [[ALLOCA_SROA_0_0_VEC_EXTRACT]], [[META568:![0-9]+]], !DIExpression(), [[DBG578]]) +; DEBUG-NEXT: [[ALLOCA_SROA_0_0_LOAD4:%.*]] = load <2 x i64>, ptr poison, align 16, !dbg [[DBG579:![0-9]+]] +; DEBUG-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[ALLOCA_SROA_0_0_LOAD4]] to <4 x i32>, !dbg [[DBG579]] +; DEBUG-NEXT: #dbg_value(<4 x i32> [[TMP2]], [[META569:![0-9]+]], !DIExpression(), [[DBG579]]) +; DEBUG-NEXT: [[ALLOCA_SROA_0_0_LOAD5:%.*]] = load <2 x i64>, ptr poison, align 16, !dbg [[DBG580:![0-9]+]] +; DEBUG-NEXT: store <2 x i64> zeroinitializer, ptr poison, align 16, !dbg [[DBG580]] +; DEBUG-NEXT: br label [[BB_5:%.*]], !dbg [[DBG581:![0-9]+]] +; DEBUG: bb.5: +; DEBUG-NEXT: br label [[BB_5]], !dbg [[DBG582:![0-9]+]] +; entry: %alloca = alloca <4x i32>, align 16 store <4 x i32> , ptr %alloca, align 16 diff --git a/llvm/test/Transforms/SafeStack/X86/debug-loc-dynamic.ll b/llvm/test/Transforms/SafeStack/X86/debug-loc-dynamic.ll index 42d2aa91307f02..715224d6e1009e 100644 --- a/llvm/test/Transforms/SafeStack/X86/debug-loc-dynamic.ll +++ b/llvm/test/Transforms/SafeStack/X86/debug-loc-dynamic.ll @@ -17,7 +17,7 @@ entry: %0 = zext i32 %n to i64, !dbg !16 ; CHECK: store ptr %[[VLA:.*]], ptr @__safestack_unsafe_stack_ptr -; CHECK: tail call void @llvm.dbg.value(metadata ptr %[[VLA]], metadata ![[TYPE:.*]], metadata !DIExpression(DW_OP_deref)) +; CHECK: #dbg_value(ptr %[[VLA]], ![[TYPE:.*]], !DIExpression(DW_OP_deref), ; CHECK: call void @capture({{.*}} %[[VLA]]) %vla = alloca i8, i64 %0, align 16, !dbg !16 diff --git a/llvm/test/Transforms/SafeStack/X86/debug-loc.ll b/llvm/test/Transforms/SafeStack/X86/debug-loc.ll index 9a4df89f37b0ab..d237890b25ff0b 100644 --- a/llvm/test/Transforms/SafeStack/X86/debug-loc.ll +++ b/llvm/test/Transforms/SafeStack/X86/debug-loc.ll @@ -22,11 +22,11 @@ entry: call void @llvm.dbg.declare(metadata ptr %xxx, metadata !21, metadata !19), !dbg !22 ; dbg.declare for %zzz and %xxx are gone; replaced with dbg.declare based off the unsafe stack pointer -; CHECK-NOT: call void @llvm.dbg.declare -; CHECK: call void @llvm.dbg.declare(metadata ptr %[[USP]], metadata ![[VAR_ARG:.*]], metadata !DIExpression(DW_OP_constu, 104, DW_OP_minus)) -; CHECK-NOT: call void @llvm.dbg.declare -; CHECK: call void @llvm.dbg.declare(metadata ptr %[[USP]], metadata ![[VAR_LOCAL:.*]], metadata !DIExpression(DW_OP_constu, 208, DW_OP_minus)) -; CHECK-NOT: call void @llvm.dbg.declare +; CHECK-NOT: #dbg_declare +; CHECK: #dbg_declare(ptr %[[USP]], ![[VAR_ARG:.*]], !DIExpression(DW_OP_constu, 104, DW_OP_minus), +; CHECK-NOT: #dbg_declare +; CHECK: #dbg_declare(ptr %[[USP]], ![[VAR_LOCAL:.*]], !DIExpression(DW_OP_constu, 208, DW_OP_minus), +; CHECK-NOT: #dbg_declare call void @Capture(ptr %zzz), !dbg !23 call void @Capture(ptr %xxx), !dbg !24 diff --git a/llvm/test/Transforms/SafeStack/X86/debug-loc2.ll b/llvm/test/Transforms/SafeStack/X86/debug-loc2.ll index 915126bc3bbe30..7629f945623059 100644 --- a/llvm/test/Transforms/SafeStack/X86/debug-loc2.ll +++ b/llvm/test/Transforms/SafeStack/X86/debug-loc2.ll @@ -18,19 +18,19 @@ entry: %x2 = alloca i32, align 4 ; Unhandled dbg.value: expression does not start with OP_DW_deref -; CHECK: call void @llvm.dbg.value(metadata ptr undef, metadata !{{.*}}, metadata !{{.*}}) +; CHECK: #dbg_value(ptr undef, !{{.*}}, !{{.*}}) tail call void @llvm.dbg.value(metadata ptr %x1, metadata !10, metadata !23), !dbg !16 ; Unhandled dbg.value: expression does not start with OP_DW_deref -; CHECK: call void @llvm.dbg.value(metadata ptr undef, metadata !{{.*}}, metadata !{{.*}}) +; CHECK: #dbg_value(ptr undef, !{{.*}}, !{{.*}}) tail call void @llvm.dbg.value(metadata ptr %x1, metadata !10, metadata !24), !dbg !16 ; Supported dbg.value: rewritted based on the [[USP]] value. -; CHECK: call void @llvm.dbg.value(metadata ptr %[[USP]], metadata ![[X1:.*]], metadata !DIExpression(DW_OP_constu, 4, DW_OP_minus, DW_OP_deref, DW_OP_LLVM_fragment, 0, 4)) +; CHECK: #dbg_value(ptr %[[USP]], ![[X1:.*]], !DIExpression(DW_OP_constu, 4, DW_OP_minus, DW_OP_deref, DW_OP_LLVM_fragment, 0, 4), tail call void @llvm.dbg.value(metadata ptr %x1, metadata !10, metadata !25), !dbg !16 ; Supported dbg.value: rewritted based on the [[USP]] value. -; CHECK: call void @llvm.dbg.value(metadata ptr %[[USP]], metadata ![[X1:.*]], metadata !DIExpression(DW_OP_constu, 4, DW_OP_minus, DW_OP_deref)) +; CHECK: #dbg_value(ptr %[[USP]], ![[X1:.*]], !DIExpression(DW_OP_constu, 4, DW_OP_minus, DW_OP_deref), tail call void @llvm.dbg.value(metadata ptr %x1, metadata !10, metadata !15), !dbg !16 call void @capture(ptr nonnull %x1), !dbg !17 @@ -38,7 +38,7 @@ entry: ; CHECK: call void @llvm.random.metadata.use(metadata ptr undef call void @llvm.random.metadata.use(metadata ptr %x2) -; CHECK: call void @llvm.dbg.value(metadata ptr %[[USP]], metadata ![[X2:.*]], metadata !DIExpression(DW_OP_constu, 8, DW_OP_minus, DW_OP_deref)) +; CHECK: #dbg_value(ptr %[[USP]], ![[X2:.*]], !DIExpression(DW_OP_constu, 8, DW_OP_minus, DW_OP_deref), call void @llvm.dbg.value(metadata ptr %x2, metadata !12, metadata !15), !dbg !18 call void @capture(ptr nonnull %x2), !dbg !19 ret void, !dbg !20 diff --git a/llvm/test/Transforms/Scalarizer/dbginfo.ll b/llvm/test/Transforms/Scalarizer/dbginfo.ll index 3b48915b605d75..310b5aae02cf45 100644 --- a/llvm/test/Transforms/Scalarizer/dbginfo.ll +++ b/llvm/test/Transforms/Scalarizer/dbginfo.ll @@ -21,9 +21,9 @@ define void @f1(ptr nocapture %a, ptr nocapture readonly %b, ptr nocapture reado ; CHECK: %b.i1 = getelementptr i32, ptr %b, i32 1 ; CHECK: %b.i2 = getelementptr i32, ptr %b, i32 2 ; CHECK: %b.i3 = getelementptr i32, ptr %b, i32 3 -; CHECK: tail call void @llvm.dbg.value(metadata ptr %a, metadata !{{[0-9]+}}, metadata {{.*}}), !dbg !{{[0-9]+}} -; CHECK: tail call void @llvm.dbg.value(metadata ptr %b, metadata !{{[0-9]+}}, metadata {{.*}}), !dbg !{{[0-9]+}} -; CHECK: tail call void @llvm.dbg.value(metadata ptr %c, metadata !{{[0-9]+}}, metadata {{.*}}), !dbg !{{[0-9]+}} +; CHECK: #dbg_value(ptr %a, !{{[0-9]+}}, {{.*}}, !{{[0-9]+}} +; CHECK: #dbg_value(ptr %b, !{{[0-9]+}}, {{.*}}, !{{[0-9]+}} +; CHECK: #dbg_value(ptr %c, !{{[0-9]+}}, {{.*}}, !{{[0-9]+}} ; CHECK: %bval.i0 = load i32, ptr %b, align 16, !dbg ![[TAG1:[0-9]+]], !tbaa ![[TAG2:[0-9]+]] ; CHECK: %bval.i1 = load i32, ptr %b.i1, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]] ; CHECK: %bval.i2 = load i32, ptr %b.i2, align 8, !dbg ![[TAG1]], !tbaa ![[TAG2]] diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/debuginfo.ll b/llvm/test/Transforms/SimpleLoopUnswitch/debuginfo.ll index ca3691dff9d183..bc53bfd643e5e8 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/debuginfo.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/debuginfo.ll @@ -22,7 +22,7 @@ define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) { ; CHECK-NEXT: br label [[LOOP_HEADER_US:%.*]] ; CHECK: loop.header.us: ; CHECK-NEXT: [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 poison, metadata [[META3:![0-9]+]], metadata !DIExpression()), !dbg [[DBG8:![0-9]+]] +; CHECK-NEXT: #dbg_value(i32 poison, [[META3:![0-9]+]], !DIExpression(), [[META8:![0-9]+]]) ; CHECK-NEXT: br label [[NOCLOBBER_US:%.*]] ; CHECK: noclobber.us: ; CHECK-NEXT: br label [[LOOP_LATCH_US]] @@ -37,7 +37,7 @@ define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) { ; CHECK: loop.header: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: [[LV:%.*]] = load i32, ptr [[PTR]], align 4 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[LV]], metadata [[META3]], metadata !DIExpression()), !dbg [[DBG8]] +; CHECK-NEXT: #dbg_value(i32 [[LV]], [[META3]], !DIExpression(), [[META8]]) ; CHECK-NEXT: [[SC:%.*]] = icmp eq i32 [[LV]], 100 ; CHECK-NEXT: br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]] ; CHECK: noclobber: diff --git a/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad-debuginfo.ll b/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad-debuginfo.ll index a2ffb6e519ebfd..29cfebaed0507e 100644 --- a/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad-debuginfo.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/merge-compatible-invokes-of-landingpad-debuginfo.ll @@ -11,7 +11,7 @@ define void @t1_mergeable_invoke() personality ptr @__gxx_personality_v0 { ; CHECK-LABEL: @t1_mergeable_invoke( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C0:%.*]] = call i1 @cond(), !dbg [[DBG12:![0-9]+]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i1 [[C0]], metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG12]] +; CHECK-NEXT: #dbg_value(i1 [[C0]], [[META9:![0-9]+]], !DIExpression(), [[DBG12]]) ; CHECK-NEXT: br i1 [[C0]], label [[IF_THEN1_INVOKE:%.*]], label [[IF_ELSE:%.*]], !dbg [[DBG13:![0-9]+]] ; CHECK: lpad: ; CHECK-NEXT: [[EH:%.*]] = landingpad { ptr, i32 } @@ -20,7 +20,7 @@ define void @t1_mergeable_invoke() personality ptr @__gxx_personality_v0 { ; CHECK-NEXT: resume { ptr, i32 } [[EH]], !dbg [[DBG16:![0-9]+]] ; CHECK: if.else: ; CHECK-NEXT: [[C1:%.*]] = call i1 @cond(), !dbg [[DBG17:![0-9]+]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i1 [[C1]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG17]] +; CHECK-NEXT: #dbg_value(i1 [[C1]], [[META11:![0-9]+]], !DIExpression(), [[DBG17]]) ; CHECK-NEXT: br i1 [[C1]], label [[IF_THEN1_INVOKE]], label [[IF_END:%.*]], !dbg [[DBG18:![0-9]+]] ; CHECK: if.then1.invoke: ; CHECK-NEXT: invoke void @simple_throw() @@ -69,7 +69,6 @@ declare void @destructor() declare dso_local i32 @__gxx_personality_v0(...) ;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { noreturn } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } ;. ; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C, file: [[META1:![0-9]+]], producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug) ; CHECK: [[META1]] = !DIFile(filename: "", directory: {{.*}}) diff --git a/llvm/test/Transforms/SimplifyCFG/X86/pr39187-g.ll b/llvm/test/Transforms/SimplifyCFG/X86/pr39187-g.ll index 8cc466ff82e5d9..84259398f2e8de 100644 --- a/llvm/test/Transforms/SimplifyCFG/X86/pr39187-g.ll +++ b/llvm/test/Transforms/SimplifyCFG/X86/pr39187-g.ll @@ -38,8 +38,8 @@ ; CHECK: %foo.0. = load volatile i32, ptr %foo, align 4, !dbg !16 ; CHECK: %cmp = icmp eq i32 %foo.0., 4, !dbg !16 ; CHECK: %frombool = zext i1 %cmp to i8, !dbg !16 -; CHECK: call void @llvm.dbg.value(metadata i8 %frombool, metadata !13, metadata !DIExpression()), !dbg !16 -; CHECK: call void @llvm.dbg.value(metadata i32 0, metadata !15, metadata !DIExpression()), !dbg !17 +; CHECK: #dbg_value(i8 %frombool, !13, !DIExpression(), !16 +; CHECK: #dbg_value(i32 0, !15, !DIExpression(), !17 ; CHECK: %. = select i1 %cmp, i32 8, i32 4, !dbg ![[MERGEDLOC:[0-9]+]] ; CHECK: ![[MERGEDLOC]] = !DILocation(line: 0, scope: !7) diff --git a/llvm/test/Transforms/SimplifyCFG/branch-fold-dbg.ll b/llvm/test/Transforms/SimplifyCFG/branch-fold-dbg.ll index 3ab2d113e85156..c8a2070131ba72 100644 --- a/llvm/test/Transforms/SimplifyCFG/branch-fold-dbg.ll +++ b/llvm/test/Transforms/SimplifyCFG/branch-fold-dbg.ll @@ -17,11 +17,11 @@ define i1 @foo(i32) nounwind ssp !dbg !0 { ; CHECK-NEXT: [[TMP3:%.*]] = shl i32 1, [[TMP0]], !dbg [[DBG7]] ; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 31, !dbg [[DBG7]] ; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0, !dbg [[DBG7]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata ptr null, metadata [[META8:![0-9]+]], metadata !DIExpression()), !dbg [[DBG13:![0-9]+]] +; CHECK-NEXT: #dbg_value(ptr null, [[META8:![0-9]+]], !DIExpression(), [[META13:![0-9]+]]) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [5 x %0], ptr @[[GLOB0:[0-9]+]], i32 0, i32 [[TMP0]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata ptr [[TMP6]], metadata [[META8]], metadata !DIExpression()), !dbg [[DBG13]] +; CHECK-NEXT: #dbg_value(ptr [[TMP6]], [[META8]], !DIExpression(), [[META13]]) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq ptr [[TMP6]], null -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata ptr [[TMP6]], metadata [[META8]], metadata !DIExpression()), !dbg [[DBG13]] +; CHECK-NEXT: #dbg_value(ptr [[TMP6]], [[META8]], !DIExpression(), [[META13]]) ; CHECK-NEXT: [[OR_COND2:%.*]] = select i1 [[TMP5]], i1 true, i1 [[TMP7]], !dbg [[DBG7]] ; CHECK-NEXT: [[TMP8:%.*]] = icmp slt i32 [[TMP0]], 0 ; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[OR_COND2]], i1 false, i1 [[TMP8]], !dbg [[DBG7]] diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-dbgvalue-inlined.ll b/llvm/test/Transforms/SimplifyCFG/hoist-dbgvalue-inlined.ll index e00d1daf71de58..a584e06cfa2c8d 100644 --- a/llvm/test/Transforms/SimplifyCFG/hoist-dbgvalue-inlined.ll +++ b/llvm/test/Transforms/SimplifyCFG/hoist-dbgvalue-inlined.ll @@ -8,8 +8,8 @@ init: br i1 %v9, label %a, label %b ; CHECK: %vala = load i64, ptr %ptr -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 %vala, metadata [[MD:![0-9]*]] -; CHECK-NEXT: call void @llvm.dbg.value(metadata i64 %vala, metadata [[MD]] +; CHECK-NEXT: #dbg_value(i64 %vala, [[MD:![0-9]*]] +; CHECK-NEXT: #dbg_value(i64 %vala, [[MD]] ; CHECK-NEXT: %valbmasked = and i64 %vala, 1 a: ; preds = %init diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll b/llvm/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll index 5e52e8425c8b5d..1b115d64a048bf 100644 --- a/llvm/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll +++ b/llvm/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll @@ -5,17 +5,17 @@ define i32 @foo(i32 %i) nounwind ssp !dbg !0 { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 [[I:%.*]], metadata [[META7:![0-9]+]], metadata !DIExpression()), !dbg [[DBG8:![0-9]+]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 0, metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG11:![0-9]+]] +; CHECK-NEXT: #dbg_value(i32 [[I:%.*]], [[META7:![0-9]+]], !DIExpression(), [[META8:![0-9]+]]) +; CHECK-NEXT: #dbg_value(i32 0, [[META9:![0-9]+]], !DIExpression(), [[META11:![0-9]+]]) ; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[I]], 0, !dbg [[DBG12:![0-9]+]] ; CHECK-NEXT: br i1 [[COND]], label [[THEN:%.*]], label [[ELSE:%.*]], !dbg [[DBG12]] ; CHECK: then: ; CHECK-NEXT: [[CALL_1:%.*]] = call i32 (...) @bar(), !dbg [[DBG13:![0-9]+]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 [[CALL_1]], metadata [[META9]], metadata !DIExpression()), !dbg [[DBG13]] +; CHECK-NEXT: #dbg_value(i32 [[CALL_1]], [[META9]], !DIExpression(), [[DBG13]]) ; CHECK-NEXT: br label [[EXIT:%.*]], !dbg [[DBG15:![0-9]+]] ; CHECK: else: ; CHECK-NEXT: [[CALL_2:%.*]] = call i32 (...) @bar(), !dbg [[DBG16:![0-9]+]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 [[CALL_2]], metadata [[META9]], metadata !DIExpression()), !dbg [[DBG16]] +; CHECK-NEXT: #dbg_value(i32 [[CALL_2]], [[META9]], !DIExpression(), [[DBG16]]) ; CHECK-NEXT: br label [[EXIT]], !dbg [[DBG18:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: [[K_0:%.*]] = phi i32 [ [[CALL_1]], [[THEN]] ], [ [[CALL_2]], [[ELSE]] ] @@ -46,8 +46,8 @@ define i1 @hoist_with_debug2(i32 %x) !dbg !22 { ; CHECK-LABEL: @hoist_with_debug2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp ugt i32 [[X:%.*]], 2 -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 [[X]], metadata [[META21:![0-9]+]], metadata !DIExpression()), !dbg [[DBG23:![0-9]+]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 [[X]], metadata [[META21]], metadata !DIExpression()), !dbg [[DBG23]] +; CHECK-NEXT: #dbg_value(i32 [[X]], [[META21:![0-9]+]], !DIExpression(), [[META23:![0-9]+]]) +; CHECK-NEXT: #dbg_value(i32 [[X]], [[META21]], !DIExpression(), [[META23]]) ; CHECK-NEXT: [[DOT:%.*]] = select i1 [[TOBOOL_NOT]], i1 false, i1 true ; CHECK-NEXT: ret i1 [[DOT]] ; diff --git a/llvm/test/Transforms/SimplifyCFG/jump-threading-debuginfo.ll b/llvm/test/Transforms/SimplifyCFG/jump-threading-debuginfo.ll index e45757b82b75f1..a7e13bd69e19df 100644 --- a/llvm/test/Transforms/SimplifyCFG/jump-threading-debuginfo.ll +++ b/llvm/test/Transforms/SimplifyCFG/jump-threading-debuginfo.ll @@ -16,28 +16,28 @@ define void @test_phi_extra_use(i1 %c) { ; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] ; CHECK: if: ; CHECK-NEXT: call void @foo() -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 0, metadata [[META7:![0-9]+]], metadata !DIExpression()), !dbg [[DBG13:![0-9]+]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 1, metadata [[META7]], metadata !DIExpression()), !dbg [[DBG13]] +; CHECK-NEXT: #dbg_value(i32 0, [[META7:![0-9]+]], !DIExpression(), [[META13:![0-9]+]]) +; CHECK-NEXT: #dbg_value(i32 1, [[META7]], !DIExpression(), [[META13]]) ; CHECK-NEXT: call void @use.i1(i1 true) -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 2, metadata [[META7]], metadata !DIExpression()), !dbg [[DBG13]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 3, metadata [[META7]], metadata !DIExpression()), !dbg [[DBG13]] +; CHECK-NEXT: #dbg_value(i32 2, [[META7]], !DIExpression(), [[META13]]) +; CHECK-NEXT: #dbg_value(i32 3, [[META7]], !DIExpression(), [[META13]]) ; CHECK-NEXT: call void @foo() ; CHECK-NEXT: br label [[JOIN2:%.*]] ; CHECK: else: ; CHECK-NEXT: call void @bar() -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 0, metadata [[META7]], metadata !DIExpression()), !dbg [[DBG13]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 1, metadata [[META7]], metadata !DIExpression()), !dbg [[DBG13]] +; CHECK-NEXT: #dbg_value(i32 0, [[META7]], !DIExpression(), [[META13]]) +; CHECK-NEXT: #dbg_value(i32 1, [[META7]], !DIExpression(), [[META13]]) ; CHECK-NEXT: call void @use.i1(i1 false) -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 2, metadata [[META7]], metadata !DIExpression()), !dbg [[DBG13]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 3, metadata [[META7]], metadata !DIExpression()), !dbg [[DBG13]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 4, metadata [[META7]], metadata !DIExpression()), !dbg [[DBG13]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 5, metadata [[META7]], metadata !DIExpression()), !dbg [[DBG13]] +; CHECK-NEXT: #dbg_value(i32 2, [[META7]], !DIExpression(), [[META13]]) +; CHECK-NEXT: #dbg_value(i32 3, [[META7]], !DIExpression(), [[META13]]) +; CHECK-NEXT: #dbg_value(i32 4, [[META7]], !DIExpression(), [[META13]]) +; CHECK-NEXT: #dbg_value(i32 5, [[META7]], !DIExpression(), [[META13]]) ; CHECK-NEXT: call void @bar() ; CHECK-NEXT: br label [[JOIN2]] ; CHECK: join2: -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 6, metadata [[META7]], metadata !DIExpression()), !dbg [[DBG13]] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 7, metadata [[META7]], metadata !DIExpression()), !dbg [[DBG13]] -; CHECK-NEXT: ret void, !dbg [[DBG13]] +; CHECK-NEXT: #dbg_value(i32 6, [[META7]], !DIExpression(), [[META13]]) +; CHECK-NEXT: #dbg_value(i32 7, [[META7]], !DIExpression(), [[META13]]) +; CHECK-NEXT: ret void, !dbg [[META13]] ; br i1 %c, label %if, label %else diff --git a/llvm/test/Transforms/SimplifyCFG/return-merge.ll b/llvm/test/Transforms/SimplifyCFG/return-merge.ll index 7e22612834b8f8..f803a4aa811bba 100644 --- a/llvm/test/Transforms/SimplifyCFG/return-merge.ll +++ b/llvm/test/Transforms/SimplifyCFG/return-merge.ll @@ -13,7 +13,7 @@ define i32 @test1(i1 %C) { ; ; DBGINFO-LABEL: @test1( ; DBGINFO-NEXT: entry: -; DBGINFO-NEXT: tail call void @llvm.dbg.value(metadata i32 0, metadata [[META9:![0-9]+]], metadata !DIExpression()), !dbg [[DBG11:![0-9]+]] +; DBGINFO-NEXT: #dbg_value(i32 0, [[META9:![0-9]+]], !DIExpression(), [[META11:![0-9]+]]) ; DBGINFO-NEXT: [[DOT:%.*]] = select i1 [[C:%.*]], i32 1, i32 0 ; DBGINFO-NEXT: ret i32 [[DOT]], !dbg [[DBG12:![0-9]+]] ; @@ -32,7 +32,7 @@ define void @test2(i1 %C) { ; ; DBGINFO-LABEL: @test2( ; DBGINFO-NEXT: common.ret: -; DBGINFO-NEXT: tail call void @llvm.dbg.value(metadata i32 0, metadata [[META15:![0-9]+]], metadata !DIExpression()), !dbg [[DBG16:![0-9]+]] +; DBGINFO-NEXT: #dbg_value(i32 0, [[META15:![0-9]+]], !DIExpression(), [[META16:![0-9]+]]) ; DBGINFO-NEXT: ret void, !dbg [[DBG17:![0-9]+]] ; br i1 %C, label %T, label %F @@ -68,7 +68,7 @@ define i32 @test3(i1 %C0, i1 %C1, i32 %v0, i32 %v1, i32 %v2) { ; DBGINFO-NEXT: br i1 [[C0:%.*]], label [[T:%.*]], label [[F:%.*]], !dbg [[DBG22:![0-9]+]] ; DBGINFO: end: ; DBGINFO-NEXT: [[R:%.*]] = phi i32 [ [[V2:%.*]], [[F]] ], [ [[SPEC_SELECT:%.*]], [[T]] ], !dbg [[DBG23:![0-9]+]] -; DBGINFO-NEXT: tail call void @llvm.dbg.value(metadata i32 [[R]], metadata [[META20:![0-9]+]], metadata !DIExpression()), !dbg [[DBG23]] +; DBGINFO-NEXT: #dbg_value(i32 [[R]], [[META20:![0-9]+]], !DIExpression(), [[DBG23]]) ; DBGINFO-NEXT: ret i32 [[R]], !dbg [[DBG24:![0-9]+]] ; DBGINFO: T: ; DBGINFO-NEXT: call void @sideeffect1(), !dbg [[DBG25:![0-9]+]] diff --git a/llvm/test/Transforms/SimplifyCFG/speculate-dbgvalue.ll b/llvm/test/Transforms/SimplifyCFG/speculate-dbgvalue.ll index ba26c70962d730..0dbe9a8b72cdf5 100644 --- a/llvm/test/Transforms/SimplifyCFG/speculate-dbgvalue.ll +++ b/llvm/test/Transforms/SimplifyCFG/speculate-dbgvalue.ll @@ -19,19 +19,15 @@ define i32 @test1(i32 %getdirt, i32 %dirt) #0 !dbg !7 { ; CHECK-LABEL: @test1( ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[GETDIRT:%.*]], metadata !12, metadata !DIExpression()), !dbg !15 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[DIRT:%.*]], metadata !13, metadata !DIExpression()), !dbg !16 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 100, metadata !14, metadata !DIExpression()), !dbg !17 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[GETDIRT]], 0, !dbg !18 -; *** We used to get an incorrect "call void @llvm.dbg.value(metadata i32 [[DIRT]], metadata !14, metadata !DIExpression()), !dbg !17" here, before the select. *** -; CHECK-NOT: call void @llvm.dbg.value(metadata i32 [[DIRT]], metadata !14 -; CHECK-NEXT: [[RESULT:%.*]] = select i1 [[CMP]], i32 [[DIRT]], i32 100, !dbg !20 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 [[RESULT]], metadata !14, metadata !DIExpression()), !dbg !17 -; CHECK-NEXT: ret i32 [[RESULT]], !dbg !21 -; CHECK: !12 = !DILocalVariable(name: "getdirt" -; CHECK: !13 = !DILocalVariable(name: "dirt" -; CHECK: !14 = !DILocalVariable(name: "result" +; CHECK-NEXT: #dbg_value(i32 [[GETDIRT:%.*]], [[META12:![0-9]+]], !DIExpression(), [[META15:![0-9]+]]) +; CHECK-NEXT: #dbg_value(i32 [[DIRT:%.*]], [[META13:![0-9]+]], !DIExpression(), [[META16:![0-9]+]]) +; CHECK-NEXT: #dbg_value(i32 100, [[META14:![0-9]+]], !DIExpression(), [[META17:![0-9]+]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[GETDIRT]], 0, !dbg [[DBG18:![0-9]+]] +; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP]], i32 [[DIRT]], i32 100, !dbg [[DBG20:![0-9]+]] +; CHECK-NEXT: #dbg_value(i32 [[SPEC_SELECT]], [[META14]], !DIExpression(), [[META17]]) +; CHECK-NEXT: ret i32 [[SPEC_SELECT]], !dbg [[DBG21:![0-9]+]] ; +; *** We used to get an incorrect "call void @llvm.dbg.value(metadata i32 [[DIRT]], metadata !14, metadata !DIExpression()), !dbg !17" here, before the select. *** entry: call void @llvm.dbg.value(metadata i32 %getdirt, metadata !12, metadata !DIExpression()), !dbg !15 call void @llvm.dbg.value(metadata i32 %dirt, metadata !13, metadata !DIExpression()), !dbg !16 diff --git a/llvm/test/Transforms/SimplifyCFG/tail-merge-noreturn.ll b/llvm/test/Transforms/SimplifyCFG/tail-merge-noreturn.ll index f3e401104b46fc..562f6718142e5d 100644 --- a/llvm/test/Transforms/SimplifyCFG/tail-merge-noreturn.ll +++ b/llvm/test/Transforms/SimplifyCFG/tail-merge-noreturn.ll @@ -445,17 +445,17 @@ cont3: define void @strip_dbg_value(i32 %c) { ; CHECK-LABEL: @strip_dbg_value( ; CHECK-NEXT: entry: -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 [[C:%.*]], metadata [[META5:![0-9]+]], metadata !DIExpression()), !dbg [[DBG7:![0-9]+]] +; CHECK-NEXT: #dbg_value(i32 [[C:%.*]], [[META5:![0-9]+]], !DIExpression(), [[META7:![0-9]+]]) ; CHECK-NEXT: switch i32 [[C]], label [[SW_EPILOG:%.*]] [ ; CHECK-NEXT: i32 13, label [[SW_BB:%.*]] ; CHECK-NEXT: i32 42, label [[SW_BB1:%.*]] ; CHECK-NEXT: ] ; CHECK: sw.bb: -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 55, metadata [[META5]], metadata !DIExpression()), !dbg [[DBG7]] +; CHECK-NEXT: #dbg_value(i32 55, [[META5]], !DIExpression(), [[META7]]) ; CHECK-NEXT: tail call void @abort() ; CHECK-NEXT: unreachable ; CHECK: sw.bb1: -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 67, metadata [[META5]], metadata !DIExpression()), !dbg [[DBG7]] +; CHECK-NEXT: #dbg_value(i32 67, [[META5]], !DIExpression(), [[META7]]) ; CHECK-NEXT: tail call void @abort() ; CHECK-NEXT: unreachable ; CHECK: sw.epilog: @@ -485,7 +485,7 @@ sw.epilog: ; preds = %entry define void @dead_phi_and_dbg(i32 %c) { ; CHECK-LABEL: @dead_phi_and_dbg( ; CHECK-NEXT: entry: -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 [[C:%.*]], metadata [[META5]], metadata !DIExpression()), !dbg [[DBG7]] +; CHECK-NEXT: #dbg_value(i32 [[C:%.*]], [[META5]], !DIExpression(), [[META7]]) ; CHECK-NEXT: switch i32 [[C]], label [[SW_EPILOG:%.*]] [ ; CHECK-NEXT: i32 13, label [[SW_BB:%.*]] ; CHECK-NEXT: i32 42, label [[SW_BB1:%.*]] @@ -493,13 +493,13 @@ define void @dead_phi_and_dbg(i32 %c) { ; CHECK-NEXT: ] ; CHECK: sw.bb: ; CHECK-NEXT: [[C_1:%.*]] = phi i32 [ 55, [[ENTRY:%.*]] ], [ 67, [[SW_BB1]] ] -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 [[C_1]], metadata [[META5]], metadata !DIExpression()), !dbg [[DBG7]] +; CHECK-NEXT: #dbg_value(i32 [[C_1]], [[META5]], !DIExpression(), [[META7]]) ; CHECK-NEXT: tail call void @abort() ; CHECK-NEXT: unreachable ; CHECK: sw.bb1: ; CHECK-NEXT: br label [[SW_BB]] ; CHECK: sw.bb2: -; CHECK-NEXT: tail call void @llvm.dbg.value(metadata i32 84, metadata [[META5]], metadata !DIExpression()), !dbg [[DBG7]] +; CHECK-NEXT: #dbg_value(i32 84, [[META5]], !DIExpression(), [[META7]]) ; CHECK-NEXT: tail call void @abort() ; CHECK-NEXT: unreachable ; CHECK: sw.epilog: diff --git a/llvm/test/Transforms/SpeculativeExecution/PR46267.ll b/llvm/test/Transforms/SpeculativeExecution/PR46267.ll index 69dac2220d9a64..8f82a16639744d 100644 --- a/llvm/test/Transforms/SpeculativeExecution/PR46267.ll +++ b/llvm/test/Transforms/SpeculativeExecution/PR46267.ll @@ -35,13 +35,13 @@ entry: land.rhs: ; preds = %entry ; CHECK: land.rhs: -; CHECK-NEXT: call void @llvm.dbg.label +; CHECK-NEXT: #dbg_label ; CHECK-NEXT: %y = alloca i32, align 4 -; CHECK-NEXT: call void @llvm.dbg.declare(metadata ptr %y +; CHECK-NEXT: #dbg_declare(ptr %y ; CHECK-NEXT: %a0 = load i32, ptr undef, align 1 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %a0 -; CHECK-NEXT: call void @llvm.dbg.label -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %a2 +; CHECK-NEXT: #dbg_value(i32 %a0 +; CHECK-NEXT: #dbg_label +; CHECK-NEXT: #dbg_value(i32 %a2 call void @llvm.dbg.label(metadata !11), !dbg !10 %y = alloca i32, align 4 call void @llvm.dbg.declare(metadata ptr %y, metadata !14, metadata !DIExpression()), !dbg !10 diff --git a/llvm/test/Transforms/Util/Debugify/loc-only.ll b/llvm/test/Transforms/Util/Debugify/loc-only.ll index c8159f3e482b84..e90f206a95756f 100644 --- a/llvm/test/Transforms/Util/Debugify/loc-only.ll +++ b/llvm/test/Transforms/Util/Debugify/loc-only.ll @@ -7,10 +7,10 @@ define void @test() { %add = add i32 1, 2 ; ALL-NEXT: %add = add i32 1, 2, !dbg [[L1:![0-9]+]] -; VALUE-NEXT: call void @llvm.dbg.value(metadata i32 %add, metadata [[add:![0-9]+]], metadata !DIExpression()), !dbg [[L1]] +; VALUE-NEXT: #dbg_value(i32 %add, [[add:![0-9]+]], !DIExpression(), [[L1]] %sub = sub i32 %add, 1 ; ALL-NEXT: %sub = sub i32 %add, 1, !dbg [[L2:![0-9]+]] -; VALUE-NEXT: call void @llvm.dbg.value(metadata i32 %sub, metadata [[sub:![0-9]+]], metadata !DIExpression()), !dbg [[L2]] +; VALUE-NEXT: #dbg_value(i32 %sub, [[sub:![0-9]+]], !DIExpression(), [[L2]] ; ALL-NEXT: ret void, !dbg [[L3:![0-9]+]] ret void } diff --git a/llvm/test/Transforms/Util/dbg-call-bitcast.ll b/llvm/test/Transforms/Util/dbg-call-bitcast.ll index f3da1a6b1eaecb..d8d80ab0f457a6 100644 --- a/llvm/test/Transforms/Util/dbg-call-bitcast.ll +++ b/llvm/test/Transforms/Util/dbg-call-bitcast.ll @@ -5,10 +5,10 @@ define dso_local void @_Z1fv() { call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %1) call void @llvm.dbg.declare(metadata ptr %1, metadata !16, metadata !DIExpression()), !dbg !19 ; CHECK: %[[A:.*]] = alloca i32, align 4 -; CHECK: call void @llvm.dbg.value(metadata ptr %[[A]], {{.*}}, metadata !DIExpression(DW_OP_deref) +; CHECK: #dbg_value(ptr %[[A]], {{.*}}, !DIExpression(DW_OP_deref) ; CHECK: call void @_Z1gPv call void @_Z1gPv(ptr nonnull %1) -; CHECK-NOT: call void @llvm.dbg.value +; CHECK-NOT: #dbg_value ; CHECK: call void @_Z1gPv call void @_Z1gPv(ptr nonnull %1) call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %1) @@ -20,13 +20,13 @@ define dso_local void @_Z2fv() { call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %1) call void @llvm.dbg.declare(metadata ptr %1, metadata !16, metadata !DIExpression()), !dbg !19 ; CHECK: %[[A:.*]] = alloca i32, align 4 -; CHECK: call void @llvm.dbg.value(metadata ptr %[[A]], {{.*}}, metadata !DIExpression(DW_OP_deref) +; CHECK: #dbg_value(ptr %[[A]], {{.*}}, !DIExpression(DW_OP_deref) ; CHECK: call void @_Z1gPv call void @_Z1gPv(ptr nonnull %1) br label %block2 block2: -; CHECK: call void @llvm.dbg.value(metadata ptr %[[A]], {{.*}}, metadata !DIExpression(DW_OP_deref) +; CHECK: #dbg_value(ptr %[[A]], {{.*}}, !DIExpression(DW_OP_deref) ; CHECK: call void @_Z1gPv call void @_Z1gPv(ptr nonnull %1) call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %1) diff --git a/llvm/test/Transforms/Util/dbg-user-of-aext.ll b/llvm/test/Transforms/Util/dbg-user-of-aext.ll index 0511c41398981b..16816a6908a82d 100644 --- a/llvm/test/Transforms/Util/dbg-user-of-aext.ll +++ b/llvm/test/Transforms/Util/dbg-user-of-aext.ll @@ -20,10 +20,10 @@ ; parameter. It can reference the register it's in directly without masking off ; high bits or anything -; CHECK: call void @llvm.dbg.value(metadata i8 %g.coerce0, metadata ![[VAR_STRUCT:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 8)) -; CHECK: call void @llvm.dbg.value(metadata i64 %g.coerce1, metadata ![[VAR_STRUCT]], metadata !DIExpression(DW_OP_LLVM_fragment, 32, 64)) -; CHECK: call void @llvm.dbg.value(metadata i8 %frombool, metadata ![[VAR_BOOL:[0-9]+]], metadata !DIExpression()) -; CHECK: call void @llvm.dbg.value(metadata i8 %frombool1, metadata ![[VAR_FRAG:[0-9]+]], metadata !DIExpression(DW_OP_LLVM_fragment, 0, 4)) +; CHECK: #dbg_value(i8 %g.coerce0, ![[VAR_STRUCT:[0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 0, 8), +; CHECK: #dbg_value(i64 %g.coerce1, ![[VAR_STRUCT]], !DIExpression(DW_OP_LLVM_fragment, 32, 64), +; CHECK: #dbg_value(i8 %frombool, ![[VAR_BOOL:[0-9]+]], !DIExpression(), +; CHECK: #dbg_value(i8 %frombool1, ![[VAR_FRAG:[0-9]+]], !DIExpression(DW_OP_LLVM_fragment, 0, 4), %struct.foo = type { i8, i64 } diff --git a/llvm/test/Transforms/Util/salvage-debuginfo.ll b/llvm/test/Transforms/Util/salvage-debuginfo.ll index 5058095491bbbe..e8906dcbd94040 100644 --- a/llvm/test/Transforms/Util/salvage-debuginfo.ll +++ b/llvm/test/Transforms/Util/salvage-debuginfo.ll @@ -8,11 +8,11 @@ define void @f(i32) !dbg !8 { entry: %p_x = inttoptr i32 %0 to ptr %i_x = ptrtoint ptr %p_x to i32 - ; CHECK: call void @llvm.dbg.value(metadata i32 %0, + ; CHECK: #dbg_value(i32 %0, ; CHECK-SAME: !DIExpression(DW_OP_LLVM_convert, 32, DW_ATE_unsigned, ; CHECK-SAME: DW_OP_LLVM_convert, 64, DW_ATE_unsigned, ; CHECK-SAME: DW_OP_LLVM_convert, 64, DW_ATE_unsigned, - ; CHECK-SAME: DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_stack_value)) + ; CHECK-SAME: DW_OP_LLVM_convert, 32, DW_ATE_unsigned, DW_OP_stack_value) call void @llvm.dbg.value(metadata i32 %i_x, metadata !11, metadata !DIExpression()), !dbg !13 ret void, !dbg !13 } diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll index 5ad61e3a007a7e..24f89b684156f9 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll @@ -1,5 +1,6 @@ ; Just run it through opt, no passes needed. -; RUN: opt < %s -S | FileCheck %s +; This tests debug intrinsics, so we must explicitly disable records. +; RUN: opt < %s -S --write-experimental-debuginfo=false | FileCheck %s ; ModuleID = 'various_ir_values.c' source_filename = "various_ir_values.c" diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.expected index da58d0bf712e09..936377e1ceee58 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.expected @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; Just run it through opt, no passes needed. -; RUN: opt < %s -S | FileCheck %s +; This tests debug intrinsics, so we must explicitly disable records. +; RUN: opt < %s -S --write-experimental-debuginfo=false | FileCheck %s ; ModuleID = 'various_ir_values.c' source_filename = "various_ir_values.c" diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.funcsig.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.funcsig.expected index 66f8ce7602f4d0..880442b7b06f1e 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.funcsig.expected +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.funcsig.expected @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature ; Just run it through opt, no passes needed. -; RUN: opt < %s -S | FileCheck %s +; This tests debug intrinsics, so we must explicitly disable records. +; RUN: opt < %s -S --write-experimental-debuginfo=false | FileCheck %s ; ModuleID = 'various_ir_values.c' source_filename = "various_ir_values.c" diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.funcsig.globals.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.funcsig.globals.expected index e52b93989c51b7..84f31cf7649577 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.funcsig.globals.expected +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.funcsig.globals.expected @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-globals ; Just run it through opt, no passes needed. -; RUN: opt < %s -S | FileCheck %s +; This tests debug intrinsics, so we must explicitly disable records. +; RUN: opt < %s -S --write-experimental-debuginfo=false | FileCheck %s ; ModuleID = 'various_ir_values.c' source_filename = "various_ir_values.c" diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.funcsig.noglobals.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.funcsig.noglobals.expected index da58d0bf712e09..936377e1ceee58 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.funcsig.noglobals.expected +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.funcsig.noglobals.expected @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; Just run it through opt, no passes needed. -; RUN: opt < %s -S | FileCheck %s +; This tests debug intrinsics, so we must explicitly disable records. +; RUN: opt < %s -S --write-experimental-debuginfo=false | FileCheck %s ; ModuleID = 'various_ir_values.c' source_filename = "various_ir_values.c" diff --git a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.funcsig.transitiveglobals.expected b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.funcsig.transitiveglobals.expected index f7508bd1bbf61f..9464377d4016d2 100644 --- a/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.funcsig.transitiveglobals.expected +++ b/llvm/test/tools/UpdateTestChecks/update_test_checks/Inputs/various_ir_values.ll.funcsig.transitiveglobals.expected @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals smart ; Just run it through opt, no passes needed. -; RUN: opt < %s -S | FileCheck %s +; This tests debug intrinsics, so we must explicitly disable records. +; RUN: opt < %s -S --write-experimental-debuginfo=false | FileCheck %s ; ModuleID = 'various_ir_values.c' source_filename = "various_ir_values.c" diff --git a/mlir/test/Dialect/LLVMIR/di-expression-legalization.mlir b/mlir/test/Dialect/LLVMIR/di-expression-legalization.mlir index 60fbc8135be62d..9280154ad557a7 100644 --- a/mlir/test/Dialect/LLVMIR/di-expression-legalization.mlir +++ b/mlir/test/Dialect/LLVMIR/di-expression-legalization.mlir @@ -30,13 +30,13 @@ llvm.func @merge_fragments(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: !llvm.ptr) { // CHECK-OPT: #llvm.di_expression<[DW_OP_deref, DW_OP_LLVM_fragment(32, 32)]> - // CHECK-TRANSLATE: !DIExpression(DW_OP_deref, DW_OP_LLVM_fragment, 32, 32)) + // CHECK-TRANSLATE: !DIExpression(DW_OP_deref, DW_OP_LLVM_fragment, 32, 32), llvm.intr.dbg.value #var0 #llvm.di_expression<[DW_OP_deref, DW_OP_LLVM_fragment(32, 32)]> = %arg0 : !llvm.ptr loc(fused<#di_subprogram>[#loc]) // CHECK-OPT: #llvm.di_expression<[DW_OP_deref, DW_OP_LLVM_fragment(64, 32)]> - // CHECK-TRANSLATE: !DIExpression(DW_OP_deref, DW_OP_LLVM_fragment, 64, 32)) + // CHECK-TRANSLATE: !DIExpression(DW_OP_deref, DW_OP_LLVM_fragment, 64, 32), llvm.intr.dbg.value #var1 #llvm.di_expression<[DW_OP_deref, DW_OP_LLVM_fragment(32, 32), DW_OP_LLVM_fragment(32, 64)]> = %arg1 : !llvm.ptr loc(fused<#di_subprogram>[#loc]) // CHECK-OPT: #llvm.di_expression<[DW_OP_deref, DW_OP_LLVM_fragment(96, 32)]> - // CHECK-TRANSLATE: !DIExpression(DW_OP_deref, DW_OP_LLVM_fragment, 96, 32)) + // CHECK-TRANSLATE: !DIExpression(DW_OP_deref, DW_OP_LLVM_fragment, 96, 32), llvm.intr.dbg.value #var2 #llvm.di_expression<[DW_OP_deref, DW_OP_LLVM_fragment(32, 32), DW_OP_LLVM_fragment(32, 64), DW_OP_LLVM_fragment(32, 96)]> = %arg2 : !llvm.ptr loc(fused<#di_subprogram>[#loc]) llvm.return } diff --git a/polly/test/CodeGen/debug-intrinsics.ll b/polly/test/CodeGen/debug-intrinsics.ll index 25c63da4891ce1..65fa6780d9720e 100644 --- a/polly/test/CodeGen/debug-intrinsics.ll +++ b/polly/test/CodeGen/debug-intrinsics.ll @@ -42,11 +42,11 @@ for.end: ; preds = %for.cond.for.end_cr ; CHECK: polly.split_new_and_old: -; CHECK: tail call void @llvm.dbg.value -; CHECK: tail call void @llvm.dbg.value -; CHECK: tail call void @llvm.dbg.value -; CHECK: tail call void @llvm.dbg.value -; CHECK-NOT: tail call void @llvm.dbg.value +; CHECK: #dbg_value +; CHECK: #dbg_value +; CHECK: #dbg_value +; CHECK: #dbg_value +; CHECK-NOT: #dbg_value ; Function Attrs: nounwind readnone declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 From 8e0ba08448d5935281e5afd007664d528dd672c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?kadir=20=C3=A7etinkaya?= Date: Fri, 14 Jun 2024 16:07:42 +0200 Subject: [PATCH 098/155] [clang][HeaderSearch] Fix handling of relative file-paths in suggestPathToFileForDiagnostics (#95121) Normalize header-to-be-spelled using WorkingDir, similar to search paths themselves. Addresses https://github.com/llvm/llvm-project/issues/81215. --- clang/lib/Lex/HeaderSearch.cpp | 2 ++ clang/unittests/Lex/HeaderSearchTest.cpp | 15 +++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp index 574723b33866af..d6da6c2fe6c0e9 100644 --- a/clang/lib/Lex/HeaderSearch.cpp +++ b/clang/lib/Lex/HeaderSearch.cpp @@ -2039,6 +2039,8 @@ std::string HeaderSearch::suggestPathToFileForDiagnostics( using namespace llvm::sys; llvm::SmallString<32> FilePath = File; + if (!WorkingDir.empty() && !path::is_absolute(FilePath)) + fs::make_absolute(WorkingDir, FilePath); // remove_dots switches to backslashes on windows as a side-effect! // We always want to suggest forward slashes for includes. // (not remove_dots(..., posix) as that misparses windows paths). diff --git a/clang/unittests/Lex/HeaderSearchTest.cpp b/clang/unittests/Lex/HeaderSearchTest.cpp index c578fa72c859e0..a5f193ef37ce8f 100644 --- a/clang/unittests/Lex/HeaderSearchTest.cpp +++ b/clang/unittests/Lex/HeaderSearchTest.cpp @@ -131,6 +131,21 @@ TEST_F(HeaderSearchTest, Dots) { "z"); } +TEST_F(HeaderSearchTest, RelativeDirs) { + ASSERT_FALSE(VFS->setCurrentWorkingDirectory("/root/some/dir")); + addSearchDir(".."); + EXPECT_EQ( + Search.suggestPathToFileForDiagnostics("/root/some/foo.h", + /*WorkingDir=*/"/root/some/dir", + /*MainFile=*/""), + "foo.h"); + EXPECT_EQ( + Search.suggestPathToFileForDiagnostics("../foo.h", + /*WorkingDir=*/"/root/some/dir", + /*MainFile=*/""), + "foo.h"); +} + #ifdef _WIN32 TEST_F(HeaderSearchTest, BackSlash) { addSearchDir("C:\\x\\y\\"); From b1b7643f5e9da2f64f78e024da2db208f78e0c42 Mon Sep 17 00:00:00 2001 From: Angel Zhang Date: Fri, 14 Jun 2024 10:12:00 -0400 Subject: [PATCH 099/155] [mlir][spirv] Add integration test for `vector.deinterleave` (#95469) This commit is dependent on #95313. --- .../vector-deinterleave.mlir | 72 +++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 mlir/test/mlir-vulkan-runner/vector-deinterleave.mlir diff --git a/mlir/test/mlir-vulkan-runner/vector-deinterleave.mlir b/mlir/test/mlir-vulkan-runner/vector-deinterleave.mlir new file mode 100644 index 00000000000000..1805452ce988f2 --- /dev/null +++ b/mlir/test/mlir-vulkan-runner/vector-deinterleave.mlir @@ -0,0 +1,72 @@ +// RUN: mlir-vulkan-runner %s \ +// RUN: --shared-libs=%vulkan-runtime-wrappers,%mlir_runner_utils \ +// RUN: --entry-point-result=void | FileCheck %s + +// CHECK: [0, 2] +// CHECK: [1, 3] +module attributes { + gpu.container_module, + spirv.target_env = #spirv.target_env< + #spirv.vce, #spirv.resource_limits<>> +} { + gpu.module @kernels { + gpu.func @kernel_vector_deinterleave(%arg0 : memref<4xi32>, %arg1 : memref<2xi32>, %arg2 : memref<2xi32>) + kernel attributes { spirv.entry_point_abi = #spirv.entry_point_abi} { + + %src = arith.constant dense<[0, 0, 0, 0]> : vector<4xi32> + + %val0 = memref.load %arg0[0] : memref<4xi32> + %val1 = memref.load %arg0[1] : memref<4xi32> + %val2 = memref.load %arg0[2] : memref<4xi32> + %val3 = memref.load %arg0[3] : memref<4xi32> + + %src0 = vector.insert %val0, %src[0] : i32 into vector<4xi32> + %src1 = vector.insert %val1, %src0[1] : i32 into vector<4xi32> + %src2 = vector.insert %val2, %src1[2] : i32 into vector<4xi32> + %src3 = vector.insert %val3, %src2[3] : i32 into vector<4xi32> + + %res0, %res1 = vector.deinterleave %src3 : vector<4xi32> -> vector<2xi32> + + %res0_0 = vector.extract %res0[0] : i32 from vector<2xi32> + %res0_1 = vector.extract %res0[1] : i32 from vector<2xi32> + %res1_0 = vector.extract %res1[0] : i32 from vector<2xi32> + %res1_1 = vector.extract %res1[1] : i32 from vector<2xi32> + + memref.store %res0_0, %arg1[0]: memref<2xi32> + memref.store %res0_1, %arg1[1]: memref<2xi32> + memref.store %res1_0, %arg2[0]: memref<2xi32> + memref.store %res1_1, %arg2[1]: memref<2xi32> + + gpu.return + } + } + + func.func @main() { + // Allocate 3 buffers. + %buf0 = memref.alloc() : memref<4xi32> + %buf1 = memref.alloc() : memref<2xi32> + %buf2 = memref.alloc() : memref<2xi32> + + // Initialize input buffer. + %buf0_vals = arith.constant dense<[0, 1, 2, 3]> : vector<4xi32> + vector.store %buf0_vals, %buf0[0] : memref<4xi32>, vector<4xi32> + + // Initialize output buffers. + %value0 = arith.constant 0 : i32 + %buf3 = memref.cast %buf1 : memref<2xi32> to memref + %buf4 = memref.cast %buf2 : memref<2xi32> to memref + call @fillResource1DInt(%buf3, %value0) : (memref, i32) -> () + call @fillResource1DInt(%buf4, %value0) : (memref, i32) -> () + + gpu.launch_func @kernels::@kernel_vector_deinterleave + blocks in (4, 1, 1) threads in (1, 1, 1) + args(%buf0 : memref<4xi32>, %buf1 : memref<2xi32>, %buf2 : memref<2xi32>) + %buf5 = memref.cast %buf3 : memref to memref<*xi32> + %buf6 = memref.cast %buf4 : memref to memref<*xi32> + call @printMemrefI32(%buf5) : (memref<*xi32>) -> () + call @printMemrefI32(%buf6) : (memref<*xi32>) -> () + return + } + func.func private @fillResource1DInt(%0 : memref, %1 : i32) + func.func private @printMemrefI32(%ptr : memref<*xi32>) +} From 43e6f46936e177e47de6627a74b047ba27561b44 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 14 Jun 2024 15:12:24 +0100 Subject: [PATCH 100/155] [VPlan] Pre-compute cost for all instrs only feeding exit conditions. This fixes the following buildbot failures after 90fd99c07957: https://lab.llvm.org/buildbot/#/builders/17/builds/47 https://lab.llvm.org/buildbot/#/builders/168/builds/37 --- .../Transforms/Vectorize/LoopVectorize.cpp | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 5b652068a7ba9a..9fc068a0689268 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7357,16 +7357,30 @@ InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan, /// be a single condition to control the vector loop. SmallVector Exiting; CM.TheLoop->getExitingBlocks(Exiting); - // Add the cost of all exit conditions. + SetVector ExitInstrs; + // Collect all exit conditions. for (BasicBlock *EB : Exiting) { auto *Term = dyn_cast(EB->getTerminator()); if (!Term) continue; if (auto *CondI = dyn_cast(Term->getOperand(0))) { - assert(!CostCtx.SkipCostComputation.contains(CondI) && - "Condition already skipped?"); - CostCtx.SkipCostComputation.insert(CondI); - Cost += CostCtx.getLegacyCost(CondI, VF); + ExitInstrs.insert(CondI); + } + } + // Compute the cost of all instructions only feeding the exit conditions. + for (unsigned I = 0; I != ExitInstrs.size(); ++I) { + Instruction *CondI = ExitInstrs[I]; + if (!OrigLoop->contains(CondI) || + !CostCtx.SkipCostComputation.insert(CondI).second) + continue; + Cost += CostCtx.getLegacyCost(CondI, VF); + for (Value *Op : CondI->operands()) { + auto *OpI = dyn_cast(Op); + if (!OpI || any_of(OpI->users(), [&ExitInstrs](User *U) { + return !ExitInstrs.contains(cast(U)); + })) + continue; + ExitInstrs.insert(OpI); } } From 597d2f7662c31cae4c8a54cc27e2ea12833380ea Mon Sep 17 00:00:00 2001 From: Tim Gymnich Date: Fri, 14 Jun 2024 16:35:23 +0200 Subject: [PATCH 101/155] [OpenMP] Add Environment Variable to disable Reuse of Blocks for High Loop Trip Counts (#89239) Sometimes it might be beneficial to spawn more thread blocks instead of reusing existing for multiple loop iterations. **Alternatives considered:** Make `DefaultNumBlocks` settable via an environment variable. --------- Co-authored-by: Joseph Huber --- .../common/include/PluginInterface.h | 9 +++++ .../common/src/PluginInterface.cpp | 5 ++- .../high_trip_count_block_limit.cpp | 35 +++++++++++++++++++ openmp/docs/design/Runtimes.rst | 7 ++++ 4 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 offload/test/offloading/high_trip_count_block_limit.cpp diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index 0d2a36a42d5fa9..973add0ba1000f 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -826,6 +826,12 @@ struct GenericDeviceTy : public DeviceAllocatorTy { return OMPX_MinThreadsForLowTripCount; } + /// Whether or not to reuse blocks for high trip count loops. + /// @see OMPX_ReuseBlocksForHighTripCount + bool getReuseBlocksForHighTripCount() { + return OMPX_ReuseBlocksForHighTripCount; + } + /// Get the total amount of hardware parallelism supported by the target /// device. This is the total amount of warps or wavefronts that can be /// resident on the device simultaneously. @@ -901,6 +907,9 @@ struct GenericDeviceTy : public DeviceAllocatorTy { UInt32Envar OMPX_MinThreadsForLowTripCount = UInt32Envar("LIBOMPTARGET_MIN_THREADS_FOR_LOW_TRIP_COUNT", 32); + BoolEnvar OMPX_ReuseBlocksForHighTripCount = + BoolEnvar("LIBOMPTARGET_REUSE_BLOCKS_FOR_HIGH_TRIP_COUNT", true); + protected: /// Environment variables defined by the LLVM OpenMP implementation /// regarding the initial number of streams and events. diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index 94f9d4670b672f..118265973f3273 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -701,8 +701,11 @@ uint64_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice, TripCountNumBlocks = LoopTripCount; } } + + uint32_t PreferredNumBlocks = TripCountNumBlocks; // If the loops are long running we rather reuse blocks than spawn too many. - uint32_t PreferredNumBlocks = std::min(TripCountNumBlocks, DefaultNumBlocks); + if (GenericDevice.getReuseBlocksForHighTripCount()) + PreferredNumBlocks = std::min(TripCountNumBlocks, DefaultNumBlocks); return std::min(PreferredNumBlocks, GenericDevice.getBlockLimit()); } diff --git a/offload/test/offloading/high_trip_count_block_limit.cpp b/offload/test/offloading/high_trip_count_block_limit.cpp new file mode 100644 index 00000000000000..d0e39274e27d6e --- /dev/null +++ b/offload/test/offloading/high_trip_count_block_limit.cpp @@ -0,0 +1,35 @@ +// clang-format off +// RUN: %libomptarget-compilexx-generic && env LIBOMPTARGET_REUSE_BLOCKS_FOR_HIGH_TRIP_COUNT=False %libomptarget-run-generic 2>&1 | %fcheck-generic +// RUN: %libomptarget-compilexx-generic && %libomptarget-run-generic 2>&1 | %fcheck-generic --check-prefix=DEFAULT + +// UNSUPPORTED: aarch64-unknown-linux-gnu +// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO +// UNSUPPORTED: x86_64-pc-linux-gnu +// UNSUPPORTED: x86_64-pc-linux-gnu-LTO +// UNSUPPORTED: s390x-ibm-linux-gnu +// UNSUPPORTED: s390x-ibm-linux-gnu-LTO +// clang-format on + +/* + Check if there is a thread for each loop iteration +*/ +#include +#include + +int main() { + int N = 819200; + int num_threads[N]; + +#pragma omp target teams distribute parallel for + for (int j = 0; j < N; j++) { + num_threads[j] = omp_get_num_threads() * omp_get_num_teams(); + } + + if (num_threads[0] == N) + // CHECK: PASS + printf("PASS\n"); + else + // DEFAULT: FAIL + printf("FAIL: num_threads: %d\n != N: %d", num_threads[0], N); + return 0; +} diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst index f8a8cb87e83e66..98dd984fd4b0c5 100644 --- a/openmp/docs/design/Runtimes.rst +++ b/openmp/docs/design/Runtimes.rst @@ -742,6 +742,7 @@ variables is defined below. * ``LIBOMPTARGET_JIT_PRE_OPT_IR_MODULE= (LLVM-IR file)`` * ``LIBOMPTARGET_JIT_POST_OPT_IR_MODULE= (LLVM-IR file)`` * ``LIBOMPTARGET_MIN_THREADS_FOR_LOW_TRIP_COUNT= (default: 32)`` + * ``LIBOMPTARGET_REUSE_BLOCKS_FOR_HIGH_TRIP_COUNT=[TRUE/FALSE] (default TRUE)`` LIBOMPTARGET_DEBUG """""""""""""""""" @@ -1162,6 +1163,12 @@ of threads possible times the number of teams (aka. blocks) the device prefers count to increase outer (team/block) parallelism. The thread count will never be reduced below the value passed for this environment variable though. +LIBOMPTARGET_REUSE_BLOCKS_FOR_HIGH_TRIP_COUNT +""""""""""""""""""""""""""""""""""""""""""""" + +This environment variable can be used to control how the OpenMP runtime assigns +blocks to loops with high trip counts. By default we reuse existing blocks +rather than spawning new blocks. .. _libomptarget_plugin: From 7ad12a7c047a421400803eebae4cacc82b27be1d Mon Sep 17 00:00:00 2001 From: Farzon Lotfi <1802579+farzonl@users.noreply.github.com> Date: Fri, 14 Jun 2024 10:35:50 -0400 Subject: [PATCH 102/155] [ARM] Add tan intrinsic lowering (#95439) - `ARMISelLowering.cpp` - Add f16 type and neon and mve vector support for tan --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 5 + .../ARM/2011-11-29-128bitArithmetics.ll | 31 +++++ llvm/test/CodeGen/ARM/fp16-fullfp16.ll | 18 +++ llvm/test/CodeGen/ARM/fp16-promote.ll | 16 +++ llvm/test/CodeGen/ARM/vfloatintrinsics.ll | 21 ++++ .../CodeGen/Thumb2/float-intrinsics-double.ll | 9 ++ .../CodeGen/Thumb2/float-intrinsics-float.ll | 9 ++ llvm/test/CodeGen/Thumb2/mve-fmath.ll | 111 ++++++++++++++++++ 8 files changed, 220 insertions(+) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 5a617968307ddf..ef3dc87777999d 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -365,6 +365,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) { setOperationAction(ISD::FSQRT, VT, Expand); setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); + setOperationAction(ISD::FTAN, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::FLOG, VT, Expand); setOperationAction(ISD::FLOG2, VT, Expand); @@ -875,6 +876,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); setOperationAction(ISD::FSIN, MVT::v2f64, Expand); setOperationAction(ISD::FCOS, MVT::v2f64, Expand); + setOperationAction(ISD::FTAN, MVT::v2f64, Expand); setOperationAction(ISD::FPOW, MVT::v2f64, Expand); setOperationAction(ISD::FLOG, MVT::v2f64, Expand); setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); @@ -897,6 +899,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); setOperationAction(ISD::FSIN, MVT::v4f32, Expand); setOperationAction(ISD::FCOS, MVT::v4f32, Expand); + setOperationAction(ISD::FTAN, MVT::v4f32, Expand); setOperationAction(ISD::FPOW, MVT::v4f32, Expand); setOperationAction(ISD::FLOG, MVT::v4f32, Expand); setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); @@ -914,6 +917,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); setOperationAction(ISD::FSIN, MVT::v2f32, Expand); setOperationAction(ISD::FCOS, MVT::v2f32, Expand); + setOperationAction(ISD::FTAN, MVT::v2f32, Expand); setOperationAction(ISD::FPOW, MVT::v2f32, Expand); setOperationAction(ISD::FLOG, MVT::v2f32, Expand); setOperationAction(ISD::FLOG2, MVT::v2f32, Expand); @@ -1540,6 +1544,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FCOPYSIGN, MVT::f16, Expand); setOperationAction(ISD::FSIN, MVT::f16, Promote); setOperationAction(ISD::FCOS, MVT::f16, Promote); + setOperationAction(ISD::FTAN, MVT::f16, Promote); setOperationAction(ISD::FSINCOS, MVT::f16, Promote); setOperationAction(ISD::FPOWI, MVT::f16, Promote); setOperationAction(ISD::FPOW, MVT::f16, Promote); diff --git a/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll b/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll index e14e598086249e..b6ebeaae5eb6d2 100644 --- a/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll +++ b/llvm/test/CodeGen/ARM/2011-11-29-128bitArithmetics.ll @@ -56,6 +56,37 @@ L.entry: declare <4 x float> @llvm.cos.v4f32(<4 x float>) nounwind readonly +define void @test_tan(ptr %X) nounwind { + +; CHECK-LABEL: test_tan: + +; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}} +; CHECK: movt [[reg0]], :upper16:{{.*}} +; CHECK: vld1.64 + +; CHECK: {{v?mov(.32)?}} r0, +; CHECK: bl {{.*}}tanf + +; CHECK: {{v?mov(.32)?}} r0, +; CHECK: bl {{.*}}tanf + +; CHECK: {{v?mov(.32)?}} r0, +; CHECK: bl {{.*}}tanf + +; CHECK: {{v?mov(.32)?}} r0, +; CHECK: bl {{.*}}tanf + +; CHECK: vst1.64 + +L.entry: + %0 = load <4 x float>, ptr @A, align 16 + %1 = call <4 x float> @llvm.tan.v4f32(<4 x float> %0) + store <4 x float> %1, ptr %X, align 16 + ret void +} + +declare <4 x float> @llvm.tan.v4f32(<4 x float>) nounwind readonly + define void @test_exp(ptr %X) nounwind { ; CHECK-LABEL: test_exp: diff --git a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll index 7381d517505e89..2656cdbb0347ed 100644 --- a/llvm/test/CodeGen/ARM/fp16-fullfp16.ll +++ b/llvm/test/CodeGen/ARM/fp16-fullfp16.ll @@ -281,6 +281,23 @@ define void @test_cos(ptr %p) { ret void } +define void @test_tan(ptr %p) { +; CHECK-LABEL: test_tan: +; CHECK: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: vldr.16 s0, [r0] +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-NEXT: vstr.16 s0, [r4] +; CHECK-NEXT: pop {r4, pc} + %a = load half, ptr %p, align 2 + %r = call half @llvm.tan.f16(half %a) + store half %r, ptr %p + ret void +} + define void @test_pow(ptr %p, ptr %q) { ; CHECK-LABEL: test_pow: ; CHECK: .save {r4, lr} @@ -588,6 +605,7 @@ declare half @llvm.sqrt.f16(half %a) declare half @llvm.powi.f16.i32(half %a, i32 %b) declare half @llvm.sin.f16(half %a) declare half @llvm.cos.f16(half %a) +declare half @llvm.tan.f16(half %a) declare half @llvm.pow.f16(half %a, half %b) declare half @llvm.exp.f16(half %a) declare half @llvm.exp2.f16(half %a) diff --git a/llvm/test/CodeGen/ARM/fp16-promote.ll b/llvm/test/CodeGen/ARM/fp16-promote.ll index 9c01129ff30d85..ae3b8f9920e3b9 100644 --- a/llvm/test/CodeGen/ARM/fp16-promote.ll +++ b/llvm/test/CodeGen/ARM/fp16-promote.ll @@ -393,6 +393,7 @@ declare half @llvm.sqrt.f16(half %a) #0 declare half @llvm.powi.f16.i32(half %a, i32 %b) #0 declare half @llvm.sin.f16(half %a) #0 declare half @llvm.cos.f16(half %a) #0 +declare half @llvm.tan.f16(half %a) #0 declare half @llvm.pow.f16(half %a, half %b) #0 declare half @llvm.exp.f16(half %a) #0 declare half @llvm.exp2.f16(half %a) #0 @@ -472,6 +473,21 @@ define void @test_cos(ptr %p) #0 { ret void } +; CHECK-FP16-LABEL: test_tan: +; CHECK-FP16: vcvtb.f32.f16 +; CHECK-FP16: bl tanf +; CHECK-FP16: vcvtb.f16.f32 +; CHECK-LIBCALL-LABEL: test_tan: +; CHECK-LIBCALL: bl __aeabi_h2f +; CHECK-LIBCALL: bl tanf +; CHECK-LIBCALL: bl __aeabi_f2h +define void @test_tan(ptr %p) #0 { + %a = load half, ptr %p, align 2 + %r = call half @llvm.tan.f16(half %a) + store half %r, ptr %p + ret void +} + ; CHECK-FP16-LABEL: test_pow: ; CHECK-FP16: vcvtb.f32.f16 ; CHECK-FP16: vcvtb.f32.f16 diff --git a/llvm/test/CodeGen/ARM/vfloatintrinsics.ll b/llvm/test/CodeGen/ARM/vfloatintrinsics.ll index 028bb76c3d4353..74782d44c74231 100644 --- a/llvm/test/CodeGen/ARM/vfloatintrinsics.ll +++ b/llvm/test/CodeGen/ARM/vfloatintrinsics.ll @@ -29,6 +29,12 @@ define %v2f32 @test_v2f32.cos(%v2f32 %a) { %1 = call %v2f32 @llvm.cos.v2f32(%v2f32 %a) ret %v2f32 %1 } +; CHECK-LABEL: test_v2f32.tan:{{.*}} +define %v2f32 @test_v2f32.tan(%v2f32 %a) { + ; CHECK: tan + %1 = call %v2f32 @llvm.tan.v2f32(%v2f32 %a) + ret %v2f32 %1 +} ; CHECK-LABEL: test_v2f32.pow:{{.*}} define %v2f32 @test_v2f32.pow(%v2f32 %a, %v2f32 %b) { ; CHECK: pow @@ -112,6 +118,7 @@ declare %v2f32 @llvm.sqrt.v2f32(%v2f32) #0 declare %v2f32 @llvm.powi.v2f32.i32(%v2f32, i32) #0 declare %v2f32 @llvm.sin.v2f32(%v2f32) #0 declare %v2f32 @llvm.cos.v2f32(%v2f32) #0 +declare %v2f32 @llvm.tan.v2f32(%v2f32) #0 declare %v2f32 @llvm.pow.v2f32(%v2f32, %v2f32) #0 declare %v2f32 @llvm.exp.v2f32(%v2f32) #0 declare %v2f32 @llvm.exp2.v2f32(%v2f32) #0 @@ -153,6 +160,12 @@ define %v4f32 @test_v4f32.cos(%v4f32 %a) { %1 = call %v4f32 @llvm.cos.v4f32(%v4f32 %a) ret %v4f32 %1 } +; CHECK-LABEL: test_v4f32.tan:{{.*}} +define %v4f32 @test_v4f32.tan(%v4f32 %a) { + ; CHECK: tan + %1 = call %v4f32 @llvm.tan.v4f32(%v4f32 %a) + ret %v4f32 %1 +} ; CHECK-LABEL: test_v4f32.pow:{{.*}} define %v4f32 @test_v4f32.pow(%v4f32 %a, %v4f32 %b) { ; CHECK: pow @@ -236,6 +249,7 @@ declare %v4f32 @llvm.sqrt.v4f32(%v4f32) #0 declare %v4f32 @llvm.powi.v4f32.i32(%v4f32, i32) #0 declare %v4f32 @llvm.sin.v4f32(%v4f32) #0 declare %v4f32 @llvm.cos.v4f32(%v4f32) #0 +declare %v4f32 @llvm.tan.v4f32(%v4f32) #0 declare %v4f32 @llvm.pow.v4f32(%v4f32, %v4f32) #0 declare %v4f32 @llvm.exp.v4f32(%v4f32) #0 declare %v4f32 @llvm.exp2.v4f32(%v4f32) #0 @@ -277,6 +291,12 @@ define %v2f64 @test_v2f64.cos(%v2f64 %a) { %1 = call %v2f64 @llvm.cos.v2f64(%v2f64 %a) ret %v2f64 %1 } +; CHECK-LABEL: test_v2f64.tan:{{.*}} +define %v2f64 @test_v2f64.tan(%v2f64 %a) { + ; CHECK: tan + %1 = call %v2f64 @llvm.tan.v2f64(%v2f64 %a) + ret %v2f64 %1 +} ; CHECK-LABEL: test_v2f64.pow:{{.*}} define %v2f64 @test_v2f64.pow(%v2f64 %a, %v2f64 %b) { ; CHECK: pow @@ -361,6 +381,7 @@ declare %v2f64 @llvm.sqrt.v2f64(%v2f64) #0 declare %v2f64 @llvm.powi.v2f64.i32(%v2f64, i32) #0 declare %v2f64 @llvm.sin.v2f64(%v2f64) #0 declare %v2f64 @llvm.cos.v2f64(%v2f64) #0 +declare %v2f64 @llvm.tan.v2f64(%v2f64) #0 declare %v2f64 @llvm.pow.v2f64(%v2f64, %v2f64) #0 declare %v2f64 @llvm.exp.v2f64(%v2f64) #0 declare %v2f64 @llvm.exp2.v2f64(%v2f64) #0 diff --git a/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll b/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll index 70a5939865b7b8..7f5da368869392 100644 --- a/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll +++ b/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll @@ -41,6 +41,15 @@ define double @cos_d(double %a) { ret double %1 } +declare double @llvm.tan.f64(double %Val) +define double @tan_d(double %a) { +; CHECK-LABEL: tan_d: +; SOFT: {{(bl|b)}} tan +; HARD: b tan + %1 = call double @llvm.tan.f64(double %a) + ret double %1 +} + declare double @llvm.pow.f64(double %Val, double %power) define double @pow_d(double %a, double %b) { ; CHECK-LABEL: pow_d: diff --git a/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll b/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll index b6b891edd0461a..94ba9b218a0721 100644 --- a/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll +++ b/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll @@ -42,6 +42,15 @@ define float @cos_f(float %a) { ret float %1 } +declare float @llvm.tan.f32(float %Val) +define float @tan_f(float %a) { +; CHECK-LABEL: tan_f: +; SOFT: bl tanf +; HARD: b tanf + %1 = call float @llvm.tan.f32(float %a) + ret float %1 +} + declare float @llvm.pow.f32(float %Val, float %power) define float @pow_f(float %a, float %b) { ; CHECK-LABEL: pow_f: diff --git a/llvm/test/CodeGen/Thumb2/mve-fmath.ll b/llvm/test/CodeGen/Thumb2/mve-fmath.ll index c299b62a4c9429..d747da76a45fae 100644 --- a/llvm/test/CodeGen/Thumb2/mve-fmath.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fmath.ll @@ -288,6 +288,117 @@ entry: ret <2 x double> %0 } +define arm_aapcs_vfpcc <4 x float> @tan_float32_t(<4 x float> %src) { +; CHECK-LABEL: tan_float32_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, r4, d9 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vmov r4, r1, d8 +; CHECK-NEXT: vmov s19, r0 +; CHECK-NEXT: vmov s18, r5 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vmov s17, r0 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vmov s16, r0 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r4, r5, r7, pc} +entry: + %0 = call fast <4 x float> @llvm.tan.v4f32(<4 x float> %src) + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <8 x half> @tan_float16_t(<8 x half> %src) { +; CHECK-LABEL: tan_float16_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vcvtb.f32.f16 s0, s16 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vcvtt.f32.f16 s0, s16 +; CHECK-NEXT: vmov s16, r0 +; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvtb.f16.f32 s20, s16 +; CHECK-NEXT: vcvtt.f16.f32 s20, s0 +; CHECK-NEXT: vcvtb.f32.f16 s0, s17 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvtb.f16.f32 s21, s0 +; CHECK-NEXT: vcvtt.f32.f16 s0, s17 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvtt.f16.f32 s21, s0 +; CHECK-NEXT: vcvtb.f32.f16 s0, s18 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvtb.f16.f32 s22, s0 +; CHECK-NEXT: vcvtt.f32.f16 s0, s18 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvtt.f16.f32 s22, s0 +; CHECK-NEXT: vcvtb.f32.f16 s0, s19 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvtb.f16.f32 s23, s0 +; CHECK-NEXT: vcvtt.f32.f16 s0, s19 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: bl tanf +; CHECK-NEXT: vmov s0, r0 +; CHECK-NEXT: vcvtt.f16.f32 s23, s0 +; CHECK-NEXT: vmov q0, q5 +; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = call fast <8 x half> @llvm.tan.v8f16(<8 x half> %src) + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <2 x double> @tan_float64_t(<2 x double> %src) { +; CHECK-LABEL: tan_float64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: bl tan +; CHECK-NEXT: vmov r2, r3, d8 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: bl tan +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = call fast <2 x double> @llvm.tan.v2f64(<2 x double> %src) + ret <2 x double> %0 +} + define arm_aapcs_vfpcc <4 x float> @exp_float32_t(<4 x float> %src) { ; CHECK-LABEL: exp_float32_t: ; CHECK: @ %bb.0: @ %entry From 0774000e3294849206aac4e18adf27286b17e217 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Fri, 14 Jun 2024 16:38:57 +0200 Subject: [PATCH 103/155] [AMDGPULowerBufferFatPointers] Fix offset-only ptrtoint (#95543) For ptrtoint that truncates to the offset only, the expansion generated a shift by the bit width, which is poison. Instead, we should return the offset directly. (The same problem exists for the constant expression case, but I plan to address that separately, and more comprehensively.) --- .../AMDGPU/AMDGPULowerBufferFatPointers.cpp | 30 +- .../buffer-fat-pointer-atomicrmw-fadd.ll | 1203 +++++++++------- .../buffer-fat-pointer-atomicrmw-fmax.ll | 1235 ++++++++++------- .../buffer-fat-pointer-atomicrmw-fmin.ll | 1235 ++++++++++------- .../lower-buffer-fat-pointers-pointer-ops.ll | 3 +- 5 files changed, 2165 insertions(+), 1541 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp index 0b261d8e33907c..dfe05837673138 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp @@ -1435,20 +1435,22 @@ PtrParts SplitPtrStructs::visitPtrToIntInst(PtrToIntInst &PI) { const DataLayout &DL = PI.getModule()->getDataLayout(); unsigned FatPtrWidth = DL.getPointerSizeInBits(AMDGPUAS::BUFFER_FAT_POINTER); - Value *RsrcInt; - if (Width <= BufferOffsetWidth) - RsrcInt = ConstantExpr::getIntegerValue(ResTy, APInt::getZero(Width)); - else - RsrcInt = IRB.CreatePtrToInt(Rsrc, ResTy, PI.getName() + ".rsrc"); - copyMetadata(RsrcInt, &PI); - - Value *Shl = IRB.CreateShl( - RsrcInt, - ConstantExpr::getIntegerValue(ResTy, APInt(Width, BufferOffsetWidth)), "", - Width >= FatPtrWidth, Width > FatPtrWidth); - Value *OffCast = - IRB.CreateIntCast(Off, ResTy, /*isSigned=*/false, PI.getName() + ".off"); - Value *Res = IRB.CreateOr(Shl, OffCast); + Value *Res; + if (Width <= BufferOffsetWidth) { + Res = IRB.CreateIntCast(Off, ResTy, /*isSigned=*/false, + PI.getName() + ".off"); + } else { + Value *RsrcInt = IRB.CreatePtrToInt(Rsrc, ResTy, PI.getName() + ".rsrc"); + Value *Shl = IRB.CreateShl( + RsrcInt, + ConstantExpr::getIntegerValue(ResTy, APInt(Width, BufferOffsetWidth)), + "", Width >= FatPtrWidth, Width > FatPtrWidth); + Value *OffCast = IRB.CreateIntCast(Off, ResTy, /*isSigned=*/false, + PI.getName() + ".off"); + Res = IRB.CreateOr(Shl, OffCast); + } + + copyMetadata(Res, &PI); Res->takeName(&PI); SplitUsers.insert(&PI); PI.replaceAllUsesWith(Res); diff --git a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll index 2f4606035376d3..b81730803d4a97 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll @@ -1814,22 +1814,27 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset(ptr addrspace(7) i ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_addk_co_i32 s4, 0x200 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX12-NEXT: s_and_b32 s4, s4, -4 -; GFX12-NEXT: v_mov_b32_e32 v5, s4 -; GFX12-NEXT: s_mov_b32 s4, 0 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_and_b32 s5, s4, -4 +; GFX12-NEXT: s_and_b32 s4, s4, 3 +; GFX12-NEXT: v_mov_b32_e32 v5, s5 +; GFX12-NEXT: s_lshl_b32 s4, s4, 3 +; GFX12-NEXT: s_lshl_b32 s5, 0xffff, s4 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: s_not_b32 s6, s5 ; GFX12-NEXT: buffer_load_b32 v2, v5, s[0:3], null offen +; GFX12-NEXT: s_mov_b32 s5, 0 ; GFX12-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_lshrrev_b32_e32 v1, 24, v2 +; GFX12-NEXT: v_lshrrev_b32_e32 v1, s4, v2 ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_add_f16_e32 v1, v1, v0 ; GFX12-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-NEXT: v_lshlrev_b32_e32 v1, 24, v1 -; GFX12-NEXT: v_and_or_b32 v1, 0xffffff, v2, v1 +; GFX12-NEXT: v_lshlrev_b32_e32 v1, s4, v1 +; GFX12-NEXT: v_and_or_b32 v1, v2, s6, v1 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_dual_mov_b32 v4, v2 :: v_dual_mov_b32 v3, v1 ; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[3:4], v5, s[0:3], null offen th:TH_ATOMIC_RETURN @@ -1837,31 +1842,36 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset(ptr addrspace(7) i ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v2 ; GFX12-NEXT: v_mov_b32_e32 v2, v3 -; GFX12-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX12-NEXT: s_or_b32 s5, vcc_lo, s5 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 +; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_cbranch_execnz .LBB6_1 ; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX12-NEXT: v_lshrrev_b32_e32 v0, 24, v3 +; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX12-NEXT: v_lshrrev_b32_e32 v0, s4, v3 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: s_addk_i32 s4, 0x200 -; GFX940-NEXT: s_and_b32 s4, s4, -4 -; GFX940-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-NEXT: s_and_b32 s5, s4, -4 +; GFX940-NEXT: v_mov_b32_e32 v1, s5 ; GFX940-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen -; GFX940-NEXT: s_mov_b32 s6, 0xffffff +; GFX940-NEXT: s_and_b32 s4, s4, 3 +; GFX940-NEXT: s_lshl_b32 s6, s4, 3 +; GFX940-NEXT: s_lshl_b32 s4, 0xffff, s6 +; GFX940-NEXT: s_not_b32 s7, s4 ; GFX940-NEXT: s_mov_b64 s[4:5], 0 ; GFX940-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_add_f16_sdwa v2, v3, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_and_or_b32 v2, v3, s6, v2 +; GFX940-NEXT: v_lshrrev_b32_e32 v2, s6, v3 +; GFX940-NEXT: v_add_f16_e32 v2, v2, v0 +; GFX940-NEXT: v_lshlrev_b32_e32 v2, s6, v2 +; GFX940-NEXT: v_and_or_b32 v2, v3, s7, v2 ; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[2:3] +; GFX940-NEXT: buffer_wbl2 sc1 ; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v1, s[0:3], 0 offen sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc1 @@ -1872,30 +1882,34 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset(ptr addrspace(7) i ; GFX940-NEXT: s_cbranch_execnz .LBB6_1 ; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX940-NEXT: v_lshrrev_b32_e32 v0, s6, v4 ; GFX940-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_addk_i32 s4, 0x200 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: s_and_b32 s4, s4, -4 -; GFX11-NEXT: v_mov_b32_e32 v5, s4 -; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_and_b32 s5, s4, -4 +; GFX11-NEXT: s_and_b32 s4, s4, 3 +; GFX11-NEXT: v_mov_b32_e32 v5, s5 +; GFX11-NEXT: s_lshl_b32 s4, s4, 3 +; GFX11-NEXT: s_lshl_b32 s5, 0xffff, s4 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_not_b32 s6, s5 ; GFX11-NEXT: buffer_load_b32 v2, v5, s[0:3], 0 offen -; GFX11-NEXT: .p2align 6 +; GFX11-NEXT: s_mov_b32 s5, 0 ; GFX11-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_lshrrev_b32_e32 v1, 24, v2 +; GFX11-NEXT: v_lshrrev_b32_e32 v1, s4, v2 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_add_f16_e32 v1, v1, v0 ; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_lshlrev_b32_e32 v1, 24, v1 -; GFX11-NEXT: v_and_or_b32 v1, 0xffffff, v2, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, s4, v1 +; GFX11-NEXT: v_and_or_b32 v1, v2, s6, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_dual_mov_b32 v4, v2 :: v_dual_mov_b32 v3, v1 ; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[3:4], v5, s[0:3], 0 offen glc @@ -1904,31 +1918,35 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset(ptr addrspace(7) i ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v2 ; GFX11-NEXT: v_mov_b32_e32 v2, v3 -; GFX11-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX11-NEXT: s_or_b32 s5, vcc_lo, s5 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 +; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s5 ; GFX11-NEXT: s_cbranch_execnz .LBB6_1 ; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v3 +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX11-NEXT: v_lshrrev_b32_e32 v0, s4, v3 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_addk_i32 s8, 0x200 -; GFX10-NEXT: s_and_b32 s8, s8, -4 -; GFX10-NEXT: v_mov_b32_e32 v5, s8 -; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_and_b32 s9, s8, -4 +; GFX10-NEXT: s_and_b32 s8, s8, 3 +; GFX10-NEXT: v_mov_b32_e32 v5, s9 +; GFX10-NEXT: s_lshl_b32 s8, s8, 3 +; GFX10-NEXT: s_lshl_b32 s9, 0xffff, s8 +; GFX10-NEXT: s_not_b32 s10, s9 ; GFX10-NEXT: buffer_load_dword v2, v5, s[4:7], 0 offen +; GFX10-NEXT: s_mov_b32 s9, 0 ; GFX10-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_add_f16_sdwa v1, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b32_e32 v1, s8, v2 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v1 -; GFX10-NEXT: v_and_or_b32 v1, 0xffffff, v2, v1 +; GFX10-NEXT: v_add_f16_e32 v1, v1, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GFX10-NEXT: v_and_or_b32 v1, v2, s10, v1 ; GFX10-NEXT: v_mov_b32_e32 v4, v2 ; GFX10-NEXT: v_mov_b32_e32 v3, v1 ; GFX10-NEXT: buffer_atomic_cmpswap v[3:4], v5, s[4:7], 0 offen glc @@ -1937,28 +1955,33 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset(ptr addrspace(7) i ; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v3, v2 ; GFX10-NEXT: v_mov_b32_e32 v2, v3 -; GFX10-NEXT: s_or_b32 s8, vcc_lo, s8 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 +; GFX10-NEXT: s_or_b32 s9, vcc_lo, s9 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s9 ; GFX10-NEXT: s_cbranch_execnz .LBB6_1 ; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v3 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s9 +; GFX10-NEXT: v_lshrrev_b32_e32 v0, s8, v3 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: s_addk_i32 s8, 0x200 -; GFX90A-NEXT: s_and_b32 s8, s8, -4 -; GFX90A-NEXT: v_mov_b32_e32 v1, s8 +; GFX90A-NEXT: s_and_b32 s9, s8, -4 +; GFX90A-NEXT: v_mov_b32_e32 v1, s9 ; GFX90A-NEXT: buffer_load_dword v3, v1, s[4:7], 0 offen -; GFX90A-NEXT: s_mov_b32 s10, 0xffffff +; GFX90A-NEXT: s_and_b32 s8, s8, 3 +; GFX90A-NEXT: s_lshl_b32 s10, s8, 3 +; GFX90A-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX90A-NEXT: s_not_b32 s11, s8 ; GFX90A-NEXT: s_mov_b64 s[8:9], 0 ; GFX90A-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: v_add_f16_sdwa v2, v3, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD -; GFX90A-NEXT: v_and_or_b32 v2, v3, s10, v2 +; GFX90A-NEXT: v_lshrrev_b32_e32 v2, s10, v3 +; GFX90A-NEXT: v_add_f16_e32 v2, v2, v0 +; GFX90A-NEXT: v_lshlrev_b32_e32 v2, s10, v2 +; GFX90A-NEXT: v_and_or_b32 v2, v3, s11, v2 ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] ; GFX90A-NEXT: buffer_atomic_cmpswap v[4:5], v1, s[4:7], 0 offen glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) @@ -1970,23 +1993,28 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset(ptr addrspace(7) i ; GFX90A-NEXT: s_cbranch_execnz .LBB6_1 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX90A-NEXT: v_lshrrev_b32_e32 v0, s10, v4 ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: s_addk_i32 s8, 0x200 -; GFX908-NEXT: s_and_b32 s8, s8, -4 -; GFX908-NEXT: v_mov_b32_e32 v5, s8 +; GFX908-NEXT: s_and_b32 s9, s8, -4 +; GFX908-NEXT: v_mov_b32_e32 v5, s9 ; GFX908-NEXT: buffer_load_dword v2, v5, s[4:7], 0 offen -; GFX908-NEXT: s_mov_b32 s10, 0xffffff +; GFX908-NEXT: s_and_b32 s8, s8, 3 +; GFX908-NEXT: s_lshl_b32 s10, s8, 3 +; GFX908-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX908-NEXT: s_not_b32 s11, s8 ; GFX908-NEXT: s_mov_b64 s[8:9], 0 ; GFX908-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_add_f16_sdwa v1, v2, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD -; GFX908-NEXT: v_and_or_b32 v1, v2, s10, v1 +; GFX908-NEXT: v_lshrrev_b32_e32 v1, s10, v2 +; GFX908-NEXT: v_add_f16_e32 v1, v1, v0 +; GFX908-NEXT: v_lshlrev_b32_e32 v1, s10, v1 +; GFX908-NEXT: v_and_or_b32 v1, v2, s11, v1 ; GFX908-NEXT: v_mov_b32_e32 v4, v2 ; GFX908-NEXT: v_mov_b32_e32 v3, v1 ; GFX908-NEXT: buffer_atomic_cmpswap v[3:4], v5, s[4:7], 0 offen glc @@ -1999,22 +2027,28 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset(ptr addrspace(7) i ; GFX908-NEXT: s_cbranch_execnz .LBB6_1 ; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX908-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX908-NEXT: v_lshrrev_b32_e32 v0, 24, v3 +; GFX908-NEXT: v_lshrrev_b32_e32 v0, s10, v3 ; GFX908-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_addk_i32 s8, 0x200 -; GFX8-NEXT: s_and_b32 s8, s8, -4 -; GFX8-NEXT: v_mov_b32_e32 v5, s8 +; GFX8-NEXT: s_and_b32 s9, s8, -4 +; GFX8-NEXT: v_mov_b32_e32 v5, s9 ; GFX8-NEXT: buffer_load_dword v2, v5, s[4:7], 0 offen +; GFX8-NEXT: s_and_b32 s8, s8, 3 +; GFX8-NEXT: s_lshl_b32 s10, s8, 3 +; GFX8-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX8-NEXT: s_not_b32 s11, s8 ; GFX8-NEXT: s_mov_b64 s[8:9], 0 ; GFX8-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_add_f16_sdwa v1, v2, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_e32 v3, 0xffffff, v2 +; GFX8-NEXT: v_lshrrev_b32_e32 v1, s10, v2 +; GFX8-NEXT: v_add_f16_e32 v1, v1, v0 +; GFX8-NEXT: v_and_b32_e32 v3, s11, v2 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, s10, v1 ; GFX8-NEXT: v_or_b32_e32 v1, v3, v1 ; GFX8-NEXT: v_mov_b32_e32 v4, v2 ; GFX8-NEXT: v_mov_b32_e32 v3, v1 @@ -2028,28 +2062,32 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset(ptr addrspace(7) i ; GFX8-NEXT: s_cbranch_execnz .LBB6_1 ; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX8-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v3 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, s10, v3 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_addk_i32 s8, 0x200 -; GFX7-NEXT: s_and_b32 s8, s8, -4 -; GFX7-NEXT: v_mov_b32_e32 v4, s8 +; GFX7-NEXT: s_and_b32 s9, s8, -4 +; GFX7-NEXT: v_mov_b32_e32 v4, s9 ; GFX7-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: s_mov_b64 s[8:9], 0 +; GFX7-NEXT: s_and_b32 s8, s8, 3 +; GFX7-NEXT: s_lshl_b32 s10, s8, 3 +; GFX7-NEXT: s_lshl_b32 s8, 0xffff, s10 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v0 +; GFX7-NEXT: s_not_b32 s11, s8 +; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, s10, v1 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_and_b32_e32 v2, 0xffffff, v1 +; GFX7-NEXT: v_and_b32_e32 v2, s11, v1 ; GFX7-NEXT: v_add_f32_e32 v0, v0, v5 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX7-NEXT: v_mov_b32_e32 v3, v1 ; GFX7-NEXT: v_mov_b32_e32 v2, v0 @@ -2063,7 +2101,7 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset(ptr addrspace(7) i ; GFX7-NEXT: s_cbranch_execnz .LBB6_1 ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -2071,22 +2109,26 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset(ptr addrspace(7) i ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: s_addk_i32 s8, 0x200 -; GFX6-NEXT: s_and_b32 s8, s8, -4 -; GFX6-NEXT: v_mov_b32_e32 v4, s8 +; GFX6-NEXT: s_and_b32 s9, s8, -4 +; GFX6-NEXT: v_mov_b32_e32 v4, s9 ; GFX6-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-NEXT: s_mov_b64 s[8:9], 0 +; GFX6-NEXT: s_and_b32 s8, s8, 3 +; GFX6-NEXT: s_lshl_b32 s10, s8, 3 +; GFX6-NEXT: s_lshl_b32 s8, 0xffff, s10 ; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v0 +; GFX6-NEXT: s_not_b32 s11, s8 +; GFX6-NEXT: s_mov_b64 s[8:9], 0 ; GFX6-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX6-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, s10, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v1 +; GFX6-NEXT: v_and_b32_e32 v2, s11, v1 ; GFX6-NEXT: v_add_f32_e32 v0, v0, v5 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX6-NEXT: v_mov_b32_e32 v3, v1 ; GFX6-NEXT: v_mov_b32_e32 v2, v0 @@ -2100,7 +2142,7 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset(ptr addrspace(7) i ; GFX6-NEXT: s_cbranch_execnz .LBB6_1 ; GFX6-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX6-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: s_setpc_b64 s[30:31] @@ -2118,22 +2160,27 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_f16__offset(ptr addrspace(7) ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_addk_co_i32 s4, 0x200 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX12-NEXT: s_and_b32 s4, s4, -4 -; GFX12-NEXT: v_mov_b32_e32 v3, s4 -; GFX12-NEXT: s_mov_b32 s4, 0 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_and_b32 s5, s4, -4 +; GFX12-NEXT: s_and_b32 s4, s4, 3 +; GFX12-NEXT: v_mov_b32_e32 v3, s5 +; GFX12-NEXT: s_lshl_b32 s4, s4, 3 +; GFX12-NEXT: s_lshl_b32 s5, 0xffff, s4 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-NEXT: s_not_b32 s6, s5 ; GFX12-NEXT: buffer_load_b32 v2, v3, s[0:3], null offen +; GFX12-NEXT: s_mov_b32 s5, 0 ; GFX12-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_lshrrev_b32_e32 v1, 24, v2 +; GFX12-NEXT: v_lshrrev_b32_e32 v1, s4, v2 ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_add_f16_e32 v1, v1, v0 ; GFX12-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-NEXT: v_lshlrev_b32_e32 v1, 24, v1 -; GFX12-NEXT: v_and_or_b32 v1, 0xffffff, v2, v1 +; GFX12-NEXT: v_lshlrev_b32_e32 v1, s4, v1 +; GFX12-NEXT: v_and_or_b32 v1, v2, s6, v1 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_dual_mov_b32 v5, v2 :: v_dual_mov_b32 v4, v1 ; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v3, s[0:3], null offen th:TH_ATOMIC_RETURN @@ -2141,30 +2188,35 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_f16__offset(ptr addrspace(7) ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v2 ; GFX12-NEXT: v_mov_b32_e32 v2, v4 -; GFX12-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX12-NEXT: s_or_b32 s5, vcc_lo, s5 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 +; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_cbranch_execnz .LBB7_1 ; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f16__offset: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: s_addk_i32 s4, 0x200 -; GFX940-NEXT: s_and_b32 s4, s4, -4 -; GFX940-NEXT: v_mov_b32_e32 v1, s4 +; GFX940-NEXT: s_and_b32 s5, s4, -4 +; GFX940-NEXT: v_mov_b32_e32 v1, s5 ; GFX940-NEXT: buffer_load_dword v3, v1, s[0:3], 0 offen -; GFX940-NEXT: s_mov_b32 s6, 0xffffff +; GFX940-NEXT: s_and_b32 s4, s4, 3 +; GFX940-NEXT: s_lshl_b32 s6, s4, 3 +; GFX940-NEXT: s_lshl_b32 s4, 0xffff, s6 +; GFX940-NEXT: s_not_b32 s7, s4 ; GFX940-NEXT: s_mov_b64 s[4:5], 0 ; GFX940-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_add_f16_sdwa v2, v3, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_and_or_b32 v2, v3, s6, v2 +; GFX940-NEXT: v_lshrrev_b32_e32 v2, s6, v3 +; GFX940-NEXT: v_add_f16_e32 v2, v2, v0 +; GFX940-NEXT: v_lshlrev_b32_e32 v2, s6, v2 +; GFX940-NEXT: v_and_or_b32 v2, v3, s7, v2 ; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[2:3] +; GFX940-NEXT: buffer_wbl2 sc1 ; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v1, s[0:3], 0 offen sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc1 @@ -2181,23 +2233,27 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_f16__offset(ptr addrspace(7) ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_addk_i32 s4, 0x200 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: s_and_b32 s4, s4, -4 -; GFX11-NEXT: v_mov_b32_e32 v3, s4 -; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_and_b32 s5, s4, -4 +; GFX11-NEXT: s_and_b32 s4, s4, 3 +; GFX11-NEXT: v_mov_b32_e32 v3, s5 +; GFX11-NEXT: s_lshl_b32 s4, s4, 3 +; GFX11-NEXT: s_lshl_b32 s5, 0xffff, s4 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_not_b32 s6, s5 ; GFX11-NEXT: buffer_load_b32 v2, v3, s[0:3], 0 offen -; GFX11-NEXT: .p2align 6 +; GFX11-NEXT: s_mov_b32 s5, 0 ; GFX11-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_lshrrev_b32_e32 v1, 24, v2 +; GFX11-NEXT: v_lshrrev_b32_e32 v1, s4, v2 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_add_f16_e32 v1, v1, v0 ; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_lshlrev_b32_e32 v1, 24, v1 -; GFX11-NEXT: v_and_or_b32 v1, 0xffffff, v2, v1 +; GFX11-NEXT: v_lshlrev_b32_e32 v1, s4, v1 +; GFX11-NEXT: v_and_or_b32 v1, v2, s6, v1 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_dual_mov_b32 v5, v2 :: v_dual_mov_b32 v4, v1 ; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v3, s[0:3], 0 offen glc @@ -2206,30 +2262,34 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_f16__offset(ptr addrspace(7) ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v2 ; GFX11-NEXT: v_mov_b32_e32 v2, v4 -; GFX11-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX11-NEXT: s_or_b32 s5, vcc_lo, s5 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 +; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s5 ; GFX11-NEXT: s_cbranch_execnz .LBB7_1 ; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f16__offset: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_addk_i32 s8, 0x200 -; GFX10-NEXT: s_and_b32 s8, s8, -4 -; GFX10-NEXT: v_mov_b32_e32 v3, s8 -; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_and_b32 s9, s8, -4 +; GFX10-NEXT: s_and_b32 s8, s8, 3 +; GFX10-NEXT: v_mov_b32_e32 v3, s9 +; GFX10-NEXT: s_lshl_b32 s8, s8, 3 +; GFX10-NEXT: s_lshl_b32 s9, 0xffff, s8 +; GFX10-NEXT: s_not_b32 s10, s9 ; GFX10-NEXT: buffer_load_dword v2, v3, s[4:7], 0 offen +; GFX10-NEXT: s_mov_b32 s9, 0 ; GFX10-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_add_f16_sdwa v1, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b32_e32 v1, s8, v2 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX10-NEXT: v_lshlrev_b32_e32 v1, 24, v1 -; GFX10-NEXT: v_and_or_b32 v1, 0xffffff, v2, v1 +; GFX10-NEXT: v_add_f16_e32 v1, v1, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v1, s8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GFX10-NEXT: v_and_or_b32 v1, v2, s10, v1 ; GFX10-NEXT: v_mov_b32_e32 v5, v2 ; GFX10-NEXT: v_mov_b32_e32 v4, v1 ; GFX10-NEXT: buffer_atomic_cmpswap v[4:5], v3, s[4:7], 0 offen glc @@ -2238,27 +2298,32 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_f16__offset(ptr addrspace(7) ; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v2 ; GFX10-NEXT: v_mov_b32_e32 v2, v4 -; GFX10-NEXT: s_or_b32 s8, vcc_lo, s8 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 +; GFX10-NEXT: s_or_b32 s9, vcc_lo, s9 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s9 ; GFX10-NEXT: s_cbranch_execnz .LBB7_1 ; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s9 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_f16__offset: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: s_addk_i32 s8, 0x200 -; GFX90A-NEXT: s_and_b32 s8, s8, -4 -; GFX90A-NEXT: v_mov_b32_e32 v1, s8 +; GFX90A-NEXT: s_and_b32 s9, s8, -4 +; GFX90A-NEXT: v_mov_b32_e32 v1, s9 ; GFX90A-NEXT: buffer_load_dword v3, v1, s[4:7], 0 offen -; GFX90A-NEXT: s_mov_b32 s10, 0xffffff +; GFX90A-NEXT: s_and_b32 s8, s8, 3 +; GFX90A-NEXT: s_lshl_b32 s10, s8, 3 +; GFX90A-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX90A-NEXT: s_not_b32 s11, s8 ; GFX90A-NEXT: s_mov_b64 s[8:9], 0 ; GFX90A-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: v_add_f16_sdwa v2, v3, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD -; GFX90A-NEXT: v_and_or_b32 v2, v3, s10, v2 +; GFX90A-NEXT: v_lshrrev_b32_e32 v2, s10, v3 +; GFX90A-NEXT: v_add_f16_e32 v2, v2, v0 +; GFX90A-NEXT: v_lshlrev_b32_e32 v2, s10, v2 +; GFX90A-NEXT: v_and_or_b32 v2, v3, s11, v2 ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1] ; GFX90A-NEXT: buffer_atomic_cmpswap v[4:5], v1, s[4:7], 0 offen glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) @@ -2276,16 +2341,21 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_f16__offset(ptr addrspace(7) ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: s_addk_i32 s8, 0x200 -; GFX908-NEXT: s_and_b32 s8, s8, -4 -; GFX908-NEXT: v_mov_b32_e32 v3, s8 +; GFX908-NEXT: s_and_b32 s9, s8, -4 +; GFX908-NEXT: v_mov_b32_e32 v3, s9 ; GFX908-NEXT: buffer_load_dword v2, v3, s[4:7], 0 offen -; GFX908-NEXT: s_mov_b32 s10, 0xffffff +; GFX908-NEXT: s_and_b32 s8, s8, 3 +; GFX908-NEXT: s_lshl_b32 s10, s8, 3 +; GFX908-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX908-NEXT: s_not_b32 s11, s8 ; GFX908-NEXT: s_mov_b64 s[8:9], 0 ; GFX908-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_add_f16_sdwa v1, v2, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD -; GFX908-NEXT: v_and_or_b32 v1, v2, s10, v1 +; GFX908-NEXT: v_lshrrev_b32_e32 v1, s10, v2 +; GFX908-NEXT: v_add_f16_e32 v1, v1, v0 +; GFX908-NEXT: v_lshlrev_b32_e32 v1, s10, v1 +; GFX908-NEXT: v_and_or_b32 v1, v2, s11, v1 ; GFX908-NEXT: v_mov_b32_e32 v5, v2 ; GFX908-NEXT: v_mov_b32_e32 v4, v1 ; GFX908-NEXT: buffer_atomic_cmpswap v[4:5], v3, s[4:7], 0 offen glc @@ -2304,15 +2374,21 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_f16__offset(ptr addrspace(7) ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_addk_i32 s8, 0x200 -; GFX8-NEXT: s_and_b32 s8, s8, -4 -; GFX8-NEXT: v_mov_b32_e32 v3, s8 +; GFX8-NEXT: s_and_b32 s9, s8, -4 +; GFX8-NEXT: v_mov_b32_e32 v3, s9 ; GFX8-NEXT: buffer_load_dword v2, v3, s[4:7], 0 offen +; GFX8-NEXT: s_and_b32 s8, s8, 3 +; GFX8-NEXT: s_lshl_b32 s10, s8, 3 +; GFX8-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX8-NEXT: s_not_b32 s11, s8 ; GFX8-NEXT: s_mov_b64 s[8:9], 0 ; GFX8-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_add_f16_sdwa v1, v2, v0 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v2 +; GFX8-NEXT: v_lshrrev_b32_e32 v1, s10, v2 +; GFX8-NEXT: v_add_f16_e32 v1, v1, v0 +; GFX8-NEXT: v_and_b32_e32 v4, s11, v2 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, s10, v1 ; GFX8-NEXT: v_or_b32_e32 v1, v4, v1 ; GFX8-NEXT: v_mov_b32_e32 v5, v2 ; GFX8-NEXT: v_mov_b32_e32 v4, v1 @@ -2332,21 +2408,25 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_f16__offset(ptr addrspace(7) ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_addk_i32 s8, 0x200 -; GFX7-NEXT: s_and_b32 s8, s8, -4 -; GFX7-NEXT: v_mov_b32_e32 v2, s8 +; GFX7-NEXT: s_and_b32 s9, s8, -4 +; GFX7-NEXT: v_mov_b32_e32 v2, s9 ; GFX7-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: s_mov_b64 s[8:9], 0 +; GFX7-NEXT: s_and_b32 s8, s8, 3 +; GFX7-NEXT: s_lshl_b32 s10, s8, 3 +; GFX7-NEXT: s_lshl_b32 s8, 0xffff, s10 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GFX7-NEXT: s_not_b32 s11, s8 +; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, s10, v1 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_and_b32_e32 v4, 0xffffff, v1 +; GFX7-NEXT: v_and_b32_e32 v4, s11, v1 ; GFX7-NEXT: v_add_f32_e32 v0, v0, v3 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX7-NEXT: v_mov_b32_e32 v5, v1 ; GFX7-NEXT: v_mov_b32_e32 v4, v0 @@ -2366,22 +2446,26 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_f16__offset(ptr addrspace(7) ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: s_addk_i32 s8, 0x200 -; GFX6-NEXT: s_and_b32 s8, s8, -4 -; GFX6-NEXT: v_mov_b32_e32 v2, s8 +; GFX6-NEXT: s_and_b32 s9, s8, -4 +; GFX6-NEXT: v_mov_b32_e32 v2, s9 ; GFX6-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-NEXT: s_mov_b64 s[8:9], 0 +; GFX6-NEXT: s_and_b32 s8, s8, 3 +; GFX6-NEXT: s_lshl_b32 s10, s8, 3 +; GFX6-NEXT: s_lshl_b32 s8, 0xffff, s10 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GFX6-NEXT: s_not_b32 s11, s8 +; GFX6-NEXT: s_mov_b64 s[8:9], 0 ; GFX6-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX6-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, s10, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v1 +; GFX6-NEXT: v_and_b32_e32 v4, s11, v1 ; GFX6-NEXT: v_add_f32_e32 v0, v0, v3 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX6-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX6-NEXT: v_mov_b32_e32 v5, v1 ; GFX6-NEXT: v_mov_b32_e32 v4, v0 @@ -2410,10 +2494,15 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 +; GFX12-NEXT: v_add_nc_u32_e32 v6, 0x200, v4 ; GFX12-NEXT: s_mov_b32 s1, exec_lo -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_and_b32_e32 v4, -4, v4 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_and_b32_e32 v4, 3, v6 +; GFX12-NEXT: v_and_b32_e32 v10, -4, v6 +; GFX12-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_lshlrev_b32_e64 v7, v4, 0xffff +; GFX12-NEXT: v_not_b32_e32 v11, v7 ; GFX12-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX12-NEXT: v_readfirstlane_b32 s4, v0 ; GFX12-NEXT: v_readfirstlane_b32 s5, v1 @@ -2425,7 +2514,7 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX12-NEXT: s_and_saveexec_b32 s0, s0 -; GFX12-NEXT: buffer_load_b32 v7, v4, s[4:7], null offen +; GFX12-NEXT: buffer_load_b32 v7, v10, s[4:7], null offen ; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_cbranch_execnz .LBB8_1 ; GFX12-NEXT: ; %bb.2: @@ -2435,15 +2524,15 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX12-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_lshrrev_b32_e32 v6, 24, v7 +; GFX12-NEXT: v_lshrrev_b32_e32 v6, v4, v7 ; GFX12-NEXT: s_mov_b32 s2, exec_lo ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_add_f16_e32 v6, v6, v5 ; GFX12-NEXT: v_and_b32_e32 v6, 0xffff, v6 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX12-NEXT: v_and_or_b32 v6, 0xffffff, v7, v6 +; GFX12-NEXT: v_lshlrev_b32_e32 v6, v4, v6 +; GFX12-NEXT: v_and_or_b32 v6, v7, v11, v6 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_dual_mov_b32 v9, v7 :: v_dual_mov_b32 v8, v6 ; GFX12-NEXT: .LBB8_4: ; Parent Loop BB8_3 Depth=1 @@ -2459,7 +2548,7 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX12-NEXT: s_and_saveexec_b32 s0, s0 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[8:9], v4, s[4:7], null offen th:TH_ATOMIC_RETURN +; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[8:9], v10, s[4:7], null offen th:TH_ATOMIC_RETURN ; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_cbranch_execnz .LBB8_4 ; GFX12-NEXT: ; %bb.5: ; in Loop: Header=BB8_3 Depth=1 @@ -2474,15 +2563,19 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX12-NEXT: s_cbranch_execnz .LBB8_3 ; GFX12-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX12-NEXT: v_lshrrev_b32_e32 v0, 24, v8 +; GFX12-NEXT: v_lshrrev_b32_e32 v0, v4, v8 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_add_u32_e32 v4, 0x200, v4 -; GFX940-NEXT: v_and_b32_e32 v4, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v10, 0xffffff +; GFX940-NEXT: v_and_b32_e32 v10, -4, v4 +; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX940-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX940-NEXT: s_mov_b32 s0, 0xffff +; GFX940-NEXT: v_lshlrev_b32_e64 v6, v4, s0 +; GFX940-NEXT: v_not_b32_e32 v11, v6 ; GFX940-NEXT: s_mov_b64 s[2:3], exec ; GFX940-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX940-NEXT: v_readfirstlane_b32 s4, v0 @@ -2494,7 +2587,7 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] ; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] ; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: buffer_load_dword v7, v4, s[4:7], 0 offen +; GFX940-NEXT: buffer_load_dword v7, v10, s[4:7], 0 offen ; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] ; GFX940-NEXT: s_cbranch_execnz .LBB8_1 ; GFX940-NEXT: ; %bb.2: @@ -2504,9 +2597,11 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX940-NEXT: ; =>This Loop Header: Depth=1 ; GFX940-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_add_f16_sdwa v6, v7, v5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX940-NEXT: v_lshrrev_b32_e32 v6, v4, v7 +; GFX940-NEXT: v_add_f16_e32 v6, v6, v5 +; GFX940-NEXT: v_lshlrev_b32_e32 v6, v4, v6 +; GFX940-NEXT: v_and_or_b32 v6, v7, v11, v6 ; GFX940-NEXT: s_mov_b64 s[8:9], exec -; GFX940-NEXT: v_and_or_b32 v6, v7, v10, v6 ; GFX940-NEXT: v_mov_b64_e32 v[8:9], v[6:7] ; GFX940-NEXT: buffer_wbl2 sc1 ; GFX940-NEXT: .LBB8_4: ; Parent Loop BB8_3 Depth=1 @@ -2521,7 +2616,7 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] ; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_atomic_cmpswap v[8:9], v4, s[4:7], 0 offen sc0 +; GFX940-NEXT: buffer_atomic_cmpswap v[8:9], v10, s[4:7], 0 offen sc0 ; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] ; GFX940-NEXT: s_cbranch_execnz .LBB8_4 ; GFX940-NEXT: ; %bb.5: ; in Loop: Header=BB8_3 Depth=1 @@ -2535,17 +2630,22 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX940-NEXT: s_cbranch_execnz .LBB8_3 ; GFX940-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 24, v8 +; GFX940-NEXT: v_lshrrev_b32_e32 v0, v4, v8 ; GFX940-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 +; GFX11-NEXT: v_add_nc_u32_e32 v6, 0x200, v4 ; GFX11-NEXT: s_mov_b32 s1, 0 ; GFX11-NEXT: s_mov_b32 s2, exec_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_and_b32_e32 v4, -4, v4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v4, 3, v6 +; GFX11-NEXT: v_and_b32_e32 v10, -4, v6 +; GFX11-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e64 v7, v4, 0xffff +; GFX11-NEXT: v_not_b32_e32 v11, v7 ; GFX11-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: v_readfirstlane_b32 s4, v0 ; GFX11-NEXT: v_readfirstlane_b32 s5, v1 @@ -2557,7 +2657,7 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX11-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-NEXT: buffer_load_b32 v7, v4, s[4:7], 0 offen +; GFX11-NEXT: buffer_load_b32 v7, v10, s[4:7], 0 offen ; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_cbranch_execnz .LBB8_1 ; GFX11-NEXT: ; %bb.2: @@ -2567,15 +2667,15 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX11-NEXT: ; =>This Loop Header: Depth=1 ; GFX11-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_lshrrev_b32_e32 v6, 24, v7 +; GFX11-NEXT: v_lshrrev_b32_e32 v6, v4, v7 ; GFX11-NEXT: s_mov_b32 s2, exec_lo ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_add_f16_e32 v6, v6, v5 ; GFX11-NEXT: v_and_b32_e32 v6, 0xffff, v6 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX11-NEXT: v_and_or_b32 v6, 0xffffff, v7, v6 +; GFX11-NEXT: v_lshlrev_b32_e32 v6, v4, v6 +; GFX11-NEXT: v_and_or_b32 v6, v7, v11, v6 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_dual_mov_b32 v9, v7 :: v_dual_mov_b32 v8, v6 ; GFX11-NEXT: .LBB8_4: ; Parent Loop BB8_3 Depth=1 @@ -2591,7 +2691,7 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX11-NEXT: s_and_saveexec_b32 s0, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[8:9], v4, s[4:7], 0 offen glc +; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[8:9], v10, s[4:7], 0 offen glc ; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_cbranch_execnz .LBB8_4 ; GFX11-NEXT: ; %bb.5: ; in Loop: Header=BB8_3 Depth=1 @@ -2607,16 +2707,20 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX11-NEXT: s_cbranch_execnz .LBB8_3 ; GFX11-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v8 +; GFX11-NEXT: v_lshrrev_b32_e32 v0, v4, v8 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 +; GFX10-NEXT: v_add_nc_u32_e32 v6, 0x200, v4 ; GFX10-NEXT: s_mov_b32 s5, 0 ; GFX10-NEXT: s_mov_b32 s6, exec_lo -; GFX10-NEXT: v_and_b32_e32 v4, -4, v4 +; GFX10-NEXT: v_and_b32_e32 v4, 3, v6 +; GFX10-NEXT: v_and_b32_e32 v10, -4, v6 +; GFX10-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX10-NEXT: v_lshlrev_b32_e64 v7, v4, 0xffff +; GFX10-NEXT: v_not_b32_e32 v11, v7 ; GFX10-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_readfirstlane_b32 s8, v0 ; GFX10-NEXT: v_readfirstlane_b32 s9, v1 @@ -2626,7 +2730,7 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] ; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 ; GFX10-NEXT: s_and_saveexec_b32 s4, s4 -; GFX10-NEXT: buffer_load_dword v7, v4, s[8:11], 0 offen +; GFX10-NEXT: buffer_load_dword v7, v10, s[8:11], 0 offen ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB8_1 @@ -2636,12 +2740,12 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX10-NEXT: ; =>This Loop Header: Depth=1 ; GFX10-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_add_f16_sdwa v6, v7, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b32_e32 v6, v4, v7 ; GFX10-NEXT: s_mov_b32 s6, exec_lo ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_and_b32_e32 v6, 0xffff, v6 -; GFX10-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GFX10-NEXT: v_and_or_b32 v6, 0xffffff, v7, v6 +; GFX10-NEXT: v_add_f16_e32 v6, v6, v5 +; GFX10-NEXT: v_lshlrev_b32_sdwa v6, v4, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GFX10-NEXT: v_and_or_b32 v6, v7, v11, v6 ; GFX10-NEXT: v_mov_b32_e32 v9, v7 ; GFX10-NEXT: v_mov_b32_e32 v8, v6 ; GFX10-NEXT: .LBB8_4: ; Parent Loop BB8_3 Depth=1 @@ -2655,7 +2759,7 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 ; GFX10-NEXT: s_and_saveexec_b32 s4, s4 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_cmpswap v[8:9], v4, s[8:11], 0 offen glc +; GFX10-NEXT: buffer_atomic_cmpswap v[8:9], v10, s[8:11], 0 offen glc ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB8_4 @@ -2672,15 +2776,19 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX10-NEXT: s_cbranch_execnz .LBB8_3 ; GFX10-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v8 +; GFX10-NEXT: v_lshrrev_b32_e32 v0, v4, v8 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_u32_e32 v4, 0x200, v4 -; GFX90A-NEXT: v_and_b32_e32 v4, -4, v4 -; GFX90A-NEXT: v_mov_b32_e32 v10, 0xffffff +; GFX90A-NEXT: v_and_b32_e32 v10, -4, v4 +; GFX90A-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX90A-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX90A-NEXT: s_mov_b32 s4, 0xffff +; GFX90A-NEXT: v_lshlrev_b32_e64 v6, v4, s4 +; GFX90A-NEXT: v_not_b32_e32 v11, v6 ; GFX90A-NEXT: s_mov_b64 s[6:7], exec ; GFX90A-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: v_readfirstlane_b32 s8, v0 @@ -2692,7 +2800,7 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX90A-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX90A-NEXT: s_nop 0 -; GFX90A-NEXT: buffer_load_dword v7, v4, s[8:11], 0 offen +; GFX90A-NEXT: buffer_load_dword v7, v10, s[8:11], 0 offen ; GFX90A-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX90A-NEXT: s_cbranch_execnz .LBB8_1 ; GFX90A-NEXT: ; %bb.2: @@ -2702,8 +2810,10 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX90A-NEXT: ; =>This Loop Header: Depth=1 ; GFX90A-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: v_add_f16_sdwa v6, v7, v5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD -; GFX90A-NEXT: v_and_or_b32 v6, v7, v10, v6 +; GFX90A-NEXT: v_lshrrev_b32_e32 v6, v4, v7 +; GFX90A-NEXT: v_add_f16_e32 v6, v6, v5 +; GFX90A-NEXT: v_lshlrev_b32_e32 v6, v4, v6 +; GFX90A-NEXT: v_and_or_b32 v6, v7, v11, v6 ; GFX90A-NEXT: s_mov_b64 s[12:13], exec ; GFX90A-NEXT: v_pk_mov_b32 v[8:9], v[6:7], v[6:7] op_sel:[0,1] ; GFX90A-NEXT: .LBB8_4: ; Parent Loop BB8_3 Depth=1 @@ -2717,7 +2827,7 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX90A-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: buffer_atomic_cmpswap v[8:9], v4, s[8:11], 0 offen glc +; GFX90A-NEXT: buffer_atomic_cmpswap v[8:9], v10, s[8:11], 0 offen glc ; GFX90A-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX90A-NEXT: s_cbranch_execnz .LBB8_4 ; GFX90A-NEXT: ; %bb.5: ; in Loop: Header=BB8_3 Depth=1 @@ -2731,15 +2841,19 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX90A-NEXT: s_cbranch_execnz .LBB8_3 ; GFX90A-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX90A-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 24, v8 +; GFX90A-NEXT: v_lshrrev_b32_e32 v0, v4, v8 ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_add_u32_e32 v4, 0x200, v4 -; GFX908-NEXT: v_and_b32_e32 v4, -4, v4 -; GFX908-NEXT: v_mov_b32_e32 v10, 0xffffff +; GFX908-NEXT: v_and_b32_e32 v10, -4, v4 +; GFX908-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX908-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX908-NEXT: s_mov_b32 s4, 0xffff +; GFX908-NEXT: v_lshlrev_b32_e64 v6, v4, s4 +; GFX908-NEXT: v_not_b32_e32 v11, v6 ; GFX908-NEXT: s_mov_b64 s[6:7], exec ; GFX908-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX908-NEXT: v_readfirstlane_b32 s8, v0 @@ -2751,7 +2865,7 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX908-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX908-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX908-NEXT: s_nop 0 -; GFX908-NEXT: buffer_load_dword v7, v4, s[8:11], 0 offen +; GFX908-NEXT: buffer_load_dword v7, v10, s[8:11], 0 offen ; GFX908-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX908-NEXT: s_cbranch_execnz .LBB8_1 ; GFX908-NEXT: ; %bb.2: @@ -2761,8 +2875,10 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX908-NEXT: ; =>This Loop Header: Depth=1 ; GFX908-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_add_f16_sdwa v6, v7, v5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD -; GFX908-NEXT: v_and_or_b32 v6, v7, v10, v6 +; GFX908-NEXT: v_lshrrev_b32_e32 v6, v4, v7 +; GFX908-NEXT: v_add_f16_e32 v6, v6, v5 +; GFX908-NEXT: v_lshlrev_b32_e32 v6, v4, v6 +; GFX908-NEXT: v_and_or_b32 v6, v7, v11, v6 ; GFX908-NEXT: v_mov_b32_e32 v9, v7 ; GFX908-NEXT: s_mov_b64 s[12:13], exec ; GFX908-NEXT: v_mov_b32_e32 v8, v6 @@ -2777,7 +2893,7 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX908-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX908-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: buffer_atomic_cmpswap v[8:9], v4, s[8:11], 0 offen glc +; GFX908-NEXT: buffer_atomic_cmpswap v[8:9], v10, s[8:11], 0 offen glc ; GFX908-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX908-NEXT: s_cbranch_execnz .LBB8_4 ; GFX908-NEXT: ; %bb.5: ; in Loop: Header=BB8_3 Depth=1 @@ -2791,14 +2907,19 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX908-NEXT: s_cbranch_execnz .LBB8_3 ; GFX908-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX908-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX908-NEXT: v_lshrrev_b32_e32 v0, 24, v8 +; GFX908-NEXT: v_lshrrev_b32_e32 v0, v4, v8 ; GFX908-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x200, v4 -; GFX8-NEXT: v_and_b32_e32 v4, -4, v4 +; GFX8-NEXT: v_and_b32_e32 v10, -4, v4 +; GFX8-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v4, 3, v4 +; GFX8-NEXT: s_mov_b32 s4, 0xffff +; GFX8-NEXT: v_lshlrev_b32_e64 v6, v4, s4 +; GFX8-NEXT: v_not_b32_e32 v11, v6 ; GFX8-NEXT: s_mov_b64 s[6:7], exec ; GFX8-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX8-NEXT: v_readfirstlane_b32 s8, v0 @@ -2810,7 +2931,7 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX8-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX8-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX8-NEXT: s_nop 0 -; GFX8-NEXT: buffer_load_dword v7, v4, s[8:11], 0 offen +; GFX8-NEXT: buffer_load_dword v7, v10, s[8:11], 0 offen ; GFX8-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX8-NEXT: s_cbranch_execnz .LBB8_1 ; GFX8-NEXT: ; %bb.2: @@ -2820,8 +2941,10 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX8-NEXT: ; =>This Loop Header: Depth=1 ; GFX8-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_add_f16_sdwa v6, v7, v5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_e32 v8, 0xffffff, v7 +; GFX8-NEXT: v_lshrrev_b32_e32 v6, v4, v7 +; GFX8-NEXT: v_add_f16_e32 v6, v6, v5 +; GFX8-NEXT: v_lshlrev_b32_e32 v6, v4, v6 +; GFX8-NEXT: v_and_b32_e32 v8, v7, v11 ; GFX8-NEXT: v_or_b32_e32 v6, v8, v6 ; GFX8-NEXT: v_mov_b32_e32 v9, v7 ; GFX8-NEXT: s_mov_b64 s[12:13], exec @@ -2837,7 +2960,7 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX8-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX8-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: buffer_atomic_cmpswap v[8:9], v4, s[8:11], 0 offen glc +; GFX8-NEXT: buffer_atomic_cmpswap v[8:9], v10, s[8:11], 0 offen glc ; GFX8-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX8-NEXT: s_cbranch_execnz .LBB8_4 ; GFX8-NEXT: ; %bb.5: ; in Loop: Header=BB8_3 Depth=1 @@ -2851,14 +2974,18 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX8-NEXT: s_cbranch_execnz .LBB8_3 ; GFX8-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX8-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v8 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, v4, v8 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_add_i32_e32 v4, vcc, 0x200, v4 -; GFX7-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX7-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX7-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v7, 3, v4 +; GFX7-NEXT: v_lshl_b32_e32 v4, 0xffff, v7 +; GFX7-NEXT: v_not_b32_e32 v9, v4 ; GFX7-NEXT: s_mov_b64 s[6:7], exec ; GFX7-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX7-NEXT: v_readfirstlane_b32 s8, v0 @@ -2869,25 +2996,25 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX7-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[2:3] ; GFX7-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX7-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GFX7-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX7-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX7-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_cbranch_execnz .LBB8_1 ; GFX7-NEXT: ; %bb.2: ; GFX7-NEXT: s_mov_b64 exec, s[6:7] ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v5 ; GFX7-NEXT: s_mov_b64 s[6:7], 0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v4 ; GFX7-NEXT: .LBB8_3: ; %atomicrmw.start ; GFX7-NEXT: ; =>This Loop Header: Depth=1 ; GFX7-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v6 +; GFX7-NEXT: v_lshrrev_b32_e32 v4, v7, v6 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX7-NEXT: v_and_b32_e32 v5, 0xffffff, v6 +; GFX7-NEXT: v_and_b32_e32 v5, v6, v9 ; GFX7-NEXT: s_mov_b64 s[12:13], exec -; GFX7-NEXT: v_add_f32_e32 v4, v4, v8 +; GFX7-NEXT: v_add_f32_e32 v4, v4, v10 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, v7, v4 ; GFX7-NEXT: v_or_b32_e32 v5, v5, v4 ; GFX7-NEXT: v_mov_b32_e32 v4, v5 ; GFX7-NEXT: v_mov_b32_e32 v5, v6 @@ -2902,7 +3029,7 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX7-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX7-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX7-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX7-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_cbranch_execnz .LBB8_4 ; GFX7-NEXT: ; %bb.5: ; in Loop: Header=BB8_3 Depth=1 @@ -2916,7 +3043,7 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX7-NEXT: s_cbranch_execnz .LBB8_3 ; GFX7-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -2924,7 +3051,11 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0x200, v4 -; GFX6-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX6-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX6-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX6-NEXT: v_lshlrev_b32_e32 v7, 3, v4 +; GFX6-NEXT: v_lshl_b32_e32 v4, 0xffff, v7 +; GFX6-NEXT: v_not_b32_e32 v9, v4 ; GFX6-NEXT: s_mov_b64 s[6:7], exec ; GFX6-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX6-NEXT: v_readfirstlane_b32 s8, v0 @@ -2935,25 +3066,25 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX6-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[2:3] ; GFX6-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX6-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GFX6-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX6-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX6-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX6-NEXT: s_cbranch_execnz .LBB8_1 ; GFX6-NEXT: ; %bb.2: ; GFX6-NEXT: s_mov_b64 exec, s[6:7] ; GFX6-NEXT: v_cvt_f16_f32_e32 v4, v5 ; GFX6-NEXT: s_mov_b64 s[6:7], 0 -; GFX6-NEXT: v_cvt_f32_f16_e32 v8, v4 +; GFX6-NEXT: v_cvt_f32_f16_e32 v10, v4 ; GFX6-NEXT: .LBB8_3: ; %atomicrmw.start ; GFX6-NEXT: ; =>This Loop Header: Depth=1 ; GFX6-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) -; GFX6-NEXT: v_lshrrev_b32_e32 v4, 24, v6 +; GFX6-NEXT: v_lshrrev_b32_e32 v4, v7, v6 ; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v6 +; GFX6-NEXT: v_and_b32_e32 v5, v6, v9 ; GFX6-NEXT: s_mov_b64 s[12:13], exec -; GFX6-NEXT: v_add_f32_e32 v4, v4, v8 +; GFX6-NEXT: v_add_f32_e32 v4, v4, v10 ; GFX6-NEXT: v_cvt_f16_f32_e32 v4, v4 -; GFX6-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX6-NEXT: v_lshlrev_b32_e32 v4, v7, v4 ; GFX6-NEXT: v_or_b32_e32 v5, v5, v4 ; GFX6-NEXT: v_mov_b32_e32 v4, v5 ; GFX6-NEXT: v_mov_b32_e32 v5, v6 @@ -2968,7 +3099,7 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX6-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX6-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX6-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX6-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX6-NEXT: s_cbranch_execnz .LBB8_4 ; GFX6-NEXT: ; %bb.5: ; in Loop: Header=BB8_3 Depth=1 @@ -2982,7 +3113,7 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall(ptr add ; GFX6-NEXT: s_cbranch_execnz .LBB8_3 ; GFX6-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX6-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: s_setpc_b64 s[30:31] @@ -3005,15 +3136,19 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset(ptr addrspace(7 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_addk_co_i32 s4, 0x200 ; GFX12-NEXT: v_lshlrev_b32_e32 v5, 16, v0 -; GFX12-NEXT: s_and_b32 s4, s4, -4 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: v_mov_b32_e32 v4, s4 -; GFX12-NEXT: s_mov_b32 s4, 0 +; GFX12-NEXT: s_and_b32 s5, s4, -4 +; GFX12-NEXT: s_and_b32 s4, s4, 3 +; GFX12-NEXT: v_mov_b32_e32 v4, s5 +; GFX12-NEXT: s_lshl_b32 s4, s4, 3 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_lshl_b32 s5, 0xffff, s4 +; GFX12-NEXT: s_not_b32 s6, s5 ; GFX12-NEXT: buffer_load_b32 v1, v4, s[0:3], null offen +; GFX12-NEXT: s_mov_b32 s5, 0 ; GFX12-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v0, s4, v1 ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 16, v0 @@ -3027,8 +3162,8 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset(ptr addrspace(7 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo ; GFX12-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX12-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX12-NEXT: v_lshlrev_b32_e32 v0, s4, v0 +; GFX12-NEXT: v_and_or_b32 v0, v1, s6, v0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0 ; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[2:3], v4, s[0:3], null offen th:TH_ATOMIC_RETURN @@ -3036,42 +3171,43 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset(ptr addrspace(7 ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v1 ; GFX12-NEXT: v_mov_b32_e32 v1, v2 -; GFX12-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX12-NEXT: s_or_b32 s5, vcc_lo, s5 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 +; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_cbranch_execnz .LBB9_1 ; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX12-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX12-NEXT: v_lshrrev_b32_e32 v0, s4, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: s_addk_i32 s4, 0x200 -; GFX940-NEXT: s_and_b32 s4, s4, -4 -; GFX940-NEXT: v_mov_b32_e32 v4, s4 +; GFX940-NEXT: s_and_b32 s5, s4, -4 +; GFX940-NEXT: v_mov_b32_e32 v4, s5 ; GFX940-NEXT: buffer_load_dword v1, v4, s[0:3], 0 offen -; GFX940-NEXT: s_mov_b32 s6, 0xffffff +; GFX940-NEXT: s_and_b32 s4, s4, 3 +; GFX940-NEXT: s_lshl_b32 s6, s4, 3 +; GFX940-NEXT: s_lshl_b32 s4, 0xffff, s6 +; GFX940-NEXT: s_not_b32 s7, s4 ; GFX940-NEXT: s_mov_b64 s[4:5], 0 ; GFX940-NEXT: v_lshlrev_b32_e32 v5, 16, v0 -; GFX940-NEXT: s_movk_i32 s7, 0x7fff +; GFX940-NEXT: s_movk_i32 s8, 0x7fff ; GFX940-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 24, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX940-NEXT: v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX940-NEXT: buffer_wbl2 sc1 ; GFX940-NEXT: v_add_f32_e32 v0, v0, v5 ; GFX940-NEXT: v_bfe_u32 v2, v0, 16, 1 ; GFX940-NEXT: v_or_b32_e32 v3, 0x400000, v0 -; GFX940-NEXT: v_add3_u32 v2, v2, v0, s7 +; GFX940-NEXT: v_add3_u32 v2, v2, v0, s8 ; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: s_nop 1 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX940-NEXT: v_and_or_b32 v0, v1, s6, v0 +; GFX940-NEXT: v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX940-NEXT: v_and_or_b32 v0, v1, s7, v0 ; GFX940-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX940-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) @@ -3083,7 +3219,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset(ptr addrspace(7 ; GFX940-NEXT: s_cbranch_execnz .LBB9_1 ; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX940-NEXT: v_lshrrev_b32_e32 v0, s6, v2 ; GFX940-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset: @@ -3091,16 +3227,20 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset(ptr addrspace(7 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_addk_i32 s4, 0x200 ; GFX11-NEXT: v_lshlrev_b32_e32 v5, 16, v0 -; GFX11-NEXT: s_and_b32 s4, s4, -4 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: v_mov_b32_e32 v4, s4 -; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_and_b32 s5, s4, -4 +; GFX11-NEXT: s_and_b32 s4, s4, 3 +; GFX11-NEXT: v_mov_b32_e32 v4, s5 +; GFX11-NEXT: s_lshl_b32 s4, s4, 3 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_lshl_b32 s5, 0xffff, s4 +; GFX11-NEXT: s_not_b32 s6, s5 ; GFX11-NEXT: buffer_load_b32 v1, v4, s[0:3], 0 offen +; GFX11-NEXT: s_mov_b32 s5, 0 ; GFX11-NEXT: .p2align 6 ; GFX11-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX11-NEXT: v_lshrrev_b32_e32 v0, s4, v1 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 @@ -3114,8 +3254,8 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset(ptr addrspace(7 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX11-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v0, s4, v0 +; GFX11-NEXT: v_and_or_b32 v0, v1, s6, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0 ; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[2:3], v4, s[0:3], 0 offen glc @@ -3124,13 +3264,13 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset(ptr addrspace(7 ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v1 ; GFX11-NEXT: v_mov_b32_e32 v1, v2 -; GFX11-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX11-NEXT: s_or_b32 s5, vcc_lo, s5 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 +; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s5 ; GFX11-NEXT: s_cbranch_execnz .LBB9_1 ; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX11-NEXT: v_lshrrev_b32_e32 v0, s4, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset: @@ -3138,25 +3278,27 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset(ptr addrspace(7 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_addk_i32 s8, 0x200 ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v0 -; GFX10-NEXT: s_and_b32 s8, s8, -4 -; GFX10-NEXT: v_mov_b32_e32 v4, s8 -; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_and_b32 s9, s8, -4 +; GFX10-NEXT: s_and_b32 s8, s8, 3 +; GFX10-NEXT: v_mov_b32_e32 v4, s9 +; GFX10-NEXT: s_lshl_b32 s8, s8, 3 +; GFX10-NEXT: s_lshl_b32 s9, 0xffff, s8 +; GFX10-NEXT: s_not_b32 s10, s9 ; GFX10-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen +; GFX10-NEXT: s_mov_b32 s9, 0 ; GFX10-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX10-NEXT: v_lshrrev_b32_sdwa v0, s8, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX10-NEXT: v_add_f32_e32 v0, v0, v5 ; GFX10-NEXT: v_bfe_u32 v2, v0, 16, 1 ; GFX10-NEXT: v_or_b32_e32 v3, 0x400000, v0 ; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 ; GFX10-NEXT: v_add3_u32 v2, v2, v0, 0x7fff ; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX10-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX10-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX10-NEXT: v_and_or_b32 v0, v1, s10, v0 ; GFX10-NEXT: v_mov_b32_e32 v3, v1 ; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; GFX10-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[4:7], 0 offen glc @@ -3165,39 +3307,40 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset(ptr addrspace(7 ; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, v2 -; GFX10-NEXT: s_or_b32 s8, vcc_lo, s8 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 +; GFX10-NEXT: s_or_b32 s9, vcc_lo, s9 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s9 ; GFX10-NEXT: s_cbranch_execnz .LBB9_1 ; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s9 +; GFX10-NEXT: v_lshrrev_b32_e32 v0, s8, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: s_addk_i32 s8, 0x200 -; GFX90A-NEXT: s_and_b32 s8, s8, -4 -; GFX90A-NEXT: v_mov_b32_e32 v4, s8 +; GFX90A-NEXT: s_and_b32 s9, s8, -4 +; GFX90A-NEXT: v_mov_b32_e32 v4, s9 ; GFX90A-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen -; GFX90A-NEXT: s_mov_b32 s10, 0xffffff +; GFX90A-NEXT: s_and_b32 s8, s8, 3 +; GFX90A-NEXT: s_lshl_b32 s10, s8, 3 +; GFX90A-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX90A-NEXT: s_not_b32 s11, s8 ; GFX90A-NEXT: s_mov_b64 s[8:9], 0 ; GFX90A-NEXT: v_lshlrev_b32_e32 v5, 16, v0 -; GFX90A-NEXT: s_movk_i32 s11, 0x7fff +; GFX90A-NEXT: s_movk_i32 s12, 0x7fff ; GFX90A-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 24, v1 -; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX90A-NEXT: v_lshrrev_b32_sdwa v0, s10, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX90A-NEXT: v_add_f32_e32 v0, v0, v5 ; GFX90A-NEXT: v_bfe_u32 v2, v0, 16, 1 ; GFX90A-NEXT: v_or_b32_e32 v3, 0x400000, v0 -; GFX90A-NEXT: v_add3_u32 v2, v2, v0, s11 +; GFX90A-NEXT: v_add3_u32 v2, v2, v0, s12 ; GFX90A-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 ; GFX90A-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc -; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX90A-NEXT: v_and_or_b32 v0, v1, s10, v0 +; GFX90A-NEXT: v_lshlrev_b32_sdwa v0, s10, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX90A-NEXT: v_and_or_b32 v0, v1, s11, v0 ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1] ; GFX90A-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[4:7], 0 offen glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) @@ -3209,34 +3352,35 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset(ptr addrspace(7 ; GFX90A-NEXT: s_cbranch_execnz .LBB9_1 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX90A-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: s_addk_i32 s8, 0x200 -; GFX908-NEXT: s_and_b32 s8, s8, -4 -; GFX908-NEXT: v_mov_b32_e32 v4, s8 +; GFX908-NEXT: s_and_b32 s9, s8, -4 +; GFX908-NEXT: v_mov_b32_e32 v4, s9 ; GFX908-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen -; GFX908-NEXT: s_mov_b32 s10, 0xffffff +; GFX908-NEXT: s_and_b32 s8, s8, 3 +; GFX908-NEXT: s_lshl_b32 s10, s8, 3 +; GFX908-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX908-NEXT: s_not_b32 s11, s8 ; GFX908-NEXT: s_mov_b64 s[8:9], 0 ; GFX908-NEXT: v_lshlrev_b32_e32 v5, 16, v0 -; GFX908-NEXT: s_movk_i32 s11, 0x7fff +; GFX908-NEXT: s_movk_i32 s12, 0x7fff ; GFX908-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_lshrrev_b32_e32 v0, 24, v1 -; GFX908-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX908-NEXT: v_lshrrev_b32_sdwa v0, s10, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX908-NEXT: v_add_f32_e32 v0, v0, v5 ; GFX908-NEXT: v_bfe_u32 v2, v0, 16, 1 ; GFX908-NEXT: v_or_b32_e32 v3, 0x400000, v0 -; GFX908-NEXT: v_add3_u32 v2, v2, v0, s11 +; GFX908-NEXT: v_add3_u32 v2, v2, v0, s12 ; GFX908-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 ; GFX908-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc -; GFX908-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX908-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX908-NEXT: v_and_or_b32 v0, v1, s10, v0 +; GFX908-NEXT: v_lshlrev_b32_sdwa v0, s10, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX908-NEXT: v_and_or_b32 v0, v1, s11, v0 ; GFX908-NEXT: v_mov_b32_e32 v3, v1 ; GFX908-NEXT: v_mov_b32_e32 v2, v0 ; GFX908-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[4:7], 0 offen glc @@ -3249,33 +3393,36 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset(ptr addrspace(7 ; GFX908-NEXT: s_cbranch_execnz .LBB9_1 ; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX908-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX908-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX908-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX908-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_addk_i32 s8, 0x200 -; GFX8-NEXT: s_and_b32 s8, s8, -4 -; GFX8-NEXT: v_mov_b32_e32 v4, s8 +; GFX8-NEXT: s_and_b32 s9, s8, -4 +; GFX8-NEXT: v_mov_b32_e32 v4, s9 ; GFX8-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen +; GFX8-NEXT: s_and_b32 s8, s8, 3 +; GFX8-NEXT: s_lshl_b32 s10, s8, 3 +; GFX8-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX8-NEXT: s_not_b32 s11, s8 ; GFX8-NEXT: s_mov_b64 s[8:9], 0 ; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v0 ; GFX8-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX8-NEXT: v_mov_b32_e32 v0, s10 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX8-NEXT: v_add_f32_e32 v0, v0, v5 -; GFX8-NEXT: v_bfe_u32 v3, v0, 16, 1 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v0 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x7fff, v3 -; GFX8-NEXT: v_or_b32_e32 v6, 0x400000, v0 -; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v6, vcc -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX8-NEXT: v_lshrrev_b32_sdwa v3, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_add_f32_e32 v3, v3, v5 +; GFX8-NEXT: v_bfe_u32 v6, v3, 16, 1 +; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v3 +; GFX8-NEXT: v_add_u32_e32 v6, vcc, 0x7fff, v6 +; GFX8-NEXT: v_or_b32_e32 v7, 0x400000, v3 +; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX8-NEXT: v_cndmask_b32_e32 v3, v6, v7, vcc +; GFX8-NEXT: v_and_b32_e32 v2, s11, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX8-NEXT: v_mov_b32_e32 v3, v1 ; GFX8-NEXT: v_mov_b32_e32 v2, v0 @@ -3289,28 +3436,32 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset(ptr addrspace(7 ; GFX8-NEXT: s_cbranch_execnz .LBB9_1 ; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX8-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_addk_i32 s8, 0x200 -; GFX7-NEXT: s_and_b32 s8, s8, -4 -; GFX7-NEXT: v_mov_b32_e32 v4, s8 +; GFX7-NEXT: s_and_b32 s9, s8, -4 +; GFX7-NEXT: v_mov_b32_e32 v4, s9 ; GFX7-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen +; GFX7-NEXT: s_and_b32 s8, s8, 3 +; GFX7-NEXT: s_lshl_b32 s10, s8, 3 +; GFX7-NEXT: s_lshl_b32 s8, 0xffff, s10 ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: s_not_b32 s11, s8 ; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: v_and_b32_e32 v5, 0xffff0000, v0 ; GFX7-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, s10, v1 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: v_add_f32_e32 v0, v0, v5 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v2, 0xffffff, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s11, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX7-NEXT: v_mov_b32_e32 v3, v1 ; GFX7-NEXT: v_mov_b32_e32 v2, v0 @@ -3324,7 +3475,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset(ptr addrspace(7 ; GFX7-NEXT: s_cbranch_execnz .LBB9_1 ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -3332,22 +3483,26 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset(ptr addrspace(7 ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: s_addk_i32 s8, 0x200 -; GFX6-NEXT: s_and_b32 s8, s8, -4 -; GFX6-NEXT: v_mov_b32_e32 v4, s8 +; GFX6-NEXT: s_and_b32 s9, s8, -4 +; GFX6-NEXT: v_mov_b32_e32 v4, s9 ; GFX6-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen +; GFX6-NEXT: s_and_b32 s8, s8, 3 +; GFX6-NEXT: s_lshl_b32 s10, s8, 3 +; GFX6-NEXT: s_lshl_b32 s8, 0xffff, s10 ; GFX6-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX6-NEXT: s_not_b32 s11, s8 ; GFX6-NEXT: s_mov_b64 s[8:9], 0 ; GFX6-NEXT: v_and_b32_e32 v5, 0xffff0000, v0 ; GFX6-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX6-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, s10, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: v_add_f32_e32 v0, v0, v5 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX6-NEXT: v_and_b32_e32 v2, s11, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX6-NEXT: v_mov_b32_e32 v3, v1 ; GFX6-NEXT: v_mov_b32_e32 v2, v0 @@ -3361,7 +3516,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset(ptr addrspace(7 ; GFX6-NEXT: s_cbranch_execnz .LBB9_1 ; GFX6-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX6-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: s_setpc_b64 s[30:31] @@ -3380,15 +3535,19 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset(ptr addrspace(7 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_addk_co_i32 s4, 0x200 ; GFX12-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX12-NEXT: s_and_b32 s4, s4, -4 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: v_mov_b32_e32 v2, s4 -; GFX12-NEXT: s_mov_b32 s4, 0 +; GFX12-NEXT: s_and_b32 s5, s4, -4 +; GFX12-NEXT: s_and_b32 s4, s4, 3 +; GFX12-NEXT: v_mov_b32_e32 v2, s5 +; GFX12-NEXT: s_lshl_b32 s4, s4, 3 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_lshl_b32 s5, 0xffff, s4 +; GFX12-NEXT: s_not_b32 s6, s5 ; GFX12-NEXT: buffer_load_b32 v1, v2, s[0:3], null offen +; GFX12-NEXT: s_mov_b32 s5, 0 ; GFX12-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v0, s4, v1 ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 16, v0 @@ -3402,8 +3561,8 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset(ptr addrspace(7 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc_lo ; GFX12-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX12-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX12-NEXT: v_lshlrev_b32_e32 v0, s4, v0 +; GFX12-NEXT: v_and_or_b32 v0, v1, s6, v0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_dual_mov_b32 v5, v1 :: v_dual_mov_b32 v4, v0 ; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v2, s[0:3], null offen th:TH_ATOMIC_RETURN @@ -3411,41 +3570,42 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset(ptr addrspace(7 ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v1 ; GFX12-NEXT: v_mov_b32_e32 v1, v4 -; GFX12-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX12-NEXT: s_or_b32 s5, vcc_lo, s5 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 +; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_cbranch_execnz .LBB10_1 ; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: s_addk_i32 s4, 0x200 -; GFX940-NEXT: s_and_b32 s4, s4, -4 -; GFX940-NEXT: v_mov_b32_e32 v2, s4 +; GFX940-NEXT: s_and_b32 s5, s4, -4 +; GFX940-NEXT: v_mov_b32_e32 v2, s5 ; GFX940-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen -; GFX940-NEXT: s_mov_b32 s6, 0xffffff +; GFX940-NEXT: s_and_b32 s4, s4, 3 +; GFX940-NEXT: s_lshl_b32 s6, s4, 3 +; GFX940-NEXT: s_lshl_b32 s4, 0xffff, s6 +; GFX940-NEXT: s_not_b32 s7, s4 ; GFX940-NEXT: s_mov_b64 s[4:5], 0 ; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX940-NEXT: s_movk_i32 s7, 0x7fff +; GFX940-NEXT: s_movk_i32 s8, 0x7fff ; GFX940-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 24, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX940-NEXT: v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX940-NEXT: buffer_wbl2 sc1 ; GFX940-NEXT: v_add_f32_e32 v0, v0, v3 ; GFX940-NEXT: v_bfe_u32 v4, v0, 16, 1 ; GFX940-NEXT: v_or_b32_e32 v5, 0x400000, v0 -; GFX940-NEXT: v_add3_u32 v4, v4, v0, s7 +; GFX940-NEXT: v_add3_u32 v4, v4, v0, s8 ; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: s_nop 1 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX940-NEXT: v_and_or_b32 v0, v1, s6, v0 +; GFX940-NEXT: v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX940-NEXT: v_and_or_b32 v0, v1, s7, v0 ; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) @@ -3464,16 +3624,20 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset(ptr addrspace(7 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_addk_i32 s4, 0x200 ; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX11-NEXT: s_and_b32 s4, s4, -4 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: v_mov_b32_e32 v2, s4 -; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_and_b32 s5, s4, -4 +; GFX11-NEXT: s_and_b32 s4, s4, 3 +; GFX11-NEXT: v_mov_b32_e32 v2, s5 +; GFX11-NEXT: s_lshl_b32 s4, s4, 3 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_lshl_b32 s5, 0xffff, s4 +; GFX11-NEXT: s_not_b32 s6, s5 ; GFX11-NEXT: buffer_load_b32 v1, v2, s[0:3], 0 offen +; GFX11-NEXT: s_mov_b32 s5, 0 ; GFX11-NEXT: .p2align 6 ; GFX11-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX11-NEXT: v_lshrrev_b32_e32 v0, s4, v1 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 @@ -3487,8 +3651,8 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset(ptr addrspace(7 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc_lo ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX11-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v0, s4, v0 +; GFX11-NEXT: v_and_or_b32 v0, v1, s6, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_dual_mov_b32 v5, v1 :: v_dual_mov_b32 v4, v0 ; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v2, s[0:3], 0 offen glc @@ -3497,12 +3661,12 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset(ptr addrspace(7 ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v1 ; GFX11-NEXT: v_mov_b32_e32 v1, v4 -; GFX11-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX11-NEXT: s_or_b32 s5, vcc_lo, s5 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 +; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s5 ; GFX11-NEXT: s_cbranch_execnz .LBB10_1 ; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset: @@ -3510,25 +3674,27 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset(ptr addrspace(7 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_addk_i32 s8, 0x200 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX10-NEXT: s_and_b32 s8, s8, -4 -; GFX10-NEXT: v_mov_b32_e32 v2, s8 -; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_and_b32 s9, s8, -4 +; GFX10-NEXT: s_and_b32 s8, s8, 3 +; GFX10-NEXT: v_mov_b32_e32 v2, s9 +; GFX10-NEXT: s_lshl_b32 s8, s8, 3 +; GFX10-NEXT: s_lshl_b32 s9, 0xffff, s8 +; GFX10-NEXT: s_not_b32 s10, s9 ; GFX10-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen +; GFX10-NEXT: s_mov_b32 s9, 0 ; GFX10-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX10-NEXT: v_lshrrev_b32_sdwa v0, s8, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX10-NEXT: v_add_f32_e32 v0, v0, v3 ; GFX10-NEXT: v_bfe_u32 v4, v0, 16, 1 ; GFX10-NEXT: v_or_b32_e32 v5, 0x400000, v0 ; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 ; GFX10-NEXT: v_add3_u32 v4, v4, v0, 0x7fff ; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc_lo -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX10-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX10-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX10-NEXT: v_and_or_b32 v0, v1, s10, v0 ; GFX10-NEXT: v_mov_b32_e32 v5, v1 ; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; GFX10-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[4:7], 0 offen glc @@ -3537,38 +3703,39 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset(ptr addrspace(7 ; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, v4 -; GFX10-NEXT: s_or_b32 s8, vcc_lo, s8 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 +; GFX10-NEXT: s_or_b32 s9, vcc_lo, s9 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s9 ; GFX10-NEXT: s_cbranch_execnz .LBB10_1 ; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s9 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: s_addk_i32 s8, 0x200 -; GFX90A-NEXT: s_and_b32 s8, s8, -4 -; GFX90A-NEXT: v_mov_b32_e32 v2, s8 +; GFX90A-NEXT: s_and_b32 s9, s8, -4 +; GFX90A-NEXT: v_mov_b32_e32 v2, s9 ; GFX90A-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen -; GFX90A-NEXT: s_mov_b32 s10, 0xffffff +; GFX90A-NEXT: s_and_b32 s8, s8, 3 +; GFX90A-NEXT: s_lshl_b32 s10, s8, 3 +; GFX90A-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX90A-NEXT: s_not_b32 s11, s8 ; GFX90A-NEXT: s_mov_b64 s[8:9], 0 ; GFX90A-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX90A-NEXT: s_movk_i32 s11, 0x7fff +; GFX90A-NEXT: s_movk_i32 s12, 0x7fff ; GFX90A-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 24, v1 -; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX90A-NEXT: v_lshrrev_b32_sdwa v0, s10, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX90A-NEXT: v_add_f32_e32 v0, v0, v3 ; GFX90A-NEXT: v_bfe_u32 v4, v0, 16, 1 ; GFX90A-NEXT: v_or_b32_e32 v5, 0x400000, v0 -; GFX90A-NEXT: v_add3_u32 v4, v4, v0, s11 +; GFX90A-NEXT: v_add3_u32 v4, v4, v0, s12 ; GFX90A-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 ; GFX90A-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc -; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX90A-NEXT: v_and_or_b32 v0, v1, s10, v0 +; GFX90A-NEXT: v_lshlrev_b32_sdwa v0, s10, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX90A-NEXT: v_and_or_b32 v0, v1, s11, v0 ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[0:1] op_sel:[0,1] ; GFX90A-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[4:7], 0 offen glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) @@ -3586,27 +3753,28 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset(ptr addrspace(7 ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: s_addk_i32 s8, 0x200 -; GFX908-NEXT: s_and_b32 s8, s8, -4 -; GFX908-NEXT: v_mov_b32_e32 v2, s8 +; GFX908-NEXT: s_and_b32 s9, s8, -4 +; GFX908-NEXT: v_mov_b32_e32 v2, s9 ; GFX908-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen -; GFX908-NEXT: s_mov_b32 s10, 0xffffff +; GFX908-NEXT: s_and_b32 s8, s8, 3 +; GFX908-NEXT: s_lshl_b32 s10, s8, 3 +; GFX908-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX908-NEXT: s_not_b32 s11, s8 ; GFX908-NEXT: s_mov_b64 s[8:9], 0 ; GFX908-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX908-NEXT: s_movk_i32 s11, 0x7fff +; GFX908-NEXT: s_movk_i32 s12, 0x7fff ; GFX908-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_lshrrev_b32_e32 v0, 24, v1 -; GFX908-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX908-NEXT: v_lshrrev_b32_sdwa v0, s10, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX908-NEXT: v_add_f32_e32 v0, v0, v3 ; GFX908-NEXT: v_bfe_u32 v4, v0, 16, 1 ; GFX908-NEXT: v_or_b32_e32 v5, 0x400000, v0 -; GFX908-NEXT: v_add3_u32 v4, v4, v0, s11 +; GFX908-NEXT: v_add3_u32 v4, v4, v0, s12 ; GFX908-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 ; GFX908-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc -; GFX908-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX908-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX908-NEXT: v_and_or_b32 v0, v1, s10, v0 +; GFX908-NEXT: v_lshlrev_b32_sdwa v0, s10, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX908-NEXT: v_and_or_b32 v0, v1, s11, v0 ; GFX908-NEXT: v_mov_b32_e32 v5, v1 ; GFX908-NEXT: v_mov_b32_e32 v4, v0 ; GFX908-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[4:7], 0 offen glc @@ -3625,26 +3793,29 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset(ptr addrspace(7 ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_addk_i32 s8, 0x200 -; GFX8-NEXT: s_and_b32 s8, s8, -4 -; GFX8-NEXT: v_mov_b32_e32 v2, s8 +; GFX8-NEXT: s_and_b32 s9, s8, -4 +; GFX8-NEXT: v_mov_b32_e32 v2, s9 ; GFX8-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen +; GFX8-NEXT: s_and_b32 s8, s8, 3 +; GFX8-NEXT: s_lshl_b32 s10, s8, 3 +; GFX8-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX8-NEXT: s_not_b32 s11, s8 ; GFX8-NEXT: s_mov_b64 s[8:9], 0 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v0 ; GFX8-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX8-NEXT: v_mov_b32_e32 v0, s10 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX8-NEXT: v_add_f32_e32 v0, v0, v3 -; GFX8-NEXT: v_bfe_u32 v5, v0, 16, 1 -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v0 -; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0x7fff, v5 -; GFX8-NEXT: v_or_b32_e32 v6, 0x400000, v0 -; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v6, vcc -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX8-NEXT: v_lshrrev_b32_sdwa v5, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_add_f32_e32 v5, v5, v3 +; GFX8-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v5 +; GFX8-NEXT: v_add_u32_e32 v6, vcc, 0x7fff, v6 +; GFX8-NEXT: v_or_b32_e32 v7, 0x400000, v5 +; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX8-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc +; GFX8-NEXT: v_and_b32_e32 v4, s11, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX8-NEXT: v_mov_b32_e32 v5, v1 ; GFX8-NEXT: v_mov_b32_e32 v4, v0 @@ -3664,21 +3835,25 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset(ptr addrspace(7 ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_addk_i32 s8, 0x200 -; GFX7-NEXT: s_and_b32 s8, s8, -4 -; GFX7-NEXT: v_mov_b32_e32 v2, s8 +; GFX7-NEXT: s_and_b32 s9, s8, -4 +; GFX7-NEXT: v_mov_b32_e32 v2, s9 ; GFX7-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen +; GFX7-NEXT: s_and_b32 s8, s8, 3 +; GFX7-NEXT: s_lshl_b32 s10, s8, 3 +; GFX7-NEXT: s_lshl_b32 s8, 0xffff, s10 ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: s_not_b32 s11, s8 ; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: v_and_b32_e32 v3, 0xffff0000, v0 ; GFX7-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, s10, v1 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: v_add_f32_e32 v0, v0, v3 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v4, 0xffffff, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v4, s11, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX7-NEXT: v_mov_b32_e32 v5, v1 ; GFX7-NEXT: v_mov_b32_e32 v4, v0 @@ -3698,22 +3873,26 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_bf16__offset(ptr addrspace(7 ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: s_addk_i32 s8, 0x200 -; GFX6-NEXT: s_and_b32 s8, s8, -4 -; GFX6-NEXT: v_mov_b32_e32 v2, s8 +; GFX6-NEXT: s_and_b32 s9, s8, -4 +; GFX6-NEXT: v_mov_b32_e32 v2, s9 ; GFX6-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen +; GFX6-NEXT: s_and_b32 s8, s8, 3 +; GFX6-NEXT: s_lshl_b32 s10, s8, 3 +; GFX6-NEXT: s_lshl_b32 s8, 0xffff, s10 ; GFX6-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX6-NEXT: s_not_b32 s11, s8 ; GFX6-NEXT: s_mov_b64 s[8:9], 0 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff0000, v0 ; GFX6-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX6-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, s10, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: v_add_f32_e32 v0, v0, v3 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX6-NEXT: v_and_b32_e32 v4, s11, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX6-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX6-NEXT: v_mov_b32_e32 v5, v1 ; GFX6-NEXT: v_mov_b32_e32 v4, v0 @@ -3744,8 +3923,13 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 ; GFX12-NEXT: s_mov_b32 s1, exec_lo -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_and_b32_e32 v6, 3, v4 +; GFX12-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX12-NEXT: v_lshlrev_b32_e32 v7, 3, v6 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff +; GFX12-NEXT: v_not_b32_e32 v9, v6 ; GFX12-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX12-NEXT: v_readfirstlane_b32 s4, v0 ; GFX12-NEXT: v_readfirstlane_b32 s5, v1 @@ -3757,34 +3941,34 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX12-NEXT: s_and_saveexec_b32 s0, s0 -; GFX12-NEXT: buffer_load_b32 v6, v7, s[4:7], null offen +; GFX12-NEXT: buffer_load_b32 v6, v8, s[4:7], null offen ; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_cbranch_execnz .LBB11_1 ; GFX12-NEXT: ; %bb.2: ; GFX12-NEXT: s_mov_b32 exec_lo, s1 -; GFX12-NEXT: v_lshlrev_b32_e32 v8, 16, v5 +; GFX12-NEXT: v_lshlrev_b32_e32 v10, 16, v5 ; GFX12-NEXT: s_mov_b32 s1, 0 ; GFX12-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX12-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_lshrrev_b32_e32 v4, 24, v6 +; GFX12-NEXT: v_lshrrev_b32_e32 v4, v7, v6 ; GFX12-NEXT: s_mov_b32 s2, exec_lo ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX12-NEXT: v_add_f32_e32 v4, v4, v8 +; GFX12-NEXT: v_add_f32_e32 v4, v4, v10 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX12-NEXT: v_bfe_u32 v5, v4, 16, 1 -; GFX12-NEXT: v_or_b32_e32 v9, 0x400000, v4 +; GFX12-NEXT: v_or_b32_e32 v11, 0x400000, v4 ; GFX12-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 ; GFX12-NEXT: v_add3_u32 v5, v5, v4, 0x7fff ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-NEXT: v_cndmask_b32_e32 v4, v5, v9, vcc_lo +; GFX12-NEXT: v_cndmask_b32_e32 v4, v5, v11, vcc_lo ; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v4 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX12-NEXT: v_and_or_b32 v5, 0xffffff, v6, v4 +; GFX12-NEXT: v_lshlrev_b32_e32 v4, v7, v4 +; GFX12-NEXT: v_and_or_b32 v5, v6, v9, v4 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_mov_b32_e32 v4, v5 ; GFX12-NEXT: v_mov_b32_e32 v5, v6 @@ -3801,7 +3985,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX12-NEXT: s_and_saveexec_b32 s0, s0 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v7, s[4:7], null offen th:TH_ATOMIC_RETURN +; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[4:7], null offen th:TH_ATOMIC_RETURN ; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_cbranch_execnz .LBB11_4 ; GFX12-NEXT: ; %bb.5: ; in Loop: Header=BB11_3 Depth=1 @@ -3816,15 +4000,19 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX12-NEXT: s_cbranch_execnz .LBB11_3 ; GFX12-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX12-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX12-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_add_u32_e32 v4, 0x200, v4 -; GFX940-NEXT: v_and_b32_e32 v8, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, 0xffffff +; GFX940-NEXT: v_and_b32_e32 v9, -4, v4 +; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX940-NEXT: v_lshlrev_b32_e32 v8, 3, v4 +; GFX940-NEXT: s_mov_b32 s0, 0xffff +; GFX940-NEXT: v_lshlrev_b32_e64 v4, v8, s0 +; GFX940-NEXT: v_not_b32_e32 v10, v4 ; GFX940-NEXT: s_mov_b64 s[2:3], exec ; GFX940-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX940-NEXT: v_readfirstlane_b32 s4, v0 @@ -3836,31 +4024,30 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] ; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] ; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: buffer_load_dword v7, v8, s[4:7], 0 offen +; GFX940-NEXT: buffer_load_dword v7, v9, s[4:7], 0 offen ; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] ; GFX940-NEXT: s_cbranch_execnz .LBB11_1 ; GFX940-NEXT: ; %bb.2: ; GFX940-NEXT: s_mov_b64 exec, s[2:3] ; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v10, 16, v5 +; GFX940-NEXT: v_lshlrev_b32_e32 v11, 16, v5 ; GFX940-NEXT: s_movk_i32 s10, 0x7fff ; GFX940-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX940-NEXT: ; =>This Loop Header: Depth=1 ; GFX940-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, 24, v7 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX940-NEXT: v_add_f32_e32 v4, v4, v10 +; GFX940-NEXT: v_lshrrev_b32_sdwa v4, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX940-NEXT: s_mov_b64 s[8:9], exec +; GFX940-NEXT: v_add_f32_e32 v4, v4, v11 ; GFX940-NEXT: v_bfe_u32 v5, v4, 16, 1 ; GFX940-NEXT: v_add3_u32 v5, v5, v4, s10 ; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v4 ; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX940-NEXT: s_mov_b64 s[8:9], exec ; GFX940-NEXT: buffer_wbl2 sc1 +; GFX940-NEXT: s_nop 0 ; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX940-NEXT: v_and_or_b32 v6, v7, v9, v4 +; GFX940-NEXT: v_lshlrev_b32_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX940-NEXT: v_and_or_b32 v6, v7, v10, v4 ; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[6:7] ; GFX940-NEXT: .LBB11_4: ; Parent Loop BB11_3 Depth=1 ; GFX940-NEXT: ; => This Inner Loop Header: Depth=2 @@ -3874,7 +4061,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] ; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0 +; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v9, s[4:7], 0 offen sc0 ; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] ; GFX940-NEXT: s_cbranch_execnz .LBB11_4 ; GFX940-NEXT: ; %bb.5: ; in Loop: Header=BB11_3 Depth=1 @@ -3888,7 +4075,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX940-NEXT: s_cbranch_execnz .LBB11_3 ; GFX940-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX940-NEXT: v_lshrrev_b32_e32 v0, v8, v4 ; GFX940-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall: @@ -3897,8 +4084,13 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX11-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 ; GFX11-NEXT: s_mov_b32 s1, 0 ; GFX11-NEXT: s_mov_b32 s2, exec_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v6, 3, v4 +; GFX11-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX11-NEXT: v_lshlrev_b32_e32 v7, 3, v6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff +; GFX11-NEXT: v_not_b32_e32 v9, v6 ; GFX11-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: v_readfirstlane_b32 s4, v0 ; GFX11-NEXT: v_readfirstlane_b32 s5, v1 @@ -3910,35 +4102,35 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX11-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-NEXT: buffer_load_b32 v6, v7, s[4:7], 0 offen +; GFX11-NEXT: buffer_load_b32 v6, v8, s[4:7], 0 offen ; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_cbranch_execnz .LBB11_1 ; GFX11-NEXT: ; %bb.2: ; GFX11-NEXT: s_mov_b32 exec_lo, s2 -; GFX11-NEXT: v_lshlrev_b32_e32 v8, 16, v5 +; GFX11-NEXT: v_lshlrev_b32_e32 v10, 16, v5 ; GFX11-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX11-NEXT: .p2align 6 ; GFX11-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX11-NEXT: ; =>This Loop Header: Depth=1 ; GFX11-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_lshrrev_b32_e32 v4, 24, v6 +; GFX11-NEXT: v_lshrrev_b32_e32 v4, v7, v6 ; GFX11-NEXT: s_mov_b32 s2, exec_lo ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX11-NEXT: v_add_f32_e32 v4, v4, v8 +; GFX11-NEXT: v_add_f32_e32 v4, v4, v10 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_bfe_u32 v5, v4, 16, 1 -; GFX11-NEXT: v_or_b32_e32 v9, 0x400000, v4 +; GFX11-NEXT: v_or_b32_e32 v11, 0x400000, v4 ; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 ; GFX11-NEXT: v_add3_u32 v5, v5, v4, 0x7fff ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e32 v4, v5, v9, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v4, v5, v11, vcc_lo ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX11-NEXT: v_and_or_b32 v5, 0xffffff, v6, v4 +; GFX11-NEXT: v_lshlrev_b32_e32 v4, v7, v4 +; GFX11-NEXT: v_and_or_b32 v5, v6, v9, v4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_mov_b32_e32 v4, v5 ; GFX11-NEXT: v_mov_b32_e32 v5, v6 @@ -3955,7 +4147,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX11-NEXT: s_and_saveexec_b32 s0, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v7, s[4:7], 0 offen glc +; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[4:7], 0 offen glc ; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_cbranch_execnz .LBB11_4 ; GFX11-NEXT: ; %bb.5: ; in Loop: Header=BB11_3 Depth=1 @@ -3972,7 +4164,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX11-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX11-NEXT: s_set_inst_prefetch_distance 0x2 ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX11-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall: @@ -3981,7 +4173,11 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX10-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 ; GFX10-NEXT: s_mov_b32 s5, 0 ; GFX10-NEXT: s_mov_b32 s6, exec_lo -; GFX10-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX10-NEXT: v_and_b32_e32 v6, 3, v4 +; GFX10-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX10-NEXT: v_lshlrev_b32_e32 v7, 3, v6 +; GFX10-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff +; GFX10-NEXT: v_not_b32_e32 v9, v6 ; GFX10-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_readfirstlane_b32 s8, v0 ; GFX10-NEXT: v_readfirstlane_b32 s9, v1 @@ -3991,30 +4187,28 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] ; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 ; GFX10-NEXT: s_and_saveexec_b32 s4, s4 -; GFX10-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX10-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB11_1 ; GFX10-NEXT: ; %bb.2: ; GFX10-NEXT: s_mov_b32 exec_lo, s6 -; GFX10-NEXT: v_lshlrev_b32_e32 v8, 16, v5 +; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v5 ; GFX10-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Loop Header: Depth=1 ; GFX10-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v6 +; GFX10-NEXT: v_lshrrev_b32_sdwa v4, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: s_mov_b32 s6, exec_lo ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX10-NEXT: v_add_f32_e32 v4, v4, v8 +; GFX10-NEXT: v_add_f32_e32 v4, v4, v10 ; GFX10-NEXT: v_bfe_u32 v5, v4, 16, 1 -; GFX10-NEXT: v_or_b32_e32 v9, 0x400000, v4 +; GFX10-NEXT: v_or_b32_e32 v11, 0x400000, v4 ; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 ; GFX10-NEXT: v_add3_u32 v5, v5, v4, 0x7fff -; GFX10-NEXT: v_cndmask_b32_e32 v4, v5, v9, vcc_lo -; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX10-NEXT: v_and_or_b32 v5, 0xffffff, v6, v4 +; GFX10-NEXT: v_cndmask_b32_e32 v4, v5, v11, vcc_lo +; GFX10-NEXT: v_lshlrev_b32_sdwa v4, v7, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX10-NEXT: v_and_or_b32 v5, v6, v9, v4 ; GFX10-NEXT: v_mov_b32_e32 v4, v5 ; GFX10-NEXT: v_mov_b32_e32 v5, v6 ; GFX10-NEXT: .LBB11_4: ; Parent Loop BB11_3 Depth=1 @@ -4028,7 +4222,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 ; GFX10-NEXT: s_and_saveexec_b32 s4, s4 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX10-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB11_4 @@ -4045,15 +4239,19 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX10-NEXT: s_cbranch_execnz .LBB11_3 ; GFX10-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX10-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_u32_e32 v4, 0x200, v4 -; GFX90A-NEXT: v_and_b32_e32 v8, -4, v4 -; GFX90A-NEXT: v_mov_b32_e32 v9, 0xffffff +; GFX90A-NEXT: v_and_b32_e32 v9, -4, v4 +; GFX90A-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX90A-NEXT: v_lshlrev_b32_e32 v8, 3, v4 +; GFX90A-NEXT: s_mov_b32 s4, 0xffff +; GFX90A-NEXT: v_lshlrev_b32_e64 v4, v8, s4 +; GFX90A-NEXT: v_not_b32_e32 v10, v4 ; GFX90A-NEXT: s_mov_b64 s[6:7], exec ; GFX90A-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: v_readfirstlane_b32 s8, v0 @@ -4065,29 +4263,27 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX90A-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX90A-NEXT: s_nop 0 -; GFX90A-NEXT: buffer_load_dword v7, v8, s[8:11], 0 offen +; GFX90A-NEXT: buffer_load_dword v7, v9, s[8:11], 0 offen ; GFX90A-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX90A-NEXT: s_cbranch_execnz .LBB11_1 ; GFX90A-NEXT: ; %bb.2: ; GFX90A-NEXT: s_mov_b64 exec, s[6:7] ; GFX90A-NEXT: s_mov_b64 s[6:7], 0 -; GFX90A-NEXT: v_lshlrev_b32_e32 v10, 16, v5 +; GFX90A-NEXT: v_lshlrev_b32_e32 v11, 16, v5 ; GFX90A-NEXT: s_movk_i32 s14, 0x7fff ; GFX90A-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Loop Header: Depth=1 ; GFX90A-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: v_lshrrev_b32_e32 v4, 24, v7 -; GFX90A-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX90A-NEXT: v_add_f32_e32 v4, v4, v10 +; GFX90A-NEXT: v_lshrrev_b32_sdwa v4, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX90A-NEXT: v_add_f32_e32 v4, v4, v11 ; GFX90A-NEXT: v_bfe_u32 v5, v4, 16, 1 ; GFX90A-NEXT: v_add3_u32 v5, v5, v4, s14 ; GFX90A-NEXT: v_or_b32_e32 v6, 0x400000, v4 ; GFX90A-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 ; GFX90A-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc -; GFX90A-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX90A-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX90A-NEXT: v_and_or_b32 v6, v7, v9, v4 +; GFX90A-NEXT: v_lshlrev_b32_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX90A-NEXT: v_and_or_b32 v6, v7, v10, v4 ; GFX90A-NEXT: s_mov_b64 s[12:13], exec ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[6:7], v[6:7] op_sel:[0,1] ; GFX90A-NEXT: .LBB11_4: ; Parent Loop BB11_3 Depth=1 @@ -4101,7 +4297,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX90A-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc +; GFX90A-NEXT: buffer_atomic_cmpswap v[4:5], v9, s[8:11], 0 offen glc ; GFX90A-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX90A-NEXT: s_cbranch_execnz .LBB11_4 ; GFX90A-NEXT: ; %bb.5: ; in Loop: Header=BB11_3 Depth=1 @@ -4115,15 +4311,19 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX90A-NEXT: s_cbranch_execnz .LBB11_3 ; GFX90A-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX90A-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX90A-NEXT: v_lshrrev_b32_e32 v0, v8, v4 ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_add_u32_e32 v4, 0x200, v4 -; GFX908-NEXT: v_and_b32_e32 v7, -4, v4 -; GFX908-NEXT: v_mov_b32_e32 v8, 0xffffff +; GFX908-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX908-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX908-NEXT: v_lshlrev_b32_e32 v7, 3, v4 +; GFX908-NEXT: s_mov_b32 s4, 0xffff +; GFX908-NEXT: v_lshlrev_b32_e64 v4, v7, s4 +; GFX908-NEXT: v_not_b32_e32 v9, v4 ; GFX908-NEXT: s_mov_b64 s[6:7], exec ; GFX908-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX908-NEXT: v_readfirstlane_b32 s8, v0 @@ -4135,29 +4335,27 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX908-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX908-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX908-NEXT: s_nop 0 -; GFX908-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX908-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX908-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX908-NEXT: s_cbranch_execnz .LBB11_1 ; GFX908-NEXT: ; %bb.2: ; GFX908-NEXT: s_mov_b64 exec, s[6:7] ; GFX908-NEXT: s_mov_b64 s[6:7], 0 -; GFX908-NEXT: v_lshlrev_b32_e32 v9, 16, v5 +; GFX908-NEXT: v_lshlrev_b32_e32 v10, 16, v5 ; GFX908-NEXT: s_movk_i32 s14, 0x7fff ; GFX908-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX908-NEXT: ; =>This Loop Header: Depth=1 ; GFX908-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_lshrrev_b32_e32 v4, 24, v6 -; GFX908-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX908-NEXT: v_add_f32_e32 v4, v4, v9 +; GFX908-NEXT: v_lshrrev_b32_sdwa v4, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX908-NEXT: v_add_f32_e32 v4, v4, v10 ; GFX908-NEXT: v_bfe_u32 v5, v4, 16, 1 ; GFX908-NEXT: v_add3_u32 v5, v5, v4, s14 -; GFX908-NEXT: v_or_b32_e32 v10, 0x400000, v4 +; GFX908-NEXT: v_or_b32_e32 v11, 0x400000, v4 ; GFX908-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX908-NEXT: v_cndmask_b32_e32 v4, v5, v10, vcc -; GFX908-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX908-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX908-NEXT: v_and_or_b32 v5, v6, v8, v4 +; GFX908-NEXT: v_cndmask_b32_e32 v4, v5, v11, vcc +; GFX908-NEXT: v_lshlrev_b32_sdwa v4, v7, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX908-NEXT: v_and_or_b32 v5, v6, v9, v4 ; GFX908-NEXT: v_mov_b32_e32 v4, v5 ; GFX908-NEXT: s_mov_b64 s[12:13], exec ; GFX908-NEXT: v_mov_b32_e32 v5, v6 @@ -4172,7 +4370,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX908-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX908-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX908-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX908-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX908-NEXT: s_cbranch_execnz .LBB11_4 ; GFX908-NEXT: ; %bb.5: ; in Loop: Header=BB11_3 Depth=1 @@ -4186,14 +4384,19 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX908-NEXT: s_cbranch_execnz .LBB11_3 ; GFX908-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX908-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX908-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX908-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX908-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x200, v4 -; GFX8-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX8-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX8-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v7, 3, v4 +; GFX8-NEXT: s_mov_b32 s4, 0xffff +; GFX8-NEXT: v_lshlrev_b32_e64 v4, v7, s4 +; GFX8-NEXT: v_not_b32_e32 v9, v4 ; GFX8-NEXT: s_mov_b64 s[6:7], exec ; GFX8-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX8-NEXT: v_readfirstlane_b32 s8, v0 @@ -4205,29 +4408,27 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX8-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX8-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX8-NEXT: s_nop 0 -; GFX8-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX8-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX8-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX8-NEXT: s_cbranch_execnz .LBB11_1 ; GFX8-NEXT: ; %bb.2: ; GFX8-NEXT: s_mov_b64 exec, s[6:7] ; GFX8-NEXT: s_mov_b64 s[6:7], 0 -; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v5 +; GFX8-NEXT: v_lshlrev_b32_e32 v10, 16, v5 ; GFX8-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX8-NEXT: ; =>This Loop Header: Depth=1 ; GFX8-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v6 -; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX8-NEXT: v_add_f32_e32 v4, v4, v8 +; GFX8-NEXT: v_lshrrev_b32_sdwa v4, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_add_f32_e32 v4, v4, v10 ; GFX8-NEXT: v_bfe_u32 v5, v4, 16, 1 ; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v4 ; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0x7fff, v5 -; GFX8-NEXT: v_or_b32_e32 v9, 0x400000, v4 +; GFX8-NEXT: v_or_b32_e32 v11, 0x400000, v4 ; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v9, vcc -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX8-NEXT: v_and_b32_e32 v5, 0xffffff, v6 +; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v11, vcc +; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v7, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_and_b32_e32 v5, v6, v9 ; GFX8-NEXT: v_or_b32_e32 v5, v5, v4 ; GFX8-NEXT: v_mov_b32_e32 v4, v5 ; GFX8-NEXT: s_mov_b64 s[12:13], exec @@ -4243,7 +4444,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX8-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX8-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX8-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX8-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX8-NEXT: s_cbranch_execnz .LBB11_4 ; GFX8-NEXT: ; %bb.5: ; in Loop: Header=BB11_3 Depth=1 @@ -4257,14 +4458,18 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX8-NEXT: s_cbranch_execnz .LBB11_3 ; GFX8-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX8-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_add_i32_e32 v4, vcc, 0x200, v4 -; GFX7-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX7-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX7-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v7, 3, v4 +; GFX7-NEXT: v_lshl_b32_e32 v4, 0xffff, v7 +; GFX7-NEXT: v_not_b32_e32 v9, v4 ; GFX7-NEXT: s_mov_b64 s[6:7], exec ; GFX7-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX7-NEXT: v_readfirstlane_b32 s8, v0 @@ -4275,24 +4480,24 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX7-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[2:3] ; GFX7-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX7-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GFX7-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX7-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX7-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_cbranch_execnz .LBB11_1 ; GFX7-NEXT: ; %bb.2: ; GFX7-NEXT: s_mov_b64 exec, s[6:7] ; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v5 ; GFX7-NEXT: s_mov_b64 s[6:7], 0 -; GFX7-NEXT: v_and_b32_e32 v8, 0xffff0000, v4 +; GFX7-NEXT: v_and_b32_e32 v10, 0xffff0000, v4 ; GFX7-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX7-NEXT: ; =>This Loop Header: Depth=1 ; GFX7-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v6 +; GFX7-NEXT: v_lshrrev_b32_e32 v4, v7, v6 ; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX7-NEXT: v_add_f32_e32 v4, v4, v8 +; GFX7-NEXT: v_add_f32_e32 v4, v4, v10 ; GFX7-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX7-NEXT: v_and_b32_e32 v5, 0xffffff, v6 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, v7, v4 +; GFX7-NEXT: v_and_b32_e32 v5, v6, v9 ; GFX7-NEXT: v_or_b32_e32 v5, v5, v4 ; GFX7-NEXT: v_mov_b32_e32 v4, v5 ; GFX7-NEXT: s_mov_b64 s[12:13], exec @@ -4308,7 +4513,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX7-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX7-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX7-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX7-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_cbranch_execnz .LBB11_4 ; GFX7-NEXT: ; %bb.5: ; in Loop: Header=BB11_3 Depth=1 @@ -4322,7 +4527,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX7-NEXT: s_cbranch_execnz .LBB11_3 ; GFX7-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -4330,7 +4535,11 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0x200, v4 -; GFX6-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX6-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX6-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX6-NEXT: v_lshlrev_b32_e32 v7, 3, v4 +; GFX6-NEXT: v_lshl_b32_e32 v4, 0xffff, v7 +; GFX6-NEXT: v_not_b32_e32 v9, v4 ; GFX6-NEXT: s_mov_b64 s[6:7], exec ; GFX6-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX6-NEXT: v_readfirstlane_b32 s8, v0 @@ -4341,24 +4550,24 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX6-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[2:3] ; GFX6-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX6-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GFX6-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX6-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX6-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX6-NEXT: s_cbranch_execnz .LBB11_1 ; GFX6-NEXT: ; %bb.2: ; GFX6-NEXT: s_mov_b64 exec, s[6:7] ; GFX6-NEXT: v_mul_f32_e32 v4, 1.0, v5 ; GFX6-NEXT: s_mov_b64 s[6:7], 0 -; GFX6-NEXT: v_and_b32_e32 v8, 0xffff0000, v4 +; GFX6-NEXT: v_and_b32_e32 v10, 0xffff0000, v4 ; GFX6-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX6-NEXT: ; =>This Loop Header: Depth=1 ; GFX6-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) -; GFX6-NEXT: v_lshrrev_b32_e32 v4, 24, v6 +; GFX6-NEXT: v_lshrrev_b32_e32 v4, v7, v6 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX6-NEXT: v_add_f32_e32 v4, v4, v8 +; GFX6-NEXT: v_add_f32_e32 v4, v4, v10 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX6-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v6 +; GFX6-NEXT: v_lshlrev_b32_e32 v4, v7, v4 +; GFX6-NEXT: v_and_b32_e32 v5, v6, v9 ; GFX6-NEXT: v_or_b32_e32 v5, v5, v4 ; GFX6-NEXT: v_mov_b32_e32 v4, v5 ; GFX6-NEXT: s_mov_b64 s[12:13], exec @@ -4374,7 +4583,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX6-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX6-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX6-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX6-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX6-NEXT: s_cbranch_execnz .LBB11_4 ; GFX6-NEXT: ; %bb.5: ; in Loop: Header=BB11_3 Depth=1 @@ -4388,7 +4597,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall(ptr ; GFX6-NEXT: s_cbranch_execnz .LBB11_3 ; GFX6-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX6-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll index 684fa2d7df60a5..fb068e35fc5977 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll @@ -2354,57 +2354,66 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset(ptr addrspace(7) i ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_addk_co_i32 s4, 0x200 ; GFX12-NEXT: v_max_num_f16_e32 v5, v0, v0 -; GFX12-NEXT: s_and_b32 s4, s4, -4 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: v_mov_b32_e32 v4, s4 -; GFX12-NEXT: s_mov_b32 s4, 0 +; GFX12-NEXT: s_and_b32 s5, s4, -4 +; GFX12-NEXT: s_and_b32 s4, s4, 3 +; GFX12-NEXT: v_mov_b32_e32 v4, s5 +; GFX12-NEXT: s_lshl_b32 s4, s4, 3 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_lshl_b32 s5, 0xffff, s4 +; GFX12-NEXT: s_not_b32 s6, s5 ; GFX12-NEXT: buffer_load_b32 v1, v4, s[0:3], null offen +; GFX12-NEXT: s_mov_b32 s5, 0 ; GFX12-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v0, s4, v1 ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0 ; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v5 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX12-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX12-NEXT: v_lshlrev_b32_e32 v0, s4, v0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX12-NEXT: v_and_or_b32 v0, v1, s6, v0 ; GFX12-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0 ; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[2:3], v4, s[0:3], null offen th:TH_ATOMIC_RETURN ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v1 ; GFX12-NEXT: v_mov_b32_e32 v1, v2 -; GFX12-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX12-NEXT: s_or_b32 s5, vcc_lo, s5 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 +; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_cbranch_execnz .LBB6_1 ; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX12-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX12-NEXT: v_lshrrev_b32_e32 v0, s4, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: s_addk_i32 s4, 0x200 -; GFX940-NEXT: s_and_b32 s4, s4, -4 -; GFX940-NEXT: v_mov_b32_e32 v4, s4 +; GFX940-NEXT: s_and_b32 s5, s4, -4 +; GFX940-NEXT: v_mov_b32_e32 v4, s5 ; GFX940-NEXT: buffer_load_dword v1, v4, s[0:3], 0 offen -; GFX940-NEXT: s_mov_b32 s6, 0xffffff +; GFX940-NEXT: s_and_b32 s4, s4, 3 +; GFX940-NEXT: s_lshl_b32 s6, s4, 3 +; GFX940-NEXT: s_lshl_b32 s4, 0xffff, s6 +; GFX940-NEXT: s_not_b32 s7, s4 ; GFX940-NEXT: s_mov_b64 s[4:5], 0 ; GFX940-NEXT: v_max_f16_e32 v5, v0, v0 ; GFX940-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f16_sdwa v0, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 -; GFX940-NEXT: v_max_f16_sdwa v0, v0, v5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_and_or_b32 v0, v1, s6, v0 +; GFX940-NEXT: v_lshrrev_b32_e32 v0, s6, v1 +; GFX940-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX940-NEXT: v_max_f16_e32 v0, v0, v5 +; GFX940-NEXT: v_lshlrev_b32_e32 v0, s6, v0 +; GFX940-NEXT: v_and_or_b32 v0, v1, s7, v0 ; GFX940-NEXT: v_mov_b64_e32 v[2:3], v[0:1] +; GFX940-NEXT: buffer_wbl2 sc1 ; GFX940-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc1 @@ -2415,7 +2424,7 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset(ptr addrspace(7) i ; GFX940-NEXT: s_cbranch_execnz .LBB6_1 ; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX940-NEXT: v_lshrrev_b32_e32 v0, s6, v2 ; GFX940-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset: @@ -2423,25 +2432,29 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset(ptr addrspace(7) i ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_addk_i32 s4, 0x200 ; GFX11-NEXT: v_max_f16_e32 v5, v0, v0 -; GFX11-NEXT: s_and_b32 s4, s4, -4 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: v_mov_b32_e32 v4, s4 -; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_and_b32 s5, s4, -4 +; GFX11-NEXT: s_and_b32 s4, s4, 3 +; GFX11-NEXT: v_mov_b32_e32 v4, s5 +; GFX11-NEXT: s_lshl_b32 s4, s4, 3 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_lshl_b32 s5, 0xffff, s4 +; GFX11-NEXT: s_not_b32 s6, s5 ; GFX11-NEXT: buffer_load_b32 v1, v4, s[0:3], 0 offen +; GFX11-NEXT: s_mov_b32 s5, 0 ; GFX11-NEXT: .p2align 6 ; GFX11-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX11-NEXT: v_lshrrev_b32_e32 v0, s4, v1 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_max_f16_e32 v0, v0, v0 ; GFX11-NEXT: v_max_f16_e32 v0, v0, v5 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v0, s4, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX11-NEXT: v_and_or_b32 v0, v1, s6, v0 ; GFX11-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0 ; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[2:3], v4, s[0:3], 0 offen glc ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -2449,13 +2462,13 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset(ptr addrspace(7) i ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v1 ; GFX11-NEXT: v_mov_b32_e32 v1, v2 -; GFX11-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX11-NEXT: s_or_b32 s5, vcc_lo, s5 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 +; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s5 ; GFX11-NEXT: s_cbranch_execnz .LBB6_1 ; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX11-NEXT: v_lshrrev_b32_e32 v0, s4, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset: @@ -2463,19 +2476,23 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset(ptr addrspace(7) i ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_addk_i32 s8, 0x200 ; GFX10-NEXT: v_max_f16_e32 v5, v0, v0 -; GFX10-NEXT: s_and_b32 s8, s8, -4 -; GFX10-NEXT: v_mov_b32_e32 v4, s8 -; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_and_b32 s9, s8, -4 +; GFX10-NEXT: s_and_b32 s8, s8, 3 +; GFX10-NEXT: v_mov_b32_e32 v4, s9 +; GFX10-NEXT: s_lshl_b32 s8, s8, 3 +; GFX10-NEXT: s_lshl_b32 s9, 0xffff, s8 +; GFX10-NEXT: s_not_b32 s10, s9 ; GFX10-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen +; GFX10-NEXT: s_mov_b32 s9, 0 ; GFX10-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_max_f16_sdwa v0, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 +; GFX10-NEXT: v_lshrrev_b32_e32 v0, s8, v1 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 ; GFX10-NEXT: v_max_f16_e32 v0, v0, v5 -; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX10-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX10-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GFX10-NEXT: v_and_or_b32 v0, v1, s10, v0 ; GFX10-NEXT: v_mov_b32_e32 v3, v1 ; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; GFX10-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[4:7], 0 offen glc @@ -2484,30 +2501,35 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset(ptr addrspace(7) i ; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, v2 -; GFX10-NEXT: s_or_b32 s8, vcc_lo, s8 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 +; GFX10-NEXT: s_or_b32 s9, vcc_lo, s9 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s9 ; GFX10-NEXT: s_cbranch_execnz .LBB6_1 ; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s9 +; GFX10-NEXT: v_lshrrev_b32_e32 v0, s8, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: s_addk_i32 s8, 0x200 -; GFX90A-NEXT: s_and_b32 s8, s8, -4 -; GFX90A-NEXT: v_mov_b32_e32 v4, s8 +; GFX90A-NEXT: s_and_b32 s9, s8, -4 +; GFX90A-NEXT: v_mov_b32_e32 v4, s9 ; GFX90A-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen -; GFX90A-NEXT: s_mov_b32 s10, 0xffffff +; GFX90A-NEXT: s_and_b32 s8, s8, 3 +; GFX90A-NEXT: s_lshl_b32 s10, s8, 3 +; GFX90A-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX90A-NEXT: s_not_b32 s11, s8 ; GFX90A-NEXT: s_mov_b64 s[8:9], 0 ; GFX90A-NEXT: v_max_f16_e32 v5, v0, v0 ; GFX90A-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: v_max_f16_sdwa v0, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 -; GFX90A-NEXT: v_max_f16_sdwa v0, v0, v5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX90A-NEXT: v_and_or_b32 v0, v1, s10, v0 +; GFX90A-NEXT: v_lshrrev_b32_e32 v0, s10, v1 +; GFX90A-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX90A-NEXT: v_max_f16_e32 v0, v0, v5 +; GFX90A-NEXT: v_lshlrev_b32_e32 v0, s10, v0 +; GFX90A-NEXT: v_and_or_b32 v0, v1, s11, v0 ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1] ; GFX90A-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[4:7], 0 offen glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) @@ -2519,25 +2541,30 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset(ptr addrspace(7) i ; GFX90A-NEXT: s_cbranch_execnz .LBB6_1 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX90A-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: s_addk_i32 s8, 0x200 -; GFX908-NEXT: s_and_b32 s8, s8, -4 -; GFX908-NEXT: v_mov_b32_e32 v4, s8 +; GFX908-NEXT: s_and_b32 s9, s8, -4 +; GFX908-NEXT: v_mov_b32_e32 v4, s9 ; GFX908-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen -; GFX908-NEXT: s_mov_b32 s10, 0xffffff +; GFX908-NEXT: s_and_b32 s8, s8, 3 +; GFX908-NEXT: s_lshl_b32 s10, s8, 3 +; GFX908-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX908-NEXT: s_not_b32 s11, s8 ; GFX908-NEXT: s_mov_b64 s[8:9], 0 ; GFX908-NEXT: v_max_f16_e32 v5, v0, v0 ; GFX908-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_max_f16_sdwa v0, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 -; GFX908-NEXT: v_max_f16_sdwa v0, v0, v5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX908-NEXT: v_and_or_b32 v0, v1, s10, v0 +; GFX908-NEXT: v_lshrrev_b32_e32 v0, s10, v1 +; GFX908-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX908-NEXT: v_max_f16_e32 v0, v0, v5 +; GFX908-NEXT: v_lshlrev_b32_e32 v0, s10, v0 +; GFX908-NEXT: v_and_or_b32 v0, v1, s11, v0 ; GFX908-NEXT: v_mov_b32_e32 v3, v1 ; GFX908-NEXT: v_mov_b32_e32 v2, v0 ; GFX908-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[4:7], 0 offen glc @@ -2550,24 +2577,30 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset(ptr addrspace(7) i ; GFX908-NEXT: s_cbranch_execnz .LBB6_1 ; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX908-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX908-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX908-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX908-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_addk_i32 s8, 0x200 -; GFX8-NEXT: s_and_b32 s8, s8, -4 -; GFX8-NEXT: v_mov_b32_e32 v4, s8 +; GFX8-NEXT: s_and_b32 s9, s8, -4 +; GFX8-NEXT: v_mov_b32_e32 v4, s9 ; GFX8-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen +; GFX8-NEXT: s_and_b32 s8, s8, 3 +; GFX8-NEXT: s_lshl_b32 s10, s8, 3 +; GFX8-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX8-NEXT: s_not_b32 s11, s8 ; GFX8-NEXT: s_mov_b64 s[8:9], 0 ; GFX8-NEXT: v_max_f16_e32 v5, v0, v0 ; GFX8-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v0, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 -; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v1 -; GFX8-NEXT: v_max_f16_sdwa v0, v0, v5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_lshrrev_b32_e32 v0, s10, v1 +; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-NEXT: v_max_f16_e32 v0, v0, v5 +; GFX8-NEXT: v_and_b32_e32 v2, s11, v1 +; GFX8-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX8-NEXT: v_mov_b32_e32 v3, v1 ; GFX8-NEXT: v_mov_b32_e32 v2, v0 @@ -2581,28 +2614,32 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset(ptr addrspace(7) i ; GFX8-NEXT: s_cbranch_execnz .LBB6_1 ; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX8-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_addk_i32 s8, 0x200 -; GFX7-NEXT: s_and_b32 s8, s8, -4 -; GFX7-NEXT: v_mov_b32_e32 v4, s8 +; GFX7-NEXT: s_and_b32 s9, s8, -4 +; GFX7-NEXT: v_mov_b32_e32 v4, s9 ; GFX7-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: s_mov_b64 s[8:9], 0 +; GFX7-NEXT: s_and_b32 s8, s8, 3 +; GFX7-NEXT: s_lshl_b32 s10, s8, 3 +; GFX7-NEXT: s_lshl_b32 s8, 0xffff, s10 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v0 +; GFX7-NEXT: s_not_b32 s11, s8 +; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, s10, v1 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_and_b32_e32 v2, 0xffffff, v1 +; GFX7-NEXT: v_and_b32_e32 v2, s11, v1 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v5 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX7-NEXT: v_mov_b32_e32 v3, v1 ; GFX7-NEXT: v_mov_b32_e32 v2, v0 @@ -2616,7 +2653,7 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset(ptr addrspace(7) i ; GFX7-NEXT: s_cbranch_execnz .LBB6_1 ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -2624,22 +2661,26 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset(ptr addrspace(7) i ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: s_addk_i32 s8, 0x200 -; GFX6-NEXT: s_and_b32 s8, s8, -4 -; GFX6-NEXT: v_mov_b32_e32 v4, s8 +; GFX6-NEXT: s_and_b32 s9, s8, -4 +; GFX6-NEXT: v_mov_b32_e32 v4, s9 ; GFX6-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-NEXT: s_mov_b64 s[8:9], 0 +; GFX6-NEXT: s_and_b32 s8, s8, 3 +; GFX6-NEXT: s_lshl_b32 s10, s8, 3 +; GFX6-NEXT: s_lshl_b32 s8, 0xffff, s10 ; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v0 +; GFX6-NEXT: s_not_b32 s11, s8 +; GFX6-NEXT: s_mov_b64 s[8:9], 0 ; GFX6-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX6-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, s10, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v1 +; GFX6-NEXT: v_and_b32_e32 v2, s11, v1 ; GFX6-NEXT: v_max_f32_e32 v0, v0, v5 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX6-NEXT: v_mov_b32_e32 v3, v1 ; GFX6-NEXT: v_mov_b32_e32 v2, v0 @@ -2653,7 +2694,7 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset(ptr addrspace(7) i ; GFX6-NEXT: s_cbranch_execnz .LBB6_1 ; GFX6-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX6-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: s_setpc_b64 s[30:31] @@ -2672,56 +2713,65 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f16__offset(ptr addrspace(7) ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_addk_co_i32 s4, 0x200 ; GFX12-NEXT: v_max_num_f16_e32 v3, v0, v0 -; GFX12-NEXT: s_and_b32 s4, s4, -4 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: v_mov_b32_e32 v2, s4 -; GFX12-NEXT: s_mov_b32 s4, 0 +; GFX12-NEXT: s_and_b32 s5, s4, -4 +; GFX12-NEXT: s_and_b32 s4, s4, 3 +; GFX12-NEXT: v_mov_b32_e32 v2, s5 +; GFX12-NEXT: s_lshl_b32 s4, s4, 3 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_lshl_b32 s5, 0xffff, s4 +; GFX12-NEXT: s_not_b32 s6, s5 ; GFX12-NEXT: buffer_load_b32 v1, v2, s[0:3], null offen +; GFX12-NEXT: s_mov_b32 s5, 0 ; GFX12-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v0, s4, v1 ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0 ; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v3 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX12-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX12-NEXT: v_lshlrev_b32_e32 v0, s4, v0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX12-NEXT: v_and_or_b32 v0, v1, s6, v0 ; GFX12-NEXT: v_dual_mov_b32 v5, v1 :: v_dual_mov_b32 v4, v0 ; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v2, s[0:3], null offen th:TH_ATOMIC_RETURN ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v1 ; GFX12-NEXT: v_mov_b32_e32 v1, v4 -; GFX12-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX12-NEXT: s_or_b32 s5, vcc_lo, s5 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 +; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_cbranch_execnz .LBB7_1 ; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f16__offset: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: s_addk_i32 s4, 0x200 -; GFX940-NEXT: s_and_b32 s4, s4, -4 -; GFX940-NEXT: v_mov_b32_e32 v2, s4 +; GFX940-NEXT: s_and_b32 s5, s4, -4 +; GFX940-NEXT: v_mov_b32_e32 v2, s5 ; GFX940-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen -; GFX940-NEXT: s_mov_b32 s6, 0xffffff +; GFX940-NEXT: s_and_b32 s4, s4, 3 +; GFX940-NEXT: s_lshl_b32 s6, s4, 3 +; GFX940-NEXT: s_lshl_b32 s4, 0xffff, s6 +; GFX940-NEXT: s_not_b32 s7, s4 ; GFX940-NEXT: s_mov_b64 s[4:5], 0 ; GFX940-NEXT: v_max_f16_e32 v3, v0, v0 ; GFX940-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f16_sdwa v0, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 -; GFX940-NEXT: v_max_f16_sdwa v0, v0, v3 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_and_or_b32 v0, v1, s6, v0 +; GFX940-NEXT: v_lshrrev_b32_e32 v0, s6, v1 +; GFX940-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX940-NEXT: v_max_f16_e32 v0, v0, v3 +; GFX940-NEXT: v_lshlrev_b32_e32 v0, s6, v0 +; GFX940-NEXT: v_and_or_b32 v0, v1, s7, v0 ; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[0:1] +; GFX940-NEXT: buffer_wbl2 sc1 ; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc1 @@ -2739,25 +2789,29 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f16__offset(ptr addrspace(7) ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_addk_i32 s4, 0x200 ; GFX11-NEXT: v_max_f16_e32 v3, v0, v0 -; GFX11-NEXT: s_and_b32 s4, s4, -4 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: v_mov_b32_e32 v2, s4 -; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_and_b32 s5, s4, -4 +; GFX11-NEXT: s_and_b32 s4, s4, 3 +; GFX11-NEXT: v_mov_b32_e32 v2, s5 +; GFX11-NEXT: s_lshl_b32 s4, s4, 3 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_lshl_b32 s5, 0xffff, s4 +; GFX11-NEXT: s_not_b32 s6, s5 ; GFX11-NEXT: buffer_load_b32 v1, v2, s[0:3], 0 offen +; GFX11-NEXT: s_mov_b32 s5, 0 ; GFX11-NEXT: .p2align 6 ; GFX11-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX11-NEXT: v_lshrrev_b32_e32 v0, s4, v1 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_max_f16_e32 v0, v0, v0 ; GFX11-NEXT: v_max_f16_e32 v0, v0, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v0, s4, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX11-NEXT: v_and_or_b32 v0, v1, s6, v0 ; GFX11-NEXT: v_dual_mov_b32 v5, v1 :: v_dual_mov_b32 v4, v0 ; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v2, s[0:3], 0 offen glc ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -2765,12 +2819,12 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f16__offset(ptr addrspace(7) ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v1 ; GFX11-NEXT: v_mov_b32_e32 v1, v4 -; GFX11-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX11-NEXT: s_or_b32 s5, vcc_lo, s5 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 +; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s5 ; GFX11-NEXT: s_cbranch_execnz .LBB7_1 ; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f16__offset: @@ -2778,19 +2832,23 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f16__offset(ptr addrspace(7) ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_addk_i32 s8, 0x200 ; GFX10-NEXT: v_max_f16_e32 v3, v0, v0 -; GFX10-NEXT: s_and_b32 s8, s8, -4 -; GFX10-NEXT: v_mov_b32_e32 v2, s8 -; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_and_b32 s9, s8, -4 +; GFX10-NEXT: s_and_b32 s8, s8, 3 +; GFX10-NEXT: v_mov_b32_e32 v2, s9 +; GFX10-NEXT: s_lshl_b32 s8, s8, 3 +; GFX10-NEXT: s_lshl_b32 s9, 0xffff, s8 +; GFX10-NEXT: s_not_b32 s10, s9 ; GFX10-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen +; GFX10-NEXT: s_mov_b32 s9, 0 ; GFX10-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_max_f16_sdwa v0, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 +; GFX10-NEXT: v_lshrrev_b32_e32 v0, s8, v1 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 ; GFX10-NEXT: v_max_f16_e32 v0, v0, v3 -; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX10-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX10-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GFX10-NEXT: v_and_or_b32 v0, v1, s10, v0 ; GFX10-NEXT: v_mov_b32_e32 v5, v1 ; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; GFX10-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[4:7], 0 offen glc @@ -2799,29 +2857,34 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f16__offset(ptr addrspace(7) ; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, v4 -; GFX10-NEXT: s_or_b32 s8, vcc_lo, s8 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 +; GFX10-NEXT: s_or_b32 s9, vcc_lo, s9 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s9 ; GFX10-NEXT: s_cbranch_execnz .LBB7_1 ; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s9 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_f16__offset: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: s_addk_i32 s8, 0x200 -; GFX90A-NEXT: s_and_b32 s8, s8, -4 -; GFX90A-NEXT: v_mov_b32_e32 v2, s8 +; GFX90A-NEXT: s_and_b32 s9, s8, -4 +; GFX90A-NEXT: v_mov_b32_e32 v2, s9 ; GFX90A-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen -; GFX90A-NEXT: s_mov_b32 s10, 0xffffff +; GFX90A-NEXT: s_and_b32 s8, s8, 3 +; GFX90A-NEXT: s_lshl_b32 s10, s8, 3 +; GFX90A-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX90A-NEXT: s_not_b32 s11, s8 ; GFX90A-NEXT: s_mov_b64 s[8:9], 0 ; GFX90A-NEXT: v_max_f16_e32 v3, v0, v0 ; GFX90A-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: v_max_f16_sdwa v0, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 -; GFX90A-NEXT: v_max_f16_sdwa v0, v0, v3 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX90A-NEXT: v_and_or_b32 v0, v1, s10, v0 +; GFX90A-NEXT: v_lshrrev_b32_e32 v0, s10, v1 +; GFX90A-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX90A-NEXT: v_max_f16_e32 v0, v0, v3 +; GFX90A-NEXT: v_lshlrev_b32_e32 v0, s10, v0 +; GFX90A-NEXT: v_and_or_b32 v0, v1, s11, v0 ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[0:1] op_sel:[0,1] ; GFX90A-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[4:7], 0 offen glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) @@ -2839,18 +2902,23 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f16__offset(ptr addrspace(7) ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: s_addk_i32 s8, 0x200 -; GFX908-NEXT: s_and_b32 s8, s8, -4 -; GFX908-NEXT: v_mov_b32_e32 v2, s8 +; GFX908-NEXT: s_and_b32 s9, s8, -4 +; GFX908-NEXT: v_mov_b32_e32 v2, s9 ; GFX908-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen -; GFX908-NEXT: s_mov_b32 s10, 0xffffff +; GFX908-NEXT: s_and_b32 s8, s8, 3 +; GFX908-NEXT: s_lshl_b32 s10, s8, 3 +; GFX908-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX908-NEXT: s_not_b32 s11, s8 ; GFX908-NEXT: s_mov_b64 s[8:9], 0 ; GFX908-NEXT: v_max_f16_e32 v3, v0, v0 ; GFX908-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_max_f16_sdwa v0, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 -; GFX908-NEXT: v_max_f16_sdwa v0, v0, v3 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX908-NEXT: v_and_or_b32 v0, v1, s10, v0 +; GFX908-NEXT: v_lshrrev_b32_e32 v0, s10, v1 +; GFX908-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX908-NEXT: v_max_f16_e32 v0, v0, v3 +; GFX908-NEXT: v_lshlrev_b32_e32 v0, s10, v0 +; GFX908-NEXT: v_and_or_b32 v0, v1, s11, v0 ; GFX908-NEXT: v_mov_b32_e32 v5, v1 ; GFX908-NEXT: v_mov_b32_e32 v4, v0 ; GFX908-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[4:7], 0 offen glc @@ -2869,17 +2937,23 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f16__offset(ptr addrspace(7) ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_addk_i32 s8, 0x200 -; GFX8-NEXT: s_and_b32 s8, s8, -4 -; GFX8-NEXT: v_mov_b32_e32 v2, s8 +; GFX8-NEXT: s_and_b32 s9, s8, -4 +; GFX8-NEXT: v_mov_b32_e32 v2, s9 ; GFX8-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen +; GFX8-NEXT: s_and_b32 s8, s8, 3 +; GFX8-NEXT: s_lshl_b32 s10, s8, 3 +; GFX8-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX8-NEXT: s_not_b32 s11, s8 ; GFX8-NEXT: s_mov_b64 s[8:9], 0 ; GFX8-NEXT: v_max_f16_e32 v3, v0, v0 ; GFX8-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v0, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 -; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v1 -; GFX8-NEXT: v_max_f16_sdwa v0, v0, v3 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_lshrrev_b32_e32 v0, s10, v1 +; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-NEXT: v_max_f16_e32 v0, v0, v3 +; GFX8-NEXT: v_and_b32_e32 v4, s11, v1 +; GFX8-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX8-NEXT: v_mov_b32_e32 v5, v1 ; GFX8-NEXT: v_mov_b32_e32 v4, v0 @@ -2899,21 +2973,25 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f16__offset(ptr addrspace(7) ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_addk_i32 s8, 0x200 -; GFX7-NEXT: s_and_b32 s8, s8, -4 -; GFX7-NEXT: v_mov_b32_e32 v2, s8 +; GFX7-NEXT: s_and_b32 s9, s8, -4 +; GFX7-NEXT: v_mov_b32_e32 v2, s9 ; GFX7-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: s_mov_b64 s[8:9], 0 +; GFX7-NEXT: s_and_b32 s8, s8, 3 +; GFX7-NEXT: s_lshl_b32 s10, s8, 3 +; GFX7-NEXT: s_lshl_b32 s8, 0xffff, s10 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GFX7-NEXT: s_not_b32 s11, s8 +; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, s10, v1 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_and_b32_e32 v4, 0xffffff, v1 +; GFX7-NEXT: v_and_b32_e32 v4, s11, v1 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v3 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX7-NEXT: v_mov_b32_e32 v5, v1 ; GFX7-NEXT: v_mov_b32_e32 v4, v0 @@ -2933,22 +3011,26 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_f16__offset(ptr addrspace(7) ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: s_addk_i32 s8, 0x200 -; GFX6-NEXT: s_and_b32 s8, s8, -4 -; GFX6-NEXT: v_mov_b32_e32 v2, s8 +; GFX6-NEXT: s_and_b32 s9, s8, -4 +; GFX6-NEXT: v_mov_b32_e32 v2, s9 ; GFX6-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-NEXT: s_mov_b64 s[8:9], 0 +; GFX6-NEXT: s_and_b32 s8, s8, 3 +; GFX6-NEXT: s_lshl_b32 s10, s8, 3 +; GFX6-NEXT: s_lshl_b32 s8, 0xffff, s10 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GFX6-NEXT: s_not_b32 s11, s8 +; GFX6-NEXT: s_mov_b64 s[8:9], 0 ; GFX6-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX6-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, s10, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v1 +; GFX6-NEXT: v_and_b32_e32 v4, s11, v1 ; GFX6-NEXT: v_max_f32_e32 v0, v0, v3 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX6-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX6-NEXT: v_mov_b32_e32 v5, v1 ; GFX6-NEXT: v_mov_b32_e32 v4, v0 @@ -2979,8 +3061,13 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 ; GFX12-NEXT: s_mov_b32 s1, exec_lo -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_and_b32_e32 v6, 3, v4 +; GFX12-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX12-NEXT: v_lshlrev_b32_e32 v7, 3, v6 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff +; GFX12-NEXT: v_not_b32_e32 v9, v6 ; GFX12-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX12-NEXT: v_readfirstlane_b32 s4, v0 ; GFX12-NEXT: v_readfirstlane_b32 s5, v1 @@ -2992,28 +3079,28 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX12-NEXT: s_and_saveexec_b32 s0, s0 -; GFX12-NEXT: buffer_load_b32 v6, v7, s[4:7], null offen +; GFX12-NEXT: buffer_load_b32 v6, v8, s[4:7], null offen ; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_cbranch_execnz .LBB8_1 ; GFX12-NEXT: ; %bb.2: ; GFX12-NEXT: s_mov_b32 exec_lo, s1 -; GFX12-NEXT: v_max_num_f16_e32 v8, v5, v5 +; GFX12-NEXT: v_max_num_f16_e32 v10, v5, v5 ; GFX12-NEXT: s_mov_b32 s1, 0 ; GFX12-NEXT: .LBB8_3: ; %atomicrmw.start ; GFX12-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_lshrrev_b32_e32 v4, 24, v6 +; GFX12-NEXT: v_lshrrev_b32_e32 v4, v7, v6 ; GFX12-NEXT: s_mov_b32 s2, exec_lo ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_max_num_f16_e32 v4, v4, v4 -; GFX12-NEXT: v_max_num_f16_e32 v4, v4, v8 +; GFX12-NEXT: v_max_num_f16_e32 v4, v4, v10 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX12-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX12-NEXT: v_lshlrev_b32_e32 v4, v7, v4 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-NEXT: v_and_or_b32 v5, 0xffffff, v6, v4 +; GFX12-NEXT: v_and_or_b32 v5, v6, v9, v4 ; GFX12-NEXT: v_mov_b32_e32 v4, v5 ; GFX12-NEXT: v_mov_b32_e32 v5, v6 ; GFX12-NEXT: .LBB8_4: ; Parent Loop BB8_3 Depth=1 @@ -3029,7 +3116,7 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX12-NEXT: s_and_saveexec_b32 s0, s0 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v7, s[4:7], null offen th:TH_ATOMIC_RETURN +; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[4:7], null offen th:TH_ATOMIC_RETURN ; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_cbranch_execnz .LBB8_4 ; GFX12-NEXT: ; %bb.5: ; in Loop: Header=BB8_3 Depth=1 @@ -3044,15 +3131,19 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX12-NEXT: s_cbranch_execnz .LBB8_3 ; GFX12-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX12-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX12-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_add_u32_e32 v4, 0x200, v4 -; GFX940-NEXT: v_and_b32_e32 v8, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, 0xffffff +; GFX940-NEXT: v_and_b32_e32 v9, -4, v4 +; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX940-NEXT: v_lshlrev_b32_e32 v8, 3, v4 +; GFX940-NEXT: s_mov_b32 s0, 0xffff +; GFX940-NEXT: v_lshlrev_b32_e64 v4, v8, s0 +; GFX940-NEXT: v_not_b32_e32 v10, v4 ; GFX940-NEXT: s_mov_b64 s[2:3], exec ; GFX940-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX940-NEXT: v_readfirstlane_b32 s4, v0 @@ -3064,21 +3155,23 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] ; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] ; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: buffer_load_dword v7, v8, s[4:7], 0 offen +; GFX940-NEXT: buffer_load_dword v7, v9, s[4:7], 0 offen ; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] ; GFX940-NEXT: s_cbranch_execnz .LBB8_1 ; GFX940-NEXT: ; %bb.2: ; GFX940-NEXT: s_mov_b64 exec, s[2:3] ; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_max_f16_e32 v10, v5, v5 +; GFX940-NEXT: v_max_f16_e32 v11, v5, v5 ; GFX940-NEXT: .LBB8_3: ; %atomicrmw.start ; GFX940-NEXT: ; =>This Loop Header: Depth=1 ; GFX940-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f16_sdwa v4, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 -; GFX940-NEXT: v_max_f16_sdwa v4, v4, v10 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX940-NEXT: v_lshrrev_b32_e32 v4, v8, v7 +; GFX940-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX940-NEXT: v_max_f16_e32 v4, v4, v11 +; GFX940-NEXT: v_lshlrev_b32_e32 v4, v8, v4 +; GFX940-NEXT: v_and_or_b32 v6, v7, v10, v4 ; GFX940-NEXT: s_mov_b64 s[8:9], exec -; GFX940-NEXT: v_and_or_b32 v6, v7, v9, v4 ; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[6:7] ; GFX940-NEXT: buffer_wbl2 sc1 ; GFX940-NEXT: .LBB8_4: ; Parent Loop BB8_3 Depth=1 @@ -3093,7 +3186,7 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] ; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0 +; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v9, s[4:7], 0 offen sc0 ; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] ; GFX940-NEXT: s_cbranch_execnz .LBB8_4 ; GFX940-NEXT: ; %bb.5: ; in Loop: Header=BB8_3 Depth=1 @@ -3107,7 +3200,7 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX940-NEXT: s_cbranch_execnz .LBB8_3 ; GFX940-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX940-NEXT: v_lshrrev_b32_e32 v0, v8, v4 ; GFX940-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall: @@ -3116,8 +3209,13 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX11-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 ; GFX11-NEXT: s_mov_b32 s1, 0 ; GFX11-NEXT: s_mov_b32 s2, exec_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v6, 3, v4 +; GFX11-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX11-NEXT: v_lshlrev_b32_e32 v7, 3, v6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff +; GFX11-NEXT: v_not_b32_e32 v9, v6 ; GFX11-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: v_readfirstlane_b32 s4, v0 ; GFX11-NEXT: v_readfirstlane_b32 s5, v1 @@ -3129,28 +3227,28 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX11-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-NEXT: buffer_load_b32 v6, v7, s[4:7], 0 offen +; GFX11-NEXT: buffer_load_b32 v6, v8, s[4:7], 0 offen ; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_cbranch_execnz .LBB8_1 ; GFX11-NEXT: ; %bb.2: ; GFX11-NEXT: s_mov_b32 exec_lo, s2 -; GFX11-NEXT: v_max_f16_e32 v8, v5, v5 +; GFX11-NEXT: v_max_f16_e32 v10, v5, v5 ; GFX11-NEXT: .p2align 6 ; GFX11-NEXT: .LBB8_3: ; %atomicrmw.start ; GFX11-NEXT: ; =>This Loop Header: Depth=1 ; GFX11-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_lshrrev_b32_e32 v4, 24, v6 +; GFX11-NEXT: v_lshrrev_b32_e32 v4, v7, v6 ; GFX11-NEXT: s_mov_b32 s2, exec_lo ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_max_f16_e32 v4, v4, v4 -; GFX11-NEXT: v_max_f16_e32 v4, v4, v8 +; GFX11-NEXT: v_max_f16_e32 v4, v4, v10 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX11-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX11-NEXT: v_lshlrev_b32_e32 v4, v7, v4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_and_or_b32 v5, 0xffffff, v6, v4 +; GFX11-NEXT: v_and_or_b32 v5, v6, v9, v4 ; GFX11-NEXT: v_mov_b32_e32 v4, v5 ; GFX11-NEXT: v_mov_b32_e32 v5, v6 ; GFX11-NEXT: .LBB8_4: ; Parent Loop BB8_3 Depth=1 @@ -3166,7 +3264,7 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX11-NEXT: s_and_saveexec_b32 s0, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v7, s[4:7], 0 offen glc +; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[4:7], 0 offen glc ; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_cbranch_execnz .LBB8_4 ; GFX11-NEXT: ; %bb.5: ; in Loop: Header=BB8_3 Depth=1 @@ -3182,7 +3280,7 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX11-NEXT: s_cbranch_execnz .LBB8_3 ; GFX11-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX11-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall: @@ -3191,7 +3289,11 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX10-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 ; GFX10-NEXT: s_mov_b32 s5, 0 ; GFX10-NEXT: s_mov_b32 s6, exec_lo -; GFX10-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX10-NEXT: v_and_b32_e32 v6, 3, v4 +; GFX10-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX10-NEXT: v_lshlrev_b32_e32 v7, 3, v6 +; GFX10-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff +; GFX10-NEXT: v_not_b32_e32 v9, v6 ; GFX10-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_readfirstlane_b32 s8, v0 ; GFX10-NEXT: v_readfirstlane_b32 s9, v1 @@ -3201,24 +3303,24 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] ; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 ; GFX10-NEXT: s_and_saveexec_b32 s4, s4 -; GFX10-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX10-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB8_1 ; GFX10-NEXT: ; %bb.2: ; GFX10-NEXT: s_mov_b32 exec_lo, s6 -; GFX10-NEXT: v_max_f16_e32 v8, v5, v5 +; GFX10-NEXT: v_max_f16_e32 v10, v5, v5 ; GFX10-NEXT: .LBB8_3: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Loop Header: Depth=1 ; GFX10-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_max_f16_sdwa v4, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 +; GFX10-NEXT: v_lshrrev_b32_e32 v4, v7, v6 ; GFX10-NEXT: s_mov_b32 s6, exec_lo ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_max_f16_e32 v4, v4, v8 -; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX10-NEXT: v_and_or_b32 v5, 0xffffff, v6, v4 +; GFX10-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX10-NEXT: v_max_f16_e32 v4, v4, v10 +; GFX10-NEXT: v_lshlrev_b32_sdwa v4, v7, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GFX10-NEXT: v_and_or_b32 v5, v6, v9, v4 ; GFX10-NEXT: v_mov_b32_e32 v4, v5 ; GFX10-NEXT: v_mov_b32_e32 v5, v6 ; GFX10-NEXT: .LBB8_4: ; Parent Loop BB8_3 Depth=1 @@ -3232,7 +3334,7 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 ; GFX10-NEXT: s_and_saveexec_b32 s4, s4 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX10-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB8_4 @@ -3249,15 +3351,19 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX10-NEXT: s_cbranch_execnz .LBB8_3 ; GFX10-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX10-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_u32_e32 v4, 0x200, v4 -; GFX90A-NEXT: v_and_b32_e32 v8, -4, v4 -; GFX90A-NEXT: v_mov_b32_e32 v9, 0xffffff +; GFX90A-NEXT: v_and_b32_e32 v9, -4, v4 +; GFX90A-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX90A-NEXT: v_lshlrev_b32_e32 v8, 3, v4 +; GFX90A-NEXT: s_mov_b32 s4, 0xffff +; GFX90A-NEXT: v_lshlrev_b32_e64 v4, v8, s4 +; GFX90A-NEXT: v_not_b32_e32 v10, v4 ; GFX90A-NEXT: s_mov_b64 s[6:7], exec ; GFX90A-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: v_readfirstlane_b32 s8, v0 @@ -3269,20 +3375,22 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX90A-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX90A-NEXT: s_nop 0 -; GFX90A-NEXT: buffer_load_dword v7, v8, s[8:11], 0 offen +; GFX90A-NEXT: buffer_load_dword v7, v9, s[8:11], 0 offen ; GFX90A-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX90A-NEXT: s_cbranch_execnz .LBB8_1 ; GFX90A-NEXT: ; %bb.2: ; GFX90A-NEXT: s_mov_b64 exec, s[6:7] ; GFX90A-NEXT: s_mov_b64 s[6:7], 0 -; GFX90A-NEXT: v_max_f16_e32 v10, v5, v5 +; GFX90A-NEXT: v_max_f16_e32 v11, v5, v5 ; GFX90A-NEXT: .LBB8_3: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Loop Header: Depth=1 ; GFX90A-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: v_max_f16_sdwa v4, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 -; GFX90A-NEXT: v_max_f16_sdwa v4, v4, v10 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX90A-NEXT: v_and_or_b32 v6, v7, v9, v4 +; GFX90A-NEXT: v_lshrrev_b32_e32 v4, v8, v7 +; GFX90A-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX90A-NEXT: v_max_f16_e32 v4, v4, v11 +; GFX90A-NEXT: v_lshlrev_b32_e32 v4, v8, v4 +; GFX90A-NEXT: v_and_or_b32 v6, v7, v10, v4 ; GFX90A-NEXT: s_mov_b64 s[12:13], exec ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[6:7], v[6:7] op_sel:[0,1] ; GFX90A-NEXT: .LBB8_4: ; Parent Loop BB8_3 Depth=1 @@ -3296,7 +3404,7 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX90A-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc +; GFX90A-NEXT: buffer_atomic_cmpswap v[4:5], v9, s[8:11], 0 offen glc ; GFX90A-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX90A-NEXT: s_cbranch_execnz .LBB8_4 ; GFX90A-NEXT: ; %bb.5: ; in Loop: Header=BB8_3 Depth=1 @@ -3310,15 +3418,19 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX90A-NEXT: s_cbranch_execnz .LBB8_3 ; GFX90A-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX90A-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX90A-NEXT: v_lshrrev_b32_e32 v0, v8, v4 ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_add_u32_e32 v4, 0x200, v4 -; GFX908-NEXT: v_and_b32_e32 v7, -4, v4 -; GFX908-NEXT: v_mov_b32_e32 v8, 0xffffff +; GFX908-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX908-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX908-NEXT: v_lshlrev_b32_e32 v7, 3, v4 +; GFX908-NEXT: s_mov_b32 s4, 0xffff +; GFX908-NEXT: v_lshlrev_b32_e64 v4, v7, s4 +; GFX908-NEXT: v_not_b32_e32 v9, v4 ; GFX908-NEXT: s_mov_b64 s[6:7], exec ; GFX908-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX908-NEXT: v_readfirstlane_b32 s8, v0 @@ -3330,20 +3442,22 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX908-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX908-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX908-NEXT: s_nop 0 -; GFX908-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX908-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX908-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX908-NEXT: s_cbranch_execnz .LBB8_1 ; GFX908-NEXT: ; %bb.2: ; GFX908-NEXT: s_mov_b64 exec, s[6:7] ; GFX908-NEXT: s_mov_b64 s[6:7], 0 -; GFX908-NEXT: v_max_f16_e32 v9, v5, v5 +; GFX908-NEXT: v_max_f16_e32 v10, v5, v5 ; GFX908-NEXT: .LBB8_3: ; %atomicrmw.start ; GFX908-NEXT: ; =>This Loop Header: Depth=1 ; GFX908-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_max_f16_sdwa v4, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 -; GFX908-NEXT: v_max_f16_sdwa v4, v4, v9 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX908-NEXT: v_and_or_b32 v5, v6, v8, v4 +; GFX908-NEXT: v_lshrrev_b32_e32 v4, v7, v6 +; GFX908-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX908-NEXT: v_max_f16_e32 v4, v4, v10 +; GFX908-NEXT: v_lshlrev_b32_e32 v4, v7, v4 +; GFX908-NEXT: v_and_or_b32 v5, v6, v9, v4 ; GFX908-NEXT: v_mov_b32_e32 v4, v5 ; GFX908-NEXT: s_mov_b64 s[12:13], exec ; GFX908-NEXT: v_mov_b32_e32 v5, v6 @@ -3358,7 +3472,7 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX908-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX908-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX908-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX908-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX908-NEXT: s_cbranch_execnz .LBB8_4 ; GFX908-NEXT: ; %bb.5: ; in Loop: Header=BB8_3 Depth=1 @@ -3372,14 +3486,19 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX908-NEXT: s_cbranch_execnz .LBB8_3 ; GFX908-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX908-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX908-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX908-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX908-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x200, v4 -; GFX8-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX8-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX8-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v7, 3, v4 +; GFX8-NEXT: s_mov_b32 s4, 0xffff +; GFX8-NEXT: v_lshlrev_b32_e64 v4, v7, s4 +; GFX8-NEXT: v_not_b32_e32 v9, v4 ; GFX8-NEXT: s_mov_b64 s[6:7], exec ; GFX8-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX8-NEXT: v_readfirstlane_b32 s8, v0 @@ -3391,20 +3510,22 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX8-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX8-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX8-NEXT: s_nop 0 -; GFX8-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX8-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX8-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX8-NEXT: s_cbranch_execnz .LBB8_1 ; GFX8-NEXT: ; %bb.2: ; GFX8-NEXT: s_mov_b64 exec, s[6:7] ; GFX8-NEXT: s_mov_b64 s[6:7], 0 -; GFX8-NEXT: v_max_f16_e32 v8, v5, v5 +; GFX8-NEXT: v_max_f16_e32 v10, v5, v5 ; GFX8-NEXT: .LBB8_3: ; %atomicrmw.start ; GFX8-NEXT: ; =>This Loop Header: Depth=1 ; GFX8-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v4, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 -; GFX8-NEXT: v_max_f16_sdwa v4, v4, v8 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_and_b32_e32 v5, 0xffffff, v6 +; GFX8-NEXT: v_lshrrev_b32_e32 v4, v7, v6 +; GFX8-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX8-NEXT: v_max_f16_e32 v4, v4, v10 +; GFX8-NEXT: v_lshlrev_b32_e32 v4, v7, v4 +; GFX8-NEXT: v_and_b32_e32 v5, v6, v9 ; GFX8-NEXT: v_or_b32_e32 v5, v5, v4 ; GFX8-NEXT: v_mov_b32_e32 v4, v5 ; GFX8-NEXT: s_mov_b64 s[12:13], exec @@ -3420,7 +3541,7 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX8-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX8-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX8-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX8-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX8-NEXT: s_cbranch_execnz .LBB8_4 ; GFX8-NEXT: ; %bb.5: ; in Loop: Header=BB8_3 Depth=1 @@ -3434,14 +3555,18 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX8-NEXT: s_cbranch_execnz .LBB8_3 ; GFX8-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX8-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_add_i32_e32 v4, vcc, 0x200, v4 -; GFX7-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX7-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX7-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v7, 3, v4 +; GFX7-NEXT: v_lshl_b32_e32 v4, 0xffff, v7 +; GFX7-NEXT: v_not_b32_e32 v9, v4 ; GFX7-NEXT: s_mov_b64 s[6:7], exec ; GFX7-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX7-NEXT: v_readfirstlane_b32 s8, v0 @@ -3452,25 +3577,25 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX7-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[2:3] ; GFX7-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX7-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GFX7-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX7-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX7-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_cbranch_execnz .LBB8_1 ; GFX7-NEXT: ; %bb.2: ; GFX7-NEXT: s_mov_b64 exec, s[6:7] ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v5 ; GFX7-NEXT: s_mov_b64 s[6:7], 0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v4 ; GFX7-NEXT: .LBB8_3: ; %atomicrmw.start ; GFX7-NEXT: ; =>This Loop Header: Depth=1 ; GFX7-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v6 +; GFX7-NEXT: v_lshrrev_b32_e32 v4, v7, v6 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX7-NEXT: v_and_b32_e32 v5, 0xffffff, v6 +; GFX7-NEXT: v_and_b32_e32 v5, v6, v9 ; GFX7-NEXT: s_mov_b64 s[12:13], exec -; GFX7-NEXT: v_max_f32_e32 v4, v4, v8 +; GFX7-NEXT: v_max_f32_e32 v4, v4, v10 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, v7, v4 ; GFX7-NEXT: v_or_b32_e32 v5, v5, v4 ; GFX7-NEXT: v_mov_b32_e32 v4, v5 ; GFX7-NEXT: v_mov_b32_e32 v5, v6 @@ -3485,7 +3610,7 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX7-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX7-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX7-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX7-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_cbranch_execnz .LBB8_4 ; GFX7-NEXT: ; %bb.5: ; in Loop: Header=BB8_3 Depth=1 @@ -3499,7 +3624,7 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX7-NEXT: s_cbranch_execnz .LBB8_3 ; GFX7-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -3507,7 +3632,11 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0x200, v4 -; GFX6-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX6-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX6-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX6-NEXT: v_lshlrev_b32_e32 v7, 3, v4 +; GFX6-NEXT: v_lshl_b32_e32 v4, 0xffff, v7 +; GFX6-NEXT: v_not_b32_e32 v9, v4 ; GFX6-NEXT: s_mov_b64 s[6:7], exec ; GFX6-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX6-NEXT: v_readfirstlane_b32 s8, v0 @@ -3518,25 +3647,25 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX6-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[2:3] ; GFX6-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX6-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GFX6-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX6-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX6-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX6-NEXT: s_cbranch_execnz .LBB8_1 ; GFX6-NEXT: ; %bb.2: ; GFX6-NEXT: s_mov_b64 exec, s[6:7] ; GFX6-NEXT: v_cvt_f16_f32_e32 v4, v5 ; GFX6-NEXT: s_mov_b64 s[6:7], 0 -; GFX6-NEXT: v_cvt_f32_f16_e32 v8, v4 +; GFX6-NEXT: v_cvt_f32_f16_e32 v10, v4 ; GFX6-NEXT: .LBB8_3: ; %atomicrmw.start ; GFX6-NEXT: ; =>This Loop Header: Depth=1 ; GFX6-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) -; GFX6-NEXT: v_lshrrev_b32_e32 v4, 24, v6 +; GFX6-NEXT: v_lshrrev_b32_e32 v4, v7, v6 ; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v6 +; GFX6-NEXT: v_and_b32_e32 v5, v6, v9 ; GFX6-NEXT: s_mov_b64 s[12:13], exec -; GFX6-NEXT: v_max_f32_e32 v4, v4, v8 +; GFX6-NEXT: v_max_f32_e32 v4, v4, v10 ; GFX6-NEXT: v_cvt_f16_f32_e32 v4, v4 -; GFX6-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX6-NEXT: v_lshlrev_b32_e32 v4, v7, v4 ; GFX6-NEXT: v_or_b32_e32 v5, v5, v4 ; GFX6-NEXT: v_mov_b32_e32 v4, v5 ; GFX6-NEXT: v_mov_b32_e32 v5, v6 @@ -3551,7 +3680,7 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX6-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX6-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX6-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX6-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX6-NEXT: s_cbranch_execnz .LBB8_4 ; GFX6-NEXT: ; %bb.5: ; in Loop: Header=BB8_3 Depth=1 @@ -3565,7 +3694,7 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall(ptr add ; GFX6-NEXT: s_cbranch_execnz .LBB8_3 ; GFX6-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX6-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: s_setpc_b64 s[30:31] @@ -3588,15 +3717,19 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset(ptr addrspace(7 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_addk_co_i32 s4, 0x200 ; GFX12-NEXT: v_lshlrev_b32_e32 v5, 16, v0 -; GFX12-NEXT: s_and_b32 s4, s4, -4 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: v_mov_b32_e32 v4, s4 -; GFX12-NEXT: s_mov_b32 s4, 0 +; GFX12-NEXT: s_and_b32 s5, s4, -4 +; GFX12-NEXT: s_and_b32 s4, s4, 3 +; GFX12-NEXT: v_mov_b32_e32 v4, s5 +; GFX12-NEXT: s_lshl_b32 s4, s4, 3 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_lshl_b32 s5, 0xffff, s4 +; GFX12-NEXT: s_not_b32 s6, s5 ; GFX12-NEXT: buffer_load_b32 v1, v4, s[0:3], null offen +; GFX12-NEXT: s_mov_b32 s5, 0 ; GFX12-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v0, s4, v1 ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 16, v0 @@ -3610,8 +3743,8 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset(ptr addrspace(7 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo ; GFX12-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX12-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX12-NEXT: v_lshlrev_b32_e32 v0, s4, v0 +; GFX12-NEXT: v_and_or_b32 v0, v1, s6, v0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0 ; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[2:3], v4, s[0:3], null offen th:TH_ATOMIC_RETURN @@ -3619,42 +3752,43 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset(ptr addrspace(7 ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v1 ; GFX12-NEXT: v_mov_b32_e32 v1, v2 -; GFX12-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX12-NEXT: s_or_b32 s5, vcc_lo, s5 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 +; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_cbranch_execnz .LBB9_1 ; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX12-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX12-NEXT: v_lshrrev_b32_e32 v0, s4, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: s_addk_i32 s4, 0x200 -; GFX940-NEXT: s_and_b32 s4, s4, -4 -; GFX940-NEXT: v_mov_b32_e32 v4, s4 +; GFX940-NEXT: s_and_b32 s5, s4, -4 +; GFX940-NEXT: v_mov_b32_e32 v4, s5 ; GFX940-NEXT: buffer_load_dword v1, v4, s[0:3], 0 offen -; GFX940-NEXT: s_mov_b32 s6, 0xffffff +; GFX940-NEXT: s_and_b32 s4, s4, 3 +; GFX940-NEXT: s_lshl_b32 s6, s4, 3 +; GFX940-NEXT: s_lshl_b32 s4, 0xffff, s6 +; GFX940-NEXT: s_not_b32 s7, s4 ; GFX940-NEXT: s_mov_b64 s[4:5], 0 ; GFX940-NEXT: v_lshlrev_b32_e32 v5, 16, v0 -; GFX940-NEXT: s_movk_i32 s7, 0x7fff +; GFX940-NEXT: s_movk_i32 s8, 0x7fff ; GFX940-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 24, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX940-NEXT: v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX940-NEXT: buffer_wbl2 sc1 ; GFX940-NEXT: v_max_f32_e32 v0, v0, v5 ; GFX940-NEXT: v_bfe_u32 v2, v0, 16, 1 ; GFX940-NEXT: v_or_b32_e32 v3, 0x400000, v0 -; GFX940-NEXT: v_add3_u32 v2, v2, v0, s7 +; GFX940-NEXT: v_add3_u32 v2, v2, v0, s8 ; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: s_nop 1 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX940-NEXT: v_and_or_b32 v0, v1, s6, v0 +; GFX940-NEXT: v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX940-NEXT: v_and_or_b32 v0, v1, s7, v0 ; GFX940-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX940-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) @@ -3666,7 +3800,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset(ptr addrspace(7 ; GFX940-NEXT: s_cbranch_execnz .LBB9_1 ; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX940-NEXT: v_lshrrev_b32_e32 v0, s6, v2 ; GFX940-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset: @@ -3674,16 +3808,20 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset(ptr addrspace(7 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_addk_i32 s4, 0x200 ; GFX11-NEXT: v_lshlrev_b32_e32 v5, 16, v0 -; GFX11-NEXT: s_and_b32 s4, s4, -4 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: v_mov_b32_e32 v4, s4 -; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_and_b32 s5, s4, -4 +; GFX11-NEXT: s_and_b32 s4, s4, 3 +; GFX11-NEXT: v_mov_b32_e32 v4, s5 +; GFX11-NEXT: s_lshl_b32 s4, s4, 3 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_lshl_b32 s5, 0xffff, s4 +; GFX11-NEXT: s_not_b32 s6, s5 ; GFX11-NEXT: buffer_load_b32 v1, v4, s[0:3], 0 offen +; GFX11-NEXT: s_mov_b32 s5, 0 ; GFX11-NEXT: .p2align 6 ; GFX11-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX11-NEXT: v_lshrrev_b32_e32 v0, s4, v1 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 @@ -3697,8 +3835,8 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset(ptr addrspace(7 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX11-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v0, s4, v0 +; GFX11-NEXT: v_and_or_b32 v0, v1, s6, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0 ; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[2:3], v4, s[0:3], 0 offen glc @@ -3707,13 +3845,13 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset(ptr addrspace(7 ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v1 ; GFX11-NEXT: v_mov_b32_e32 v1, v2 -; GFX11-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX11-NEXT: s_or_b32 s5, vcc_lo, s5 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 +; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s5 ; GFX11-NEXT: s_cbranch_execnz .LBB9_1 ; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX11-NEXT: v_lshrrev_b32_e32 v0, s4, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset: @@ -3721,25 +3859,27 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset(ptr addrspace(7 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_addk_i32 s8, 0x200 ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v0 -; GFX10-NEXT: s_and_b32 s8, s8, -4 -; GFX10-NEXT: v_mov_b32_e32 v4, s8 -; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_and_b32 s9, s8, -4 +; GFX10-NEXT: s_and_b32 s8, s8, 3 +; GFX10-NEXT: v_mov_b32_e32 v4, s9 +; GFX10-NEXT: s_lshl_b32 s8, s8, 3 +; GFX10-NEXT: s_lshl_b32 s9, 0xffff, s8 +; GFX10-NEXT: s_not_b32 s10, s9 ; GFX10-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen +; GFX10-NEXT: s_mov_b32 s9, 0 ; GFX10-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX10-NEXT: v_lshrrev_b32_sdwa v0, s8, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX10-NEXT: v_max_f32_e32 v0, v0, v5 ; GFX10-NEXT: v_bfe_u32 v2, v0, 16, 1 ; GFX10-NEXT: v_or_b32_e32 v3, 0x400000, v0 ; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 ; GFX10-NEXT: v_add3_u32 v2, v2, v0, 0x7fff ; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX10-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX10-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX10-NEXT: v_and_or_b32 v0, v1, s10, v0 ; GFX10-NEXT: v_mov_b32_e32 v3, v1 ; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; GFX10-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[4:7], 0 offen glc @@ -3748,39 +3888,40 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset(ptr addrspace(7 ; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, v2 -; GFX10-NEXT: s_or_b32 s8, vcc_lo, s8 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 +; GFX10-NEXT: s_or_b32 s9, vcc_lo, s9 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s9 ; GFX10-NEXT: s_cbranch_execnz .LBB9_1 ; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s9 +; GFX10-NEXT: v_lshrrev_b32_e32 v0, s8, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: s_addk_i32 s8, 0x200 -; GFX90A-NEXT: s_and_b32 s8, s8, -4 -; GFX90A-NEXT: v_mov_b32_e32 v4, s8 +; GFX90A-NEXT: s_and_b32 s9, s8, -4 +; GFX90A-NEXT: v_mov_b32_e32 v4, s9 ; GFX90A-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen -; GFX90A-NEXT: s_mov_b32 s10, 0xffffff +; GFX90A-NEXT: s_and_b32 s8, s8, 3 +; GFX90A-NEXT: s_lshl_b32 s10, s8, 3 +; GFX90A-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX90A-NEXT: s_not_b32 s11, s8 ; GFX90A-NEXT: s_mov_b64 s[8:9], 0 ; GFX90A-NEXT: v_lshlrev_b32_e32 v5, 16, v0 -; GFX90A-NEXT: s_movk_i32 s11, 0x7fff +; GFX90A-NEXT: s_movk_i32 s12, 0x7fff ; GFX90A-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 24, v1 -; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX90A-NEXT: v_lshrrev_b32_sdwa v0, s10, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX90A-NEXT: v_max_f32_e32 v0, v0, v5 ; GFX90A-NEXT: v_bfe_u32 v2, v0, 16, 1 ; GFX90A-NEXT: v_or_b32_e32 v3, 0x400000, v0 -; GFX90A-NEXT: v_add3_u32 v2, v2, v0, s11 +; GFX90A-NEXT: v_add3_u32 v2, v2, v0, s12 ; GFX90A-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 ; GFX90A-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc -; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX90A-NEXT: v_and_or_b32 v0, v1, s10, v0 +; GFX90A-NEXT: v_lshlrev_b32_sdwa v0, s10, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX90A-NEXT: v_and_or_b32 v0, v1, s11, v0 ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1] ; GFX90A-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[4:7], 0 offen glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) @@ -3792,34 +3933,35 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset(ptr addrspace(7 ; GFX90A-NEXT: s_cbranch_execnz .LBB9_1 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX90A-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: s_addk_i32 s8, 0x200 -; GFX908-NEXT: s_and_b32 s8, s8, -4 -; GFX908-NEXT: v_mov_b32_e32 v4, s8 +; GFX908-NEXT: s_and_b32 s9, s8, -4 +; GFX908-NEXT: v_mov_b32_e32 v4, s9 ; GFX908-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen -; GFX908-NEXT: s_mov_b32 s10, 0xffffff +; GFX908-NEXT: s_and_b32 s8, s8, 3 +; GFX908-NEXT: s_lshl_b32 s10, s8, 3 +; GFX908-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX908-NEXT: s_not_b32 s11, s8 ; GFX908-NEXT: s_mov_b64 s[8:9], 0 ; GFX908-NEXT: v_lshlrev_b32_e32 v5, 16, v0 -; GFX908-NEXT: s_movk_i32 s11, 0x7fff +; GFX908-NEXT: s_movk_i32 s12, 0x7fff ; GFX908-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_lshrrev_b32_e32 v0, 24, v1 -; GFX908-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX908-NEXT: v_lshrrev_b32_sdwa v0, s10, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX908-NEXT: v_max_f32_e32 v0, v0, v5 ; GFX908-NEXT: v_bfe_u32 v2, v0, 16, 1 ; GFX908-NEXT: v_or_b32_e32 v3, 0x400000, v0 -; GFX908-NEXT: v_add3_u32 v2, v2, v0, s11 +; GFX908-NEXT: v_add3_u32 v2, v2, v0, s12 ; GFX908-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 ; GFX908-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc -; GFX908-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX908-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX908-NEXT: v_and_or_b32 v0, v1, s10, v0 +; GFX908-NEXT: v_lshlrev_b32_sdwa v0, s10, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX908-NEXT: v_and_or_b32 v0, v1, s11, v0 ; GFX908-NEXT: v_mov_b32_e32 v3, v1 ; GFX908-NEXT: v_mov_b32_e32 v2, v0 ; GFX908-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[4:7], 0 offen glc @@ -3832,33 +3974,36 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset(ptr addrspace(7 ; GFX908-NEXT: s_cbranch_execnz .LBB9_1 ; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX908-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX908-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX908-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX908-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_addk_i32 s8, 0x200 -; GFX8-NEXT: s_and_b32 s8, s8, -4 -; GFX8-NEXT: v_mov_b32_e32 v4, s8 +; GFX8-NEXT: s_and_b32 s9, s8, -4 +; GFX8-NEXT: v_mov_b32_e32 v4, s9 ; GFX8-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen +; GFX8-NEXT: s_and_b32 s8, s8, 3 +; GFX8-NEXT: s_lshl_b32 s10, s8, 3 +; GFX8-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX8-NEXT: s_not_b32 s11, s8 ; GFX8-NEXT: s_mov_b64 s[8:9], 0 ; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v0 ; GFX8-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX8-NEXT: v_mov_b32_e32 v0, s10 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX8-NEXT: v_max_f32_e32 v0, v0, v5 -; GFX8-NEXT: v_bfe_u32 v3, v0, 16, 1 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v0 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x7fff, v3 -; GFX8-NEXT: v_or_b32_e32 v6, 0x400000, v0 -; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v6, vcc -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX8-NEXT: v_lshrrev_b32_sdwa v3, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_max_f32_e32 v3, v3, v5 +; GFX8-NEXT: v_bfe_u32 v6, v3, 16, 1 +; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v3 +; GFX8-NEXT: v_add_u32_e32 v6, vcc, 0x7fff, v6 +; GFX8-NEXT: v_or_b32_e32 v7, 0x400000, v3 +; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX8-NEXT: v_cndmask_b32_e32 v3, v6, v7, vcc +; GFX8-NEXT: v_and_b32_e32 v2, s11, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX8-NEXT: v_mov_b32_e32 v3, v1 ; GFX8-NEXT: v_mov_b32_e32 v2, v0 @@ -3872,29 +4017,33 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset(ptr addrspace(7 ; GFX8-NEXT: s_cbranch_execnz .LBB9_1 ; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX8-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_addk_i32 s8, 0x200 -; GFX7-NEXT: s_and_b32 s8, s8, -4 -; GFX7-NEXT: v_mov_b32_e32 v4, s8 +; GFX7-NEXT: s_and_b32 s9, s8, -4 +; GFX7-NEXT: v_mov_b32_e32 v4, s9 ; GFX7-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen +; GFX7-NEXT: s_and_b32 s8, s8, 3 +; GFX7-NEXT: s_lshl_b32 s10, s8, 3 +; GFX7-NEXT: s_lshl_b32 s8, 0xffff, s10 ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: s_not_b32 s11, s8 ; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: v_and_b32_e32 v5, 0xffff0000, v0 ; GFX7-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, s10, v1 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v5 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v2, 0xffffff, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s11, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX7-NEXT: v_mov_b32_e32 v3, v1 ; GFX7-NEXT: v_mov_b32_e32 v2, v0 @@ -3908,7 +4057,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset(ptr addrspace(7 ; GFX7-NEXT: s_cbranch_execnz .LBB9_1 ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -3916,23 +4065,27 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset(ptr addrspace(7 ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: s_addk_i32 s8, 0x200 -; GFX6-NEXT: s_and_b32 s8, s8, -4 -; GFX6-NEXT: v_mov_b32_e32 v4, s8 +; GFX6-NEXT: s_and_b32 s9, s8, -4 +; GFX6-NEXT: v_mov_b32_e32 v4, s9 ; GFX6-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen +; GFX6-NEXT: s_and_b32 s8, s8, 3 +; GFX6-NEXT: s_lshl_b32 s10, s8, 3 +; GFX6-NEXT: s_lshl_b32 s8, 0xffff, s10 ; GFX6-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX6-NEXT: s_not_b32 s11, s8 ; GFX6-NEXT: s_mov_b64 s[8:9], 0 ; GFX6-NEXT: v_and_b32_e32 v5, 0xffff0000, v0 ; GFX6-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX6-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, s10, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; GFX6-NEXT: v_max_f32_e32 v0, v0, v5 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX6-NEXT: v_and_b32_e32 v2, s11, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX6-NEXT: v_mov_b32_e32 v3, v1 ; GFX6-NEXT: v_mov_b32_e32 v2, v0 @@ -3946,7 +4099,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset(ptr addrspace(7 ; GFX6-NEXT: s_cbranch_execnz .LBB9_1 ; GFX6-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX6-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: s_setpc_b64 s[30:31] @@ -3965,15 +4118,19 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset(ptr addrspace(7 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_addk_co_i32 s4, 0x200 ; GFX12-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX12-NEXT: s_and_b32 s4, s4, -4 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: v_mov_b32_e32 v2, s4 -; GFX12-NEXT: s_mov_b32 s4, 0 +; GFX12-NEXT: s_and_b32 s5, s4, -4 +; GFX12-NEXT: s_and_b32 s4, s4, 3 +; GFX12-NEXT: v_mov_b32_e32 v2, s5 +; GFX12-NEXT: s_lshl_b32 s4, s4, 3 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_lshl_b32 s5, 0xffff, s4 +; GFX12-NEXT: s_not_b32 s6, s5 ; GFX12-NEXT: buffer_load_b32 v1, v2, s[0:3], null offen +; GFX12-NEXT: s_mov_b32 s5, 0 ; GFX12-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v0, s4, v1 ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 16, v0 @@ -3987,8 +4144,8 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset(ptr addrspace(7 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc_lo ; GFX12-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX12-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX12-NEXT: v_lshlrev_b32_e32 v0, s4, v0 +; GFX12-NEXT: v_and_or_b32 v0, v1, s6, v0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_dual_mov_b32 v5, v1 :: v_dual_mov_b32 v4, v0 ; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v2, s[0:3], null offen th:TH_ATOMIC_RETURN @@ -3996,41 +4153,42 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset(ptr addrspace(7 ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v1 ; GFX12-NEXT: v_mov_b32_e32 v1, v4 -; GFX12-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX12-NEXT: s_or_b32 s5, vcc_lo, s5 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 +; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_cbranch_execnz .LBB10_1 ; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: s_addk_i32 s4, 0x200 -; GFX940-NEXT: s_and_b32 s4, s4, -4 -; GFX940-NEXT: v_mov_b32_e32 v2, s4 +; GFX940-NEXT: s_and_b32 s5, s4, -4 +; GFX940-NEXT: v_mov_b32_e32 v2, s5 ; GFX940-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen -; GFX940-NEXT: s_mov_b32 s6, 0xffffff +; GFX940-NEXT: s_and_b32 s4, s4, 3 +; GFX940-NEXT: s_lshl_b32 s6, s4, 3 +; GFX940-NEXT: s_lshl_b32 s4, 0xffff, s6 +; GFX940-NEXT: s_not_b32 s7, s4 ; GFX940-NEXT: s_mov_b64 s[4:5], 0 ; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX940-NEXT: s_movk_i32 s7, 0x7fff +; GFX940-NEXT: s_movk_i32 s8, 0x7fff ; GFX940-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 24, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX940-NEXT: v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX940-NEXT: buffer_wbl2 sc1 ; GFX940-NEXT: v_max_f32_e32 v0, v0, v3 ; GFX940-NEXT: v_bfe_u32 v4, v0, 16, 1 ; GFX940-NEXT: v_or_b32_e32 v5, 0x400000, v0 -; GFX940-NEXT: v_add3_u32 v4, v4, v0, s7 +; GFX940-NEXT: v_add3_u32 v4, v4, v0, s8 ; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: s_nop 1 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX940-NEXT: v_and_or_b32 v0, v1, s6, v0 +; GFX940-NEXT: v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX940-NEXT: v_and_or_b32 v0, v1, s7, v0 ; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) @@ -4049,16 +4207,20 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset(ptr addrspace(7 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_addk_i32 s4, 0x200 ; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX11-NEXT: s_and_b32 s4, s4, -4 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: v_mov_b32_e32 v2, s4 -; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_and_b32 s5, s4, -4 +; GFX11-NEXT: s_and_b32 s4, s4, 3 +; GFX11-NEXT: v_mov_b32_e32 v2, s5 +; GFX11-NEXT: s_lshl_b32 s4, s4, 3 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_lshl_b32 s5, 0xffff, s4 +; GFX11-NEXT: s_not_b32 s6, s5 ; GFX11-NEXT: buffer_load_b32 v1, v2, s[0:3], 0 offen +; GFX11-NEXT: s_mov_b32 s5, 0 ; GFX11-NEXT: .p2align 6 ; GFX11-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX11-NEXT: v_lshrrev_b32_e32 v0, s4, v1 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 @@ -4072,8 +4234,8 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset(ptr addrspace(7 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc_lo ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX11-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v0, s4, v0 +; GFX11-NEXT: v_and_or_b32 v0, v1, s6, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_dual_mov_b32 v5, v1 :: v_dual_mov_b32 v4, v0 ; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v2, s[0:3], 0 offen glc @@ -4082,12 +4244,12 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset(ptr addrspace(7 ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v1 ; GFX11-NEXT: v_mov_b32_e32 v1, v4 -; GFX11-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX11-NEXT: s_or_b32 s5, vcc_lo, s5 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 +; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s5 ; GFX11-NEXT: s_cbranch_execnz .LBB10_1 ; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset: @@ -4095,25 +4257,27 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset(ptr addrspace(7 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_addk_i32 s8, 0x200 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX10-NEXT: s_and_b32 s8, s8, -4 -; GFX10-NEXT: v_mov_b32_e32 v2, s8 -; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_and_b32 s9, s8, -4 +; GFX10-NEXT: s_and_b32 s8, s8, 3 +; GFX10-NEXT: v_mov_b32_e32 v2, s9 +; GFX10-NEXT: s_lshl_b32 s8, s8, 3 +; GFX10-NEXT: s_lshl_b32 s9, 0xffff, s8 +; GFX10-NEXT: s_not_b32 s10, s9 ; GFX10-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen +; GFX10-NEXT: s_mov_b32 s9, 0 ; GFX10-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX10-NEXT: v_lshrrev_b32_sdwa v0, s8, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX10-NEXT: v_max_f32_e32 v0, v0, v3 ; GFX10-NEXT: v_bfe_u32 v4, v0, 16, 1 ; GFX10-NEXT: v_or_b32_e32 v5, 0x400000, v0 ; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 ; GFX10-NEXT: v_add3_u32 v4, v4, v0, 0x7fff ; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc_lo -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX10-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX10-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX10-NEXT: v_and_or_b32 v0, v1, s10, v0 ; GFX10-NEXT: v_mov_b32_e32 v5, v1 ; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; GFX10-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[4:7], 0 offen glc @@ -4122,38 +4286,39 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset(ptr addrspace(7 ; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, v4 -; GFX10-NEXT: s_or_b32 s8, vcc_lo, s8 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 +; GFX10-NEXT: s_or_b32 s9, vcc_lo, s9 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s9 ; GFX10-NEXT: s_cbranch_execnz .LBB10_1 ; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s9 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: s_addk_i32 s8, 0x200 -; GFX90A-NEXT: s_and_b32 s8, s8, -4 -; GFX90A-NEXT: v_mov_b32_e32 v2, s8 +; GFX90A-NEXT: s_and_b32 s9, s8, -4 +; GFX90A-NEXT: v_mov_b32_e32 v2, s9 ; GFX90A-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen -; GFX90A-NEXT: s_mov_b32 s10, 0xffffff +; GFX90A-NEXT: s_and_b32 s8, s8, 3 +; GFX90A-NEXT: s_lshl_b32 s10, s8, 3 +; GFX90A-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX90A-NEXT: s_not_b32 s11, s8 ; GFX90A-NEXT: s_mov_b64 s[8:9], 0 ; GFX90A-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX90A-NEXT: s_movk_i32 s11, 0x7fff +; GFX90A-NEXT: s_movk_i32 s12, 0x7fff ; GFX90A-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 24, v1 -; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX90A-NEXT: v_lshrrev_b32_sdwa v0, s10, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX90A-NEXT: v_max_f32_e32 v0, v0, v3 ; GFX90A-NEXT: v_bfe_u32 v4, v0, 16, 1 ; GFX90A-NEXT: v_or_b32_e32 v5, 0x400000, v0 -; GFX90A-NEXT: v_add3_u32 v4, v4, v0, s11 +; GFX90A-NEXT: v_add3_u32 v4, v4, v0, s12 ; GFX90A-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 ; GFX90A-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc -; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX90A-NEXT: v_and_or_b32 v0, v1, s10, v0 +; GFX90A-NEXT: v_lshlrev_b32_sdwa v0, s10, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX90A-NEXT: v_and_or_b32 v0, v1, s11, v0 ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[0:1] op_sel:[0,1] ; GFX90A-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[4:7], 0 offen glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) @@ -4171,27 +4336,28 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset(ptr addrspace(7 ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: s_addk_i32 s8, 0x200 -; GFX908-NEXT: s_and_b32 s8, s8, -4 -; GFX908-NEXT: v_mov_b32_e32 v2, s8 +; GFX908-NEXT: s_and_b32 s9, s8, -4 +; GFX908-NEXT: v_mov_b32_e32 v2, s9 ; GFX908-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen -; GFX908-NEXT: s_mov_b32 s10, 0xffffff +; GFX908-NEXT: s_and_b32 s8, s8, 3 +; GFX908-NEXT: s_lshl_b32 s10, s8, 3 +; GFX908-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX908-NEXT: s_not_b32 s11, s8 ; GFX908-NEXT: s_mov_b64 s[8:9], 0 ; GFX908-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX908-NEXT: s_movk_i32 s11, 0x7fff +; GFX908-NEXT: s_movk_i32 s12, 0x7fff ; GFX908-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_lshrrev_b32_e32 v0, 24, v1 -; GFX908-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX908-NEXT: v_lshrrev_b32_sdwa v0, s10, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX908-NEXT: v_max_f32_e32 v0, v0, v3 ; GFX908-NEXT: v_bfe_u32 v4, v0, 16, 1 ; GFX908-NEXT: v_or_b32_e32 v5, 0x400000, v0 -; GFX908-NEXT: v_add3_u32 v4, v4, v0, s11 +; GFX908-NEXT: v_add3_u32 v4, v4, v0, s12 ; GFX908-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 ; GFX908-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc -; GFX908-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX908-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX908-NEXT: v_and_or_b32 v0, v1, s10, v0 +; GFX908-NEXT: v_lshlrev_b32_sdwa v0, s10, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX908-NEXT: v_and_or_b32 v0, v1, s11, v0 ; GFX908-NEXT: v_mov_b32_e32 v5, v1 ; GFX908-NEXT: v_mov_b32_e32 v4, v0 ; GFX908-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[4:7], 0 offen glc @@ -4210,26 +4376,29 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset(ptr addrspace(7 ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_addk_i32 s8, 0x200 -; GFX8-NEXT: s_and_b32 s8, s8, -4 -; GFX8-NEXT: v_mov_b32_e32 v2, s8 +; GFX8-NEXT: s_and_b32 s9, s8, -4 +; GFX8-NEXT: v_mov_b32_e32 v2, s9 ; GFX8-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen +; GFX8-NEXT: s_and_b32 s8, s8, 3 +; GFX8-NEXT: s_lshl_b32 s10, s8, 3 +; GFX8-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX8-NEXT: s_not_b32 s11, s8 ; GFX8-NEXT: s_mov_b64 s[8:9], 0 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v0 ; GFX8-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX8-NEXT: v_mov_b32_e32 v0, s10 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX8-NEXT: v_max_f32_e32 v0, v0, v3 -; GFX8-NEXT: v_bfe_u32 v5, v0, 16, 1 -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v0 -; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0x7fff, v5 -; GFX8-NEXT: v_or_b32_e32 v6, 0x400000, v0 -; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v6, vcc -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX8-NEXT: v_lshrrev_b32_sdwa v5, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_max_f32_e32 v5, v5, v3 +; GFX8-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v5 +; GFX8-NEXT: v_add_u32_e32 v6, vcc, 0x7fff, v6 +; GFX8-NEXT: v_or_b32_e32 v7, 0x400000, v5 +; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX8-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc +; GFX8-NEXT: v_and_b32_e32 v4, s11, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX8-NEXT: v_mov_b32_e32 v5, v1 ; GFX8-NEXT: v_mov_b32_e32 v4, v0 @@ -4249,22 +4418,26 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset(ptr addrspace(7 ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_addk_i32 s8, 0x200 -; GFX7-NEXT: s_and_b32 s8, s8, -4 -; GFX7-NEXT: v_mov_b32_e32 v2, s8 +; GFX7-NEXT: s_and_b32 s9, s8, -4 +; GFX7-NEXT: v_mov_b32_e32 v2, s9 ; GFX7-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen +; GFX7-NEXT: s_and_b32 s8, s8, 3 +; GFX7-NEXT: s_lshl_b32 s10, s8, 3 +; GFX7-NEXT: s_lshl_b32 s8, 0xffff, s10 ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: s_not_b32 s11, s8 ; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: v_and_b32_e32 v3, 0xffff0000, v0 ; GFX7-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, s10, v1 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; GFX7-NEXT: v_max_f32_e32 v0, v0, v3 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v4, 0xffffff, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v4, s11, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX7-NEXT: v_mov_b32_e32 v5, v1 ; GFX7-NEXT: v_mov_b32_e32 v4, v0 @@ -4284,23 +4457,27 @@ define void @buffer_fat_ptr_agent_atomic_fmax_noret_bf16__offset(ptr addrspace(7 ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: s_addk_i32 s8, 0x200 -; GFX6-NEXT: s_and_b32 s8, s8, -4 -; GFX6-NEXT: v_mov_b32_e32 v2, s8 +; GFX6-NEXT: s_and_b32 s9, s8, -4 +; GFX6-NEXT: v_mov_b32_e32 v2, s9 ; GFX6-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen +; GFX6-NEXT: s_and_b32 s8, s8, 3 +; GFX6-NEXT: s_lshl_b32 s10, s8, 3 +; GFX6-NEXT: s_lshl_b32 s8, 0xffff, s10 ; GFX6-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX6-NEXT: s_not_b32 s11, s8 ; GFX6-NEXT: s_mov_b64 s[8:9], 0 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff0000, v0 ; GFX6-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX6-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, s10, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; GFX6-NEXT: v_max_f32_e32 v0, v0, v3 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX6-NEXT: v_and_b32_e32 v4, s11, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX6-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX6-NEXT: v_mov_b32_e32 v5, v1 ; GFX6-NEXT: v_mov_b32_e32 v4, v0 @@ -4331,8 +4508,13 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 ; GFX12-NEXT: s_mov_b32 s1, exec_lo -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_and_b32_e32 v6, 3, v4 +; GFX12-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX12-NEXT: v_lshlrev_b32_e32 v7, 3, v6 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff +; GFX12-NEXT: v_not_b32_e32 v9, v6 ; GFX12-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX12-NEXT: v_readfirstlane_b32 s4, v0 ; GFX12-NEXT: v_readfirstlane_b32 s5, v1 @@ -4344,34 +4526,34 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX12-NEXT: s_and_saveexec_b32 s0, s0 -; GFX12-NEXT: buffer_load_b32 v6, v7, s[4:7], null offen +; GFX12-NEXT: buffer_load_b32 v6, v8, s[4:7], null offen ; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_cbranch_execnz .LBB11_1 ; GFX12-NEXT: ; %bb.2: ; GFX12-NEXT: s_mov_b32 exec_lo, s1 -; GFX12-NEXT: v_lshlrev_b32_e32 v8, 16, v5 +; GFX12-NEXT: v_lshlrev_b32_e32 v10, 16, v5 ; GFX12-NEXT: s_mov_b32 s1, 0 ; GFX12-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX12-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_lshrrev_b32_e32 v4, 24, v6 +; GFX12-NEXT: v_lshrrev_b32_e32 v4, v7, v6 ; GFX12-NEXT: s_mov_b32 s2, exec_lo ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX12-NEXT: v_max_num_f32_e32 v4, v4, v8 +; GFX12-NEXT: v_max_num_f32_e32 v4, v4, v10 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX12-NEXT: v_bfe_u32 v5, v4, 16, 1 -; GFX12-NEXT: v_or_b32_e32 v9, 0x400000, v4 +; GFX12-NEXT: v_or_b32_e32 v11, 0x400000, v4 ; GFX12-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 ; GFX12-NEXT: v_add3_u32 v5, v5, v4, 0x7fff ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-NEXT: v_cndmask_b32_e32 v4, v5, v9, vcc_lo +; GFX12-NEXT: v_cndmask_b32_e32 v4, v5, v11, vcc_lo ; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v4 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX12-NEXT: v_and_or_b32 v5, 0xffffff, v6, v4 +; GFX12-NEXT: v_lshlrev_b32_e32 v4, v7, v4 +; GFX12-NEXT: v_and_or_b32 v5, v6, v9, v4 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_mov_b32_e32 v4, v5 ; GFX12-NEXT: v_mov_b32_e32 v5, v6 @@ -4388,7 +4570,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX12-NEXT: s_and_saveexec_b32 s0, s0 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v7, s[4:7], null offen th:TH_ATOMIC_RETURN +; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[4:7], null offen th:TH_ATOMIC_RETURN ; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_cbranch_execnz .LBB11_4 ; GFX12-NEXT: ; %bb.5: ; in Loop: Header=BB11_3 Depth=1 @@ -4403,15 +4585,19 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX12-NEXT: s_cbranch_execnz .LBB11_3 ; GFX12-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX12-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX12-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_add_u32_e32 v4, 0x200, v4 -; GFX940-NEXT: v_and_b32_e32 v8, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, 0xffffff +; GFX940-NEXT: v_and_b32_e32 v9, -4, v4 +; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX940-NEXT: v_lshlrev_b32_e32 v8, 3, v4 +; GFX940-NEXT: s_mov_b32 s0, 0xffff +; GFX940-NEXT: v_lshlrev_b32_e64 v4, v8, s0 +; GFX940-NEXT: v_not_b32_e32 v10, v4 ; GFX940-NEXT: s_mov_b64 s[2:3], exec ; GFX940-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX940-NEXT: v_readfirstlane_b32 s4, v0 @@ -4423,31 +4609,30 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] ; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] ; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: buffer_load_dword v7, v8, s[4:7], 0 offen +; GFX940-NEXT: buffer_load_dword v7, v9, s[4:7], 0 offen ; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] ; GFX940-NEXT: s_cbranch_execnz .LBB11_1 ; GFX940-NEXT: ; %bb.2: ; GFX940-NEXT: s_mov_b64 exec, s[2:3] ; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v10, 16, v5 +; GFX940-NEXT: v_lshlrev_b32_e32 v11, 16, v5 ; GFX940-NEXT: s_movk_i32 s10, 0x7fff ; GFX940-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX940-NEXT: ; =>This Loop Header: Depth=1 ; GFX940-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, 24, v7 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX940-NEXT: v_max_f32_e32 v4, v4, v10 +; GFX940-NEXT: v_lshrrev_b32_sdwa v4, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX940-NEXT: s_mov_b64 s[8:9], exec +; GFX940-NEXT: v_max_f32_e32 v4, v4, v11 ; GFX940-NEXT: v_bfe_u32 v5, v4, 16, 1 ; GFX940-NEXT: v_add3_u32 v5, v5, v4, s10 ; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v4 ; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX940-NEXT: s_mov_b64 s[8:9], exec ; GFX940-NEXT: buffer_wbl2 sc1 +; GFX940-NEXT: s_nop 0 ; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX940-NEXT: v_and_or_b32 v6, v7, v9, v4 +; GFX940-NEXT: v_lshlrev_b32_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX940-NEXT: v_and_or_b32 v6, v7, v10, v4 ; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[6:7] ; GFX940-NEXT: .LBB11_4: ; Parent Loop BB11_3 Depth=1 ; GFX940-NEXT: ; => This Inner Loop Header: Depth=2 @@ -4461,7 +4646,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] ; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0 +; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v9, s[4:7], 0 offen sc0 ; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] ; GFX940-NEXT: s_cbranch_execnz .LBB11_4 ; GFX940-NEXT: ; %bb.5: ; in Loop: Header=BB11_3 Depth=1 @@ -4475,7 +4660,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX940-NEXT: s_cbranch_execnz .LBB11_3 ; GFX940-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX940-NEXT: v_lshrrev_b32_e32 v0, v8, v4 ; GFX940-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall: @@ -4484,8 +4669,13 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX11-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 ; GFX11-NEXT: s_mov_b32 s1, 0 ; GFX11-NEXT: s_mov_b32 s2, exec_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v6, 3, v4 +; GFX11-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX11-NEXT: v_lshlrev_b32_e32 v7, 3, v6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff +; GFX11-NEXT: v_not_b32_e32 v9, v6 ; GFX11-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: v_readfirstlane_b32 s4, v0 ; GFX11-NEXT: v_readfirstlane_b32 s5, v1 @@ -4497,35 +4687,35 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX11-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-NEXT: buffer_load_b32 v6, v7, s[4:7], 0 offen +; GFX11-NEXT: buffer_load_b32 v6, v8, s[4:7], 0 offen ; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_cbranch_execnz .LBB11_1 ; GFX11-NEXT: ; %bb.2: ; GFX11-NEXT: s_mov_b32 exec_lo, s2 -; GFX11-NEXT: v_lshlrev_b32_e32 v8, 16, v5 +; GFX11-NEXT: v_lshlrev_b32_e32 v10, 16, v5 ; GFX11-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX11-NEXT: .p2align 6 ; GFX11-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX11-NEXT: ; =>This Loop Header: Depth=1 ; GFX11-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_lshrrev_b32_e32 v4, 24, v6 +; GFX11-NEXT: v_lshrrev_b32_e32 v4, v7, v6 ; GFX11-NEXT: s_mov_b32 s2, exec_lo ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX11-NEXT: v_max_f32_e32 v4, v4, v8 +; GFX11-NEXT: v_max_f32_e32 v4, v4, v10 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_bfe_u32 v5, v4, 16, 1 -; GFX11-NEXT: v_or_b32_e32 v9, 0x400000, v4 +; GFX11-NEXT: v_or_b32_e32 v11, 0x400000, v4 ; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 ; GFX11-NEXT: v_add3_u32 v5, v5, v4, 0x7fff ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e32 v4, v5, v9, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v4, v5, v11, vcc_lo ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX11-NEXT: v_and_or_b32 v5, 0xffffff, v6, v4 +; GFX11-NEXT: v_lshlrev_b32_e32 v4, v7, v4 +; GFX11-NEXT: v_and_or_b32 v5, v6, v9, v4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_mov_b32_e32 v4, v5 ; GFX11-NEXT: v_mov_b32_e32 v5, v6 @@ -4542,7 +4732,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX11-NEXT: s_and_saveexec_b32 s0, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v7, s[4:7], 0 offen glc +; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[4:7], 0 offen glc ; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_cbranch_execnz .LBB11_4 ; GFX11-NEXT: ; %bb.5: ; in Loop: Header=BB11_3 Depth=1 @@ -4559,7 +4749,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX11-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX11-NEXT: s_set_inst_prefetch_distance 0x2 ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX11-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall: @@ -4568,7 +4758,11 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX10-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 ; GFX10-NEXT: s_mov_b32 s5, 0 ; GFX10-NEXT: s_mov_b32 s6, exec_lo -; GFX10-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX10-NEXT: v_and_b32_e32 v6, 3, v4 +; GFX10-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX10-NEXT: v_lshlrev_b32_e32 v7, 3, v6 +; GFX10-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff +; GFX10-NEXT: v_not_b32_e32 v9, v6 ; GFX10-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_readfirstlane_b32 s8, v0 ; GFX10-NEXT: v_readfirstlane_b32 s9, v1 @@ -4578,30 +4772,28 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] ; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 ; GFX10-NEXT: s_and_saveexec_b32 s4, s4 -; GFX10-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX10-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB11_1 ; GFX10-NEXT: ; %bb.2: ; GFX10-NEXT: s_mov_b32 exec_lo, s6 -; GFX10-NEXT: v_lshlrev_b32_e32 v8, 16, v5 +; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v5 ; GFX10-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Loop Header: Depth=1 ; GFX10-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v6 +; GFX10-NEXT: v_lshrrev_b32_sdwa v4, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: s_mov_b32 s6, exec_lo ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX10-NEXT: v_max_f32_e32 v4, v4, v8 +; GFX10-NEXT: v_max_f32_e32 v4, v4, v10 ; GFX10-NEXT: v_bfe_u32 v5, v4, 16, 1 -; GFX10-NEXT: v_or_b32_e32 v9, 0x400000, v4 +; GFX10-NEXT: v_or_b32_e32 v11, 0x400000, v4 ; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 ; GFX10-NEXT: v_add3_u32 v5, v5, v4, 0x7fff -; GFX10-NEXT: v_cndmask_b32_e32 v4, v5, v9, vcc_lo -; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX10-NEXT: v_and_or_b32 v5, 0xffffff, v6, v4 +; GFX10-NEXT: v_cndmask_b32_e32 v4, v5, v11, vcc_lo +; GFX10-NEXT: v_lshlrev_b32_sdwa v4, v7, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX10-NEXT: v_and_or_b32 v5, v6, v9, v4 ; GFX10-NEXT: v_mov_b32_e32 v4, v5 ; GFX10-NEXT: v_mov_b32_e32 v5, v6 ; GFX10-NEXT: .LBB11_4: ; Parent Loop BB11_3 Depth=1 @@ -4615,7 +4807,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 ; GFX10-NEXT: s_and_saveexec_b32 s4, s4 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX10-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB11_4 @@ -4632,15 +4824,19 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX10-NEXT: s_cbranch_execnz .LBB11_3 ; GFX10-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX10-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_u32_e32 v4, 0x200, v4 -; GFX90A-NEXT: v_and_b32_e32 v8, -4, v4 -; GFX90A-NEXT: v_mov_b32_e32 v9, 0xffffff +; GFX90A-NEXT: v_and_b32_e32 v9, -4, v4 +; GFX90A-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX90A-NEXT: v_lshlrev_b32_e32 v8, 3, v4 +; GFX90A-NEXT: s_mov_b32 s4, 0xffff +; GFX90A-NEXT: v_lshlrev_b32_e64 v4, v8, s4 +; GFX90A-NEXT: v_not_b32_e32 v10, v4 ; GFX90A-NEXT: s_mov_b64 s[6:7], exec ; GFX90A-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: v_readfirstlane_b32 s8, v0 @@ -4652,29 +4848,27 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX90A-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX90A-NEXT: s_nop 0 -; GFX90A-NEXT: buffer_load_dword v7, v8, s[8:11], 0 offen +; GFX90A-NEXT: buffer_load_dword v7, v9, s[8:11], 0 offen ; GFX90A-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX90A-NEXT: s_cbranch_execnz .LBB11_1 ; GFX90A-NEXT: ; %bb.2: ; GFX90A-NEXT: s_mov_b64 exec, s[6:7] ; GFX90A-NEXT: s_mov_b64 s[6:7], 0 -; GFX90A-NEXT: v_lshlrev_b32_e32 v10, 16, v5 +; GFX90A-NEXT: v_lshlrev_b32_e32 v11, 16, v5 ; GFX90A-NEXT: s_movk_i32 s14, 0x7fff ; GFX90A-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Loop Header: Depth=1 ; GFX90A-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: v_lshrrev_b32_e32 v4, 24, v7 -; GFX90A-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX90A-NEXT: v_max_f32_e32 v4, v4, v10 +; GFX90A-NEXT: v_lshrrev_b32_sdwa v4, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX90A-NEXT: v_max_f32_e32 v4, v4, v11 ; GFX90A-NEXT: v_bfe_u32 v5, v4, 16, 1 ; GFX90A-NEXT: v_add3_u32 v5, v5, v4, s14 ; GFX90A-NEXT: v_or_b32_e32 v6, 0x400000, v4 ; GFX90A-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 ; GFX90A-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc -; GFX90A-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX90A-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX90A-NEXT: v_and_or_b32 v6, v7, v9, v4 +; GFX90A-NEXT: v_lshlrev_b32_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX90A-NEXT: v_and_or_b32 v6, v7, v10, v4 ; GFX90A-NEXT: s_mov_b64 s[12:13], exec ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[6:7], v[6:7] op_sel:[0,1] ; GFX90A-NEXT: .LBB11_4: ; Parent Loop BB11_3 Depth=1 @@ -4688,7 +4882,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX90A-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc +; GFX90A-NEXT: buffer_atomic_cmpswap v[4:5], v9, s[8:11], 0 offen glc ; GFX90A-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX90A-NEXT: s_cbranch_execnz .LBB11_4 ; GFX90A-NEXT: ; %bb.5: ; in Loop: Header=BB11_3 Depth=1 @@ -4702,15 +4896,19 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX90A-NEXT: s_cbranch_execnz .LBB11_3 ; GFX90A-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX90A-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX90A-NEXT: v_lshrrev_b32_e32 v0, v8, v4 ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_add_u32_e32 v4, 0x200, v4 -; GFX908-NEXT: v_and_b32_e32 v7, -4, v4 -; GFX908-NEXT: v_mov_b32_e32 v8, 0xffffff +; GFX908-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX908-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX908-NEXT: v_lshlrev_b32_e32 v7, 3, v4 +; GFX908-NEXT: s_mov_b32 s4, 0xffff +; GFX908-NEXT: v_lshlrev_b32_e64 v4, v7, s4 +; GFX908-NEXT: v_not_b32_e32 v9, v4 ; GFX908-NEXT: s_mov_b64 s[6:7], exec ; GFX908-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX908-NEXT: v_readfirstlane_b32 s8, v0 @@ -4722,29 +4920,27 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX908-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX908-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX908-NEXT: s_nop 0 -; GFX908-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX908-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX908-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX908-NEXT: s_cbranch_execnz .LBB11_1 ; GFX908-NEXT: ; %bb.2: ; GFX908-NEXT: s_mov_b64 exec, s[6:7] ; GFX908-NEXT: s_mov_b64 s[6:7], 0 -; GFX908-NEXT: v_lshlrev_b32_e32 v9, 16, v5 +; GFX908-NEXT: v_lshlrev_b32_e32 v10, 16, v5 ; GFX908-NEXT: s_movk_i32 s14, 0x7fff ; GFX908-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX908-NEXT: ; =>This Loop Header: Depth=1 ; GFX908-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_lshrrev_b32_e32 v4, 24, v6 -; GFX908-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX908-NEXT: v_max_f32_e32 v4, v4, v9 +; GFX908-NEXT: v_lshrrev_b32_sdwa v4, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX908-NEXT: v_max_f32_e32 v4, v4, v10 ; GFX908-NEXT: v_bfe_u32 v5, v4, 16, 1 ; GFX908-NEXT: v_add3_u32 v5, v5, v4, s14 -; GFX908-NEXT: v_or_b32_e32 v10, 0x400000, v4 +; GFX908-NEXT: v_or_b32_e32 v11, 0x400000, v4 ; GFX908-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX908-NEXT: v_cndmask_b32_e32 v4, v5, v10, vcc -; GFX908-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX908-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX908-NEXT: v_and_or_b32 v5, v6, v8, v4 +; GFX908-NEXT: v_cndmask_b32_e32 v4, v5, v11, vcc +; GFX908-NEXT: v_lshlrev_b32_sdwa v4, v7, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX908-NEXT: v_and_or_b32 v5, v6, v9, v4 ; GFX908-NEXT: v_mov_b32_e32 v4, v5 ; GFX908-NEXT: s_mov_b64 s[12:13], exec ; GFX908-NEXT: v_mov_b32_e32 v5, v6 @@ -4759,7 +4955,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX908-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX908-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX908-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX908-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX908-NEXT: s_cbranch_execnz .LBB11_4 ; GFX908-NEXT: ; %bb.5: ; in Loop: Header=BB11_3 Depth=1 @@ -4773,14 +4969,19 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX908-NEXT: s_cbranch_execnz .LBB11_3 ; GFX908-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX908-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX908-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX908-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX908-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x200, v4 -; GFX8-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX8-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX8-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v7, 3, v4 +; GFX8-NEXT: s_mov_b32 s4, 0xffff +; GFX8-NEXT: v_lshlrev_b32_e64 v4, v7, s4 +; GFX8-NEXT: v_not_b32_e32 v9, v4 ; GFX8-NEXT: s_mov_b64 s[6:7], exec ; GFX8-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX8-NEXT: v_readfirstlane_b32 s8, v0 @@ -4792,29 +4993,27 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX8-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX8-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX8-NEXT: s_nop 0 -; GFX8-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX8-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX8-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX8-NEXT: s_cbranch_execnz .LBB11_1 ; GFX8-NEXT: ; %bb.2: ; GFX8-NEXT: s_mov_b64 exec, s[6:7] ; GFX8-NEXT: s_mov_b64 s[6:7], 0 -; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v5 +; GFX8-NEXT: v_lshlrev_b32_e32 v10, 16, v5 ; GFX8-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX8-NEXT: ; =>This Loop Header: Depth=1 ; GFX8-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v6 -; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX8-NEXT: v_max_f32_e32 v4, v4, v8 +; GFX8-NEXT: v_lshrrev_b32_sdwa v4, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_max_f32_e32 v4, v4, v10 ; GFX8-NEXT: v_bfe_u32 v5, v4, 16, 1 ; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v4 ; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0x7fff, v5 -; GFX8-NEXT: v_or_b32_e32 v9, 0x400000, v4 +; GFX8-NEXT: v_or_b32_e32 v11, 0x400000, v4 ; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v9, vcc -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX8-NEXT: v_and_b32_e32 v5, 0xffffff, v6 +; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v11, vcc +; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v7, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_and_b32_e32 v5, v6, v9 ; GFX8-NEXT: v_or_b32_e32 v5, v5, v4 ; GFX8-NEXT: v_mov_b32_e32 v4, v5 ; GFX8-NEXT: s_mov_b64 s[12:13], exec @@ -4830,7 +5029,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX8-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX8-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX8-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX8-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX8-NEXT: s_cbranch_execnz .LBB11_4 ; GFX8-NEXT: ; %bb.5: ; in Loop: Header=BB11_3 Depth=1 @@ -4844,14 +5043,18 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX8-NEXT: s_cbranch_execnz .LBB11_3 ; GFX8-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX8-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_add_i32_e32 v4, vcc, 0x200, v4 -; GFX7-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX7-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX7-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v7, 3, v4 +; GFX7-NEXT: v_lshl_b32_e32 v4, 0xffff, v7 +; GFX7-NEXT: v_not_b32_e32 v9, v4 ; GFX7-NEXT: s_mov_b64 s[6:7], exec ; GFX7-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX7-NEXT: v_readfirstlane_b32 s8, v0 @@ -4862,25 +5065,25 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX7-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[2:3] ; GFX7-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX7-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GFX7-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX7-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX7-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_cbranch_execnz .LBB11_1 ; GFX7-NEXT: ; %bb.2: ; GFX7-NEXT: s_mov_b64 exec, s[6:7] ; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v5 ; GFX7-NEXT: s_mov_b64 s[6:7], 0 -; GFX7-NEXT: v_and_b32_e32 v8, 0xffff0000, v4 +; GFX7-NEXT: v_and_b32_e32 v10, 0xffff0000, v4 ; GFX7-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX7-NEXT: ; =>This Loop Header: Depth=1 ; GFX7-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v6 +; GFX7-NEXT: v_lshrrev_b32_e32 v4, v7, v6 ; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4 -; GFX7-NEXT: v_max_f32_e32 v4, v4, v8 +; GFX7-NEXT: v_max_f32_e32 v4, v4, v10 ; GFX7-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX7-NEXT: v_and_b32_e32 v5, 0xffffff, v6 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, v7, v4 +; GFX7-NEXT: v_and_b32_e32 v5, v6, v9 ; GFX7-NEXT: v_or_b32_e32 v5, v5, v4 ; GFX7-NEXT: v_mov_b32_e32 v4, v5 ; GFX7-NEXT: s_mov_b64 s[12:13], exec @@ -4896,7 +5099,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX7-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX7-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX7-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX7-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_cbranch_execnz .LBB11_4 ; GFX7-NEXT: ; %bb.5: ; in Loop: Header=BB11_3 Depth=1 @@ -4910,7 +5113,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX7-NEXT: s_cbranch_execnz .LBB11_3 ; GFX7-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -4918,7 +5121,11 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0x200, v4 -; GFX6-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX6-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX6-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX6-NEXT: v_lshlrev_b32_e32 v7, 3, v4 +; GFX6-NEXT: v_lshl_b32_e32 v4, 0xffff, v7 +; GFX6-NEXT: v_not_b32_e32 v9, v4 ; GFX6-NEXT: s_mov_b64 s[6:7], exec ; GFX6-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX6-NEXT: v_readfirstlane_b32 s8, v0 @@ -4929,25 +5136,25 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX6-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[2:3] ; GFX6-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX6-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GFX6-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX6-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX6-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX6-NEXT: s_cbranch_execnz .LBB11_1 ; GFX6-NEXT: ; %bb.2: ; GFX6-NEXT: s_mov_b64 exec, s[6:7] ; GFX6-NEXT: v_mul_f32_e32 v4, 1.0, v5 ; GFX6-NEXT: s_mov_b64 s[6:7], 0 -; GFX6-NEXT: v_and_b32_e32 v8, 0xffff0000, v4 +; GFX6-NEXT: v_and_b32_e32 v10, 0xffff0000, v4 ; GFX6-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX6-NEXT: ; =>This Loop Header: Depth=1 ; GFX6-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) -; GFX6-NEXT: v_lshrrev_b32_e32 v4, 24, v6 +; GFX6-NEXT: v_lshrrev_b32_e32 v4, v7, v6 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX6-NEXT: v_mul_f32_e32 v4, 1.0, v4 -; GFX6-NEXT: v_max_f32_e32 v4, v4, v8 +; GFX6-NEXT: v_max_f32_e32 v4, v4, v10 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX6-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v6 +; GFX6-NEXT: v_lshlrev_b32_e32 v4, v7, v4 +; GFX6-NEXT: v_and_b32_e32 v5, v6, v9 ; GFX6-NEXT: v_or_b32_e32 v5, v5, v4 ; GFX6-NEXT: v_mov_b32_e32 v4, v5 ; GFX6-NEXT: s_mov_b64 s[12:13], exec @@ -4963,7 +5170,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX6-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX6-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX6-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX6-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX6-NEXT: s_cbranch_execnz .LBB11_4 ; GFX6-NEXT: ; %bb.5: ; in Loop: Header=BB11_3 Depth=1 @@ -4977,7 +5184,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall(ptr ; GFX6-NEXT: s_cbranch_execnz .LBB11_3 ; GFX6-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX6-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll index 51f63c93af57b2..89289c15dcae58 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll @@ -2354,57 +2354,66 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset(ptr addrspace(7) i ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_addk_co_i32 s4, 0x200 ; GFX12-NEXT: v_max_num_f16_e32 v5, v0, v0 -; GFX12-NEXT: s_and_b32 s4, s4, -4 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: v_mov_b32_e32 v4, s4 -; GFX12-NEXT: s_mov_b32 s4, 0 +; GFX12-NEXT: s_and_b32 s5, s4, -4 +; GFX12-NEXT: s_and_b32 s4, s4, 3 +; GFX12-NEXT: v_mov_b32_e32 v4, s5 +; GFX12-NEXT: s_lshl_b32 s4, s4, 3 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_lshl_b32 s5, 0xffff, s4 +; GFX12-NEXT: s_not_b32 s6, s5 ; GFX12-NEXT: buffer_load_b32 v1, v4, s[0:3], null offen +; GFX12-NEXT: s_mov_b32 s5, 0 ; GFX12-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v0, s4, v1 ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0 ; GFX12-NEXT: v_min_num_f16_e32 v0, v0, v5 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX12-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX12-NEXT: v_lshlrev_b32_e32 v0, s4, v0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX12-NEXT: v_and_or_b32 v0, v1, s6, v0 ; GFX12-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0 ; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[2:3], v4, s[0:3], null offen th:TH_ATOMIC_RETURN ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v1 ; GFX12-NEXT: v_mov_b32_e32 v1, v2 -; GFX12-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX12-NEXT: s_or_b32 s5, vcc_lo, s5 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 +; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_cbranch_execnz .LBB6_1 ; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX12-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX12-NEXT: v_lshrrev_b32_e32 v0, s4, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: s_addk_i32 s4, 0x200 -; GFX940-NEXT: s_and_b32 s4, s4, -4 -; GFX940-NEXT: v_mov_b32_e32 v4, s4 +; GFX940-NEXT: s_and_b32 s5, s4, -4 +; GFX940-NEXT: v_mov_b32_e32 v4, s5 ; GFX940-NEXT: buffer_load_dword v1, v4, s[0:3], 0 offen -; GFX940-NEXT: s_mov_b32 s6, 0xffffff +; GFX940-NEXT: s_and_b32 s4, s4, 3 +; GFX940-NEXT: s_lshl_b32 s6, s4, 3 +; GFX940-NEXT: s_lshl_b32 s4, 0xffff, s6 +; GFX940-NEXT: s_not_b32 s7, s4 ; GFX940-NEXT: s_mov_b64 s[4:5], 0 ; GFX940-NEXT: v_max_f16_e32 v5, v0, v0 ; GFX940-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f16_sdwa v0, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 -; GFX940-NEXT: v_min_f16_sdwa v0, v0, v5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_and_or_b32 v0, v1, s6, v0 +; GFX940-NEXT: v_lshrrev_b32_e32 v0, s6, v1 +; GFX940-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX940-NEXT: v_min_f16_e32 v0, v0, v5 +; GFX940-NEXT: v_lshlrev_b32_e32 v0, s6, v0 +; GFX940-NEXT: v_and_or_b32 v0, v1, s7, v0 ; GFX940-NEXT: v_mov_b64_e32 v[2:3], v[0:1] +; GFX940-NEXT: buffer_wbl2 sc1 ; GFX940-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc1 @@ -2415,7 +2424,7 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset(ptr addrspace(7) i ; GFX940-NEXT: s_cbranch_execnz .LBB6_1 ; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX940-NEXT: v_lshrrev_b32_e32 v0, s6, v2 ; GFX940-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset: @@ -2423,25 +2432,29 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset(ptr addrspace(7) i ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_addk_i32 s4, 0x200 ; GFX11-NEXT: v_max_f16_e32 v5, v0, v0 -; GFX11-NEXT: s_and_b32 s4, s4, -4 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: v_mov_b32_e32 v4, s4 -; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_and_b32 s5, s4, -4 +; GFX11-NEXT: s_and_b32 s4, s4, 3 +; GFX11-NEXT: v_mov_b32_e32 v4, s5 +; GFX11-NEXT: s_lshl_b32 s4, s4, 3 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_lshl_b32 s5, 0xffff, s4 +; GFX11-NEXT: s_not_b32 s6, s5 ; GFX11-NEXT: buffer_load_b32 v1, v4, s[0:3], 0 offen +; GFX11-NEXT: s_mov_b32 s5, 0 ; GFX11-NEXT: .p2align 6 ; GFX11-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX11-NEXT: v_lshrrev_b32_e32 v0, s4, v1 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_max_f16_e32 v0, v0, v0 ; GFX11-NEXT: v_min_f16_e32 v0, v0, v5 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v0, s4, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX11-NEXT: v_and_or_b32 v0, v1, s6, v0 ; GFX11-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0 ; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[2:3], v4, s[0:3], 0 offen glc ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -2449,13 +2462,13 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset(ptr addrspace(7) i ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v1 ; GFX11-NEXT: v_mov_b32_e32 v1, v2 -; GFX11-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX11-NEXT: s_or_b32 s5, vcc_lo, s5 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 +; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s5 ; GFX11-NEXT: s_cbranch_execnz .LBB6_1 ; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX11-NEXT: v_lshrrev_b32_e32 v0, s4, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset: @@ -2463,19 +2476,23 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset(ptr addrspace(7) i ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_addk_i32 s8, 0x200 ; GFX10-NEXT: v_max_f16_e32 v5, v0, v0 -; GFX10-NEXT: s_and_b32 s8, s8, -4 -; GFX10-NEXT: v_mov_b32_e32 v4, s8 -; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_and_b32 s9, s8, -4 +; GFX10-NEXT: s_and_b32 s8, s8, 3 +; GFX10-NEXT: v_mov_b32_e32 v4, s9 +; GFX10-NEXT: s_lshl_b32 s8, s8, 3 +; GFX10-NEXT: s_lshl_b32 s9, 0xffff, s8 +; GFX10-NEXT: s_not_b32 s10, s9 ; GFX10-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen +; GFX10-NEXT: s_mov_b32 s9, 0 ; GFX10-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_max_f16_sdwa v0, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 +; GFX10-NEXT: v_lshrrev_b32_e32 v0, s8, v1 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 ; GFX10-NEXT: v_min_f16_e32 v0, v0, v5 -; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX10-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX10-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GFX10-NEXT: v_and_or_b32 v0, v1, s10, v0 ; GFX10-NEXT: v_mov_b32_e32 v3, v1 ; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; GFX10-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[4:7], 0 offen glc @@ -2484,30 +2501,35 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset(ptr addrspace(7) i ; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, v2 -; GFX10-NEXT: s_or_b32 s8, vcc_lo, s8 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 +; GFX10-NEXT: s_or_b32 s9, vcc_lo, s9 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s9 ; GFX10-NEXT: s_cbranch_execnz .LBB6_1 ; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s9 +; GFX10-NEXT: v_lshrrev_b32_e32 v0, s8, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: s_addk_i32 s8, 0x200 -; GFX90A-NEXT: s_and_b32 s8, s8, -4 -; GFX90A-NEXT: v_mov_b32_e32 v4, s8 +; GFX90A-NEXT: s_and_b32 s9, s8, -4 +; GFX90A-NEXT: v_mov_b32_e32 v4, s9 ; GFX90A-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen -; GFX90A-NEXT: s_mov_b32 s10, 0xffffff +; GFX90A-NEXT: s_and_b32 s8, s8, 3 +; GFX90A-NEXT: s_lshl_b32 s10, s8, 3 +; GFX90A-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX90A-NEXT: s_not_b32 s11, s8 ; GFX90A-NEXT: s_mov_b64 s[8:9], 0 ; GFX90A-NEXT: v_max_f16_e32 v5, v0, v0 ; GFX90A-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: v_max_f16_sdwa v0, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 -; GFX90A-NEXT: v_min_f16_sdwa v0, v0, v5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX90A-NEXT: v_and_or_b32 v0, v1, s10, v0 +; GFX90A-NEXT: v_lshrrev_b32_e32 v0, s10, v1 +; GFX90A-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX90A-NEXT: v_min_f16_e32 v0, v0, v5 +; GFX90A-NEXT: v_lshlrev_b32_e32 v0, s10, v0 +; GFX90A-NEXT: v_and_or_b32 v0, v1, s11, v0 ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1] ; GFX90A-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[4:7], 0 offen glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) @@ -2519,25 +2541,30 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset(ptr addrspace(7) i ; GFX90A-NEXT: s_cbranch_execnz .LBB6_1 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX90A-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: s_addk_i32 s8, 0x200 -; GFX908-NEXT: s_and_b32 s8, s8, -4 -; GFX908-NEXT: v_mov_b32_e32 v4, s8 +; GFX908-NEXT: s_and_b32 s9, s8, -4 +; GFX908-NEXT: v_mov_b32_e32 v4, s9 ; GFX908-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen -; GFX908-NEXT: s_mov_b32 s10, 0xffffff +; GFX908-NEXT: s_and_b32 s8, s8, 3 +; GFX908-NEXT: s_lshl_b32 s10, s8, 3 +; GFX908-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX908-NEXT: s_not_b32 s11, s8 ; GFX908-NEXT: s_mov_b64 s[8:9], 0 ; GFX908-NEXT: v_max_f16_e32 v5, v0, v0 ; GFX908-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_max_f16_sdwa v0, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 -; GFX908-NEXT: v_min_f16_sdwa v0, v0, v5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX908-NEXT: v_and_or_b32 v0, v1, s10, v0 +; GFX908-NEXT: v_lshrrev_b32_e32 v0, s10, v1 +; GFX908-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX908-NEXT: v_min_f16_e32 v0, v0, v5 +; GFX908-NEXT: v_lshlrev_b32_e32 v0, s10, v0 +; GFX908-NEXT: v_and_or_b32 v0, v1, s11, v0 ; GFX908-NEXT: v_mov_b32_e32 v3, v1 ; GFX908-NEXT: v_mov_b32_e32 v2, v0 ; GFX908-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[4:7], 0 offen glc @@ -2550,24 +2577,30 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset(ptr addrspace(7) i ; GFX908-NEXT: s_cbranch_execnz .LBB6_1 ; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX908-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX908-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX908-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX908-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_addk_i32 s8, 0x200 -; GFX8-NEXT: s_and_b32 s8, s8, -4 -; GFX8-NEXT: v_mov_b32_e32 v4, s8 +; GFX8-NEXT: s_and_b32 s9, s8, -4 +; GFX8-NEXT: v_mov_b32_e32 v4, s9 ; GFX8-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen +; GFX8-NEXT: s_and_b32 s8, s8, 3 +; GFX8-NEXT: s_lshl_b32 s10, s8, 3 +; GFX8-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX8-NEXT: s_not_b32 s11, s8 ; GFX8-NEXT: s_mov_b64 s[8:9], 0 ; GFX8-NEXT: v_max_f16_e32 v5, v0, v0 ; GFX8-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v0, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 -; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v1 -; GFX8-NEXT: v_min_f16_sdwa v0, v0, v5 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_lshrrev_b32_e32 v0, s10, v1 +; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-NEXT: v_min_f16_e32 v0, v0, v5 +; GFX8-NEXT: v_and_b32_e32 v2, s11, v1 +; GFX8-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX8-NEXT: v_mov_b32_e32 v3, v1 ; GFX8-NEXT: v_mov_b32_e32 v2, v0 @@ -2581,28 +2614,32 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset(ptr addrspace(7) i ; GFX8-NEXT: s_cbranch_execnz .LBB6_1 ; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX8-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_addk_i32 s8, 0x200 -; GFX7-NEXT: s_and_b32 s8, s8, -4 -; GFX7-NEXT: v_mov_b32_e32 v4, s8 +; GFX7-NEXT: s_and_b32 s9, s8, -4 +; GFX7-NEXT: v_mov_b32_e32 v4, s9 ; GFX7-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: s_mov_b64 s[8:9], 0 +; GFX7-NEXT: s_and_b32 s8, s8, 3 +; GFX7-NEXT: s_lshl_b32 s10, s8, 3 +; GFX7-NEXT: s_lshl_b32 s8, 0xffff, s10 ; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v0 +; GFX7-NEXT: s_not_b32 s11, s8 +; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, s10, v1 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_and_b32_e32 v2, 0xffffff, v1 +; GFX7-NEXT: v_and_b32_e32 v2, s11, v1 ; GFX7-NEXT: v_min_f32_e32 v0, v0, v5 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX7-NEXT: v_mov_b32_e32 v3, v1 ; GFX7-NEXT: v_mov_b32_e32 v2, v0 @@ -2616,7 +2653,7 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset(ptr addrspace(7) i ; GFX7-NEXT: s_cbranch_execnz .LBB6_1 ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -2624,22 +2661,26 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset(ptr addrspace(7) i ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: s_addk_i32 s8, 0x200 -; GFX6-NEXT: s_and_b32 s8, s8, -4 -; GFX6-NEXT: v_mov_b32_e32 v4, s8 +; GFX6-NEXT: s_and_b32 s9, s8, -4 +; GFX6-NEXT: v_mov_b32_e32 v4, s9 ; GFX6-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-NEXT: s_mov_b64 s[8:9], 0 +; GFX6-NEXT: s_and_b32 s8, s8, 3 +; GFX6-NEXT: s_lshl_b32 s10, s8, 3 +; GFX6-NEXT: s_lshl_b32 s8, 0xffff, s10 ; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v0 +; GFX6-NEXT: s_not_b32 s11, s8 +; GFX6-NEXT: s_mov_b64 s[8:9], 0 ; GFX6-NEXT: .LBB6_1: ; %atomicrmw.start ; GFX6-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, s10, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v1 +; GFX6-NEXT: v_and_b32_e32 v2, s11, v1 ; GFX6-NEXT: v_min_f32_e32 v0, v0, v5 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX6-NEXT: v_mov_b32_e32 v3, v1 ; GFX6-NEXT: v_mov_b32_e32 v2, v0 @@ -2653,7 +2694,7 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset(ptr addrspace(7) i ; GFX6-NEXT: s_cbranch_execnz .LBB6_1 ; GFX6-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX6-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: s_setpc_b64 s[30:31] @@ -2672,56 +2713,65 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f16__offset(ptr addrspace(7) ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_addk_co_i32 s4, 0x200 ; GFX12-NEXT: v_max_num_f16_e32 v3, v0, v0 -; GFX12-NEXT: s_and_b32 s4, s4, -4 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: v_mov_b32_e32 v2, s4 -; GFX12-NEXT: s_mov_b32 s4, 0 +; GFX12-NEXT: s_and_b32 s5, s4, -4 +; GFX12-NEXT: s_and_b32 s4, s4, 3 +; GFX12-NEXT: v_mov_b32_e32 v2, s5 +; GFX12-NEXT: s_lshl_b32 s4, s4, 3 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_lshl_b32 s5, 0xffff, s4 +; GFX12-NEXT: s_not_b32 s6, s5 ; GFX12-NEXT: buffer_load_b32 v1, v2, s[0:3], null offen +; GFX12-NEXT: s_mov_b32 s5, 0 ; GFX12-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v0, s4, v1 ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_max_num_f16_e32 v0, v0, v0 ; GFX12-NEXT: v_min_num_f16_e32 v0, v0, v3 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX12-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX12-NEXT: v_lshlrev_b32_e32 v0, s4, v0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX12-NEXT: v_and_or_b32 v0, v1, s6, v0 ; GFX12-NEXT: v_dual_mov_b32 v5, v1 :: v_dual_mov_b32 v4, v0 ; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v2, s[0:3], null offen th:TH_ATOMIC_RETURN ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v1 ; GFX12-NEXT: v_mov_b32_e32 v1, v4 -; GFX12-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX12-NEXT: s_or_b32 s5, vcc_lo, s5 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 +; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_cbranch_execnz .LBB7_1 ; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f16__offset: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: s_addk_i32 s4, 0x200 -; GFX940-NEXT: s_and_b32 s4, s4, -4 -; GFX940-NEXT: v_mov_b32_e32 v2, s4 +; GFX940-NEXT: s_and_b32 s5, s4, -4 +; GFX940-NEXT: v_mov_b32_e32 v2, s5 ; GFX940-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen -; GFX940-NEXT: s_mov_b32 s6, 0xffffff +; GFX940-NEXT: s_and_b32 s4, s4, 3 +; GFX940-NEXT: s_lshl_b32 s6, s4, 3 +; GFX940-NEXT: s_lshl_b32 s4, 0xffff, s6 +; GFX940-NEXT: s_not_b32 s7, s4 ; GFX940-NEXT: s_mov_b64 s[4:5], 0 ; GFX940-NEXT: v_max_f16_e32 v3, v0, v0 ; GFX940-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f16_sdwa v0, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 -; GFX940-NEXT: v_min_f16_sdwa v0, v0, v3 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: v_and_or_b32 v0, v1, s6, v0 +; GFX940-NEXT: v_lshrrev_b32_e32 v0, s6, v1 +; GFX940-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX940-NEXT: v_min_f16_e32 v0, v0, v3 +; GFX940-NEXT: v_lshlrev_b32_e32 v0, s6, v0 +; GFX940-NEXT: v_and_or_b32 v0, v1, s7, v0 ; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[0:1] +; GFX940-NEXT: buffer_wbl2 sc1 ; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) ; GFX940-NEXT: buffer_inv sc1 @@ -2739,25 +2789,29 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f16__offset(ptr addrspace(7) ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_addk_i32 s4, 0x200 ; GFX11-NEXT: v_max_f16_e32 v3, v0, v0 -; GFX11-NEXT: s_and_b32 s4, s4, -4 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: v_mov_b32_e32 v2, s4 -; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_and_b32 s5, s4, -4 +; GFX11-NEXT: s_and_b32 s4, s4, 3 +; GFX11-NEXT: v_mov_b32_e32 v2, s5 +; GFX11-NEXT: s_lshl_b32 s4, s4, 3 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_lshl_b32 s5, 0xffff, s4 +; GFX11-NEXT: s_not_b32 s6, s5 ; GFX11-NEXT: buffer_load_b32 v1, v2, s[0:3], 0 offen +; GFX11-NEXT: s_mov_b32 s5, 0 ; GFX11-NEXT: .p2align 6 ; GFX11-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX11-NEXT: v_lshrrev_b32_e32 v0, s4, v1 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_max_f16_e32 v0, v0, v0 ; GFX11-NEXT: v_min_f16_e32 v0, v0, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v0, s4, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX11-NEXT: v_and_or_b32 v0, v1, s6, v0 ; GFX11-NEXT: v_dual_mov_b32 v5, v1 :: v_dual_mov_b32 v4, v0 ; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v2, s[0:3], 0 offen glc ; GFX11-NEXT: s_waitcnt vmcnt(0) @@ -2765,12 +2819,12 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f16__offset(ptr addrspace(7) ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v1 ; GFX11-NEXT: v_mov_b32_e32 v1, v4 -; GFX11-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX11-NEXT: s_or_b32 s5, vcc_lo, s5 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 +; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s5 ; GFX11-NEXT: s_cbranch_execnz .LBB7_1 ; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f16__offset: @@ -2778,19 +2832,23 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f16__offset(ptr addrspace(7) ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_addk_i32 s8, 0x200 ; GFX10-NEXT: v_max_f16_e32 v3, v0, v0 -; GFX10-NEXT: s_and_b32 s8, s8, -4 -; GFX10-NEXT: v_mov_b32_e32 v2, s8 -; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_and_b32 s9, s8, -4 +; GFX10-NEXT: s_and_b32 s8, s8, 3 +; GFX10-NEXT: v_mov_b32_e32 v2, s9 +; GFX10-NEXT: s_lshl_b32 s8, s8, 3 +; GFX10-NEXT: s_lshl_b32 s9, 0xffff, s8 +; GFX10-NEXT: s_not_b32 s10, s9 ; GFX10-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen +; GFX10-NEXT: s_mov_b32 s9, 0 ; GFX10-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_max_f16_sdwa v0, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 +; GFX10-NEXT: v_lshrrev_b32_e32 v0, s8, v1 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: v_max_f16_e32 v0, v0, v0 ; GFX10-NEXT: v_min_f16_e32 v0, v0, v3 -; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX10-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX10-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GFX10-NEXT: v_and_or_b32 v0, v1, s10, v0 ; GFX10-NEXT: v_mov_b32_e32 v5, v1 ; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; GFX10-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[4:7], 0 offen glc @@ -2799,29 +2857,34 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f16__offset(ptr addrspace(7) ; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, v4 -; GFX10-NEXT: s_or_b32 s8, vcc_lo, s8 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 +; GFX10-NEXT: s_or_b32 s9, vcc_lo, s9 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s9 ; GFX10-NEXT: s_cbranch_execnz .LBB7_1 ; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s9 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_f16__offset: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: s_addk_i32 s8, 0x200 -; GFX90A-NEXT: s_and_b32 s8, s8, -4 -; GFX90A-NEXT: v_mov_b32_e32 v2, s8 +; GFX90A-NEXT: s_and_b32 s9, s8, -4 +; GFX90A-NEXT: v_mov_b32_e32 v2, s9 ; GFX90A-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen -; GFX90A-NEXT: s_mov_b32 s10, 0xffffff +; GFX90A-NEXT: s_and_b32 s8, s8, 3 +; GFX90A-NEXT: s_lshl_b32 s10, s8, 3 +; GFX90A-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX90A-NEXT: s_not_b32 s11, s8 ; GFX90A-NEXT: s_mov_b64 s[8:9], 0 ; GFX90A-NEXT: v_max_f16_e32 v3, v0, v0 ; GFX90A-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: v_max_f16_sdwa v0, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 -; GFX90A-NEXT: v_min_f16_sdwa v0, v0, v3 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX90A-NEXT: v_and_or_b32 v0, v1, s10, v0 +; GFX90A-NEXT: v_lshrrev_b32_e32 v0, s10, v1 +; GFX90A-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX90A-NEXT: v_min_f16_e32 v0, v0, v3 +; GFX90A-NEXT: v_lshlrev_b32_e32 v0, s10, v0 +; GFX90A-NEXT: v_and_or_b32 v0, v1, s11, v0 ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[0:1] op_sel:[0,1] ; GFX90A-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[4:7], 0 offen glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) @@ -2839,18 +2902,23 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f16__offset(ptr addrspace(7) ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: s_addk_i32 s8, 0x200 -; GFX908-NEXT: s_and_b32 s8, s8, -4 -; GFX908-NEXT: v_mov_b32_e32 v2, s8 +; GFX908-NEXT: s_and_b32 s9, s8, -4 +; GFX908-NEXT: v_mov_b32_e32 v2, s9 ; GFX908-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen -; GFX908-NEXT: s_mov_b32 s10, 0xffffff +; GFX908-NEXT: s_and_b32 s8, s8, 3 +; GFX908-NEXT: s_lshl_b32 s10, s8, 3 +; GFX908-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX908-NEXT: s_not_b32 s11, s8 ; GFX908-NEXT: s_mov_b64 s[8:9], 0 ; GFX908-NEXT: v_max_f16_e32 v3, v0, v0 ; GFX908-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_max_f16_sdwa v0, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 -; GFX908-NEXT: v_min_f16_sdwa v0, v0, v3 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX908-NEXT: v_and_or_b32 v0, v1, s10, v0 +; GFX908-NEXT: v_lshrrev_b32_e32 v0, s10, v1 +; GFX908-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX908-NEXT: v_min_f16_e32 v0, v0, v3 +; GFX908-NEXT: v_lshlrev_b32_e32 v0, s10, v0 +; GFX908-NEXT: v_and_or_b32 v0, v1, s11, v0 ; GFX908-NEXT: v_mov_b32_e32 v5, v1 ; GFX908-NEXT: v_mov_b32_e32 v4, v0 ; GFX908-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[4:7], 0 offen glc @@ -2869,17 +2937,23 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f16__offset(ptr addrspace(7) ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_addk_i32 s8, 0x200 -; GFX8-NEXT: s_and_b32 s8, s8, -4 -; GFX8-NEXT: v_mov_b32_e32 v2, s8 +; GFX8-NEXT: s_and_b32 s9, s8, -4 +; GFX8-NEXT: v_mov_b32_e32 v2, s9 ; GFX8-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen +; GFX8-NEXT: s_and_b32 s8, s8, 3 +; GFX8-NEXT: s_lshl_b32 s10, s8, 3 +; GFX8-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX8-NEXT: s_not_b32 s11, s8 ; GFX8-NEXT: s_mov_b64 s[8:9], 0 ; GFX8-NEXT: v_max_f16_e32 v3, v0, v0 ; GFX8-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v0, v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 -; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v1 -; GFX8-NEXT: v_min_f16_sdwa v0, v0, v3 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_lshrrev_b32_e32 v0, s10, v1 +; GFX8-NEXT: v_max_f16_e32 v0, v0, v0 +; GFX8-NEXT: v_min_f16_e32 v0, v0, v3 +; GFX8-NEXT: v_and_b32_e32 v4, s11, v1 +; GFX8-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX8-NEXT: v_mov_b32_e32 v5, v1 ; GFX8-NEXT: v_mov_b32_e32 v4, v0 @@ -2899,21 +2973,25 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f16__offset(ptr addrspace(7) ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_addk_i32 s8, 0x200 -; GFX7-NEXT: s_and_b32 s8, s8, -4 -; GFX7-NEXT: v_mov_b32_e32 v2, s8 +; GFX7-NEXT: s_and_b32 s9, s8, -4 +; GFX7-NEXT: v_mov_b32_e32 v2, s9 ; GFX7-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: s_mov_b64 s[8:9], 0 +; GFX7-NEXT: s_and_b32 s8, s8, 3 +; GFX7-NEXT: s_lshl_b32 s10, s8, 3 +; GFX7-NEXT: s_lshl_b32 s8, 0xffff, s10 ; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GFX7-NEXT: s_not_b32 s11, s8 +; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, s10, v1 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 -; GFX7-NEXT: v_and_b32_e32 v4, 0xffffff, v1 +; GFX7-NEXT: v_and_b32_e32 v4, s11, v1 ; GFX7-NEXT: v_min_f32_e32 v0, v0, v3 ; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX7-NEXT: v_mov_b32_e32 v5, v1 ; GFX7-NEXT: v_mov_b32_e32 v4, v0 @@ -2933,22 +3011,26 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_f16__offset(ptr addrspace(7) ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: s_addk_i32 s8, 0x200 -; GFX6-NEXT: s_and_b32 s8, s8, -4 -; GFX6-NEXT: v_mov_b32_e32 v2, s8 +; GFX6-NEXT: s_and_b32 s9, s8, -4 +; GFX6-NEXT: v_mov_b32_e32 v2, s9 ; GFX6-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-NEXT: s_mov_b64 s[8:9], 0 +; GFX6-NEXT: s_and_b32 s8, s8, 3 +; GFX6-NEXT: s_lshl_b32 s10, s8, 3 +; GFX6-NEXT: s_lshl_b32 s8, 0xffff, s10 ; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v0 +; GFX6-NEXT: s_not_b32 s11, s8 +; GFX6-NEXT: s_mov_b64 s[8:9], 0 ; GFX6-NEXT: .LBB7_1: ; %atomicrmw.start ; GFX6-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, s10, v1 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v1 +; GFX6-NEXT: v_and_b32_e32 v4, s11, v1 ; GFX6-NEXT: v_min_f32_e32 v0, v0, v3 ; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX6-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX6-NEXT: v_mov_b32_e32 v5, v1 ; GFX6-NEXT: v_mov_b32_e32 v4, v0 @@ -2979,8 +3061,13 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 ; GFX12-NEXT: s_mov_b32 s1, exec_lo -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_and_b32_e32 v6, 3, v4 +; GFX12-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX12-NEXT: v_lshlrev_b32_e32 v7, 3, v6 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff +; GFX12-NEXT: v_not_b32_e32 v9, v6 ; GFX12-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX12-NEXT: v_readfirstlane_b32 s4, v0 ; GFX12-NEXT: v_readfirstlane_b32 s5, v1 @@ -2992,28 +3079,28 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX12-NEXT: s_and_saveexec_b32 s0, s0 -; GFX12-NEXT: buffer_load_b32 v6, v7, s[4:7], null offen +; GFX12-NEXT: buffer_load_b32 v6, v8, s[4:7], null offen ; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_cbranch_execnz .LBB8_1 ; GFX12-NEXT: ; %bb.2: ; GFX12-NEXT: s_mov_b32 exec_lo, s1 -; GFX12-NEXT: v_max_num_f16_e32 v8, v5, v5 +; GFX12-NEXT: v_max_num_f16_e32 v10, v5, v5 ; GFX12-NEXT: s_mov_b32 s1, 0 ; GFX12-NEXT: .LBB8_3: ; %atomicrmw.start ; GFX12-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_lshrrev_b32_e32 v4, 24, v6 +; GFX12-NEXT: v_lshrrev_b32_e32 v4, v7, v6 ; GFX12-NEXT: s_mov_b32 s2, exec_lo ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_max_num_f16_e32 v4, v4, v4 -; GFX12-NEXT: v_min_num_f16_e32 v4, v4, v8 +; GFX12-NEXT: v_min_num_f16_e32 v4, v4, v10 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX12-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX12-NEXT: v_lshlrev_b32_e32 v4, v7, v4 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-NEXT: v_and_or_b32 v5, 0xffffff, v6, v4 +; GFX12-NEXT: v_and_or_b32 v5, v6, v9, v4 ; GFX12-NEXT: v_mov_b32_e32 v4, v5 ; GFX12-NEXT: v_mov_b32_e32 v5, v6 ; GFX12-NEXT: .LBB8_4: ; Parent Loop BB8_3 Depth=1 @@ -3029,7 +3116,7 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX12-NEXT: s_and_saveexec_b32 s0, s0 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v7, s[4:7], null offen th:TH_ATOMIC_RETURN +; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[4:7], null offen th:TH_ATOMIC_RETURN ; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_cbranch_execnz .LBB8_4 ; GFX12-NEXT: ; %bb.5: ; in Loop: Header=BB8_3 Depth=1 @@ -3044,15 +3131,19 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX12-NEXT: s_cbranch_execnz .LBB8_3 ; GFX12-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX12-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX12-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_add_u32_e32 v4, 0x200, v4 -; GFX940-NEXT: v_and_b32_e32 v8, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, 0xffffff +; GFX940-NEXT: v_and_b32_e32 v9, -4, v4 +; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX940-NEXT: v_lshlrev_b32_e32 v8, 3, v4 +; GFX940-NEXT: s_mov_b32 s0, 0xffff +; GFX940-NEXT: v_lshlrev_b32_e64 v4, v8, s0 +; GFX940-NEXT: v_not_b32_e32 v10, v4 ; GFX940-NEXT: s_mov_b64 s[2:3], exec ; GFX940-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX940-NEXT: v_readfirstlane_b32 s4, v0 @@ -3064,21 +3155,23 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] ; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] ; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: buffer_load_dword v7, v8, s[4:7], 0 offen +; GFX940-NEXT: buffer_load_dword v7, v9, s[4:7], 0 offen ; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] ; GFX940-NEXT: s_cbranch_execnz .LBB8_1 ; GFX940-NEXT: ; %bb.2: ; GFX940-NEXT: s_mov_b64 exec, s[2:3] ; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_max_f16_e32 v10, v5, v5 +; GFX940-NEXT: v_max_f16_e32 v11, v5, v5 ; GFX940-NEXT: .LBB8_3: ; %atomicrmw.start ; GFX940-NEXT: ; =>This Loop Header: Depth=1 ; GFX940-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_max_f16_sdwa v4, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 -; GFX940-NEXT: v_min_f16_sdwa v4, v4, v10 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX940-NEXT: v_lshrrev_b32_e32 v4, v8, v7 +; GFX940-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX940-NEXT: v_min_f16_e32 v4, v4, v11 +; GFX940-NEXT: v_lshlrev_b32_e32 v4, v8, v4 +; GFX940-NEXT: v_and_or_b32 v6, v7, v10, v4 ; GFX940-NEXT: s_mov_b64 s[8:9], exec -; GFX940-NEXT: v_and_or_b32 v6, v7, v9, v4 ; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[6:7] ; GFX940-NEXT: buffer_wbl2 sc1 ; GFX940-NEXT: .LBB8_4: ; Parent Loop BB8_3 Depth=1 @@ -3093,7 +3186,7 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] ; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0 +; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v9, s[4:7], 0 offen sc0 ; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] ; GFX940-NEXT: s_cbranch_execnz .LBB8_4 ; GFX940-NEXT: ; %bb.5: ; in Loop: Header=BB8_3 Depth=1 @@ -3107,7 +3200,7 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX940-NEXT: s_cbranch_execnz .LBB8_3 ; GFX940-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX940-NEXT: v_lshrrev_b32_e32 v0, v8, v4 ; GFX940-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall: @@ -3116,8 +3209,13 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX11-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 ; GFX11-NEXT: s_mov_b32 s1, 0 ; GFX11-NEXT: s_mov_b32 s2, exec_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v6, 3, v4 +; GFX11-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX11-NEXT: v_lshlrev_b32_e32 v7, 3, v6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff +; GFX11-NEXT: v_not_b32_e32 v9, v6 ; GFX11-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: v_readfirstlane_b32 s4, v0 ; GFX11-NEXT: v_readfirstlane_b32 s5, v1 @@ -3129,28 +3227,28 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX11-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-NEXT: buffer_load_b32 v6, v7, s[4:7], 0 offen +; GFX11-NEXT: buffer_load_b32 v6, v8, s[4:7], 0 offen ; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_cbranch_execnz .LBB8_1 ; GFX11-NEXT: ; %bb.2: ; GFX11-NEXT: s_mov_b32 exec_lo, s2 -; GFX11-NEXT: v_max_f16_e32 v8, v5, v5 +; GFX11-NEXT: v_max_f16_e32 v10, v5, v5 ; GFX11-NEXT: .p2align 6 ; GFX11-NEXT: .LBB8_3: ; %atomicrmw.start ; GFX11-NEXT: ; =>This Loop Header: Depth=1 ; GFX11-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_lshrrev_b32_e32 v4, 24, v6 +; GFX11-NEXT: v_lshrrev_b32_e32 v4, v7, v6 ; GFX11-NEXT: s_mov_b32 s2, exec_lo ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_max_f16_e32 v4, v4, v4 -; GFX11-NEXT: v_min_f16_e32 v4, v4, v8 +; GFX11-NEXT: v_min_f16_e32 v4, v4, v10 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX11-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX11-NEXT: v_lshlrev_b32_e32 v4, v7, v4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_and_or_b32 v5, 0xffffff, v6, v4 +; GFX11-NEXT: v_and_or_b32 v5, v6, v9, v4 ; GFX11-NEXT: v_mov_b32_e32 v4, v5 ; GFX11-NEXT: v_mov_b32_e32 v5, v6 ; GFX11-NEXT: .LBB8_4: ; Parent Loop BB8_3 Depth=1 @@ -3166,7 +3264,7 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX11-NEXT: s_and_saveexec_b32 s0, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v7, s[4:7], 0 offen glc +; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[4:7], 0 offen glc ; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_cbranch_execnz .LBB8_4 ; GFX11-NEXT: ; %bb.5: ; in Loop: Header=BB8_3 Depth=1 @@ -3182,7 +3280,7 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX11-NEXT: s_cbranch_execnz .LBB8_3 ; GFX11-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX11-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall: @@ -3191,7 +3289,11 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX10-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 ; GFX10-NEXT: s_mov_b32 s5, 0 ; GFX10-NEXT: s_mov_b32 s6, exec_lo -; GFX10-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX10-NEXT: v_and_b32_e32 v6, 3, v4 +; GFX10-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX10-NEXT: v_lshlrev_b32_e32 v7, 3, v6 +; GFX10-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff +; GFX10-NEXT: v_not_b32_e32 v9, v6 ; GFX10-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_readfirstlane_b32 s8, v0 ; GFX10-NEXT: v_readfirstlane_b32 s9, v1 @@ -3201,24 +3303,24 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] ; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 ; GFX10-NEXT: s_and_saveexec_b32 s4, s4 -; GFX10-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX10-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB8_1 ; GFX10-NEXT: ; %bb.2: ; GFX10-NEXT: s_mov_b32 exec_lo, s6 -; GFX10-NEXT: v_max_f16_e32 v8, v5, v5 +; GFX10-NEXT: v_max_f16_e32 v10, v5, v5 ; GFX10-NEXT: .LBB8_3: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Loop Header: Depth=1 ; GFX10-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_max_f16_sdwa v4, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 +; GFX10-NEXT: v_lshrrev_b32_e32 v4, v7, v6 ; GFX10-NEXT: s_mov_b32 s6, exec_lo ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_min_f16_e32 v4, v4, v8 -; GFX10-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX10-NEXT: v_and_or_b32 v5, 0xffffff, v6, v4 +; GFX10-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX10-NEXT: v_min_f16_e32 v4, v4, v10 +; GFX10-NEXT: v_lshlrev_b32_sdwa v4, v7, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GFX10-NEXT: v_and_or_b32 v5, v6, v9, v4 ; GFX10-NEXT: v_mov_b32_e32 v4, v5 ; GFX10-NEXT: v_mov_b32_e32 v5, v6 ; GFX10-NEXT: .LBB8_4: ; Parent Loop BB8_3 Depth=1 @@ -3232,7 +3334,7 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 ; GFX10-NEXT: s_and_saveexec_b32 s4, s4 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX10-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB8_4 @@ -3249,15 +3351,19 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX10-NEXT: s_cbranch_execnz .LBB8_3 ; GFX10-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX10-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_u32_e32 v4, 0x200, v4 -; GFX90A-NEXT: v_and_b32_e32 v8, -4, v4 -; GFX90A-NEXT: v_mov_b32_e32 v9, 0xffffff +; GFX90A-NEXT: v_and_b32_e32 v9, -4, v4 +; GFX90A-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX90A-NEXT: v_lshlrev_b32_e32 v8, 3, v4 +; GFX90A-NEXT: s_mov_b32 s4, 0xffff +; GFX90A-NEXT: v_lshlrev_b32_e64 v4, v8, s4 +; GFX90A-NEXT: v_not_b32_e32 v10, v4 ; GFX90A-NEXT: s_mov_b64 s[6:7], exec ; GFX90A-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: v_readfirstlane_b32 s8, v0 @@ -3269,20 +3375,22 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX90A-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX90A-NEXT: s_nop 0 -; GFX90A-NEXT: buffer_load_dword v7, v8, s[8:11], 0 offen +; GFX90A-NEXT: buffer_load_dword v7, v9, s[8:11], 0 offen ; GFX90A-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX90A-NEXT: s_cbranch_execnz .LBB8_1 ; GFX90A-NEXT: ; %bb.2: ; GFX90A-NEXT: s_mov_b64 exec, s[6:7] ; GFX90A-NEXT: s_mov_b64 s[6:7], 0 -; GFX90A-NEXT: v_max_f16_e32 v10, v5, v5 +; GFX90A-NEXT: v_max_f16_e32 v11, v5, v5 ; GFX90A-NEXT: .LBB8_3: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Loop Header: Depth=1 ; GFX90A-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: v_max_f16_sdwa v4, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 -; GFX90A-NEXT: v_min_f16_sdwa v4, v4, v10 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX90A-NEXT: v_and_or_b32 v6, v7, v9, v4 +; GFX90A-NEXT: v_lshrrev_b32_e32 v4, v8, v7 +; GFX90A-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX90A-NEXT: v_min_f16_e32 v4, v4, v11 +; GFX90A-NEXT: v_lshlrev_b32_e32 v4, v8, v4 +; GFX90A-NEXT: v_and_or_b32 v6, v7, v10, v4 ; GFX90A-NEXT: s_mov_b64 s[12:13], exec ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[6:7], v[6:7] op_sel:[0,1] ; GFX90A-NEXT: .LBB8_4: ; Parent Loop BB8_3 Depth=1 @@ -3296,7 +3404,7 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX90A-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc +; GFX90A-NEXT: buffer_atomic_cmpswap v[4:5], v9, s[8:11], 0 offen glc ; GFX90A-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX90A-NEXT: s_cbranch_execnz .LBB8_4 ; GFX90A-NEXT: ; %bb.5: ; in Loop: Header=BB8_3 Depth=1 @@ -3310,15 +3418,19 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX90A-NEXT: s_cbranch_execnz .LBB8_3 ; GFX90A-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX90A-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX90A-NEXT: v_lshrrev_b32_e32 v0, v8, v4 ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_add_u32_e32 v4, 0x200, v4 -; GFX908-NEXT: v_and_b32_e32 v7, -4, v4 -; GFX908-NEXT: v_mov_b32_e32 v8, 0xffffff +; GFX908-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX908-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX908-NEXT: v_lshlrev_b32_e32 v7, 3, v4 +; GFX908-NEXT: s_mov_b32 s4, 0xffff +; GFX908-NEXT: v_lshlrev_b32_e64 v4, v7, s4 +; GFX908-NEXT: v_not_b32_e32 v9, v4 ; GFX908-NEXT: s_mov_b64 s[6:7], exec ; GFX908-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX908-NEXT: v_readfirstlane_b32 s8, v0 @@ -3330,20 +3442,22 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX908-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX908-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX908-NEXT: s_nop 0 -; GFX908-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX908-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX908-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX908-NEXT: s_cbranch_execnz .LBB8_1 ; GFX908-NEXT: ; %bb.2: ; GFX908-NEXT: s_mov_b64 exec, s[6:7] ; GFX908-NEXT: s_mov_b64 s[6:7], 0 -; GFX908-NEXT: v_max_f16_e32 v9, v5, v5 +; GFX908-NEXT: v_max_f16_e32 v10, v5, v5 ; GFX908-NEXT: .LBB8_3: ; %atomicrmw.start ; GFX908-NEXT: ; =>This Loop Header: Depth=1 ; GFX908-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_max_f16_sdwa v4, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 -; GFX908-NEXT: v_min_f16_sdwa v4, v4, v9 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX908-NEXT: v_and_or_b32 v5, v6, v8, v4 +; GFX908-NEXT: v_lshrrev_b32_e32 v4, v7, v6 +; GFX908-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX908-NEXT: v_min_f16_e32 v4, v4, v10 +; GFX908-NEXT: v_lshlrev_b32_e32 v4, v7, v4 +; GFX908-NEXT: v_and_or_b32 v5, v6, v9, v4 ; GFX908-NEXT: v_mov_b32_e32 v4, v5 ; GFX908-NEXT: s_mov_b64 s[12:13], exec ; GFX908-NEXT: v_mov_b32_e32 v5, v6 @@ -3358,7 +3472,7 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX908-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX908-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX908-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX908-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX908-NEXT: s_cbranch_execnz .LBB8_4 ; GFX908-NEXT: ; %bb.5: ; in Loop: Header=BB8_3 Depth=1 @@ -3372,14 +3486,19 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX908-NEXT: s_cbranch_execnz .LBB8_3 ; GFX908-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX908-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX908-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX908-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX908-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x200, v4 -; GFX8-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX8-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX8-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v7, 3, v4 +; GFX8-NEXT: s_mov_b32 s4, 0xffff +; GFX8-NEXT: v_lshlrev_b32_e64 v4, v7, s4 +; GFX8-NEXT: v_not_b32_e32 v9, v4 ; GFX8-NEXT: s_mov_b64 s[6:7], exec ; GFX8-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX8-NEXT: v_readfirstlane_b32 s8, v0 @@ -3391,20 +3510,22 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX8-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX8-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX8-NEXT: s_nop 0 -; GFX8-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX8-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX8-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX8-NEXT: s_cbranch_execnz .LBB8_1 ; GFX8-NEXT: ; %bb.2: ; GFX8-NEXT: s_mov_b64 exec, s[6:7] ; GFX8-NEXT: s_mov_b64 s[6:7], 0 -; GFX8-NEXT: v_max_f16_e32 v8, v5, v5 +; GFX8-NEXT: v_max_f16_e32 v10, v5, v5 ; GFX8-NEXT: .LBB8_3: ; %atomicrmw.start ; GFX8-NEXT: ; =>This Loop Header: Depth=1 ; GFX8-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_max_f16_sdwa v4, v6, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 -; GFX8-NEXT: v_min_f16_sdwa v4, v4, v8 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_and_b32_e32 v5, 0xffffff, v6 +; GFX8-NEXT: v_lshrrev_b32_e32 v4, v7, v6 +; GFX8-NEXT: v_max_f16_e32 v4, v4, v4 +; GFX8-NEXT: v_min_f16_e32 v4, v4, v10 +; GFX8-NEXT: v_lshlrev_b32_e32 v4, v7, v4 +; GFX8-NEXT: v_and_b32_e32 v5, v6, v9 ; GFX8-NEXT: v_or_b32_e32 v5, v5, v4 ; GFX8-NEXT: v_mov_b32_e32 v4, v5 ; GFX8-NEXT: s_mov_b64 s[12:13], exec @@ -3420,7 +3541,7 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX8-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX8-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX8-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX8-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX8-NEXT: s_cbranch_execnz .LBB8_4 ; GFX8-NEXT: ; %bb.5: ; in Loop: Header=BB8_3 Depth=1 @@ -3434,14 +3555,18 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX8-NEXT: s_cbranch_execnz .LBB8_3 ; GFX8-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX8-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_add_i32_e32 v4, vcc, 0x200, v4 -; GFX7-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX7-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX7-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v7, 3, v4 +; GFX7-NEXT: v_lshl_b32_e32 v4, 0xffff, v7 +; GFX7-NEXT: v_not_b32_e32 v9, v4 ; GFX7-NEXT: s_mov_b64 s[6:7], exec ; GFX7-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX7-NEXT: v_readfirstlane_b32 s8, v0 @@ -3452,25 +3577,25 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX7-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[2:3] ; GFX7-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX7-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GFX7-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX7-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX7-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_cbranch_execnz .LBB8_1 ; GFX7-NEXT: ; %bb.2: ; GFX7-NEXT: s_mov_b64 exec, s[6:7] ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v5 ; GFX7-NEXT: s_mov_b64 s[6:7], 0 -; GFX7-NEXT: v_cvt_f32_f16_e32 v8, v4 +; GFX7-NEXT: v_cvt_f32_f16_e32 v10, v4 ; GFX7-NEXT: .LBB8_3: ; %atomicrmw.start ; GFX7-NEXT: ; =>This Loop Header: Depth=1 ; GFX7-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v6 +; GFX7-NEXT: v_lshrrev_b32_e32 v4, v7, v6 ; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX7-NEXT: v_and_b32_e32 v5, 0xffffff, v6 +; GFX7-NEXT: v_and_b32_e32 v5, v6, v9 ; GFX7-NEXT: s_mov_b64 s[12:13], exec -; GFX7-NEXT: v_min_f32_e32 v4, v4, v8 +; GFX7-NEXT: v_min_f32_e32 v4, v4, v10 ; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, v7, v4 ; GFX7-NEXT: v_or_b32_e32 v5, v5, v4 ; GFX7-NEXT: v_mov_b32_e32 v4, v5 ; GFX7-NEXT: v_mov_b32_e32 v5, v6 @@ -3485,7 +3610,7 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX7-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX7-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX7-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX7-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_cbranch_execnz .LBB8_4 ; GFX7-NEXT: ; %bb.5: ; in Loop: Header=BB8_3 Depth=1 @@ -3499,7 +3624,7 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX7-NEXT: s_cbranch_execnz .LBB8_3 ; GFX7-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -3507,7 +3632,11 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0x200, v4 -; GFX6-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX6-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX6-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX6-NEXT: v_lshlrev_b32_e32 v7, 3, v4 +; GFX6-NEXT: v_lshl_b32_e32 v4, 0xffff, v7 +; GFX6-NEXT: v_not_b32_e32 v9, v4 ; GFX6-NEXT: s_mov_b64 s[6:7], exec ; GFX6-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX6-NEXT: v_readfirstlane_b32 s8, v0 @@ -3518,25 +3647,25 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX6-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[2:3] ; GFX6-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX6-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GFX6-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX6-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX6-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX6-NEXT: s_cbranch_execnz .LBB8_1 ; GFX6-NEXT: ; %bb.2: ; GFX6-NEXT: s_mov_b64 exec, s[6:7] ; GFX6-NEXT: v_cvt_f16_f32_e32 v4, v5 ; GFX6-NEXT: s_mov_b64 s[6:7], 0 -; GFX6-NEXT: v_cvt_f32_f16_e32 v8, v4 +; GFX6-NEXT: v_cvt_f32_f16_e32 v10, v4 ; GFX6-NEXT: .LBB8_3: ; %atomicrmw.start ; GFX6-NEXT: ; =>This Loop Header: Depth=1 ; GFX6-NEXT: ; Child Loop BB8_4 Depth 2 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) -; GFX6-NEXT: v_lshrrev_b32_e32 v4, 24, v6 +; GFX6-NEXT: v_lshrrev_b32_e32 v4, v7, v6 ; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v4 -; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v6 +; GFX6-NEXT: v_and_b32_e32 v5, v6, v9 ; GFX6-NEXT: s_mov_b64 s[12:13], exec -; GFX6-NEXT: v_min_f32_e32 v4, v4, v8 +; GFX6-NEXT: v_min_f32_e32 v4, v4, v10 ; GFX6-NEXT: v_cvt_f16_f32_e32 v4, v4 -; GFX6-NEXT: v_lshlrev_b32_e32 v4, 24, v4 +; GFX6-NEXT: v_lshlrev_b32_e32 v4, v7, v4 ; GFX6-NEXT: v_or_b32_e32 v5, v5, v4 ; GFX6-NEXT: v_mov_b32_e32 v4, v5 ; GFX6-NEXT: v_mov_b32_e32 v5, v6 @@ -3551,7 +3680,7 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX6-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX6-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX6-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX6-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX6-NEXT: s_cbranch_execnz .LBB8_4 ; GFX6-NEXT: ; %bb.5: ; in Loop: Header=BB8_3 Depth=1 @@ -3565,7 +3694,7 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall(ptr add ; GFX6-NEXT: s_cbranch_execnz .LBB8_3 ; GFX6-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX6-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: s_setpc_b64 s[30:31] @@ -3588,15 +3717,19 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset(ptr addrspace(7 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_addk_co_i32 s4, 0x200 ; GFX12-NEXT: v_lshlrev_b32_e32 v5, 16, v0 -; GFX12-NEXT: s_and_b32 s4, s4, -4 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: v_mov_b32_e32 v4, s4 -; GFX12-NEXT: s_mov_b32 s4, 0 +; GFX12-NEXT: s_and_b32 s5, s4, -4 +; GFX12-NEXT: s_and_b32 s4, s4, 3 +; GFX12-NEXT: v_mov_b32_e32 v4, s5 +; GFX12-NEXT: s_lshl_b32 s4, s4, 3 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_lshl_b32 s5, 0xffff, s4 +; GFX12-NEXT: s_not_b32 s6, s5 ; GFX12-NEXT: buffer_load_b32 v1, v4, s[0:3], null offen +; GFX12-NEXT: s_mov_b32 s5, 0 ; GFX12-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v0, s4, v1 ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 16, v0 @@ -3610,8 +3743,8 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset(ptr addrspace(7 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo ; GFX12-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX12-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX12-NEXT: v_lshlrev_b32_e32 v0, s4, v0 +; GFX12-NEXT: v_and_or_b32 v0, v1, s6, v0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0 ; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[2:3], v4, s[0:3], null offen th:TH_ATOMIC_RETURN @@ -3619,42 +3752,43 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset(ptr addrspace(7 ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v1 ; GFX12-NEXT: v_mov_b32_e32 v1, v2 -; GFX12-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX12-NEXT: s_or_b32 s5, vcc_lo, s5 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 +; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_cbranch_execnz .LBB9_1 ; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX12-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX12-NEXT: v_lshrrev_b32_e32 v0, s4, v2 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: s_addk_i32 s4, 0x200 -; GFX940-NEXT: s_and_b32 s4, s4, -4 -; GFX940-NEXT: v_mov_b32_e32 v4, s4 +; GFX940-NEXT: s_and_b32 s5, s4, -4 +; GFX940-NEXT: v_mov_b32_e32 v4, s5 ; GFX940-NEXT: buffer_load_dword v1, v4, s[0:3], 0 offen -; GFX940-NEXT: s_mov_b32 s6, 0xffffff +; GFX940-NEXT: s_and_b32 s4, s4, 3 +; GFX940-NEXT: s_lshl_b32 s6, s4, 3 +; GFX940-NEXT: s_lshl_b32 s4, 0xffff, s6 +; GFX940-NEXT: s_not_b32 s7, s4 ; GFX940-NEXT: s_mov_b64 s[4:5], 0 ; GFX940-NEXT: v_lshlrev_b32_e32 v5, 16, v0 -; GFX940-NEXT: s_movk_i32 s7, 0x7fff +; GFX940-NEXT: s_movk_i32 s8, 0x7fff ; GFX940-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 24, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX940-NEXT: v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX940-NEXT: buffer_wbl2 sc1 ; GFX940-NEXT: v_min_f32_e32 v0, v0, v5 ; GFX940-NEXT: v_bfe_u32 v2, v0, 16, 1 ; GFX940-NEXT: v_or_b32_e32 v3, 0x400000, v0 -; GFX940-NEXT: v_add3_u32 v2, v2, v0, s7 +; GFX940-NEXT: v_add3_u32 v2, v2, v0, s8 ; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: s_nop 1 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX940-NEXT: v_and_or_b32 v0, v1, s6, v0 +; GFX940-NEXT: v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX940-NEXT: v_and_or_b32 v0, v1, s7, v0 ; GFX940-NEXT: v_mov_b64_e32 v[2:3], v[0:1] ; GFX940-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[0:3], 0 offen sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) @@ -3666,7 +3800,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset(ptr addrspace(7 ; GFX940-NEXT: s_cbranch_execnz .LBB9_1 ; GFX940-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX940-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX940-NEXT: v_lshrrev_b32_e32 v0, s6, v2 ; GFX940-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset: @@ -3674,16 +3808,20 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset(ptr addrspace(7 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_addk_i32 s4, 0x200 ; GFX11-NEXT: v_lshlrev_b32_e32 v5, 16, v0 -; GFX11-NEXT: s_and_b32 s4, s4, -4 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: v_mov_b32_e32 v4, s4 -; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_and_b32 s5, s4, -4 +; GFX11-NEXT: s_and_b32 s4, s4, 3 +; GFX11-NEXT: v_mov_b32_e32 v4, s5 +; GFX11-NEXT: s_lshl_b32 s4, s4, 3 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_lshl_b32 s5, 0xffff, s4 +; GFX11-NEXT: s_not_b32 s6, s5 ; GFX11-NEXT: buffer_load_b32 v1, v4, s[0:3], 0 offen +; GFX11-NEXT: s_mov_b32 s5, 0 ; GFX11-NEXT: .p2align 6 ; GFX11-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX11-NEXT: v_lshrrev_b32_e32 v0, s4, v1 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 @@ -3697,8 +3835,8 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset(ptr addrspace(7 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX11-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v0, s4, v0 +; GFX11-NEXT: v_and_or_b32 v0, v1, s6, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0 ; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[2:3], v4, s[0:3], 0 offen glc @@ -3707,13 +3845,13 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset(ptr addrspace(7 ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v1 ; GFX11-NEXT: v_mov_b32_e32 v1, v2 -; GFX11-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX11-NEXT: s_or_b32 s5, vcc_lo, s5 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 +; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s5 ; GFX11-NEXT: s_cbranch_execnz .LBB9_1 ; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s4 -; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX11-NEXT: v_lshrrev_b32_e32 v0, s4, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset: @@ -3721,25 +3859,27 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset(ptr addrspace(7 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_addk_i32 s8, 0x200 ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v0 -; GFX10-NEXT: s_and_b32 s8, s8, -4 -; GFX10-NEXT: v_mov_b32_e32 v4, s8 -; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_and_b32 s9, s8, -4 +; GFX10-NEXT: s_and_b32 s8, s8, 3 +; GFX10-NEXT: v_mov_b32_e32 v4, s9 +; GFX10-NEXT: s_lshl_b32 s8, s8, 3 +; GFX10-NEXT: s_lshl_b32 s9, 0xffff, s8 +; GFX10-NEXT: s_not_b32 s10, s9 ; GFX10-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen +; GFX10-NEXT: s_mov_b32 s9, 0 ; GFX10-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX10-NEXT: v_lshrrev_b32_sdwa v0, s8, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX10-NEXT: v_min_f32_e32 v0, v0, v5 ; GFX10-NEXT: v_bfe_u32 v2, v0, 16, 1 ; GFX10-NEXT: v_or_b32_e32 v3, 0x400000, v0 ; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 ; GFX10-NEXT: v_add3_u32 v2, v2, v0, 0x7fff ; GFX10-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX10-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX10-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX10-NEXT: v_and_or_b32 v0, v1, s10, v0 ; GFX10-NEXT: v_mov_b32_e32 v3, v1 ; GFX10-NEXT: v_mov_b32_e32 v2, v0 ; GFX10-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[4:7], 0 offen glc @@ -3748,39 +3888,40 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset(ptr addrspace(7 ; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v2, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, v2 -; GFX10-NEXT: s_or_b32 s8, vcc_lo, s8 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 +; GFX10-NEXT: s_or_b32 s9, vcc_lo, s9 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s9 ; GFX10-NEXT: s_cbranch_execnz .LBB9_1 ; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s9 +; GFX10-NEXT: v_lshrrev_b32_e32 v0, s8, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: s_addk_i32 s8, 0x200 -; GFX90A-NEXT: s_and_b32 s8, s8, -4 -; GFX90A-NEXT: v_mov_b32_e32 v4, s8 +; GFX90A-NEXT: s_and_b32 s9, s8, -4 +; GFX90A-NEXT: v_mov_b32_e32 v4, s9 ; GFX90A-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen -; GFX90A-NEXT: s_mov_b32 s10, 0xffffff +; GFX90A-NEXT: s_and_b32 s8, s8, 3 +; GFX90A-NEXT: s_lshl_b32 s10, s8, 3 +; GFX90A-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX90A-NEXT: s_not_b32 s11, s8 ; GFX90A-NEXT: s_mov_b64 s[8:9], 0 ; GFX90A-NEXT: v_lshlrev_b32_e32 v5, 16, v0 -; GFX90A-NEXT: s_movk_i32 s11, 0x7fff +; GFX90A-NEXT: s_movk_i32 s12, 0x7fff ; GFX90A-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 24, v1 -; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX90A-NEXT: v_lshrrev_b32_sdwa v0, s10, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX90A-NEXT: v_min_f32_e32 v0, v0, v5 ; GFX90A-NEXT: v_bfe_u32 v2, v0, 16, 1 ; GFX90A-NEXT: v_or_b32_e32 v3, 0x400000, v0 -; GFX90A-NEXT: v_add3_u32 v2, v2, v0, s11 +; GFX90A-NEXT: v_add3_u32 v2, v2, v0, s12 ; GFX90A-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 ; GFX90A-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc -; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX90A-NEXT: v_and_or_b32 v0, v1, s10, v0 +; GFX90A-NEXT: v_lshlrev_b32_sdwa v0, s10, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX90A-NEXT: v_and_or_b32 v0, v1, s11, v0 ; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1] ; GFX90A-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[4:7], 0 offen glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) @@ -3792,34 +3933,35 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset(ptr addrspace(7 ; GFX90A-NEXT: s_cbranch_execnz .LBB9_1 ; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX90A-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX90A-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: s_addk_i32 s8, 0x200 -; GFX908-NEXT: s_and_b32 s8, s8, -4 -; GFX908-NEXT: v_mov_b32_e32 v4, s8 +; GFX908-NEXT: s_and_b32 s9, s8, -4 +; GFX908-NEXT: v_mov_b32_e32 v4, s9 ; GFX908-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen -; GFX908-NEXT: s_mov_b32 s10, 0xffffff +; GFX908-NEXT: s_and_b32 s8, s8, 3 +; GFX908-NEXT: s_lshl_b32 s10, s8, 3 +; GFX908-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX908-NEXT: s_not_b32 s11, s8 ; GFX908-NEXT: s_mov_b64 s[8:9], 0 ; GFX908-NEXT: v_lshlrev_b32_e32 v5, 16, v0 -; GFX908-NEXT: s_movk_i32 s11, 0x7fff +; GFX908-NEXT: s_movk_i32 s12, 0x7fff ; GFX908-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_lshrrev_b32_e32 v0, 24, v1 -; GFX908-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX908-NEXT: v_lshrrev_b32_sdwa v0, s10, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX908-NEXT: v_min_f32_e32 v0, v0, v5 ; GFX908-NEXT: v_bfe_u32 v2, v0, 16, 1 ; GFX908-NEXT: v_or_b32_e32 v3, 0x400000, v0 -; GFX908-NEXT: v_add3_u32 v2, v2, v0, s11 +; GFX908-NEXT: v_add3_u32 v2, v2, v0, s12 ; GFX908-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 ; GFX908-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc -; GFX908-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX908-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX908-NEXT: v_and_or_b32 v0, v1, s10, v0 +; GFX908-NEXT: v_lshlrev_b32_sdwa v0, s10, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX908-NEXT: v_and_or_b32 v0, v1, s11, v0 ; GFX908-NEXT: v_mov_b32_e32 v3, v1 ; GFX908-NEXT: v_mov_b32_e32 v2, v0 ; GFX908-NEXT: buffer_atomic_cmpswap v[2:3], v4, s[4:7], 0 offen glc @@ -3832,33 +3974,36 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset(ptr addrspace(7 ; GFX908-NEXT: s_cbranch_execnz .LBB9_1 ; GFX908-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX908-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX908-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX908-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX908-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_addk_i32 s8, 0x200 -; GFX8-NEXT: s_and_b32 s8, s8, -4 -; GFX8-NEXT: v_mov_b32_e32 v4, s8 +; GFX8-NEXT: s_and_b32 s9, s8, -4 +; GFX8-NEXT: v_mov_b32_e32 v4, s9 ; GFX8-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen +; GFX8-NEXT: s_and_b32 s8, s8, 3 +; GFX8-NEXT: s_lshl_b32 s10, s8, 3 +; GFX8-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX8-NEXT: s_not_b32 s11, s8 ; GFX8-NEXT: s_mov_b64 s[8:9], 0 ; GFX8-NEXT: v_lshlrev_b32_e32 v5, 16, v0 ; GFX8-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX8-NEXT: v_mov_b32_e32 v0, s10 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX8-NEXT: v_min_f32_e32 v0, v0, v5 -; GFX8-NEXT: v_bfe_u32 v3, v0, 16, 1 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v0 -; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0x7fff, v3 -; GFX8-NEXT: v_or_b32_e32 v6, 0x400000, v0 -; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX8-NEXT: v_cndmask_b32_e32 v0, v3, v6, vcc -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX8-NEXT: v_lshrrev_b32_sdwa v3, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_min_f32_e32 v3, v3, v5 +; GFX8-NEXT: v_bfe_u32 v6, v3, 16, 1 +; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v3 +; GFX8-NEXT: v_add_u32_e32 v6, vcc, 0x7fff, v6 +; GFX8-NEXT: v_or_b32_e32 v7, 0x400000, v3 +; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 +; GFX8-NEXT: v_cndmask_b32_e32 v3, v6, v7, vcc +; GFX8-NEXT: v_and_b32_e32 v2, s11, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX8-NEXT: v_mov_b32_e32 v3, v1 ; GFX8-NEXT: v_mov_b32_e32 v2, v0 @@ -3872,29 +4017,33 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset(ptr addrspace(7 ; GFX8-NEXT: s_cbranch_execnz .LBB9_1 ; GFX8-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX8-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_addk_i32 s8, 0x200 -; GFX7-NEXT: s_and_b32 s8, s8, -4 -; GFX7-NEXT: v_mov_b32_e32 v4, s8 +; GFX7-NEXT: s_and_b32 s9, s8, -4 +; GFX7-NEXT: v_mov_b32_e32 v4, s9 ; GFX7-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen +; GFX7-NEXT: s_and_b32 s8, s8, 3 +; GFX7-NEXT: s_lshl_b32 s10, s8, 3 +; GFX7-NEXT: s_lshl_b32 s8, 0xffff, s10 ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: s_not_b32 s11, s8 ; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: v_and_b32_e32 v5, 0xffff0000, v0 ; GFX7-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, s10, v1 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; GFX7-NEXT: v_min_f32_e32 v0, v0, v5 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v2, 0xffffff, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v2, s11, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX7-NEXT: v_mov_b32_e32 v3, v1 ; GFX7-NEXT: v_mov_b32_e32 v2, v0 @@ -3908,7 +4057,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset(ptr addrspace(7 ; GFX7-NEXT: s_cbranch_execnz .LBB9_1 ; GFX7-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -3916,23 +4065,27 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset(ptr addrspace(7 ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: s_addk_i32 s8, 0x200 -; GFX6-NEXT: s_and_b32 s8, s8, -4 -; GFX6-NEXT: v_mov_b32_e32 v4, s8 +; GFX6-NEXT: s_and_b32 s9, s8, -4 +; GFX6-NEXT: v_mov_b32_e32 v4, s9 ; GFX6-NEXT: buffer_load_dword v1, v4, s[4:7], 0 offen +; GFX6-NEXT: s_and_b32 s8, s8, 3 +; GFX6-NEXT: s_lshl_b32 s10, s8, 3 +; GFX6-NEXT: s_lshl_b32 s8, 0xffff, s10 ; GFX6-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX6-NEXT: s_not_b32 s11, s8 ; GFX6-NEXT: s_mov_b64 s[8:9], 0 ; GFX6-NEXT: v_and_b32_e32 v5, 0xffff0000, v0 ; GFX6-NEXT: .LBB9_1: ; %atomicrmw.start ; GFX6-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, s10, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; GFX6-NEXT: v_min_f32_e32 v0, v0, v5 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX6-NEXT: v_and_b32_e32 v2, s11, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX6-NEXT: v_mov_b32_e32 v3, v1 ; GFX6-NEXT: v_mov_b32_e32 v2, v0 @@ -3946,7 +4099,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset(ptr addrspace(7 ; GFX6-NEXT: s_cbranch_execnz .LBB9_1 ; GFX6-NEXT: ; %bb.2: ; %atomicrmw.end ; GFX6-NEXT: s_or_b64 exec, exec, s[8:9] -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v2 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, s10, v2 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: s_setpc_b64 s[30:31] @@ -3965,15 +4118,19 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset(ptr addrspace(7 ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: s_addk_co_i32 s4, 0x200 ; GFX12-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX12-NEXT: s_and_b32 s4, s4, -4 -; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: v_mov_b32_e32 v2, s4 -; GFX12-NEXT: s_mov_b32 s4, 0 +; GFX12-NEXT: s_and_b32 s5, s4, -4 +; GFX12-NEXT: s_and_b32 s4, s4, 3 +; GFX12-NEXT: v_mov_b32_e32 v2, s5 +; GFX12-NEXT: s_lshl_b32 s4, s4, 3 +; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX12-NEXT: s_lshl_b32 s5, 0xffff, s4 +; GFX12-NEXT: s_not_b32 s6, s5 ; GFX12-NEXT: buffer_load_b32 v1, v2, s[0:3], null offen +; GFX12-NEXT: s_mov_b32 s5, 0 ; GFX12-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX12-NEXT: v_lshrrev_b32_e32 v0, s4, v1 ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_lshlrev_b32_e32 v0, 16, v0 @@ -3987,8 +4144,8 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset(ptr addrspace(7 ; GFX12-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc_lo ; GFX12-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX12-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX12-NEXT: v_lshlrev_b32_e32 v0, s4, v0 +; GFX12-NEXT: v_and_or_b32 v0, v1, s6, v0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_dual_mov_b32 v5, v1 :: v_dual_mov_b32 v4, v0 ; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v2, s[0:3], null offen th:TH_ATOMIC_RETURN @@ -3996,41 +4153,42 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset(ptr addrspace(7 ; GFX12-NEXT: global_inv scope:SCOPE_DEV ; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v1 ; GFX12-NEXT: v_mov_b32_e32 v1, v4 -; GFX12-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX12-NEXT: s_or_b32 s5, vcc_lo, s5 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 +; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_cbranch_execnz .LBB10_1 ; GFX12-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: s_addk_i32 s4, 0x200 -; GFX940-NEXT: s_and_b32 s4, s4, -4 -; GFX940-NEXT: v_mov_b32_e32 v2, s4 +; GFX940-NEXT: s_and_b32 s5, s4, -4 +; GFX940-NEXT: v_mov_b32_e32 v2, s5 ; GFX940-NEXT: buffer_load_dword v1, v2, s[0:3], 0 offen -; GFX940-NEXT: s_mov_b32 s6, 0xffffff +; GFX940-NEXT: s_and_b32 s4, s4, 3 +; GFX940-NEXT: s_lshl_b32 s6, s4, 3 +; GFX940-NEXT: s_lshl_b32 s4, 0xffff, s6 +; GFX940-NEXT: s_not_b32 s7, s4 ; GFX940-NEXT: s_mov_b64 s[4:5], 0 ; GFX940-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX940-NEXT: s_movk_i32 s7, 0x7fff +; GFX940-NEXT: s_movk_i32 s8, 0x7fff ; GFX940-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX940-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 24, v1 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX940-NEXT: v_lshrrev_b32_sdwa v0, s6, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX940-NEXT: buffer_wbl2 sc1 ; GFX940-NEXT: v_min_f32_e32 v0, v0, v3 ; GFX940-NEXT: v_bfe_u32 v4, v0, 16, 1 ; GFX940-NEXT: v_or_b32_e32 v5, 0x400000, v0 -; GFX940-NEXT: v_add3_u32 v4, v4, v0, s7 +; GFX940-NEXT: v_add3_u32 v4, v4, v0, s8 ; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX940-NEXT: buffer_wbl2 sc1 -; GFX940-NEXT: s_nop 0 +; GFX940-NEXT: s_nop 1 ; GFX940-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX940-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX940-NEXT: v_and_or_b32 v0, v1, s6, v0 +; GFX940-NEXT: v_lshlrev_b32_sdwa v0, s6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX940-NEXT: v_and_or_b32 v0, v1, s7, v0 ; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[0:1] ; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[0:3], 0 offen sc0 ; GFX940-NEXT: s_waitcnt vmcnt(0) @@ -4049,16 +4207,20 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset(ptr addrspace(7 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: s_addk_i32 s4, 0x200 ; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX11-NEXT: s_and_b32 s4, s4, -4 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: v_mov_b32_e32 v2, s4 -; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_and_b32 s5, s4, -4 +; GFX11-NEXT: s_and_b32 s4, s4, 3 +; GFX11-NEXT: v_mov_b32_e32 v2, s5 +; GFX11-NEXT: s_lshl_b32 s4, s4, 3 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_lshl_b32 s5, 0xffff, s4 +; GFX11-NEXT: s_not_b32 s6, s5 ; GFX11-NEXT: buffer_load_b32 v1, v2, s[0:3], 0 offen +; GFX11-NEXT: s_mov_b32 s5, 0 ; GFX11-NEXT: .p2align 6 ; GFX11-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX11-NEXT: v_lshrrev_b32_e32 v0, s4, v1 ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_lshlrev_b32_e32 v0, 16, v0 @@ -4072,8 +4234,8 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset(ptr addrspace(7 ; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc_lo ; GFX11-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX11-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX11-NEXT: v_lshlrev_b32_e32 v0, s4, v0 +; GFX11-NEXT: v_and_or_b32 v0, v1, s6, v0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_dual_mov_b32 v5, v1 :: v_dual_mov_b32 v4, v0 ; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v2, s[0:3], 0 offen glc @@ -4082,12 +4244,12 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset(ptr addrspace(7 ; GFX11-NEXT: buffer_gl0_inv ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v1 ; GFX11-NEXT: v_mov_b32_e32 v1, v4 -; GFX11-NEXT: s_or_b32 s4, vcc_lo, s4 +; GFX11-NEXT: s_or_b32 s5, vcc_lo, s5 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 +; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s5 ; GFX11-NEXT: s_cbranch_execnz .LBB10_1 ; GFX11-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s4 +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset: @@ -4095,25 +4257,27 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset(ptr addrspace(7 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_addk_i32 s8, 0x200 ; GFX10-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX10-NEXT: s_and_b32 s8, s8, -4 -; GFX10-NEXT: v_mov_b32_e32 v2, s8 -; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_and_b32 s9, s8, -4 +; GFX10-NEXT: s_and_b32 s8, s8, 3 +; GFX10-NEXT: v_mov_b32_e32 v2, s9 +; GFX10-NEXT: s_lshl_b32 s8, s8, 3 +; GFX10-NEXT: s_lshl_b32 s9, 0xffff, s8 +; GFX10-NEXT: s_not_b32 s10, s9 ; GFX10-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen +; GFX10-NEXT: s_mov_b32 s9, 0 ; GFX10-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX10-NEXT: v_lshrrev_b32_sdwa v0, s8, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX10-NEXT: v_min_f32_e32 v0, v0, v3 ; GFX10-NEXT: v_bfe_u32 v4, v0, 16, 1 ; GFX10-NEXT: v_or_b32_e32 v5, 0x400000, v0 ; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 ; GFX10-NEXT: v_add3_u32 v4, v4, v0, 0x7fff ; GFX10-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc_lo -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX10-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX10-NEXT: v_and_or_b32 v0, 0xffffff, v1, v0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v0, s8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX10-NEXT: v_and_or_b32 v0, v1, s10, v0 ; GFX10-NEXT: v_mov_b32_e32 v5, v1 ; GFX10-NEXT: v_mov_b32_e32 v4, v0 ; GFX10-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[4:7], 0 offen glc @@ -4122,38 +4286,39 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset(ptr addrspace(7 ; GFX10-NEXT: buffer_gl0_inv ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v1 ; GFX10-NEXT: v_mov_b32_e32 v1, v4 -; GFX10-NEXT: s_or_b32 s8, vcc_lo, s8 -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 +; GFX10-NEXT: s_or_b32 s9, vcc_lo, s9 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s9 ; GFX10-NEXT: s_cbranch_execnz .LBB10_1 ; GFX10-NEXT: ; %bb.2: ; %atomicrmw.end -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s9 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: s_addk_i32 s8, 0x200 -; GFX90A-NEXT: s_and_b32 s8, s8, -4 -; GFX90A-NEXT: v_mov_b32_e32 v2, s8 +; GFX90A-NEXT: s_and_b32 s9, s8, -4 +; GFX90A-NEXT: v_mov_b32_e32 v2, s9 ; GFX90A-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen -; GFX90A-NEXT: s_mov_b32 s10, 0xffffff +; GFX90A-NEXT: s_and_b32 s8, s8, 3 +; GFX90A-NEXT: s_lshl_b32 s10, s8, 3 +; GFX90A-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX90A-NEXT: s_not_b32 s11, s8 ; GFX90A-NEXT: s_mov_b64 s[8:9], 0 ; GFX90A-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX90A-NEXT: s_movk_i32 s11, 0x7fff +; GFX90A-NEXT: s_movk_i32 s12, 0x7fff ; GFX90A-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 24, v1 -; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX90A-NEXT: v_lshrrev_b32_sdwa v0, s10, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX90A-NEXT: v_min_f32_e32 v0, v0, v3 ; GFX90A-NEXT: v_bfe_u32 v4, v0, 16, 1 ; GFX90A-NEXT: v_or_b32_e32 v5, 0x400000, v0 -; GFX90A-NEXT: v_add3_u32 v4, v4, v0, s11 +; GFX90A-NEXT: v_add3_u32 v4, v4, v0, s12 ; GFX90A-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 ; GFX90A-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc -; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX90A-NEXT: v_and_or_b32 v0, v1, s10, v0 +; GFX90A-NEXT: v_lshlrev_b32_sdwa v0, s10, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX90A-NEXT: v_and_or_b32 v0, v1, s11, v0 ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[0:1], v[0:1] op_sel:[0,1] ; GFX90A-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[4:7], 0 offen glc ; GFX90A-NEXT: s_waitcnt vmcnt(0) @@ -4171,27 +4336,28 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset(ptr addrspace(7 ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: s_addk_i32 s8, 0x200 -; GFX908-NEXT: s_and_b32 s8, s8, -4 -; GFX908-NEXT: v_mov_b32_e32 v2, s8 +; GFX908-NEXT: s_and_b32 s9, s8, -4 +; GFX908-NEXT: v_mov_b32_e32 v2, s9 ; GFX908-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen -; GFX908-NEXT: s_mov_b32 s10, 0xffffff +; GFX908-NEXT: s_and_b32 s8, s8, 3 +; GFX908-NEXT: s_lshl_b32 s10, s8, 3 +; GFX908-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX908-NEXT: s_not_b32 s11, s8 ; GFX908-NEXT: s_mov_b64 s[8:9], 0 ; GFX908-NEXT: v_lshlrev_b32_e32 v3, 16, v0 -; GFX908-NEXT: s_movk_i32 s11, 0x7fff +; GFX908-NEXT: s_movk_i32 s12, 0x7fff ; GFX908-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX908-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_lshrrev_b32_e32 v0, 24, v1 -; GFX908-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX908-NEXT: v_lshrrev_b32_sdwa v0, s10, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX908-NEXT: v_min_f32_e32 v0, v0, v3 ; GFX908-NEXT: v_bfe_u32 v4, v0, 16, 1 ; GFX908-NEXT: v_or_b32_e32 v5, 0x400000, v0 -; GFX908-NEXT: v_add3_u32 v4, v4, v0, s11 +; GFX908-NEXT: v_add3_u32 v4, v4, v0, s12 ; GFX908-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 ; GFX908-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc -; GFX908-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX908-NEXT: v_lshlrev_b32_e32 v0, 24, v0 -; GFX908-NEXT: v_and_or_b32 v0, v1, s10, v0 +; GFX908-NEXT: v_lshlrev_b32_sdwa v0, s10, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX908-NEXT: v_and_or_b32 v0, v1, s11, v0 ; GFX908-NEXT: v_mov_b32_e32 v5, v1 ; GFX908-NEXT: v_mov_b32_e32 v4, v0 ; GFX908-NEXT: buffer_atomic_cmpswap v[4:5], v2, s[4:7], 0 offen glc @@ -4210,26 +4376,29 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset(ptr addrspace(7 ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_addk_i32 s8, 0x200 -; GFX8-NEXT: s_and_b32 s8, s8, -4 -; GFX8-NEXT: v_mov_b32_e32 v2, s8 +; GFX8-NEXT: s_and_b32 s9, s8, -4 +; GFX8-NEXT: v_mov_b32_e32 v2, s9 ; GFX8-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen +; GFX8-NEXT: s_and_b32 s8, s8, 3 +; GFX8-NEXT: s_lshl_b32 s10, s8, 3 +; GFX8-NEXT: s_lshl_b32 s8, 0xffff, s10 +; GFX8-NEXT: s_not_b32 s11, s8 ; GFX8-NEXT: s_mov_b64 s[8:9], 0 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 16, v0 ; GFX8-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1 +; GFX8-NEXT: v_mov_b32_e32 v0, s10 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX8-NEXT: v_min_f32_e32 v0, v0, v3 -; GFX8-NEXT: v_bfe_u32 v5, v0, 16, 1 -; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v0 -; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0x7fff, v5 -; GFX8-NEXT: v_or_b32_e32 v6, 0x400000, v0 -; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 -; GFX8-NEXT: v_cndmask_b32_e32 v0, v5, v6, vcc -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX8-NEXT: v_lshrrev_b32_sdwa v5, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_min_f32_e32 v5, v5, v3 +; GFX8-NEXT: v_bfe_u32 v6, v5, 16, 1 +; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v5 +; GFX8-NEXT: v_add_u32_e32 v6, vcc, 0x7fff, v6 +; GFX8-NEXT: v_or_b32_e32 v7, 0x400000, v5 +; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 +; GFX8-NEXT: v_cndmask_b32_e32 v5, v6, v7, vcc +; GFX8-NEXT: v_and_b32_e32 v4, s11, v1 +; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX8-NEXT: v_mov_b32_e32 v5, v1 ; GFX8-NEXT: v_mov_b32_e32 v4, v0 @@ -4249,22 +4418,26 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset(ptr addrspace(7 ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: s_addk_i32 s8, 0x200 -; GFX7-NEXT: s_and_b32 s8, s8, -4 -; GFX7-NEXT: v_mov_b32_e32 v2, s8 +; GFX7-NEXT: s_and_b32 s9, s8, -4 +; GFX7-NEXT: v_mov_b32_e32 v2, s9 ; GFX7-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen +; GFX7-NEXT: s_and_b32 s8, s8, 3 +; GFX7-NEXT: s_lshl_b32 s10, s8, 3 +; GFX7-NEXT: s_lshl_b32 s8, 0xffff, s10 ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX7-NEXT: s_not_b32 s11, s8 ; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: v_and_b32_e32 v3, 0xffff0000, v0 ; GFX7-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, s10, v1 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; GFX7-NEXT: v_min_f32_e32 v0, v0, v3 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 -; GFX7-NEXT: v_and_b32_e32 v4, 0xffffff, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX7-NEXT: v_and_b32_e32 v4, s11, v1 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX7-NEXT: v_mov_b32_e32 v5, v1 ; GFX7-NEXT: v_mov_b32_e32 v4, v0 @@ -4284,23 +4457,27 @@ define void @buffer_fat_ptr_agent_atomic_fmin_noret_bf16__offset(ptr addrspace(7 ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: s_addk_i32 s8, 0x200 -; GFX6-NEXT: s_and_b32 s8, s8, -4 -; GFX6-NEXT: v_mov_b32_e32 v2, s8 +; GFX6-NEXT: s_and_b32 s9, s8, -4 +; GFX6-NEXT: v_mov_b32_e32 v2, s9 ; GFX6-NEXT: buffer_load_dword v1, v2, s[4:7], 0 offen +; GFX6-NEXT: s_and_b32 s8, s8, 3 +; GFX6-NEXT: s_lshl_b32 s10, s8, 3 +; GFX6-NEXT: s_lshl_b32 s8, 0xffff, s10 ; GFX6-NEXT: v_mul_f32_e32 v0, 1.0, v0 +; GFX6-NEXT: s_not_b32 s11, s8 ; GFX6-NEXT: s_mov_b64 s[8:9], 0 ; GFX6-NEXT: v_and_b32_e32 v3, 0xffff0000, v0 ; GFX6-NEXT: .LBB10_1: ; %atomicrmw.start ; GFX6-NEXT: ; =>This Inner Loop Header: Depth=1 ; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v1 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, s10, v1 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: v_mul_f32_e32 v0, 1.0, v0 ; GFX6-NEXT: v_min_f32_e32 v0, v0, v3 ; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: s_waitcnt expcnt(0) -; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 24, v0 +; GFX6-NEXT: v_and_b32_e32 v4, s11, v1 +; GFX6-NEXT: v_lshlrev_b32_e32 v0, s10, v0 ; GFX6-NEXT: v_or_b32_e32 v0, v4, v0 ; GFX6-NEXT: v_mov_b32_e32 v5, v1 ; GFX6-NEXT: v_mov_b32_e32 v4, v0 @@ -4331,8 +4508,13 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 ; GFX12-NEXT: s_mov_b32 s1, exec_lo -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX12-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX12-NEXT: v_and_b32_e32 v6, 3, v4 +; GFX12-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX12-NEXT: v_lshlrev_b32_e32 v7, 3, v6 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff +; GFX12-NEXT: v_not_b32_e32 v9, v6 ; GFX12-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX12-NEXT: v_readfirstlane_b32 s4, v0 ; GFX12-NEXT: v_readfirstlane_b32 s5, v1 @@ -4344,34 +4526,34 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX12-NEXT: s_and_saveexec_b32 s0, s0 -; GFX12-NEXT: buffer_load_b32 v6, v7, s[4:7], null offen +; GFX12-NEXT: buffer_load_b32 v6, v8, s[4:7], null offen ; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_cbranch_execnz .LBB11_1 ; GFX12-NEXT: ; %bb.2: ; GFX12-NEXT: s_mov_b32 exec_lo, s1 -; GFX12-NEXT: v_lshlrev_b32_e32 v8, 16, v5 +; GFX12-NEXT: v_lshlrev_b32_e32 v10, 16, v5 ; GFX12-NEXT: s_mov_b32 s1, 0 ; GFX12-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX12-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: v_lshrrev_b32_e32 v4, 24, v6 +; GFX12-NEXT: v_lshrrev_b32_e32 v4, v7, v6 ; GFX12-NEXT: s_mov_b32 s2, exec_lo ; GFX12-NEXT: s_wait_storecnt 0x0 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX12-NEXT: v_min_num_f32_e32 v4, v4, v8 +; GFX12-NEXT: v_min_num_f32_e32 v4, v4, v10 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX12-NEXT: v_bfe_u32 v5, v4, 16, 1 -; GFX12-NEXT: v_or_b32_e32 v9, 0x400000, v4 +; GFX12-NEXT: v_or_b32_e32 v11, 0x400000, v4 ; GFX12-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 ; GFX12-NEXT: v_add3_u32 v5, v5, v4, 0x7fff ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-NEXT: v_cndmask_b32_e32 v4, v5, v9, vcc_lo +; GFX12-NEXT: v_cndmask_b32_e32 v4, v5, v11, vcc_lo ; GFX12-NEXT: v_lshrrev_b32_e32 v4, 16, v4 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX12-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX12-NEXT: v_and_or_b32 v5, 0xffffff, v6, v4 +; GFX12-NEXT: v_lshlrev_b32_e32 v4, v7, v4 +; GFX12-NEXT: v_and_or_b32 v5, v6, v9, v4 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_mov_b32_e32 v4, v5 ; GFX12-NEXT: v_mov_b32_e32 v5, v6 @@ -4388,7 +4570,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX12-NEXT: s_and_saveexec_b32 s0, s0 ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v7, s[4:7], null offen th:TH_ATOMIC_RETURN +; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[4:7], null offen th:TH_ATOMIC_RETURN ; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_cbranch_execnz .LBB11_4 ; GFX12-NEXT: ; %bb.5: ; in Loop: Header=BB11_3 Depth=1 @@ -4403,15 +4585,19 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX12-NEXT: s_cbranch_execnz .LBB11_3 ; GFX12-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX12-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX12-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall: ; GFX940: ; %bb.0: ; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX940-NEXT: v_add_u32_e32 v4, 0x200, v4 -; GFX940-NEXT: v_and_b32_e32 v8, -4, v4 -; GFX940-NEXT: v_mov_b32_e32 v9, 0xffffff +; GFX940-NEXT: v_and_b32_e32 v9, -4, v4 +; GFX940-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX940-NEXT: v_lshlrev_b32_e32 v8, 3, v4 +; GFX940-NEXT: s_mov_b32 s0, 0xffff +; GFX940-NEXT: v_lshlrev_b32_e64 v4, v8, s0 +; GFX940-NEXT: v_not_b32_e32 v10, v4 ; GFX940-NEXT: s_mov_b64 s[2:3], exec ; GFX940-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX940-NEXT: v_readfirstlane_b32 s4, v0 @@ -4423,31 +4609,30 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX940-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] ; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] ; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX940-NEXT: buffer_load_dword v7, v8, s[4:7], 0 offen +; GFX940-NEXT: buffer_load_dword v7, v9, s[4:7], 0 offen ; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] ; GFX940-NEXT: s_cbranch_execnz .LBB11_1 ; GFX940-NEXT: ; %bb.2: ; GFX940-NEXT: s_mov_b64 exec, s[2:3] ; GFX940-NEXT: s_mov_b64 s[2:3], 0 -; GFX940-NEXT: v_lshlrev_b32_e32 v10, 16, v5 +; GFX940-NEXT: v_lshlrev_b32_e32 v11, 16, v5 ; GFX940-NEXT: s_movk_i32 s10, 0x7fff ; GFX940-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX940-NEXT: ; =>This Loop Header: Depth=1 ; GFX940-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: v_lshrrev_b32_e32 v4, 24, v7 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX940-NEXT: v_min_f32_e32 v4, v4, v10 +; GFX940-NEXT: v_lshrrev_b32_sdwa v4, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX940-NEXT: s_mov_b64 s[8:9], exec +; GFX940-NEXT: v_min_f32_e32 v4, v4, v11 ; GFX940-NEXT: v_bfe_u32 v5, v4, 16, 1 ; GFX940-NEXT: v_add3_u32 v5, v5, v4, s10 ; GFX940-NEXT: v_or_b32_e32 v6, 0x400000, v4 ; GFX940-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX940-NEXT: s_mov_b64 s[8:9], exec ; GFX940-NEXT: buffer_wbl2 sc1 +; GFX940-NEXT: s_nop 0 ; GFX940-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc -; GFX940-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX940-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX940-NEXT: v_and_or_b32 v6, v7, v9, v4 +; GFX940-NEXT: v_lshlrev_b32_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX940-NEXT: v_and_or_b32 v6, v7, v10, v4 ; GFX940-NEXT: v_mov_b64_e32 v[4:5], v[6:7] ; GFX940-NEXT: .LBB11_4: ; Parent Loop BB11_3 Depth=1 ; GFX940-NEXT: ; => This Inner Loop Header: Depth=2 @@ -4461,7 +4646,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX940-NEXT: s_and_b64 s[0:1], vcc, s[0:1] ; GFX940-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] ; GFX940-NEXT: s_waitcnt vmcnt(0) -; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen sc0 +; GFX940-NEXT: buffer_atomic_cmpswap v[4:5], v9, s[4:7], 0 offen sc0 ; GFX940-NEXT: s_xor_b64 exec, exec, s[0:1] ; GFX940-NEXT: s_cbranch_execnz .LBB11_4 ; GFX940-NEXT: ; %bb.5: ; in Loop: Header=BB11_3 Depth=1 @@ -4475,7 +4660,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX940-NEXT: s_cbranch_execnz .LBB11_3 ; GFX940-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX940-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX940-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX940-NEXT: v_lshrrev_b32_e32 v0, v8, v4 ; GFX940-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall: @@ -4484,8 +4669,13 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX11-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 ; GFX11-NEXT: s_mov_b32 s1, 0 ; GFX11-NEXT: s_mov_b32 s2, exec_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_and_b32_e32 v6, 3, v4 +; GFX11-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX11-NEXT: v_lshlrev_b32_e32 v7, 3, v6 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff +; GFX11-NEXT: v_not_b32_e32 v9, v6 ; GFX11-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: v_readfirstlane_b32 s4, v0 ; GFX11-NEXT: v_readfirstlane_b32 s5, v1 @@ -4497,35 +4687,35 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX11-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-NEXT: buffer_load_b32 v6, v7, s[4:7], 0 offen +; GFX11-NEXT: buffer_load_b32 v6, v8, s[4:7], 0 offen ; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_cbranch_execnz .LBB11_1 ; GFX11-NEXT: ; %bb.2: ; GFX11-NEXT: s_mov_b32 exec_lo, s2 -; GFX11-NEXT: v_lshlrev_b32_e32 v8, 16, v5 +; GFX11-NEXT: v_lshlrev_b32_e32 v10, 16, v5 ; GFX11-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX11-NEXT: .p2align 6 ; GFX11-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX11-NEXT: ; =>This Loop Header: Depth=1 ; GFX11-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: v_lshrrev_b32_e32 v4, 24, v6 +; GFX11-NEXT: v_lshrrev_b32_e32 v4, v7, v6 ; GFX11-NEXT: s_mov_b32 s2, exec_lo ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX11-NEXT: v_min_f32_e32 v4, v4, v8 +; GFX11-NEXT: v_min_f32_e32 v4, v4, v10 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX11-NEXT: v_bfe_u32 v5, v4, 16, 1 -; GFX11-NEXT: v_or_b32_e32 v9, 0x400000, v4 +; GFX11-NEXT: v_or_b32_e32 v11, 0x400000, v4 ; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 ; GFX11-NEXT: v_add3_u32 v5, v5, v4, 0x7fff ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cndmask_b32_e32 v4, v5, v9, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e32 v4, v5, v11, vcc_lo ; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX11-NEXT: v_and_or_b32 v5, 0xffffff, v6, v4 +; GFX11-NEXT: v_lshlrev_b32_e32 v4, v7, v4 +; GFX11-NEXT: v_and_or_b32 v5, v6, v9, v4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_mov_b32_e32 v4, v5 ; GFX11-NEXT: v_mov_b32_e32 v5, v6 @@ -4542,7 +4732,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 ; GFX11-NEXT: s_and_saveexec_b32 s0, s0 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v7, s[4:7], 0 offen glc +; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[4:7], 0 offen glc ; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_cbranch_execnz .LBB11_4 ; GFX11-NEXT: ; %bb.5: ; in Loop: Header=BB11_3 Depth=1 @@ -4559,7 +4749,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX11-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX11-NEXT: s_set_inst_prefetch_distance 0x2 ; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s1 -; GFX11-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX11-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall: @@ -4568,7 +4758,11 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX10-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 ; GFX10-NEXT: s_mov_b32 s5, 0 ; GFX10-NEXT: s_mov_b32 s6, exec_lo -; GFX10-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX10-NEXT: v_and_b32_e32 v6, 3, v4 +; GFX10-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX10-NEXT: v_lshlrev_b32_e32 v7, 3, v6 +; GFX10-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff +; GFX10-NEXT: v_not_b32_e32 v9, v6 ; GFX10-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX10-NEXT: v_readfirstlane_b32 s8, v0 ; GFX10-NEXT: v_readfirstlane_b32 s9, v1 @@ -4578,30 +4772,28 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] ; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 ; GFX10-NEXT: s_and_saveexec_b32 s4, s4 -; GFX10-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX10-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB11_1 ; GFX10-NEXT: ; %bb.2: ; GFX10-NEXT: s_mov_b32 exec_lo, s6 -; GFX10-NEXT: v_lshlrev_b32_e32 v8, 16, v5 +; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v5 ; GFX10-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Loop Header: Depth=1 ; GFX10-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v4, 24, v6 +; GFX10-NEXT: v_lshrrev_b32_sdwa v4, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX10-NEXT: s_mov_b32 s6, exec_lo ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX10-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX10-NEXT: v_min_f32_e32 v4, v4, v8 +; GFX10-NEXT: v_min_f32_e32 v4, v4, v10 ; GFX10-NEXT: v_bfe_u32 v5, v4, 16, 1 -; GFX10-NEXT: v_or_b32_e32 v9, 0x400000, v4 +; GFX10-NEXT: v_or_b32_e32 v11, 0x400000, v4 ; GFX10-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 ; GFX10-NEXT: v_add3_u32 v5, v5, v4, 0x7fff -; GFX10-NEXT: v_cndmask_b32_e32 v4, v5, v9, vcc_lo -; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX10-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX10-NEXT: v_and_or_b32 v5, 0xffffff, v6, v4 +; GFX10-NEXT: v_cndmask_b32_e32 v4, v5, v11, vcc_lo +; GFX10-NEXT: v_lshlrev_b32_sdwa v4, v7, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX10-NEXT: v_and_or_b32 v5, v6, v9, v4 ; GFX10-NEXT: v_mov_b32_e32 v4, v5 ; GFX10-NEXT: v_mov_b32_e32 v5, v6 ; GFX10-NEXT: .LBB11_4: ; Parent Loop BB11_3 Depth=1 @@ -4615,7 +4807,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 ; GFX10-NEXT: s_and_saveexec_b32 s4, s4 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX10-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX10-NEXT: s_waitcnt_depctr 0xffe3 ; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB11_4 @@ -4632,15 +4824,19 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX10-NEXT: s_cbranch_execnz .LBB11_3 ; GFX10-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 -; GFX10-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX10-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall: ; GFX90A: ; %bb.0: ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX90A-NEXT: v_add_u32_e32 v4, 0x200, v4 -; GFX90A-NEXT: v_and_b32_e32 v8, -4, v4 -; GFX90A-NEXT: v_mov_b32_e32 v9, 0xffffff +; GFX90A-NEXT: v_and_b32_e32 v9, -4, v4 +; GFX90A-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX90A-NEXT: v_lshlrev_b32_e32 v8, 3, v4 +; GFX90A-NEXT: s_mov_b32 s4, 0xffff +; GFX90A-NEXT: v_lshlrev_b32_e64 v4, v8, s4 +; GFX90A-NEXT: v_not_b32_e32 v10, v4 ; GFX90A-NEXT: s_mov_b64 s[6:7], exec ; GFX90A-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX90A-NEXT: v_readfirstlane_b32 s8, v0 @@ -4652,29 +4848,27 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX90A-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX90A-NEXT: s_nop 0 -; GFX90A-NEXT: buffer_load_dword v7, v8, s[8:11], 0 offen +; GFX90A-NEXT: buffer_load_dword v7, v9, s[8:11], 0 offen ; GFX90A-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX90A-NEXT: s_cbranch_execnz .LBB11_1 ; GFX90A-NEXT: ; %bb.2: ; GFX90A-NEXT: s_mov_b64 exec, s[6:7] ; GFX90A-NEXT: s_mov_b64 s[6:7], 0 -; GFX90A-NEXT: v_lshlrev_b32_e32 v10, 16, v5 +; GFX90A-NEXT: v_lshlrev_b32_e32 v11, 16, v5 ; GFX90A-NEXT: s_movk_i32 s14, 0x7fff ; GFX90A-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX90A-NEXT: ; =>This Loop Header: Depth=1 ; GFX90A-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: v_lshrrev_b32_e32 v4, 24, v7 -; GFX90A-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX90A-NEXT: v_min_f32_e32 v4, v4, v10 +; GFX90A-NEXT: v_lshrrev_b32_sdwa v4, v8, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX90A-NEXT: v_min_f32_e32 v4, v4, v11 ; GFX90A-NEXT: v_bfe_u32 v5, v4, 16, 1 ; GFX90A-NEXT: v_add3_u32 v5, v5, v4, s14 ; GFX90A-NEXT: v_or_b32_e32 v6, 0x400000, v4 ; GFX90A-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 ; GFX90A-NEXT: v_cndmask_b32_e32 v4, v5, v6, vcc -; GFX90A-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX90A-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX90A-NEXT: v_and_or_b32 v6, v7, v9, v4 +; GFX90A-NEXT: v_lshlrev_b32_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX90A-NEXT: v_and_or_b32 v6, v7, v10, v4 ; GFX90A-NEXT: s_mov_b64 s[12:13], exec ; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[6:7], v[6:7] op_sel:[0,1] ; GFX90A-NEXT: .LBB11_4: ; Parent Loop BB11_3 Depth=1 @@ -4688,7 +4882,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX90A-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX90A-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX90A-NEXT: s_waitcnt vmcnt(0) -; GFX90A-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc +; GFX90A-NEXT: buffer_atomic_cmpswap v[4:5], v9, s[8:11], 0 offen glc ; GFX90A-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX90A-NEXT: s_cbranch_execnz .LBB11_4 ; GFX90A-NEXT: ; %bb.5: ; in Loop: Header=BB11_3 Depth=1 @@ -4702,15 +4896,19 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX90A-NEXT: s_cbranch_execnz .LBB11_3 ; GFX90A-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX90A-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX90A-NEXT: v_lshrrev_b32_e32 v0, v8, v4 ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; ; GFX908-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall: ; GFX908: ; %bb.0: ; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX908-NEXT: v_add_u32_e32 v4, 0x200, v4 -; GFX908-NEXT: v_and_b32_e32 v7, -4, v4 -; GFX908-NEXT: v_mov_b32_e32 v8, 0xffffff +; GFX908-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX908-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX908-NEXT: v_lshlrev_b32_e32 v7, 3, v4 +; GFX908-NEXT: s_mov_b32 s4, 0xffff +; GFX908-NEXT: v_lshlrev_b32_e64 v4, v7, s4 +; GFX908-NEXT: v_not_b32_e32 v9, v4 ; GFX908-NEXT: s_mov_b64 s[6:7], exec ; GFX908-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX908-NEXT: v_readfirstlane_b32 s8, v0 @@ -4722,29 +4920,27 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX908-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX908-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX908-NEXT: s_nop 0 -; GFX908-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX908-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX908-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX908-NEXT: s_cbranch_execnz .LBB11_1 ; GFX908-NEXT: ; %bb.2: ; GFX908-NEXT: s_mov_b64 exec, s[6:7] ; GFX908-NEXT: s_mov_b64 s[6:7], 0 -; GFX908-NEXT: v_lshlrev_b32_e32 v9, 16, v5 +; GFX908-NEXT: v_lshlrev_b32_e32 v10, 16, v5 ; GFX908-NEXT: s_movk_i32 s14, 0x7fff ; GFX908-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX908-NEXT: ; =>This Loop Header: Depth=1 ; GFX908-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: v_lshrrev_b32_e32 v4, 24, v6 -; GFX908-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX908-NEXT: v_min_f32_e32 v4, v4, v9 +; GFX908-NEXT: v_lshrrev_b32_sdwa v4, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX908-NEXT: v_min_f32_e32 v4, v4, v10 ; GFX908-NEXT: v_bfe_u32 v5, v4, 16, 1 ; GFX908-NEXT: v_add3_u32 v5, v5, v4, s14 -; GFX908-NEXT: v_or_b32_e32 v10, 0x400000, v4 +; GFX908-NEXT: v_or_b32_e32 v11, 0x400000, v4 ; GFX908-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX908-NEXT: v_cndmask_b32_e32 v4, v5, v10, vcc -; GFX908-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX908-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX908-NEXT: v_and_or_b32 v5, v6, v8, v4 +; GFX908-NEXT: v_cndmask_b32_e32 v4, v5, v11, vcc +; GFX908-NEXT: v_lshlrev_b32_sdwa v4, v7, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX908-NEXT: v_and_or_b32 v5, v6, v9, v4 ; GFX908-NEXT: v_mov_b32_e32 v4, v5 ; GFX908-NEXT: s_mov_b64 s[12:13], exec ; GFX908-NEXT: v_mov_b32_e32 v5, v6 @@ -4759,7 +4955,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX908-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX908-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX908-NEXT: s_waitcnt vmcnt(0) -; GFX908-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX908-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX908-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX908-NEXT: s_cbranch_execnz .LBB11_4 ; GFX908-NEXT: ; %bb.5: ; in Loop: Header=BB11_3 Depth=1 @@ -4773,14 +4969,19 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX908-NEXT: s_cbranch_execnz .LBB11_3 ; GFX908-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX908-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX908-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX908-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX908-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0x200, v4 -; GFX8-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX8-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX8-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v7, 3, v4 +; GFX8-NEXT: s_mov_b32 s4, 0xffff +; GFX8-NEXT: v_lshlrev_b32_e64 v4, v7, s4 +; GFX8-NEXT: v_not_b32_e32 v9, v4 ; GFX8-NEXT: s_mov_b64 s[6:7], exec ; GFX8-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX8-NEXT: v_readfirstlane_b32 s8, v0 @@ -4792,29 +4993,27 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX8-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX8-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX8-NEXT: s_nop 0 -; GFX8-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX8-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX8-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX8-NEXT: s_cbranch_execnz .LBB11_1 ; GFX8-NEXT: ; %bb.2: ; GFX8-NEXT: s_mov_b64 exec, s[6:7] ; GFX8-NEXT: s_mov_b64 s[6:7], 0 -; GFX8-NEXT: v_lshlrev_b32_e32 v8, 16, v5 +; GFX8-NEXT: v_lshlrev_b32_e32 v10, 16, v5 ; GFX8-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX8-NEXT: ; =>This Loop Header: Depth=1 ; GFX8-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 24, v6 -; GFX8-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GFX8-NEXT: v_min_f32_e32 v4, v4, v8 +; GFX8-NEXT: v_lshrrev_b32_sdwa v4, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_min_f32_e32 v4, v4, v10 ; GFX8-NEXT: v_bfe_u32 v5, v4, 16, 1 ; GFX8-NEXT: v_add_u32_e32 v5, vcc, v5, v4 ; GFX8-NEXT: v_add_u32_e32 v5, vcc, 0x7fff, v5 -; GFX8-NEXT: v_or_b32_e32 v9, 0x400000, v4 +; GFX8-NEXT: v_or_b32_e32 v11, 0x400000, v4 ; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 -; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v9, vcc -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX8-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX8-NEXT: v_and_b32_e32 v5, 0xffffff, v6 +; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v11, vcc +; GFX8-NEXT: v_lshlrev_b32_sdwa v4, v7, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_and_b32_e32 v5, v6, v9 ; GFX8-NEXT: v_or_b32_e32 v5, v5, v4 ; GFX8-NEXT: v_mov_b32_e32 v4, v5 ; GFX8-NEXT: s_mov_b64 s[12:13], exec @@ -4830,7 +5029,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX8-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX8-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX8-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX8-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX8-NEXT: s_cbranch_execnz .LBB11_4 ; GFX8-NEXT: ; %bb.5: ; in Loop: Header=BB11_3 Depth=1 @@ -4844,14 +5043,18 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX8-NEXT: s_cbranch_execnz .LBB11_3 ; GFX8-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX8-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX8-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX8-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX7-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall: ; GFX7: ; %bb.0: ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX7-NEXT: v_add_i32_e32 v4, vcc, 0x200, v4 -; GFX7-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX7-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX7-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v7, 3, v4 +; GFX7-NEXT: v_lshl_b32_e32 v4, 0xffff, v7 +; GFX7-NEXT: v_not_b32_e32 v9, v4 ; GFX7-NEXT: s_mov_b64 s[6:7], exec ; GFX7-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX7-NEXT: v_readfirstlane_b32 s8, v0 @@ -4862,25 +5065,25 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX7-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[2:3] ; GFX7-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX7-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GFX7-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX7-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX7-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_cbranch_execnz .LBB11_1 ; GFX7-NEXT: ; %bb.2: ; GFX7-NEXT: s_mov_b64 exec, s[6:7] ; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v5 ; GFX7-NEXT: s_mov_b64 s[6:7], 0 -; GFX7-NEXT: v_and_b32_e32 v8, 0xffff0000, v4 +; GFX7-NEXT: v_and_b32_e32 v10, 0xffff0000, v4 ; GFX7-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX7-NEXT: ; =>This Loop Header: Depth=1 ; GFX7-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_lshrrev_b32_e32 v4, 24, v6 +; GFX7-NEXT: v_lshrrev_b32_e32 v4, v7, v6 ; GFX7-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX7-NEXT: v_mul_f32_e32 v4, 1.0, v4 -; GFX7-NEXT: v_min_f32_e32 v4, v4, v8 +; GFX7-NEXT: v_min_f32_e32 v4, v4, v10 ; GFX7-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX7-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX7-NEXT: v_and_b32_e32 v5, 0xffffff, v6 +; GFX7-NEXT: v_lshlrev_b32_e32 v4, v7, v4 +; GFX7-NEXT: v_and_b32_e32 v5, v6, v9 ; GFX7-NEXT: v_or_b32_e32 v5, v5, v4 ; GFX7-NEXT: v_mov_b32_e32 v4, v5 ; GFX7-NEXT: s_mov_b64 s[12:13], exec @@ -4896,7 +5099,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX7-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX7-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX7-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX7-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX7-NEXT: s_cbranch_execnz .LBB11_4 ; GFX7-NEXT: ; %bb.5: ; in Loop: Header=BB11_3 Depth=1 @@ -4910,7 +5113,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX7-NEXT: s_cbranch_execnz .LBB11_3 ; GFX7-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX7-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX7-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX7-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; @@ -4918,7 +5121,11 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX6: ; %bb.0: ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0x200, v4 -; GFX6-NEXT: v_and_b32_e32 v7, -4, v4 +; GFX6-NEXT: v_and_b32_e32 v8, -4, v4 +; GFX6-NEXT: v_and_b32_e32 v4, 3, v4 +; GFX6-NEXT: v_lshlrev_b32_e32 v7, 3, v4 +; GFX6-NEXT: v_lshl_b32_e32 v4, 0xffff, v7 +; GFX6-NEXT: v_not_b32_e32 v9, v4 ; GFX6-NEXT: s_mov_b64 s[6:7], exec ; GFX6-NEXT: .LBB11_1: ; =>This Inner Loop Header: Depth=1 ; GFX6-NEXT: v_readfirstlane_b32 s8, v0 @@ -4929,25 +5136,25 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX6-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[2:3] ; GFX6-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX6-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GFX6-NEXT: buffer_load_dword v6, v7, s[8:11], 0 offen +; GFX6-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen ; GFX6-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX6-NEXT: s_cbranch_execnz .LBB11_1 ; GFX6-NEXT: ; %bb.2: ; GFX6-NEXT: s_mov_b64 exec, s[6:7] ; GFX6-NEXT: v_mul_f32_e32 v4, 1.0, v5 ; GFX6-NEXT: s_mov_b64 s[6:7], 0 -; GFX6-NEXT: v_and_b32_e32 v8, 0xffff0000, v4 +; GFX6-NEXT: v_and_b32_e32 v10, 0xffff0000, v4 ; GFX6-NEXT: .LBB11_3: ; %atomicrmw.start ; GFX6-NEXT: ; =>This Loop Header: Depth=1 ; GFX6-NEXT: ; Child Loop BB11_4 Depth 2 ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) -; GFX6-NEXT: v_lshrrev_b32_e32 v4, 24, v6 +; GFX6-NEXT: v_lshrrev_b32_e32 v4, v7, v6 ; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX6-NEXT: v_mul_f32_e32 v4, 1.0, v4 -; GFX6-NEXT: v_min_f32_e32 v4, v4, v8 +; GFX6-NEXT: v_min_f32_e32 v4, v4, v10 ; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v4 -; GFX6-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v6 +; GFX6-NEXT: v_lshlrev_b32_e32 v4, v7, v4 +; GFX6-NEXT: v_and_b32_e32 v5, v6, v9 ; GFX6-NEXT: v_or_b32_e32 v5, v5, v4 ; GFX6-NEXT: v_mov_b32_e32 v4, v5 ; GFX6-NEXT: s_mov_b64 s[12:13], exec @@ -4963,7 +5170,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX6-NEXT: s_and_b64 s[4:5], vcc, s[4:5] ; GFX6-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] ; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) -; GFX6-NEXT: buffer_atomic_cmpswap v[4:5], v7, s[8:11], 0 offen glc +; GFX6-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc ; GFX6-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX6-NEXT: s_cbranch_execnz .LBB11_4 ; GFX6-NEXT: ; %bb.5: ; in Loop: Header=BB11_3 Depth=1 @@ -4977,7 +5184,7 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall(ptr ; GFX6-NEXT: s_cbranch_execnz .LBB11_3 ; GFX6-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX6-NEXT: s_or_b64 exec, exec, s[6:7] -; GFX6-NEXT: v_lshrrev_b32_e32 v0, 24, v4 +; GFX6-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX6-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX6-NEXT: s_waitcnt expcnt(0) ; GFX6-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll index 35999842d6d715..cc98b5333c5bb8 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll @@ -196,8 +196,7 @@ define i32 @ptrtoint_offset(ptr addrspace(7) %ptr) { ; CHECK-LABEL: define i32 @ptrtoint_offset ; CHECK-SAME: ({ ptr addrspace(8), i32 } [[PTR:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[PTR_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[PTR]], 0 -; CHECK-NEXT: [[PTR_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[PTR]], 1 -; CHECK-NEXT: [[RET:%.*]] = or i32 poison, [[PTR_OFF]] +; CHECK-NEXT: [[RET:%.*]] = extractvalue { ptr addrspace(8), i32 } [[PTR]], 1 ; CHECK-NEXT: ret i32 [[RET]] ; %ret = ptrtoint ptr addrspace(7) %ptr to i32 From 153fca5d6a96686917f2046b01758920fcc2b714 Mon Sep 17 00:00:00 2001 From: Benjamin Chetioui <3920784+bchetioui@users.noreply.github.com> Date: Fri, 14 Jun 2024 16:40:53 +0200 Subject: [PATCH 104/155] [mlir][bzl] Fix broken BUILD due to typo in `CAPITransformsObjects` in BUILD.bazel. --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 421063cc3e930a..9f9819b6eb858d 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -1051,7 +1051,7 @@ cc_library( ":CAPIDebugObjects", ":CAPIIRObjects", ":CAPIInterfacesObjects", - ":CAPITransformObjects", + ":CAPITransformsObjects", ], ) From 74fe1da01eb149a2234fc0f9570c84a08692e782 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 14 Jun 2024 15:11:53 +0100 Subject: [PATCH 105/155] [MC][X86] addConstantComments - add mul vXi16 comments Based on feedback from #95403 - we use multiply by constant for various lowerings (shifts, division etc.), so its very useful to printout the constants to help understand the transform involved. vXi16 multiplies are the easiest to add for this initial commit, but we can add other arithmetic instructions as follow ups when the need arises (I intend to add PMADDUBSW handling for #95403 next). I've done my best to update all test checks but there are bound to be ones that got missed that will only appear when the file is regenerated. --- llvm/lib/Target/X86/X86MCInstLower.cpp | 39 +++++ llvm/test/CodeGen/X86/combine-mul.ll | 6 +- llvm/test/CodeGen/X86/combine-sdiv.ll | 148 +++++++++--------- llvm/test/CodeGen/X86/combine-udiv.ll | 50 +++--- llvm/test/CodeGen/X86/dagcombine-shifts.ll | 2 +- llvm/test/CodeGen/X86/dpbusd_const.ll | 4 +- .../X86/fold-int-pow2-with-fmul-or-fdiv.ll | 2 +- llvm/test/CodeGen/X86/freeze-binary.ll | 12 +- llvm/test/CodeGen/X86/gfni-funnel-shifts.ll | 40 ++--- llvm/test/CodeGen/X86/gfni-rotates.ll | 40 ++--- llvm/test/CodeGen/X86/gfni-shifts.ll | 54 +++---- llvm/test/CodeGen/X86/known-never-zero.ll | 4 +- llvm/test/CodeGen/X86/lower-vec-shift.ll | 6 +- llvm/test/CodeGen/X86/madd.ll | 6 +- ...of-two-or-zero-when-comparing-with-zero.ll | 6 +- llvm/test/CodeGen/X86/pmul.ll | 14 +- .../CodeGen/X86/prefer-avx256-wide-mul.ll | 4 +- .../test/CodeGen/X86/rotate-extract-vector.ll | 4 +- llvm/test/CodeGen/X86/shrink_vmul.ll | 56 +++---- llvm/test/CodeGen/X86/slow-pmulld.ll | 80 +++++----- .../CodeGen/X86/srem-seteq-vec-nonsplat.ll | 80 +++++----- llvm/test/CodeGen/X86/srem-vector-lkk.ll | 10 +- .../X86/urem-seteq-vec-tautological.ll | 10 +- llvm/test/CodeGen/X86/urem-seteq.ll | 9 ++ llvm/test/CodeGen/X86/urem-vector-lkk.ll | 10 +- llvm/test/CodeGen/X86/var-permute-128.ll | 6 +- llvm/test/CodeGen/X86/var-permute-256.ll | 12 +- llvm/test/CodeGen/X86/vec_shift6.ll | 10 +- llvm/test/CodeGen/X86/vector-fshl-128.ll | 40 ++--- llvm/test/CodeGen/X86/vector-fshl-256.ll | 36 ++--- llvm/test/CodeGen/X86/vector-fshl-rot-128.ll | 20 +-- llvm/test/CodeGen/X86/vector-fshl-rot-256.ll | 12 +- llvm/test/CodeGen/X86/vector-fshr-128.ll | 54 +++---- llvm/test/CodeGen/X86/vector-fshr-256.ll | 48 +++--- llvm/test/CodeGen/X86/vector-fshr-rot-128.ll | 20 +-- llvm/test/CodeGen/X86/vector-fshr-rot-256.ll | 12 +- llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll | 82 +++++----- llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll | 74 ++++----- llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll | 56 +++---- llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll | 132 ++++++++-------- llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll | 102 ++++++------ llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll | 96 ++++++------ llvm/test/CodeGen/X86/vector-mul.ll | 50 +++--- llvm/test/CodeGen/X86/vector-rotate-128.ll | 20 +-- llvm/test/CodeGen/X86/vector-rotate-256.ll | 12 +- .../test/CodeGen/X86/vector-shift-ashr-128.ll | 16 +- .../test/CodeGen/X86/vector-shift-ashr-256.ll | 28 ++-- .../CodeGen/X86/vector-shift-ashr-sub128.ll | 26 +-- .../test/CodeGen/X86/vector-shift-lshr-128.ll | 24 +-- .../test/CodeGen/X86/vector-shift-lshr-256.ll | 34 ++-- .../CodeGen/X86/vector-shift-lshr-sub128.ll | 36 ++--- llvm/test/CodeGen/X86/vector-shift-shl-128.ll | 28 ++-- llvm/test/CodeGen/X86/vector-shift-shl-256.ll | 34 ++-- .../CodeGen/X86/vector-shift-shl-sub128.ll | 44 +++--- llvm/test/CodeGen/X86/vector-trunc-math.ll | 32 ++-- llvm/test/CodeGen/X86/x86-shifts.ll | 4 +- 56 files changed, 972 insertions(+), 924 deletions(-) diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 8f6fba8ac22c65..5d6b560bde632d 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -1898,6 +1898,45 @@ static void addConstantComments(const MachineInstr *MI, break; } +#define INSTR_CASE(Prefix, Instr, Suffix, Postfix) \ + case X86::Prefix##Instr##Suffix##rm##Postfix: + +#define CASE_ARITH_RM(Instr) \ + INSTR_CASE(, Instr, , ) /* SSE */ \ + INSTR_CASE(V, Instr, , ) /* AVX-128 */ \ + INSTR_CASE(V, Instr, Y, ) /* AVX-256 */ \ + INSTR_CASE(V, Instr, Z128, ) \ + INSTR_CASE(V, Instr, Z128, k) \ + INSTR_CASE(V, Instr, Z128, kz) \ + INSTR_CASE(V, Instr, Z256, ) \ + INSTR_CASE(V, Instr, Z256, k) \ + INSTR_CASE(V, Instr, Z256, kz) \ + INSTR_CASE(V, Instr, Z, ) \ + INSTR_CASE(V, Instr, Z, k) \ + INSTR_CASE(V, Instr, Z, kz) + + // TODO: Add additional instructions when useful. + CASE_ARITH_RM(PMADDWD) + CASE_ARITH_RM(PMULLW) + CASE_ARITH_RM(PMULHW) + CASE_ARITH_RM(PMULHUW) + CASE_ARITH_RM(PMULHRSW) { + unsigned SrcIdx = getSrcIdx(MI, 1); + if (auto *C = X86::getConstantFromPool(*MI, SrcIdx + 1)) { + if (C->getType()->getScalarSizeInBits() == 16) { + std::string Comment; + raw_string_ostream CS(Comment); + unsigned VectorWidth = + X86::getVectorRegisterWidth(MI->getDesc().operands()[0]); + CS << "["; + printConstant(C, VectorWidth, CS); + CS << "]"; + OutStreamer.AddComment(CS.str()); + } + } + break; + } + #define MASK_AVX512_CASE(Instr) \ case Instr: \ case Instr##k: \ diff --git a/llvm/test/CodeGen/X86/combine-mul.ll b/llvm/test/CodeGen/X86/combine-mul.ll index 5d7bf4a2c9788f..85ee823f1a5477 100644 --- a/llvm/test/CodeGen/X86/combine-mul.ll +++ b/llvm/test/CodeGen/X86/combine-mul.ll @@ -543,10 +543,10 @@ define <16 x i8> @PR35579(<16 x i8> %x) { ; SSE: # %bb.0: ; SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [8,1,2,1,4,1,2,1] ; SSE-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] ; SSE-NEXT: pand %xmm2, %xmm0 -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [0,1,2,1,4,1,2,1] ; SSE-NEXT: pand %xmm2, %xmm1 ; SSE-NEXT: packuswb %xmm0, %xmm1 ; SSE-NEXT: movdqa %xmm1, %xmm0 @@ -555,7 +555,7 @@ define <16 x i8> @PR35579(<16 x i8> %x) { ; AVX-LABEL: PR35579: ; AVX: # %bb.0: ; AVX-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [0,1,2,1,4,1,2,1,8,1,2,1,4,1,2,1] ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/combine-sdiv.ll b/llvm/test/CodeGen/X86/combine-sdiv.ll index 5c5487815b3360..8e424664363bbf 100644 --- a/llvm/test/CodeGen/X86/combine-sdiv.ll +++ b/llvm/test/CodeGen/X86/combine-sdiv.ll @@ -411,13 +411,13 @@ define <16 x i8> @combine_vec_sdiv_by_pow2b_v16i8(<16 x i8> %x) { ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm1 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [256,4,2,16,8,32,64,2,256,4,2,16,8,32,64,2] ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX2-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm1 ; AVX2-NEXT: vpmovsxbw %xmm1, %ymm1 -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [256,64,128,16,32,8,4,128,256,64,128,16,32,8,4,128] ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX2-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 @@ -479,7 +479,7 @@ define <8 x i16> @combine_vec_sdiv_by_pow2b_v8i16(<8 x i16> %x) { ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psraw $15, %xmm1 -; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [u,4,2,16,8,32,64,2] ; SSE2-NEXT: paddw %xmm0, %xmm1 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [65535,65535,65535,0,65535,0,0,65535] ; SSE2-NEXT: movdqa %xmm1, %xmm3 @@ -510,11 +510,11 @@ define <8 x i16> @combine_vec_sdiv_by_pow2b_v8i16(<8 x i16> %x) { ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: psraw $15, %xmm1 -; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [u,4,2,16,8,32,64,2] ; SSE41-NEXT: paddw %xmm0, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: psraw $1, %xmm2 -; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [u,16384,u,4096,8192,2048,1024,u] ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2],xmm1[3,4,5,6],xmm2[7] ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; SSE41-NEXT: retq @@ -522,10 +522,10 @@ define <8 x i16> @combine_vec_sdiv_by_pow2b_v8i16(<8 x i16> %x) { ; AVX1-LABEL: combine_vec_sdiv_by_pow2b_v8i16: ; AVX1: # %bb.0: ; AVX1-NEXT: vpsraw $15, %xmm0, %xmm1 -; AVX1-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [u,4,2,16,8,32,64,2] ; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm1 ; AVX1-NEXT: vpsraw $1, %xmm1, %xmm2 -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [u,16384,u,4096,8192,2048,1024,u] ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2],xmm1[3,4,5,6],xmm2[7] ; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX1-NEXT: retq @@ -533,10 +533,10 @@ define <8 x i16> @combine_vec_sdiv_by_pow2b_v8i16(<8 x i16> %x) { ; AVX2-LABEL: combine_vec_sdiv_by_pow2b_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vpsraw $15, %xmm0, %xmm1 -; AVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [u,4,2,16,8,32,64,2] ; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm1 ; AVX2-NEXT: vpsraw $1, %xmm1, %xmm2 -; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [u,16384,u,4096,8192,2048,1024,u] ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2],xmm1[3,4,5,6],xmm2[7] ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX2-NEXT: retq @@ -544,7 +544,7 @@ define <8 x i16> @combine_vec_sdiv_by_pow2b_v8i16(<8 x i16> %x) { ; AVX512F-LABEL: combine_vec_sdiv_by_pow2b_v8i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpsraw $15, %xmm0, %xmm1 -; AVX512F-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [u,4,2,16,8,32,64,2] ; AVX512F-NEXT: vpaddw %xmm1, %xmm0, %xmm1 ; AVX512F-NEXT: vpmovsxwd %xmm1, %ymm1 ; AVX512F-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 @@ -684,10 +684,10 @@ define <16 x i16> @combine_vec_sdiv_by_pow2b_v16i16(<16 x i16> %x) { ; AVX2-LABEL: combine_vec_sdiv_by_pow2b_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vpsraw $15, %ymm0, %ymm1 -; AVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [u,4,2,16,8,32,64,2,u,4,2,16,8,32,64,2] ; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm1 ; AVX2-NEXT: vpsraw $1, %ymm1, %ymm2 -; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [u,16384,u,4096,8192,2048,1024,u,u,16384,u,4096,8192,2048,1024,u] ; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1],ymm2[2],ymm1[3,4,5,6],ymm2[7],ymm1[8,9],ymm2[10],ymm1[11,12,13,14],ymm2[15] ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7],ymm0[8],ymm1[9,10,11,12,13,14,15] ; AVX2-NEXT: retq @@ -695,7 +695,7 @@ define <16 x i16> @combine_vec_sdiv_by_pow2b_v16i16(<16 x i16> %x) { ; AVX512F-LABEL: combine_vec_sdiv_by_pow2b_v16i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpsraw $15, %ymm0, %ymm1 -; AVX512F-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512F-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [u,4,2,16,8,32,64,2,u,4,2,16,8,32,64,2] ; AVX512F-NEXT: vpaddw %ymm1, %ymm0, %ymm1 ; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 ; AVX512F-NEXT: vpsravd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 @@ -2154,17 +2154,17 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) { ; SSE2-NEXT: pcmpgtb %xmm0, %xmm2 ; SSE2-NEXT: movdqa %xmm2, %xmm3 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [256,2,2,2,2,128,2,128] ; SSE2-NEXT: psrlw $8, %xmm3 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [256,256,2,256,256,256,2,256] ; SSE2-NEXT: psrlw $8, %xmm2 ; SSE2-NEXT: packuswb %xmm3, %xmm2 ; SSE2-NEXT: paddb %xmm0, %xmm2 ; SSE2-NEXT: movdqa %xmm2, %xmm1 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] ; SSE2-NEXT: psraw $8, %xmm1 -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [u,128,128,128,128,2,128,2] ; SSE2-NEXT: psrlw $8, %xmm1 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-NEXT: psraw $8, %xmm2 @@ -2194,7 +2194,7 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) { ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm4[0,1],xmm2[2],xmm4[3,4,5],xmm2[6],xmm4[7] ; SSE41-NEXT: psrlw $8, %xmm2 ; SSE41-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15] -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [256,2,2,2,2,128,2,128] ; SSE41-NEXT: psrlw $8, %xmm3 ; SSE41-NEXT: packuswb %xmm3, %xmm2 ; SSE41-NEXT: paddb %xmm1, %xmm2 @@ -2229,7 +2229,7 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) { ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2],xmm3[3,4,5],xmm4[6],xmm3[7] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [256,2,2,2,2,128,2,128] ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX1-NEXT: vpackuswb %xmm1, %xmm3, %xmm1 ; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm1 @@ -2256,13 +2256,13 @@ define <16 x i8> @non_splat_minus_one_divisor_1(<16 x i8> %A) { ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm1 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [256,256,2,256,256,256,2,256,256,2,2,2,2,128,2,128] ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX2-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm1 ; AVX2-NEXT: vpmovsxbw %xmm1, %ymm1 -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [256,256,128,256,256,256,128,256,256,128,128,128,128,2,128,2] ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX2-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 @@ -2396,7 +2396,7 @@ define <4 x i32> @non_splat_minus_one_divisor_2(<4 x i32> %A) { define <8 x i16> @combine_vec_sdiv_nonuniform(<8 x i16> %x) { ; SSE-LABEL: combine_vec_sdiv_nonuniform: ; SSE: # %bb.0: -; SSE-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [21846,21846,21846,21846,2979,2979,2979,2979] ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: psrlw $15, %xmm1 ; SSE-NEXT: paddw %xmm1, %xmm0 @@ -2404,7 +2404,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform(<8 x i16> %x) { ; ; AVX-LABEL: combine_vec_sdiv_nonuniform: ; AVX: # %bb.0: -; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [21846,21846,21846,21846,2979,2979,2979,2979] ; AVX-NEXT: vpsrlw $15, %xmm0, %xmm1 ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq @@ -2415,7 +2415,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform(<8 x i16> %x) { define <8 x i16> @combine_vec_sdiv_nonuniform2(<8 x i16> %x) { ; SSE2-LABEL: combine_vec_sdiv_nonuniform2: ; SSE2: # %bb.0: -; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [10923,10923,10923,10923,5243,5243,5243,5243] ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psraw $2, %xmm1 ; SSE2-NEXT: movdqa %xmm0, %xmm2 @@ -2427,7 +2427,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform2(<8 x i16> %x) { ; ; SSE41-LABEL: combine_vec_sdiv_nonuniform2: ; SSE41: # %bb.0: -; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [10923,10923,10923,10923,5243,5243,5243,5243] ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: psraw $1, %xmm1 ; SSE41-NEXT: movdqa %xmm0, %xmm2 @@ -2439,7 +2439,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform2(<8 x i16> %x) { ; ; AVX1-LABEL: combine_vec_sdiv_nonuniform2: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [10923,10923,10923,10923,5243,5243,5243,5243] ; AVX1-NEXT: vpsraw $1, %xmm0, %xmm1 ; AVX1-NEXT: vpsraw $2, %xmm0, %xmm2 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7] @@ -2449,7 +2449,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform2(<8 x i16> %x) { ; ; AVX2-LABEL: combine_vec_sdiv_nonuniform2: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [10923,10923,10923,10923,5243,5243,5243,5243] ; AVX2-NEXT: vpsraw $1, %xmm0, %xmm1 ; AVX2-NEXT: vpsraw $2, %xmm0, %xmm2 ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3] @@ -2459,7 +2459,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform2(<8 x i16> %x) { ; ; AVX512F-LABEL: combine_vec_sdiv_nonuniform2: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [10923,10923,10923,10923,5243,5243,5243,5243] ; AVX512F-NEXT: vpsraw $1, %xmm0, %xmm1 ; AVX512F-NEXT: vpsraw $2, %xmm0, %xmm2 ; AVX512F-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3] @@ -2469,7 +2469,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform2(<8 x i16> %x) { ; ; AVX512BW-LABEL: combine_vec_sdiv_nonuniform2: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [10923,10923,10923,10923,5243,5243,5243,5243] ; AVX512BW-NEXT: vpsrlw $15, %xmm0, %xmm1 ; AVX512BW-NEXT: vpsravw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX512BW-NEXT: vpaddw %xmm1, %xmm0, %xmm0 @@ -2477,7 +2477,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform2(<8 x i16> %x) { ; ; XOP-LABEL: combine_vec_sdiv_nonuniform2: ; XOP: # %bb.0: -; XOP-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; XOP-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [10923,10923,10923,10923,5243,5243,5243,5243] ; XOP-NEXT: vpsrlw $15, %xmm0, %xmm1 ; XOP-NEXT: vpshaw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; XOP-NEXT: vpaddw %xmm1, %xmm0, %xmm0 @@ -2517,7 +2517,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform3(<8 x i16> %x) { ; ; AVX1-LABEL: combine_vec_sdiv_nonuniform3: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [45591,45591,45591,45591,32833,32833,32833,32833] ; AVX1-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsraw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpsraw $4, %xmm0, %xmm2 @@ -2528,7 +2528,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform3(<8 x i16> %x) { ; ; AVX2-LABEL: combine_vec_sdiv_nonuniform3: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [45591,45591,45591,45591,32833,32833,32833,32833] ; AVX2-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vpsraw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpsraw $4, %xmm0, %xmm2 @@ -2539,7 +2539,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform3(<8 x i16> %x) { ; ; AVX512F-LABEL: combine_vec_sdiv_nonuniform3: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [45591,45591,45591,45591,32833,32833,32833,32833] ; AVX512F-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ; AVX512F-NEXT: vpsraw $8, %xmm0, %xmm1 ; AVX512F-NEXT: vpsraw $4, %xmm0, %xmm2 @@ -2550,7 +2550,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform3(<8 x i16> %x) { ; ; AVX512BW-LABEL: combine_vec_sdiv_nonuniform3: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [45591,45591,45591,45591,32833,32833,32833,32833] ; AVX512BW-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ; AVX512BW-NEXT: vpsrlw $15, %xmm0, %xmm1 ; AVX512BW-NEXT: vpsravw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 @@ -2559,7 +2559,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform3(<8 x i16> %x) { ; ; XOP-LABEL: combine_vec_sdiv_nonuniform3: ; XOP: # %bb.0: -; XOP-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; XOP-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [45591,45591,45591,45591,32833,32833,32833,32833] ; XOP-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ; XOP-NEXT: vpsrlw $15, %xmm0, %xmm1 ; XOP-NEXT: vpshaw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 @@ -2602,7 +2602,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform4(<8 x i16> %x) { ; ; AVX1-LABEL: combine_vec_sdiv_nonuniform4: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [19945,19945,19945,19945,32639,32639,32639,32639] ; AVX1-NEXT: vpsubw %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpsraw $8, %xmm0, %xmm1 ; AVX1-NEXT: vpsraw $4, %xmm0, %xmm2 @@ -2613,7 +2613,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform4(<8 x i16> %x) { ; ; AVX2-LABEL: combine_vec_sdiv_nonuniform4: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [19945,19945,19945,19945,32639,32639,32639,32639] ; AVX2-NEXT: vpsubw %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vpsraw $8, %xmm0, %xmm1 ; AVX2-NEXT: vpsraw $4, %xmm0, %xmm2 @@ -2624,7 +2624,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform4(<8 x i16> %x) { ; ; AVX512F-LABEL: combine_vec_sdiv_nonuniform4: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [19945,19945,19945,19945,32639,32639,32639,32639] ; AVX512F-NEXT: vpsubw %xmm0, %xmm1, %xmm0 ; AVX512F-NEXT: vpsraw $8, %xmm0, %xmm1 ; AVX512F-NEXT: vpsraw $4, %xmm0, %xmm2 @@ -2635,7 +2635,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform4(<8 x i16> %x) { ; ; AVX512BW-LABEL: combine_vec_sdiv_nonuniform4: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [19945,19945,19945,19945,32639,32639,32639,32639] ; AVX512BW-NEXT: vpsubw %xmm0, %xmm1, %xmm0 ; AVX512BW-NEXT: vpsrlw $15, %xmm0, %xmm1 ; AVX512BW-NEXT: vpsravw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 @@ -2644,7 +2644,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform4(<8 x i16> %x) { ; ; XOP-LABEL: combine_vec_sdiv_nonuniform4: ; XOP: # %bb.0: -; XOP-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; XOP-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [19945,19945,19945,19945,32639,32639,32639,32639] ; XOP-NEXT: vpsubw %xmm0, %xmm1, %xmm0 ; XOP-NEXT: vpsrlw $15, %xmm0, %xmm1 ; XOP-NEXT: vpshaw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 @@ -2659,7 +2659,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform5(<8 x i16> %x) { ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,0,65535,0,0,0,1,1] ; SSE2-NEXT: pmullw %xmm0, %xmm1 -; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [32639,54613,19945,21846,2979,5243,32897,32833] ; SSE2-NEXT: paddw %xmm1, %xmm0 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,65535,65535,65535,65535,65535,65535,0] ; SSE2-NEXT: movdqa %xmm0, %xmm2 @@ -2694,7 +2694,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform5(<8 x i16> %x) { ; SSE41: # %bb.0: ; SSE41-NEXT: pmovsxbw {{.*#+}} xmm1 = [65535,0,65535,0,0,0,1,1] ; SSE41-NEXT: pmullw %xmm0, %xmm1 -; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [32639,54613,19945,21846,2979,5243,32897,32833] ; SSE41-NEXT: paddw %xmm1, %xmm0 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [256,16384,4096,u,u,u,512,256] ; SSE41-NEXT: pmulhw %xmm0, %xmm1 @@ -2708,10 +2708,10 @@ define <8 x i16> @combine_vec_sdiv_nonuniform5(<8 x i16> %x) { ; ; AVX1-LABEL: combine_vec_sdiv_nonuniform5: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [65535,0,65535,0,0,0,1,1] +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [32639,54613,19945,21846,2979,5243,32897,32833] ; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [256,16384,4096,u,u,u,512,256] ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3,4],xmm1[5,6,7] ; AVX1-NEXT: vpsraw $1, %xmm0, %xmm2 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4],xmm2[5],xmm1[6,7] @@ -2721,10 +2721,10 @@ define <8 x i16> @combine_vec_sdiv_nonuniform5(<8 x i16> %x) { ; ; AVX2-LABEL: combine_vec_sdiv_nonuniform5: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 -; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [65535,0,65535,0,0,0,1,1] +; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [32639,54613,19945,21846,2979,5243,32897,32833] ; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [256,16384,4096,u,u,u,512,256] ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3,4],xmm1[5,6,7] ; AVX2-NEXT: vpsraw $1, %xmm0, %xmm2 ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4],xmm2[5],xmm1[6,7] @@ -2734,8 +2734,8 @@ define <8 x i16> @combine_vec_sdiv_nonuniform5(<8 x i16> %x) { ; ; AVX512F-LABEL: combine_vec_sdiv_nonuniform5: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 -; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [65535,0,65535,0,0,0,1,1] +; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [32639,54613,19945,21846,2979,5243,32897,32833] ; AVX512F-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: vpsrlw $15, %xmm0, %xmm1 ; AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0 @@ -2747,8 +2747,8 @@ define <8 x i16> @combine_vec_sdiv_nonuniform5(<8 x i16> %x) { ; ; AVX512BW-LABEL: combine_vec_sdiv_nonuniform5: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 -; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [65535,0,65535,0,0,0,1,1] +; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [32639,54613,19945,21846,2979,5243,32897,32833] ; AVX512BW-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpsrlw $15, %xmm0, %xmm1 ; AVX512BW-NEXT: vpsravw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 @@ -2757,7 +2757,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform5(<8 x i16> %x) { ; ; XOP-LABEL: combine_vec_sdiv_nonuniform5: ; XOP: # %bb.0: -; XOP-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; XOP-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [32639,54613,19945,21846,2979,5243,32897,32833] ; XOP-NEXT: vpmacsww %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; XOP-NEXT: vpsrlw $15, %xmm0, %xmm1 ; XOP-NEXT: vpshaw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 @@ -2772,7 +2772,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform6(<8 x i16> %x) { ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,1,1,1,0] ; SSE2-NEXT: pmullw %xmm0, %xmm1 -; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [32767,32767,32703,0,0,32897,32769,16385] ; SSE2-NEXT: paddw %xmm1, %xmm0 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,0,0,0,65535,65535] ; SSE2-NEXT: movdqa %xmm0, %xmm2 @@ -2807,7 +2807,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform6(<8 x i16> %x) { ; SSE41: # %bb.0: ; SSE41-NEXT: pmovsxbw {{.*#+}} xmm1 = [65535,65535,65535,65535,1,1,1,0] ; SSE41-NEXT: pmullw %xmm0, %xmm1 -; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [32767,32767,32703,0,0,32897,32769,16385] ; SSE41-NEXT: paddw %xmm1, %xmm0 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [4,256,256,u,u,512,256,8] ; SSE41-NEXT: pmulhw %xmm0, %xmm1 @@ -2820,10 +2820,10 @@ define <8 x i16> @combine_vec_sdiv_nonuniform6(<8 x i16> %x) { ; ; AVX1-LABEL: combine_vec_sdiv_nonuniform6: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [65535,65535,65535,65535,1,1,1,0] +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [32767,32767,32703,0,0,32897,32769,16385] ; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [4,256,256,u,u,512,256,8] ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3,4],xmm1[5,6,7] ; AVX1-NEXT: vpsrlw $15, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 @@ -2833,10 +2833,10 @@ define <8 x i16> @combine_vec_sdiv_nonuniform6(<8 x i16> %x) { ; ; AVX2-LABEL: combine_vec_sdiv_nonuniform6: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 -; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [65535,65535,65535,65535,1,1,1,0] +; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [32767,32767,32703,0,0,32897,32769,16385] ; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [4,256,256,u,u,512,256,8] ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2],xmm0[3,4],xmm1[5,6,7] ; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 @@ -2846,8 +2846,8 @@ define <8 x i16> @combine_vec_sdiv_nonuniform6(<8 x i16> %x) { ; ; AVX512F-LABEL: combine_vec_sdiv_nonuniform6: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 -; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [65535,65535,65535,65535,1,1,1,0] +; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [32767,32767,32703,0,0,32897,32769,16385] ; AVX512F-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: vpsrlw $15, %xmm0, %xmm1 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 @@ -2861,8 +2861,8 @@ define <8 x i16> @combine_vec_sdiv_nonuniform6(<8 x i16> %x) { ; ; AVX512BW-LABEL: combine_vec_sdiv_nonuniform6: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 -; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [65535,65535,65535,65535,1,1,1,0] +; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [32767,32767,32703,0,0,32897,32769,16385] ; AVX512BW-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpsrlw $15, %xmm0, %xmm1 ; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 @@ -2873,7 +2873,7 @@ define <8 x i16> @combine_vec_sdiv_nonuniform6(<8 x i16> %x) { ; ; XOP-LABEL: combine_vec_sdiv_nonuniform6: ; XOP: # %bb.0: -; XOP-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; XOP-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [32767,32767,32703,0,0,32897,32769,16385] ; XOP-NEXT: vpmacsww %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; XOP-NEXT: vpsrlw $15, %xmm0, %xmm1 ; XOP-NEXT: vpxor %xmm2, %xmm2, %xmm2 @@ -2930,7 +2930,7 @@ define <16 x i8> @pr38658(<16 x i8> %x) { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] -; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [0,0,0,0,0,0,0,37632] ; SSE2-NEXT: psrlw $8, %xmm2 ; SSE2-NEXT: pxor %xmm3, %xmm3 ; SSE2-NEXT: packuswb %xmm2, %xmm3 @@ -2940,7 +2940,7 @@ define <16 x i8> @pr38658(<16 x i8> %x) { ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] ; SSE2-NEXT: psraw $8, %xmm1 -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [256,256,256,256,256,256,256,64] ; SSE2-NEXT: psrlw $8, %xmm1 ; SSE2-NEXT: packuswb %xmm1, %xmm2 ; SSE2-NEXT: psrlw $7, %xmm0 @@ -2953,7 +2953,7 @@ define <16 x i8> @pr38658(<16 x i8> %x) { ; SSE41-NEXT: pxor %xmm1, %xmm1 ; SSE41-NEXT: pxor %xmm2, %xmm2 ; SSE41-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] -; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [0,0,0,0,0,0,0,37632] ; SSE41-NEXT: psrlw $8, %xmm2 ; SSE41-NEXT: packuswb %xmm2, %xmm1 ; SSE41-NEXT: paddb %xmm1, %xmm0 @@ -2976,7 +2976,7 @@ define <16 x i8> @pr38658(<16 x i8> %x) { ; AVX1: # %bb.0: ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,0,0,0,0,0,0,37632] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 @@ -2996,13 +2996,13 @@ define <16 x i8> @pr38658(<16 x i8> %x) { ; AVX2-LABEL: pr38658: ; AVX2: # %bb.0: ; AVX2-NEXT: vpmovsxbw %xmm0, %ymm1 -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,65427] ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX2-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vpmovsxbw %xmm0, %ymm1 -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [256,256,256,256,256,256,256,256,256,256,256,256,256,256,256,64] ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX2-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 @@ -3015,7 +3015,7 @@ define <16 x i8> @pr38658(<16 x i8> %x) { ; AVX512F-LABEL: pr38658: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm1 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,65427] ; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero ; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 @@ -3032,7 +3032,7 @@ define <16 x i8> @pr38658(<16 x i8> %x) { ; AVX512BW-LABEL: pr38658: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm1 -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,65427] ; AVX512BW-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX512BW-NEXT: vpmovwb %ymm1, %xmm1 ; AVX512BW-NEXT: vpaddb %xmm0, %xmm1, %xmm0 @@ -3049,7 +3049,7 @@ define <16 x i8> @pr38658(<16 x i8> %x) { ; XOP: # %bb.0: ; XOP-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; XOP-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; XOP-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; XOP-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,0,0,0,0,0,0,37632] ; XOP-NEXT: vpperm {{.*#+}} xmm1 = xmm1[1,3,5,7,9,11,13,15],xmm2[1,3,5,7,9,11,13,15] ; XOP-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; XOP-NEXT: vpshab {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 diff --git a/llvm/test/CodeGen/X86/combine-udiv.ll b/llvm/test/CodeGen/X86/combine-udiv.ll index 04ea514b3e8017..e429ac0c63c2d0 100644 --- a/llvm/test/CodeGen/X86/combine-udiv.ll +++ b/llvm/test/CodeGen/X86/combine-udiv.ll @@ -471,7 +471,7 @@ define <8 x i16> @combine_vec_udiv_uniform(<8 x i16> %x) { ; ; AVX-LABEL: combine_vec_udiv_uniform: ; AVX: # %bb.0: -; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [25645,25645,25645,25645,25645,25645,25645,25645] ; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 @@ -480,7 +480,7 @@ define <8 x i16> @combine_vec_udiv_uniform(<8 x i16> %x) { ; ; XOP-LABEL: combine_vec_udiv_uniform: ; XOP: # %bb.0: -; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [25645,25645,25645,25645,25645,25645,25645,25645] ; XOP-NEXT: vpsubw %xmm1, %xmm0, %xmm0 ; XOP-NEXT: vpsrlw $1, %xmm0, %xmm0 ; XOP-NEXT: vpaddw %xmm1, %xmm0, %xmm0 @@ -500,7 +500,7 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) { ; SSE2-NEXT: psrlw $3, %xmm3 ; SSE2-NEXT: pandn %xmm3, %xmm1 ; SSE2-NEXT: por %xmm2, %xmm1 -; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [25645,61681,8195,9363,512,32769,32897,2] ; SSE2-NEXT: psubw %xmm1, %xmm0 ; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE2-NEXT: paddw %xmm1, %xmm0 @@ -515,7 +515,7 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) { ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: psrlw $3, %xmm1 ; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] -; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [25645,61681,8195,9363,512,32769,32897,2] ; SSE41-NEXT: psubw %xmm1, %xmm0 ; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE41-NEXT: paddw %xmm1, %xmm0 @@ -528,18 +528,18 @@ define <8 x i16> @combine_vec_udiv_nonuniform(<8 x i16> %x) { ; AVX: # %bb.0: ; AVX-NEXT: vpsrlw $3, %xmm0, %xmm1 ; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7] -; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [25645,61681,8195,9363,512,32769,32897,2] ; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [4096,2048,8,u,u,2,2,u] ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3,4],xmm1[5,6],xmm0[7] ; AVX-NEXT: retq ; ; XOP-LABEL: combine_vec_udiv_nonuniform: ; XOP: # %bb.0: ; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 -; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [25645,61681,8195,9363,512,32769,32897,2] ; XOP-NEXT: vpsubw %xmm1, %xmm0, %xmm0 ; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; XOP-NEXT: vpaddw %xmm1, %xmm0, %xmm0 @@ -558,8 +558,8 @@ define <8 x i16> @combine_vec_udiv_nonuniform2(<8 x i16> %x) { ; SSE2-NEXT: psrlw $1, %xmm0 ; SSE2-NEXT: pandn %xmm0, %xmm1 ; SSE2-NEXT: por %xmm2, %xmm1 -; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [16393,59919,58255,32787,55189,8197,52429,32789] +; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [8,2048,2048,2,2048,8,2048,2] ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -568,22 +568,22 @@ define <8 x i16> @combine_vec_udiv_nonuniform2(<8 x i16> %x) { ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: psrlw $1, %xmm1 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7] -; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16393,59919,58255,32787,55189,8197,52429,32789] +; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [8,2048,2048,2,2048,8,2048,2] ; SSE41-NEXT: retq ; ; AVX-LABEL: combine_vec_udiv_nonuniform2: ; AVX: # %bb.0: ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm1 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7] -; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16393,59919,58255,32787,55189,8197,52429,32789] +; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [8,2048,2048,2,2048,8,2048,2] ; AVX-NEXT: retq ; ; XOP-LABEL: combine_vec_udiv_nonuniform2: ; XOP: # %bb.0: ; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16393,59919,58255,32787,55189,8197,52429,32789] ; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; XOP-NEXT: retq %1 = udiv <8 x i16> %x, @@ -598,21 +598,21 @@ define <8 x i16> @combine_vec_udiv_nonuniform3(<8 x i16> %x) { ; SSE-NEXT: psubw %xmm1, %xmm0 ; SSE-NEXT: psrlw $1, %xmm0 ; SSE-NEXT: paddw %xmm1, %xmm0 -; SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16384,4096,4096,4096,4096,2048,2048,1024] ; SSE-NEXT: retq ; ; AVX-LABEL: combine_vec_udiv_nonuniform3: ; AVX: # %bb.0: -; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [9363,25645,18351,12137,2115,23705,1041,517] ; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [16384,4096,4096,4096,4096,2048,2048,1024] ; AVX-NEXT: retq ; ; XOP-LABEL: combine_vec_udiv_nonuniform3: ; XOP: # %bb.0: -; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [9363,25645,18351,12137,2115,23705,1041,517] ; XOP-NEXT: vpsubw %xmm1, %xmm0, %xmm0 ; XOP-NEXT: vpsrlw $1, %xmm0, %xmm0 ; XOP-NEXT: vpaddw %xmm1, %xmm0, %xmm0 @@ -687,7 +687,7 @@ define <8 x i16> @pr38477(<8 x i16> %a0) { ; SSE2-NEXT: pmulhuw %xmm0, %xmm1 ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: psubw %xmm1, %xmm2 -; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [u,32768,0,0,0,0,0,32768] ; SSE2-NEXT: paddw %xmm1, %xmm2 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535,65535,65535,0,65535] ; SSE2-NEXT: pandn %xmm2, %xmm1 @@ -706,7 +706,7 @@ define <8 x i16> @pr38477(<8 x i16> %a0) { ; SSE41-NEXT: pmulhuw %xmm0, %xmm1 ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: psubw %xmm1, %xmm2 -; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE41-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [u,32768,0,0,0,0,0,32768] ; SSE41-NEXT: paddw %xmm1, %xmm2 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [u,1024,1024,16,4,1024,u,4096] ; SSE41-NEXT: pmulhuw %xmm2, %xmm1 @@ -716,20 +716,20 @@ define <8 x i16> @pr38477(<8 x i16> %a0) { ; ; AVX-LABEL: pr38477: ; AVX: # %bb.0: -; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [u,4957,57457,4103,16385,35545,2048,2115] ; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm2 -; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [u,32768,0,0,0,0,0,32768] ; AVX-NEXT: vpaddw %xmm1, %xmm2, %xmm1 -; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 +; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 # [u,1024,1024,16,4,1024,u,4096] ; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5],xmm1[6],xmm2[7] ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX-NEXT: retq ; ; XOP-LABEL: pr38477: ; XOP: # %bb.0: -; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [u,4957,57457,4103,16385,35545,2048,2115] ; XOP-NEXT: vpsubw %xmm1, %xmm0, %xmm2 -; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; XOP-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [u,32768,0,0,0,0,0,32768] ; XOP-NEXT: vpaddw %xmm1, %xmm2, %xmm1 ; XOP-NEXT: vpshlw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; XOP-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] diff --git a/llvm/test/CodeGen/X86/dagcombine-shifts.ll b/llvm/test/CodeGen/X86/dagcombine-shifts.ll index 734abfe55a4ec4..345b2b9309f9a7 100644 --- a/llvm/test/CodeGen/X86/dagcombine-shifts.ll +++ b/llvm/test/CodeGen/X86/dagcombine-shifts.ll @@ -394,7 +394,7 @@ define <4 x i32> @shift_zext_shl_vec(<4 x i8> %x) nounwind { ; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; X64-NEXT: pxor %xmm1, %xmm1 ; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; X64-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [512,256,128,64,u,u,u,u] ; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] ; X64-NEXT: retq %a = and <4 x i8> %x, diff --git a/llvm/test/CodeGen/X86/dpbusd_const.ll b/llvm/test/CodeGen/X86/dpbusd_const.ll index 5862e614265b1f..b124bd5165e119 100644 --- a/llvm/test/CodeGen/X86/dpbusd_const.ll +++ b/llvm/test/CodeGen/X86/dpbusd_const.ll @@ -7,7 +7,7 @@ define i32 @mul_4xi8_zc_exceed(<4 x i8> %a, i32 %c) { ; ALL-LABEL: mul_4xi8_zc_exceed: ; ALL: # %bb.0: # %entry ; ALL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; ALL-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; ALL-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,0,1,0,2,0,128,0] ; ALL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; ALL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; ALL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] @@ -148,7 +148,7 @@ define i32 @mul_4xi8_cs_exceed(<4 x i8> %a, i32 %c) { ; ALL-LABEL: mul_4xi8_cs_exceed: ; ALL: # %bb.0: # %entry ; ALL-NEXT: vpmovsxbd %xmm0, %xmm0 -; ALL-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; ALL-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,0,1,0,2,0,256,0] ; ALL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; ALL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; ALL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] diff --git a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll index 96b2e1ef982765..5a051a9c499e47 100644 --- a/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll +++ b/llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll @@ -1008,7 +1008,7 @@ define <2 x half> @fmul_pow_shl_cnt_vec_fail_to_large(<2 x i16> %cnt) nounwind { ; CHECK-SSE-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE-NEXT: cvttps2dq %xmm0, %xmm0 ; CHECK-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7] -; CHECK-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2,2,u,u,u,u,u,u] ; CHECK-SSE-NEXT: pxor %xmm0, %xmm0 ; CHECK-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] ; CHECK-SSE-NEXT: movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill diff --git a/llvm/test/CodeGen/X86/freeze-binary.ll b/llvm/test/CodeGen/X86/freeze-binary.ll index 1209e2633c0603..506a08808ff91d 100644 --- a/llvm/test/CodeGen/X86/freeze-binary.ll +++ b/llvm/test/CodeGen/X86/freeze-binary.ll @@ -329,12 +329,12 @@ define i32 @freeze_mul_nsw(i32 %a0) nounwind { define <8 x i16> @freeze_mul_vec(<8 x i16> %a0) nounwind { ; X86-LABEL: freeze_mul_vec: ; X86: # %bb.0: -; X86-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [4,6,6,4,4,6,6,4] ; X86-NEXT: retl ; ; X64-LABEL: freeze_mul_vec: ; X64: # %bb.0: -; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4,6,6,4,4,6,6,4] ; X64-NEXT: retq %x = mul <8 x i16> %a0, %y = freeze <8 x i16> %x @@ -345,14 +345,14 @@ define <8 x i16> @freeze_mul_vec(<8 x i16> %a0) nounwind { define <8 x i16> @freeze_mul_vec_undef(<8 x i16> %a0) nounwind { ; X86-LABEL: freeze_mul_vec_undef: ; X86: # %bb.0: -; X86-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 -; X86-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [1,2,3,4,4,3,0,1] +; X86-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [4,3,2,1,1,2,u,4] ; X86-NEXT: retl ; ; X64-LABEL: freeze_mul_vec_undef: ; X64: # %bb.0: -; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,3,4,4,3,0,1] +; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4,3,2,1,1,2,u,4] ; X64-NEXT: retq %x = mul <8 x i16> %a0, %y = freeze <8 x i16> %x diff --git a/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll b/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll index b3ca9fb04aeb71..5857ff1162ceb6 100644 --- a/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll +++ b/llvm/test/CodeGen/X86/gfni-funnel-shifts.ll @@ -376,10 +376,10 @@ define <16 x i8> @constant_fshl_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; GFNISSE: # %bb.0: ; GFNISSE-NEXT: movdqa %xmm1, %xmm2 ; GFNISSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] -; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [1,128,64,32,16,8,4,2] ; GFNISSE-NEXT: psrlw $8, %xmm2 ; GFNISSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,2,4,8,16,32,64,128] ; GFNISSE-NEXT: psrlw $8, %xmm1 ; GFNISSE-NEXT: packuswb %xmm2, %xmm1 ; GFNISSE-NEXT: movdqa %xmm1, %xmm0 @@ -388,10 +388,10 @@ define <16 x i8> @constant_fshl_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; GFNIAVX1OR2-LABEL: constant_fshl_v16i8: ; GFNIAVX1OR2: # %bb.0: ; GFNIAVX1OR2-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; GFNIAVX1OR2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; GFNIAVX1OR2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [1,128,64,32,16,8,4,2] ; GFNIAVX1OR2-NEXT: vpsrlw $8, %xmm2, %xmm2 ; GFNIAVX1OR2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; GFNIAVX1OR2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX1OR2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; GFNIAVX1OR2-NEXT: vpsrlw $8, %xmm0, %xmm0 ; GFNIAVX1OR2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 ; GFNIAVX1OR2-NEXT: retq @@ -399,10 +399,10 @@ define <16 x i8> @constant_fshl_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; GFNIAVX512VL-LABEL: constant_fshl_v16i8: ; GFNIAVX512VL: # %bb.0: ; GFNIAVX512VL-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [1,128,64,32,16,8,4,2] ; GFNIAVX512VL-NEXT: vpsrlw $8, %xmm2, %xmm2 ; GFNIAVX512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; GFNIAVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0 ; GFNIAVX512VL-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 ; GFNIAVX512VL-NEXT: retq @@ -427,10 +427,10 @@ define <16 x i8> @constant_fshr_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; GFNISSE: # %bb.0: ; GFNISSE-NEXT: movdqa %xmm1, %xmm2 ; GFNISSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] -; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [1,128,64,32,16,8,4,2] ; GFNISSE-NEXT: psrlw $8, %xmm2 ; GFNISSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,2,4,8,16,32,64,128] ; GFNISSE-NEXT: psrlw $8, %xmm1 ; GFNISSE-NEXT: packuswb %xmm2, %xmm1 ; GFNISSE-NEXT: movdqa %xmm1, %xmm0 @@ -439,10 +439,10 @@ define <16 x i8> @constant_fshr_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; GFNIAVX1OR2-LABEL: constant_fshr_v16i8: ; GFNIAVX1OR2: # %bb.0: ; GFNIAVX1OR2-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; GFNIAVX1OR2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; GFNIAVX1OR2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [1,128,64,32,16,8,4,2] ; GFNIAVX1OR2-NEXT: vpsrlw $8, %xmm2, %xmm2 ; GFNIAVX1OR2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; GFNIAVX1OR2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX1OR2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; GFNIAVX1OR2-NEXT: vpsrlw $8, %xmm0, %xmm0 ; GFNIAVX1OR2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 ; GFNIAVX1OR2-NEXT: retq @@ -450,10 +450,10 @@ define <16 x i8> @constant_fshr_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; GFNIAVX512VL-LABEL: constant_fshr_v16i8: ; GFNIAVX512VL: # %bb.0: ; GFNIAVX512VL-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [1,128,64,32,16,8,4,2] ; GFNIAVX512VL-NEXT: vpsrlw $8, %xmm2, %xmm2 ; GFNIAVX512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; GFNIAVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0 ; GFNIAVX512VL-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 ; GFNIAVX512VL-NEXT: retq @@ -1168,10 +1168,10 @@ define <32 x i8> @constant_fshl_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; GFNIAVX2-LABEL: constant_fshl_v32i8: ; GFNIAVX2: # %bb.0: ; GFNIAVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] -; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2] ; GFNIAVX2-NEXT: vpsrlw $8, %ymm2, %ymm2 ; GFNIAVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] -; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] ; GFNIAVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; GFNIAVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 ; GFNIAVX2-NEXT: retq @@ -1179,10 +1179,10 @@ define <32 x i8> @constant_fshl_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; GFNIAVX512VL-LABEL: constant_fshl_v32i8: ; GFNIAVX512VL: # %bb.0: ; GFNIAVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] -; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2] ; GFNIAVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2 ; GFNIAVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] -; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] ; GFNIAVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0 ; GFNIAVX512VL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 ; GFNIAVX512VL-NEXT: retq @@ -1252,10 +1252,10 @@ define <32 x i8> @constant_fshr_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; GFNIAVX2-LABEL: constant_fshr_v32i8: ; GFNIAVX2: # %bb.0: ; GFNIAVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] -; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2] ; GFNIAVX2-NEXT: vpsrlw $8, %ymm2, %ymm2 ; GFNIAVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] -; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] ; GFNIAVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; GFNIAVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 ; GFNIAVX2-NEXT: retq @@ -1263,10 +1263,10 @@ define <32 x i8> @constant_fshr_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; GFNIAVX512VL-LABEL: constant_fshr_v32i8: ; GFNIAVX512VL: # %bb.0: ; GFNIAVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] -; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2] ; GFNIAVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2 ; GFNIAVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] -; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] ; GFNIAVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0 ; GFNIAVX512VL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 ; GFNIAVX512VL-NEXT: retq diff --git a/llvm/test/CodeGen/X86/gfni-rotates.ll b/llvm/test/CodeGen/X86/gfni-rotates.ll index 9ddadca380fed6..cc077410228cb7 100644 --- a/llvm/test/CodeGen/X86/gfni-rotates.ll +++ b/llvm/test/CodeGen/X86/gfni-rotates.ll @@ -273,10 +273,10 @@ define <16 x i8> @constant_rotl_v16i8(<16 x i8> %a) nounwind { ; GFNISSE: # %bb.0: ; GFNISSE-NEXT: movdqa %xmm0, %xmm1 ; GFNISSE-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,128,64,32,16,8,4,2] ; GFNISSE-NEXT: psrlw $8, %xmm1 ; GFNISSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,4,8,16,32,64,128] ; GFNISSE-NEXT: psrlw $8, %xmm0 ; GFNISSE-NEXT: packuswb %xmm1, %xmm0 ; GFNISSE-NEXT: retq @@ -284,10 +284,10 @@ define <16 x i8> @constant_rotl_v16i8(<16 x i8> %a) nounwind { ; GFNIAVX1OR2-LABEL: constant_rotl_v16i8: ; GFNIAVX1OR2: # %bb.0: ; GFNIAVX1OR2-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; GFNIAVX1OR2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; GFNIAVX1OR2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,128,64,32,16,8,4,2] ; GFNIAVX1OR2-NEXT: vpsrlw $8, %xmm1, %xmm1 ; GFNIAVX1OR2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; GFNIAVX1OR2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX1OR2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; GFNIAVX1OR2-NEXT: vpsrlw $8, %xmm0, %xmm0 ; GFNIAVX1OR2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; GFNIAVX1OR2-NEXT: retq @@ -295,10 +295,10 @@ define <16 x i8> @constant_rotl_v16i8(<16 x i8> %a) nounwind { ; GFNIAVX512VL-LABEL: constant_rotl_v16i8: ; GFNIAVX512VL: # %bb.0: ; GFNIAVX512VL-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,128,64,32,16,8,4,2] ; GFNIAVX512VL-NEXT: vpsrlw $8, %xmm1, %xmm1 ; GFNIAVX512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; GFNIAVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0 ; GFNIAVX512VL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; GFNIAVX512VL-NEXT: retq @@ -322,10 +322,10 @@ define <16 x i8> @constant_rotr_v16i8(<16 x i8> %a) nounwind { ; GFNISSE: # %bb.0: ; GFNISSE-NEXT: movdqa %xmm0, %xmm1 ; GFNISSE-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,128,64,32,16,8,4,2] ; GFNISSE-NEXT: psrlw $8, %xmm1 ; GFNISSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,4,8,16,32,64,128] ; GFNISSE-NEXT: psrlw $8, %xmm0 ; GFNISSE-NEXT: packuswb %xmm1, %xmm0 ; GFNISSE-NEXT: retq @@ -333,10 +333,10 @@ define <16 x i8> @constant_rotr_v16i8(<16 x i8> %a) nounwind { ; GFNIAVX1OR2-LABEL: constant_rotr_v16i8: ; GFNIAVX1OR2: # %bb.0: ; GFNIAVX1OR2-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; GFNIAVX1OR2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; GFNIAVX1OR2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,128,64,32,16,8,4,2] ; GFNIAVX1OR2-NEXT: vpsrlw $8, %xmm1, %xmm1 ; GFNIAVX1OR2-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; GFNIAVX1OR2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX1OR2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; GFNIAVX1OR2-NEXT: vpsrlw $8, %xmm0, %xmm0 ; GFNIAVX1OR2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; GFNIAVX1OR2-NEXT: retq @@ -344,10 +344,10 @@ define <16 x i8> @constant_rotr_v16i8(<16 x i8> %a) nounwind { ; GFNIAVX512VL-LABEL: constant_rotr_v16i8: ; GFNIAVX512VL: # %bb.0: ; GFNIAVX512VL-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,128,64,32,16,8,4,2] ; GFNIAVX512VL-NEXT: vpsrlw $8, %xmm1, %xmm1 ; GFNIAVX512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; GFNIAVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0 ; GFNIAVX512VL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; GFNIAVX512VL-NEXT: retq @@ -958,10 +958,10 @@ define <32 x i8> @constant_rotl_v32i8(<32 x i8> %a) nounwind { ; GFNIAVX2-LABEL: constant_rotl_v32i8: ; GFNIAVX2: # %bb.0: ; GFNIAVX2-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2] ; GFNIAVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 ; GFNIAVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] ; GFNIAVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; GFNIAVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; GFNIAVX2-NEXT: retq @@ -969,10 +969,10 @@ define <32 x i8> @constant_rotl_v32i8(<32 x i8> %a) nounwind { ; GFNIAVX512VL-LABEL: constant_rotl_v32i8: ; GFNIAVX512VL: # %bb.0: ; GFNIAVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2] ; GFNIAVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1 ; GFNIAVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] ; GFNIAVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0 ; GFNIAVX512VL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; GFNIAVX512VL-NEXT: retq @@ -1039,10 +1039,10 @@ define <32 x i8> @constant_rotr_v32i8(<32 x i8> %a) nounwind { ; GFNIAVX2-LABEL: constant_rotr_v32i8: ; GFNIAVX2: # %bb.0: ; GFNIAVX2-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2] ; GFNIAVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 ; GFNIAVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] ; GFNIAVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; GFNIAVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; GFNIAVX2-NEXT: retq @@ -1050,10 +1050,10 @@ define <32 x i8> @constant_rotr_v32i8(<32 x i8> %a) nounwind { ; GFNIAVX512VL-LABEL: constant_rotr_v32i8: ; GFNIAVX512VL: # %bb.0: ; GFNIAVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2] ; GFNIAVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1 ; GFNIAVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] ; GFNIAVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0 ; GFNIAVX512VL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; GFNIAVX512VL-NEXT: retq diff --git a/llvm/test/CodeGen/X86/gfni-shifts.ll b/llvm/test/CodeGen/X86/gfni-shifts.ll index 6232488bea71b4..dab0432889cf9f 100644 --- a/llvm/test/CodeGen/X86/gfni-shifts.ll +++ b/llvm/test/CodeGen/X86/gfni-shifts.ll @@ -387,10 +387,10 @@ define <16 x i8> @constant_shl_v16i8(<16 x i8> %a) nounwind { ; GFNISSE: # %bb.0: ; GFNISSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; GFNISSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [128,64,32,16,8,4,2,1] ; GFNISSE-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] ; GFNISSE-NEXT: pand %xmm2, %xmm0 -; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,2,4,8,16,32,64,128] ; GFNISSE-NEXT: pand %xmm2, %xmm1 ; GFNISSE-NEXT: packuswb %xmm0, %xmm1 ; GFNISSE-NEXT: movdqa %xmm1, %xmm0 @@ -399,11 +399,11 @@ define <16 x i8> @constant_shl_v16i8(<16 x i8> %a) nounwind { ; GFNIAVX1-LABEL: constant_shl_v16i8: ; GFNIAVX1: # %bb.0: ; GFNIAVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; GFNIAVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; GFNIAVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [128,64,32,16,8,4,2,1] ; GFNIAVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] ; GFNIAVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 ; GFNIAVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; GFNIAVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; GFNIAVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 ; GFNIAVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; GFNIAVX1-NEXT: retq @@ -411,7 +411,7 @@ define <16 x i8> @constant_shl_v16i8(<16 x i8> %a) nounwind { ; GFNIAVX2-LABEL: constant_shl_v16i8: ; GFNIAVX2: # %bb.0: ; GFNIAVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,128,64,32,16,8,4,2,1] ; GFNIAVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; GFNIAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; GFNIAVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -443,9 +443,9 @@ define <16 x i8> @constant_lshr_v16i8(<16 x i8> %a) nounwind { ; GFNISSE-NEXT: pxor %xmm2, %xmm2 ; GFNISSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; GFNISSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15] -; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,4,8,16,32,64,128,256] ; GFNISSE-NEXT: psrlw $8, %xmm0 -; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [256,128,64,32,16,8,4,2] ; GFNISSE-NEXT: psrlw $8, %xmm1 ; GFNISSE-NEXT: packuswb %xmm0, %xmm1 ; GFNISSE-NEXT: movdqa %xmm1, %xmm0 @@ -455,10 +455,10 @@ define <16 x i8> @constant_lshr_v16i8(<16 x i8> %a) nounwind { ; GFNIAVX1: # %bb.0: ; GFNIAVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; GFNIAVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] -; GFNIAVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; GFNIAVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2,4,8,16,32,64,128,256] ; GFNIAVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 ; GFNIAVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; GFNIAVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [256,128,64,32,16,8,4,2] ; GFNIAVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 ; GFNIAVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; GFNIAVX1-NEXT: retq @@ -466,7 +466,7 @@ define <16 x i8> @constant_lshr_v16i8(<16 x i8> %a) nounwind { ; GFNIAVX2-LABEL: constant_lshr_v16i8: ; GFNIAVX2: # %bb.0: ; GFNIAVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [256,128,64,32,16,8,4,2,2,4,8,16,32,64,128,256] ; GFNIAVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; GFNIAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; GFNIAVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -498,11 +498,11 @@ define <16 x i8> @constant_ashr_v16i8(<16 x i8> %a) nounwind { ; GFNISSE-NEXT: movdqa %xmm0, %xmm1 ; GFNISSE-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] ; GFNISSE-NEXT: psraw $8, %xmm1 -; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2,4,8,16,32,64,128,256] ; GFNISSE-NEXT: psrlw $8, %xmm1 ; GFNISSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; GFNISSE-NEXT: psraw $8, %xmm0 -; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; GFNISSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [256,128,64,32,16,8,4,2] ; GFNISSE-NEXT: psrlw $8, %xmm0 ; GFNISSE-NEXT: packuswb %xmm1, %xmm0 ; GFNISSE-NEXT: retq @@ -511,11 +511,11 @@ define <16 x i8> @constant_ashr_v16i8(<16 x i8> %a) nounwind { ; GFNIAVX1: # %bb.0: ; GFNIAVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; GFNIAVX1-NEXT: vpsraw $8, %xmm1, %xmm1 -; GFNIAVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; GFNIAVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2,4,8,16,32,64,128,256] ; GFNIAVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 ; GFNIAVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; GFNIAVX1-NEXT: vpsraw $8, %xmm0, %xmm0 -; GFNIAVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; GFNIAVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [256,128,64,32,16,8,4,2] ; GFNIAVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 ; GFNIAVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; GFNIAVX1-NEXT: retq @@ -523,7 +523,7 @@ define <16 x i8> @constant_ashr_v16i8(<16 x i8> %a) nounwind { ; GFNIAVX2-LABEL: constant_ashr_v16i8: ; GFNIAVX2: # %bb.0: ; GFNIAVX2-NEXT: vpmovsxbw %xmm0, %ymm0 -; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [256,128,64,32,16,8,4,2,2,4,8,16,32,64,128,256] ; GFNIAVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; GFNIAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; GFNIAVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -1271,11 +1271,11 @@ define <32 x i8> @constant_shl_v32i8(<32 x i8> %a) nounwind { ; GFNIAVX2-LABEL: constant_shl_v32i8: ; GFNIAVX2: # %bb.0: ; GFNIAVX2-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1] ; GFNIAVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; GFNIAVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 ; GFNIAVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] ; GFNIAVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 ; GFNIAVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; GFNIAVX2-NEXT: retq @@ -1283,11 +1283,11 @@ define <32 x i8> @constant_shl_v32i8(<32 x i8> %a) nounwind { ; GFNIAVX512VL-LABEL: constant_shl_v32i8: ; GFNIAVX512VL: # %bb.0: ; GFNIAVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1] ; GFNIAVX512VL-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; GFNIAVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm1 ; GFNIAVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] ; GFNIAVX512VL-NEXT: vpand %ymm2, %ymm0, %ymm0 ; GFNIAVX512VL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; GFNIAVX512VL-NEXT: retq @@ -1353,10 +1353,10 @@ define <32 x i8> @constant_lshr_v32i8(<32 x i8> %a) nounwind { ; GFNIAVX2: # %bb.0: ; GFNIAVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; GFNIAVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] -; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [2,4,8,16,32,64,128,256,2,4,8,16,32,64,128,256] ; GFNIAVX2-NEXT: vpsrlw $8, %ymm2, %ymm2 ; GFNIAVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] -; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2] ; GFNIAVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; GFNIAVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 ; GFNIAVX2-NEXT: retq @@ -1365,10 +1365,10 @@ define <32 x i8> @constant_lshr_v32i8(<32 x i8> %a) nounwind { ; GFNIAVX512VL: # %bb.0: ; GFNIAVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; GFNIAVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] -; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [2,4,8,16,32,64,128,256,2,4,8,16,32,64,128,256] ; GFNIAVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2 ; GFNIAVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] -; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2] ; GFNIAVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0 ; GFNIAVX512VL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 ; GFNIAVX512VL-NEXT: retq @@ -1440,11 +1440,11 @@ define <32 x i8> @constant_ashr_v32i8(<32 x i8> %a) nounwind { ; GFNIAVX2: # %bb.0: ; GFNIAVX2-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] ; GFNIAVX2-NEXT: vpsraw $8, %ymm1, %ymm1 -; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [2,4,8,16,32,64,128,256,2,4,8,16,32,64,128,256] ; GFNIAVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 ; GFNIAVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] ; GFNIAVX2-NEXT: vpsraw $8, %ymm0, %ymm0 -; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; GFNIAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2] ; GFNIAVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; GFNIAVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; GFNIAVX2-NEXT: retq @@ -1453,11 +1453,11 @@ define <32 x i8> @constant_ashr_v32i8(<32 x i8> %a) nounwind { ; GFNIAVX512VL: # %bb.0: ; GFNIAVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] ; GFNIAVX512VL-NEXT: vpsraw $8, %ymm1, %ymm1 -; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [2,4,8,16,32,64,128,256,2,4,8,16,32,64,128,256] ; GFNIAVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1 ; GFNIAVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] ; GFNIAVX512VL-NEXT: vpsraw $8, %ymm0, %ymm0 -; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; GFNIAVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2] ; GFNIAVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0 ; GFNIAVX512VL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; GFNIAVX512VL-NEXT: retq diff --git a/llvm/test/CodeGen/X86/known-never-zero.ll b/llvm/test/CodeGen/X86/known-never-zero.ll index f0504e7dbdb652..df11a44626e381 100644 --- a/llvm/test/CodeGen/X86/known-never-zero.ll +++ b/llvm/test/CodeGen/X86/known-never-zero.ll @@ -1466,7 +1466,7 @@ define i32 @bitcast_known_nonzero(<2 x i16> %xx) { ; X86-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; X86-NEXT: cvttps2dq %xmm0, %xmm0 ; X86-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] -; X86-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [256,256,u,u,u,u,u,u] ; X86-NEXT: movd %xmm0, %eax ; X86-NEXT: bsfl %eax, %ecx ; X86-NEXT: movl $32, %eax @@ -1480,7 +1480,7 @@ define i32 @bitcast_known_nonzero(<2 x i16> %xx) { ; X64-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; X64-NEXT: vcvttps2dq %xmm0, %xmm0 ; X64-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 -; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [256,256,u,u,u,u,u,u] ; X64-NEXT: vmovd %xmm0, %eax ; X64-NEXT: bsfl %eax, %ecx ; X64-NEXT: movl $32, %eax diff --git a/llvm/test/CodeGen/X86/lower-vec-shift.ll b/llvm/test/CodeGen/X86/lower-vec-shift.ll index 832655c79d17c8..67e0c1b3cf2b3e 100644 --- a/llvm/test/CodeGen/X86/lower-vec-shift.ll +++ b/llvm/test/CodeGen/X86/lower-vec-shift.ll @@ -258,8 +258,8 @@ define <8 x i32> @test10(ptr %a) { define <16 x i16> @test11(<16 x i16> %a) { ; SSE-LABEL: test11: ; SSE: # %bb.0: -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,8,2,2,2,8,8,8] +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [8,8,8,2,2,2,8,2] ; SSE-NEXT: retq ; ; AVX1-LABEL: test11: @@ -276,7 +276,7 @@ define <16 x i16> @test11(<16 x i16> %a) { ; ; AVX2-LABEL: test11: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [2,8,2,2,2,8,8,8,8,8,8,2,2,2,8,2] ; AVX2-NEXT: retq %lshr = shl <16 x i16> %a, ret <16 x i16> %lshr diff --git a/llvm/test/CodeGen/X86/madd.ll b/llvm/test/CodeGen/X86/madd.ll index df4155845f37bf..f3b117c626c8cd 100644 --- a/llvm/test/CodeGen/X86/madd.ll +++ b/llvm/test/CodeGen/X86/madd.ll @@ -1988,12 +1988,12 @@ define <16 x i32> @pmaddwd_32(<32 x i16> %A, <32 x i16> %B) { define <4 x i32> @pmaddwd_const(<8 x i16> %A) { ; SSE2-LABEL: pmaddwd_const: ; SSE2: # %bb.0: -; SSE2-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [32767,32768,0,0,1,7,42,32] ; SSE2-NEXT: retq ; ; AVX-LABEL: pmaddwd_const: ; AVX: # %bb.0: -; AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [32767,32768,0,0,1,7,42,32] ; AVX-NEXT: retq %a = sext <8 x i16> %A to <8 x i32> %m = mul nsw <8 x i32> %a, @@ -2059,7 +2059,7 @@ define <4 x i32> @pmaddwd_negative2(<8 x i16> %A) { ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] -; SSE2-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,0,7,0,42,0,32,0] ; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[0,2] ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[1,3] diff --git a/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll b/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll index 6d66ef7c589575..886c3ae10324de 100644 --- a/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll +++ b/llvm/test/CodeGen/X86/omit-urem-of-power-of-two-or-zero-when-comparing-with-zero.ll @@ -239,7 +239,7 @@ define <4 x i1> @p7_vector_urem_by_const__nonsplat_undef2(<4 x i32> %x, <4 x i32 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3] ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] ; SSE2-NEXT: psrld $2, %xmm2 -; SSE2-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [6,0,6,0,6,0,6,0] ; SSE2-NEXT: psubd %xmm2, %xmm0 ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 @@ -255,7 +255,7 @@ define <4 x i1> @p7_vector_urem_by_const__nonsplat_undef2(<4 x i32> %x, <4 x i32 ; SSE4-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] ; SSE4-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7] ; SSE4-NEXT: psrld $2, %xmm2 -; SSE4-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE4-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [6,0,6,0,6,0,6,0] ; SSE4-NEXT: psubd %xmm2, %xmm0 ; SSE4-NEXT: pxor %xmm1, %xmm1 ; SSE4-NEXT: pcmpeqd %xmm1, %xmm0 @@ -272,7 +272,7 @@ define <4 x i1> @p7_vector_urem_by_const__nonsplat_undef2(<4 x i32> %x, <4 x i32 ; AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] ; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3] ; AVX2-NEXT: vpsrld $2, %xmm1, %xmm1 -; AVX2-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX2-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [6,0,6,0,6,0,6,0] ; AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/pmul.ll b/llvm/test/CodeGen/X86/pmul.ll index 1f82c4a5a2d92b..7715188642dd27 100644 --- a/llvm/test/CodeGen/X86/pmul.ll +++ b/llvm/test/CodeGen/X86/pmul.ll @@ -37,7 +37,7 @@ define <16 x i8> @mul_v16i8c(<16 x i8> %i) nounwind { ; AVX2-LABEL: mul_v16i8c: ; AVX2: # %bb.0: # %entry ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117] ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -47,7 +47,7 @@ define <16 x i8> @mul_v16i8c(<16 x i8> %i) nounwind { ; AVX512F-LABEL: mul_v16i8c: ; AVX512F: # %bb.0: # %entry ; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117] ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper @@ -56,7 +56,7 @@ define <16 x i8> @mul_v16i8c(<16 x i8> %i) nounwind { ; AVX512BW-LABEL: mul_v16i8c: ; AVX512BW: # %bb.0: # %entry ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117] ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512BW-NEXT: vzeroupper @@ -69,12 +69,12 @@ entry: define <8 x i16> @mul_v8i16c(<8 x i16> %i) nounwind { ; SSE-LABEL: mul_v8i16c: ; SSE: # %bb.0: # %entry -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [117,117,117,117,117,117,117,117] ; SSE-NEXT: retq ; ; AVX-LABEL: mul_v8i16c: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [117,117,117,117,117,117,117,117] ; AVX-NEXT: retq entry: %A = mul <8 x i16> %i, < i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117 > @@ -454,7 +454,7 @@ define <32 x i8> @mul_v32i8c(<32 x i8> %i) nounwind { ; AVX512BW-LABEL: mul_v32i8c: ; AVX512BW: # %bb.0: # %entry ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117] ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: retq entry: @@ -479,7 +479,7 @@ define <16 x i16> @mul_v16i16c(<16 x i16> %i) nounwind { ; ; AVX-LABEL: mul_v16i16c: ; AVX: # %bb.0: # %entry -; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [117,117,117,117,117,117,117,117,117,117,117,117,117,117,117,117] ; AVX-NEXT: retq entry: %A = mul <16 x i16> %i, < i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117, i16 117 > diff --git a/llvm/test/CodeGen/X86/prefer-avx256-wide-mul.ll b/llvm/test/CodeGen/X86/prefer-avx256-wide-mul.ll index f627560f9f3823..c023f129d7565a 100644 --- a/llvm/test/CodeGen/X86/prefer-avx256-wide-mul.ll +++ b/llvm/test/CodeGen/X86/prefer-avx256-wide-mul.ll @@ -27,7 +27,7 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) { ; AVX512BWVL-LABEL: test_div7_32i8: ; AVX512BWVL: # %bb.0: ; AVX512BWVL-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX512BWVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512BWVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37] ; AVX512BWVL-NEXT: vpsrlw $8, %zmm1, %zmm1 ; AVX512BWVL-NEXT: vpmovwb %zmm1, %ymm1 ; AVX512BWVL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 @@ -41,7 +41,7 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) { ; AVX512BW-LABEL: test_div7_32i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37] ; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 ; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 ; AVX512BW-NEXT: vpsubb %ymm1, %ymm0, %ymm0 diff --git a/llvm/test/CodeGen/X86/rotate-extract-vector.ll b/llvm/test/CodeGen/X86/rotate-extract-vector.ll index 4c5a3c12fa3851..1ead3f98ab5d6c 100644 --- a/llvm/test/CodeGen/X86/rotate-extract-vector.ll +++ b/llvm/test/CodeGen/X86/rotate-extract-vector.ll @@ -127,7 +127,7 @@ define <4 x i32> @vrolw_extract_mul_with_mask(<4 x i32> %i) nounwind { define <32 x i16> @illegal_no_extract_mul(<32 x i16> %i) nounwind { ; X86-LABEL: illegal_no_extract_mul: ; X86: # %bb.0: -; X86-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0 +; X86-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0 # [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] ; X86-NEXT: vpsrlw $10, %zmm0, %zmm1 ; X86-NEXT: vpsllw $6, %zmm0, %zmm0 ; X86-NEXT: vporq %zmm1, %zmm0, %zmm0 @@ -135,7 +135,7 @@ define <32 x i16> @illegal_no_extract_mul(<32 x i16> %i) nounwind { ; ; X64-LABEL: illegal_no_extract_mul: ; X64: # %bb.0: -; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 +; X64-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10] ; X64-NEXT: vpsrlw $10, %zmm0, %zmm1 ; X64-NEXT: vpsllw $6, %zmm0, %zmm0 ; X64-NEXT: vporq %zmm1, %zmm0, %zmm0 diff --git a/llvm/test/CodeGen/X86/shrink_vmul.ll b/llvm/test/CodeGen/X86/shrink_vmul.ll index f3f7f0515e3060..e53eed45877975 100644 --- a/llvm/test/CodeGen/X86/shrink_vmul.ll +++ b/llvm/test/CodeGen/X86/shrink_vmul.ll @@ -1365,7 +1365,7 @@ define void @mul_2xi8_varconst1(ptr nocapture readonly %a, i64 %index) { ; X86-SSE-NEXT: pxor %xmm1, %xmm1 ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; X86-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [0,0,255,0,u,u,u,u] ; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4) ; X86-SSE-NEXT: retl ; @@ -1377,7 +1377,7 @@ define void @mul_2xi8_varconst1(ptr nocapture readonly %a, i64 %index) { ; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx ; X86-AVX-NEXT: vmovd %ecx, %xmm0 ; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; X86-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [0,0,255,0,u,u,u,u] ; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) ; X86-AVX-NEXT: retl ; @@ -1389,7 +1389,7 @@ define void @mul_2xi8_varconst1(ptr nocapture readonly %a, i64 %index) { ; X64-SSE-NEXT: pxor %xmm1, %xmm1 ; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; X64-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,0,255,0,u,u,u,u] ; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4) ; X64-SSE-NEXT: retq ; @@ -1399,7 +1399,7 @@ define void @mul_2xi8_varconst1(ptr nocapture readonly %a, i64 %index) { ; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx ; X64-AVX-NEXT: vmovd %ecx, %xmm0 ; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; X64-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,0,255,0,u,u,u,u] ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq entry: @@ -1429,7 +1429,7 @@ define void @mul_2xi8_varconst2(ptr nocapture readonly %a, i64 %index) { ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; X86-SSE-NEXT: psrad $24, %xmm0 -; X86-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [65408,0,127,0,u,u,u,u] ; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4) ; X86-SSE-NEXT: retl ; @@ -1441,7 +1441,7 @@ define void @mul_2xi8_varconst2(ptr nocapture readonly %a, i64 %index) { ; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx ; X86-AVX-NEXT: vmovd %ecx, %xmm0 ; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0 -; X86-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [65408,0,127,0,u,u,u,u] ; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) ; X86-AVX-NEXT: retl ; @@ -1453,7 +1453,7 @@ define void @mul_2xi8_varconst2(ptr nocapture readonly %a, i64 %index) { ; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; X64-SSE-NEXT: psrad $24, %xmm0 -; X64-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [65408,0,127,0,u,u,u,u] ; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4) ; X64-SSE-NEXT: retq ; @@ -1463,7 +1463,7 @@ define void @mul_2xi8_varconst2(ptr nocapture readonly %a, i64 %index) { ; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx ; X64-AVX-NEXT: vmovd %ecx, %xmm0 ; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0 -; X64-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [65408,0,127,0,u,u,u,u] ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq entry: @@ -1493,7 +1493,7 @@ define void @mul_2xi8_varconst3(ptr nocapture readonly %a, i64 %index) { ; X86-SSE-NEXT: pxor %xmm1, %xmm1 ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; X86-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [0,0,256,0,u,u,u,u] ; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4) ; X86-SSE-NEXT: retl ; @@ -1505,7 +1505,7 @@ define void @mul_2xi8_varconst3(ptr nocapture readonly %a, i64 %index) { ; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx ; X86-AVX-NEXT: vmovd %ecx, %xmm0 ; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; X86-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [0,0,256,0,u,u,u,u] ; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) ; X86-AVX-NEXT: retl ; @@ -1517,7 +1517,7 @@ define void @mul_2xi8_varconst3(ptr nocapture readonly %a, i64 %index) { ; X64-SSE-NEXT: pxor %xmm1, %xmm1 ; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; X64-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,0,256,0,u,u,u,u] ; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4) ; X64-SSE-NEXT: retq ; @@ -1527,7 +1527,7 @@ define void @mul_2xi8_varconst3(ptr nocapture readonly %a, i64 %index) { ; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx ; X64-AVX-NEXT: vmovd %ecx, %xmm0 ; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; X64-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,0,256,0,u,u,u,u] ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq entry: @@ -1557,7 +1557,7 @@ define void @mul_2xi8_varconst4(ptr nocapture readonly %a, i64 %index) { ; X86-SSE-NEXT: pxor %xmm1, %xmm1 ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; X86-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [65535,0,255,0,u,u,u,u] ; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4) ; X86-SSE-NEXT: retl ; @@ -1569,7 +1569,7 @@ define void @mul_2xi8_varconst4(ptr nocapture readonly %a, i64 %index) { ; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx ; X86-AVX-NEXT: vmovd %ecx, %xmm0 ; X86-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; X86-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [65535,0,255,0,u,u,u,u] ; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) ; X86-AVX-NEXT: retl ; @@ -1581,7 +1581,7 @@ define void @mul_2xi8_varconst4(ptr nocapture readonly %a, i64 %index) { ; X64-SSE-NEXT: pxor %xmm1, %xmm1 ; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; X64-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [65535,0,255,0,u,u,u,u] ; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4) ; X64-SSE-NEXT: retq ; @@ -1591,7 +1591,7 @@ define void @mul_2xi8_varconst4(ptr nocapture readonly %a, i64 %index) { ; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx ; X64-AVX-NEXT: vmovd %ecx, %xmm0 ; X64-AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; X64-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [65535,0,255,0,u,u,u,u] ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq entry: @@ -1621,7 +1621,7 @@ define void @mul_2xi8_varconst5(ptr nocapture readonly %a, i64 %index) { ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; X86-SSE-NEXT: psrad $24, %xmm0 -; X86-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [65407,0,127,0,u,u,u,u] ; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4) ; X86-SSE-NEXT: retl ; @@ -1633,7 +1633,7 @@ define void @mul_2xi8_varconst5(ptr nocapture readonly %a, i64 %index) { ; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx ; X86-AVX-NEXT: vmovd %ecx, %xmm0 ; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0 -; X86-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [65407,0,127,0,u,u,u,u] ; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) ; X86-AVX-NEXT: retl ; @@ -1645,7 +1645,7 @@ define void @mul_2xi8_varconst5(ptr nocapture readonly %a, i64 %index) { ; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; X64-SSE-NEXT: psrad $24, %xmm0 -; X64-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [65407,0,127,0,u,u,u,u] ; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4) ; X64-SSE-NEXT: retq ; @@ -1655,7 +1655,7 @@ define void @mul_2xi8_varconst5(ptr nocapture readonly %a, i64 %index) { ; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx ; X64-AVX-NEXT: vmovd %ecx, %xmm0 ; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0 -; X64-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [65407,0,127,0,u,u,u,u] ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq entry: @@ -1685,7 +1685,7 @@ define void @mul_2xi8_varconst6(ptr nocapture readonly %a, i64 %index) { ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; X86-SSE-NEXT: psrad $24, %xmm0 -; X86-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [65408,0,128,0,u,u,u,u] ; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4) ; X86-SSE-NEXT: retl ; @@ -1697,7 +1697,7 @@ define void @mul_2xi8_varconst6(ptr nocapture readonly %a, i64 %index) { ; X86-AVX-NEXT: movzwl (%ecx,%eax), %ecx ; X86-AVX-NEXT: vmovd %ecx, %xmm0 ; X86-AVX-NEXT: vpmovsxbd %xmm0, %xmm0 -; X86-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [65408,0,128,0,u,u,u,u] ; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) ; X86-AVX-NEXT: retl ; @@ -1709,7 +1709,7 @@ define void @mul_2xi8_varconst6(ptr nocapture readonly %a, i64 %index) { ; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; X64-SSE-NEXT: psrad $24, %xmm0 -; X64-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [65408,0,128,0,u,u,u,u] ; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4) ; X64-SSE-NEXT: retq ; @@ -1719,7 +1719,7 @@ define void @mul_2xi8_varconst6(ptr nocapture readonly %a, i64 %index) { ; X64-AVX-NEXT: movzwl (%rdi,%rsi), %ecx ; X64-AVX-NEXT: vmovd %ecx, %xmm0 ; X64-AVX-NEXT: vpmovsxbd %xmm0, %xmm0 -; X64-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [65408,0,128,0,u,u,u,u] ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq entry: @@ -1808,7 +1808,7 @@ define void @mul_2xi16_varconst2(ptr nocapture readonly %a, i64 %index) { ; X86-SSE-NEXT: movl c, %edx ; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,3,4,5,6,7] -; X86-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [32768,0,32767,0,u,u,u,u] ; X86-SSE-NEXT: movq %xmm0, (%edx,%eax,4) ; X86-SSE-NEXT: retl ; @@ -1819,7 +1819,7 @@ define void @mul_2xi16_varconst2(ptr nocapture readonly %a, i64 %index) { ; X86-AVX-NEXT: movl c, %edx ; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; X86-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [32768,0,32767,0,u,u,u,u] ; X86-AVX-NEXT: vmovq %xmm0, (%edx,%eax,4) ; X86-AVX-NEXT: retl ; @@ -1828,7 +1828,7 @@ define void @mul_2xi16_varconst2(ptr nocapture readonly %a, i64 %index) { ; X64-SSE-NEXT: movq c(%rip), %rax ; X64-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,3,4,5,6,7] -; X64-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [32768,0,32767,0,u,u,u,u] ; X64-SSE-NEXT: movq %xmm0, (%rax,%rsi,4) ; X64-SSE-NEXT: retq ; @@ -1837,7 +1837,7 @@ define void @mul_2xi16_varconst2(ptr nocapture readonly %a, i64 %index) { ; X64-AVX-NEXT: movq c(%rip), %rax ; X64-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; X64-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [32768,0,32767,0,u,u,u,u] ; X64-AVX-NEXT: vmovq %xmm0, (%rax,%rsi,4) ; X64-AVX-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/slow-pmulld.ll b/llvm/test/CodeGen/X86/slow-pmulld.ll index b8d3527070b877..59aa96520070e5 100644 --- a/llvm/test/CodeGen/X86/slow-pmulld.ll +++ b/llvm/test/CodeGen/X86/slow-pmulld.ll @@ -23,61 +23,61 @@ define <4 x i32> @test_mul_v4i32_v4i8(<4 x i8> %A) { ; SSE-32-LABEL: test_mul_v4i32_v4i8: ; SSE-32: # %bb.0: ; SSE-32-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; SSE-32-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; SSE-32-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [18778,0,18778,0,18778,0,18778,0] ; SSE-32-NEXT: retl ; ; SSE-64-LABEL: test_mul_v4i32_v4i8: ; SSE-64: # %bb.0: ; SSE-64-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; SSE-64-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-64-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [18778,0,18778,0,18778,0,18778,0] ; SSE-64-NEXT: retq ; ; AVX2-SLOW32-LABEL: test_mul_v4i32_v4i8: ; AVX2-SLOW32: # %bb.0: ; AVX2-SLOW32-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX2-SLOW32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; AVX2-SLOW32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [18778,0,18778,0,18778,0,18778,0] ; AVX2-SLOW32-NEXT: retl ; ; AVX2-SLOW64-LABEL: test_mul_v4i32_v4i8: ; AVX2-SLOW64: # %bb.0: ; AVX2-SLOW64-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX2-SLOW64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-SLOW64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [18778,0,18778,0,18778,0,18778,0] ; AVX2-SLOW64-NEXT: retq ; ; AVX2-32-LABEL: test_mul_v4i32_v4i8: ; AVX2-32: # %bb.0: ; AVX2-32-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX2-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; AVX2-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [18778,0,18778,0,18778,0,18778,0] ; AVX2-32-NEXT: retl ; ; AVX2-64-LABEL: test_mul_v4i32_v4i8: ; AVX2-64: # %bb.0: ; AVX2-64-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX2-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [18778,0,18778,0,18778,0,18778,0] ; AVX2-64-NEXT: retq ; ; AVX512DQ-32-LABEL: test_mul_v4i32_v4i8: ; AVX512DQ-32: # %bb.0: ; AVX512DQ-32-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX512DQ-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; AVX512DQ-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [18778,0,18778,0,18778,0,18778,0] ; AVX512DQ-32-NEXT: retl ; ; AVX512DQ-64-LABEL: test_mul_v4i32_v4i8: ; AVX512DQ-64: # %bb.0: ; AVX512DQ-64-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX512DQ-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512DQ-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [18778,0,18778,0,18778,0,18778,0] ; AVX512DQ-64-NEXT: retq ; ; AVX512BW-32-LABEL: test_mul_v4i32_v4i8: ; AVX512BW-32: # %bb.0: ; AVX512BW-32-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX512BW-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; AVX512BW-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [18778,0,18778,0,18778,0,18778,0] ; AVX512BW-32-NEXT: retl ; ; AVX512BW-64-LABEL: test_mul_v4i32_v4i8: ; AVX512BW-64: # %bb.0: ; AVX512BW-64-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX512BW-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512BW-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [18778,0,18778,0,18778,0,18778,0] ; AVX512BW-64-NEXT: retq ; ; KNL-32-LABEL: test_mul_v4i32_v4i8: @@ -132,49 +132,49 @@ define <8 x i32> @test_mul_v8i32_v8i8(<8 x i8> %A) { ; AVX2-SLOW32-LABEL: test_mul_v8i32_v8i8: ; AVX2-SLOW32: # %bb.0: ; AVX2-SLOW32-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX2-SLOW32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; AVX2-SLOW32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # [18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0] ; AVX2-SLOW32-NEXT: retl ; ; AVX2-SLOW64-LABEL: test_mul_v8i32_v8i8: ; AVX2-SLOW64: # %bb.0: ; AVX2-SLOW64-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX2-SLOW64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-SLOW64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0] ; AVX2-SLOW64-NEXT: retq ; ; AVX2-32-LABEL: test_mul_v8i32_v8i8: ; AVX2-32: # %bb.0: ; AVX2-32-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX2-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; AVX2-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # [18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0] ; AVX2-32-NEXT: retl ; ; AVX2-64-LABEL: test_mul_v8i32_v8i8: ; AVX2-64: # %bb.0: ; AVX2-64-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX2-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0] ; AVX2-64-NEXT: retq ; ; AVX512DQ-32-LABEL: test_mul_v8i32_v8i8: ; AVX512DQ-32: # %bb.0: ; AVX512DQ-32-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX512DQ-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; AVX512DQ-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # [18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0] ; AVX512DQ-32-NEXT: retl ; ; AVX512DQ-64-LABEL: test_mul_v8i32_v8i8: ; AVX512DQ-64: # %bb.0: ; AVX512DQ-64-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX512DQ-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512DQ-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0] ; AVX512DQ-64-NEXT: retq ; ; AVX512BW-32-LABEL: test_mul_v8i32_v8i8: ; AVX512BW-32: # %bb.0: ; AVX512BW-32-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX512BW-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; AVX512BW-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # [18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0] ; AVX512BW-32-NEXT: retl ; ; AVX512BW-64-LABEL: test_mul_v8i32_v8i8: ; AVX512BW-64: # %bb.0: ; AVX512BW-64-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX512BW-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512BW-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0] ; AVX512BW-64-NEXT: retq ; ; KNL-32-LABEL: test_mul_v8i32_v8i8: @@ -289,13 +289,13 @@ define <16 x i32> @test_mul_v16i32_v16i8(<16 x i8> %A) { ; AVX512BW-32-LABEL: test_mul_v16i32_v16i8: ; AVX512BW-32: # %bb.0: ; AVX512BW-32-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512BW-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0 +; AVX512BW-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0 # [18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0] ; AVX512BW-32-NEXT: retl ; ; AVX512BW-64-LABEL: test_mul_v16i32_v16i8: ; AVX512BW-64: # %bb.0: ; AVX512BW-64-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512BW-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 +; AVX512BW-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0] ; AVX512BW-64-NEXT: retq ; ; KNL-32-LABEL: test_mul_v16i32_v16i8: @@ -543,61 +543,61 @@ define <4 x i32> @test_mul_v4i32_v4i8_minsize(<4 x i8> %A) minsize { ; SSE-32-LABEL: test_mul_v4i32_v4i8_minsize: ; SSE-32: # %bb.0: ; SSE-32-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; SSE-32-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; SSE-32-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [18778,0,18778,0,18778,0,18778,0] ; SSE-32-NEXT: retl ; ; SSE-64-LABEL: test_mul_v4i32_v4i8_minsize: ; SSE-64: # %bb.0: ; SSE-64-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; SSE-64-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-64-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [18778,0,18778,0,18778,0,18778,0] ; SSE-64-NEXT: retq ; ; AVX2-SLOW32-LABEL: test_mul_v4i32_v4i8_minsize: ; AVX2-SLOW32: # %bb.0: ; AVX2-SLOW32-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX2-SLOW32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; AVX2-SLOW32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [18778,0,18778,0,18778,0,18778,0] ; AVX2-SLOW32-NEXT: retl ; ; AVX2-SLOW64-LABEL: test_mul_v4i32_v4i8_minsize: ; AVX2-SLOW64: # %bb.0: ; AVX2-SLOW64-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX2-SLOW64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-SLOW64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [18778,0,18778,0,18778,0,18778,0] ; AVX2-SLOW64-NEXT: retq ; ; AVX2-32-LABEL: test_mul_v4i32_v4i8_minsize: ; AVX2-32: # %bb.0: ; AVX2-32-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX2-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; AVX2-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [18778,0,18778,0,18778,0,18778,0] ; AVX2-32-NEXT: retl ; ; AVX2-64-LABEL: test_mul_v4i32_v4i8_minsize: ; AVX2-64: # %bb.0: ; AVX2-64-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX2-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [18778,0,18778,0,18778,0,18778,0] ; AVX2-64-NEXT: retq ; ; AVX512DQ-32-LABEL: test_mul_v4i32_v4i8_minsize: ; AVX512DQ-32: # %bb.0: ; AVX512DQ-32-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX512DQ-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; AVX512DQ-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [18778,0,18778,0,18778,0,18778,0] ; AVX512DQ-32-NEXT: retl ; ; AVX512DQ-64-LABEL: test_mul_v4i32_v4i8_minsize: ; AVX512DQ-64: # %bb.0: ; AVX512DQ-64-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX512DQ-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512DQ-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [18778,0,18778,0,18778,0,18778,0] ; AVX512DQ-64-NEXT: retq ; ; AVX512BW-32-LABEL: test_mul_v4i32_v4i8_minsize: ; AVX512BW-32: # %bb.0: ; AVX512BW-32-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX512BW-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; AVX512BW-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [18778,0,18778,0,18778,0,18778,0] ; AVX512BW-32-NEXT: retl ; ; AVX512BW-64-LABEL: test_mul_v4i32_v4i8_minsize: ; AVX512BW-64: # %bb.0: ; AVX512BW-64-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; AVX512BW-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512BW-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [18778,0,18778,0,18778,0,18778,0] ; AVX512BW-64-NEXT: retq ; ; KNL-32-LABEL: test_mul_v4i32_v4i8_minsize: @@ -652,49 +652,49 @@ define <8 x i32> @test_mul_v8i32_v8i8_minsize(<8 x i8> %A) minsize { ; AVX2-SLOW32-LABEL: test_mul_v8i32_v8i8_minsize: ; AVX2-SLOW32: # %bb.0: ; AVX2-SLOW32-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX2-SLOW32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; AVX2-SLOW32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # [18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0] ; AVX2-SLOW32-NEXT: retl ; ; AVX2-SLOW64-LABEL: test_mul_v8i32_v8i8_minsize: ; AVX2-SLOW64: # %bb.0: ; AVX2-SLOW64-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX2-SLOW64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-SLOW64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0] ; AVX2-SLOW64-NEXT: retq ; ; AVX2-32-LABEL: test_mul_v8i32_v8i8_minsize: ; AVX2-32: # %bb.0: ; AVX2-32-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX2-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; AVX2-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # [18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0] ; AVX2-32-NEXT: retl ; ; AVX2-64-LABEL: test_mul_v8i32_v8i8_minsize: ; AVX2-64: # %bb.0: ; AVX2-64-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX2-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0] ; AVX2-64-NEXT: retq ; ; AVX512DQ-32-LABEL: test_mul_v8i32_v8i8_minsize: ; AVX512DQ-32: # %bb.0: ; AVX512DQ-32-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX512DQ-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; AVX512DQ-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # [18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0] ; AVX512DQ-32-NEXT: retl ; ; AVX512DQ-64-LABEL: test_mul_v8i32_v8i8_minsize: ; AVX512DQ-64: # %bb.0: ; AVX512DQ-64-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX512DQ-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512DQ-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0] ; AVX512DQ-64-NEXT: retq ; ; AVX512BW-32-LABEL: test_mul_v8i32_v8i8_minsize: ; AVX512BW-32: # %bb.0: ; AVX512BW-32-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX512BW-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; AVX512BW-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # [18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0] ; AVX512BW-32-NEXT: retl ; ; AVX512BW-64-LABEL: test_mul_v8i32_v8i8_minsize: ; AVX512BW-64: # %bb.0: ; AVX512BW-64-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero -; AVX512BW-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512BW-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0] ; AVX512BW-64-NEXT: retq ; ; KNL-32-LABEL: test_mul_v8i32_v8i8_minsize: @@ -809,13 +809,13 @@ define <16 x i32> @test_mul_v16i32_v16i8_minsize(<16 x i8> %A) minsize { ; AVX512BW-32-LABEL: test_mul_v16i32_v16i8_minsize: ; AVX512BW-32: # %bb.0: ; AVX512BW-32-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512BW-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0 +; AVX512BW-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0 # [18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0] ; AVX512BW-32-NEXT: retl ; ; AVX512BW-64-LABEL: test_mul_v16i32_v16i8_minsize: ; AVX512BW-64: # %bb.0: ; AVX512BW-64-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero -; AVX512BW-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 +; AVX512BW-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0,18778,0] ; AVX512BW-64-NEXT: retq ; ; KNL-32-LABEL: test_mul_v16i32_v16i8_minsize: diff --git a/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll index 3dde5c1c8a40c1..5531c262bd7726 100644 --- a/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll +++ b/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll @@ -2217,21 +2217,21 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) { ; CHECK-SSE2-NEXT: movq %rdi, %rax ; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm5 ; CHECK-SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5 +; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5 # [9,0,41,183,1,1,161,221] ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] ; CHECK-SSE2-NEXT: pand %xmm4, %xmm5 ; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm6 ; CHECK-SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 +; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 # [171,103,183,171,61,1,127,183] ; CHECK-SSE2-NEXT: pand %xmm4, %xmm6 ; CHECK-SSE2-NEXT: packuswb %xmm5, %xmm6 ; CHECK-SSE2-NEXT: paddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 ; CHECK-SSE2-NEXT: movdqa %xmm6, %xmm5 ; CHECK-SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm6[8],xmm5[9],xmm6[9],xmm5[10],xmm6[10],xmm5[11],xmm6[11],xmm5[12],xmm6[12],xmm5[13],xmm6[13],xmm5[14],xmm6[14],xmm5[15],xmm6[15] -; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5 +; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5 # [128,1,128,1,128,32,1,1] ; CHECK-SSE2-NEXT: psrlw $8, %xmm5 ; CHECK-SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 +; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 # [1,1,1,128,64,2,1,32] ; CHECK-SSE2-NEXT: psrlw $8, %xmm6 ; CHECK-SSE2-NEXT: packuswb %xmm5, %xmm6 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm7 = [84,2,36,42,2,1,2,4,2,255,4,36,127,31,2,2] @@ -2246,19 +2246,19 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) { ; CHECK-SSE2-NEXT: por %xmm7, %xmm5 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 ; CHECK-SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [223,223,205,183,161,1,171,239] ; CHECK-SSE2-NEXT: pand %xmm4, %xmm1 ; CHECK-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [197,205,27,241,1,1,1,163] ; CHECK-SSE2-NEXT: pand %xmm4, %xmm0 ; CHECK-SSE2-NEXT: packuswb %xmm1, %xmm0 ; CHECK-SSE2-NEXT: paddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1 ; CHECK-SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [128,128,1,1,1,128,1,64] ; CHECK-SSE2-NEXT: psrlw $8, %xmm1 ; CHECK-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,1,1,128,128,32,128,32] ; CHECK-SSE2-NEXT: psrlw $8, %xmm0 ; CHECK-SSE2-NEXT: packuswb %xmm1, %xmm0 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [19,51,13,7,128,32,128,3,5,5,51,37,3,128,85,5] @@ -2281,20 +2281,20 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) { ; CHECK-SSE41-NEXT: movq %rdi, %rax ; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm0 ; CHECK-SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; CHECK-SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [9,0,41,183,1,1,161,221] ; CHECK-SSE41-NEXT: pmovzxbw {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255] ; CHECK-SSE41-NEXT: pand %xmm5, %xmm0 ; CHECK-SSE41-NEXT: pmovzxbw {{.*#+}} xmm6 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; CHECK-SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 +; CHECK-SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 # [171,103,183,171,61,1,127,183] ; CHECK-SSE41-NEXT: pand %xmm5, %xmm6 ; CHECK-SSE41-NEXT: packuswb %xmm0, %xmm6 ; CHECK-SSE41-NEXT: paddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 ; CHECK-SSE41-NEXT: movdqa %xmm6, %xmm0 ; CHECK-SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm6[8],xmm0[9],xmm6[9],xmm0[10],xmm6[10],xmm0[11],xmm6[11],xmm0[12],xmm6[12],xmm0[13],xmm6[13],xmm0[14],xmm6[14],xmm0[15],xmm6[15] -; CHECK-SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [128,1,128,1,128,32,1,1] ; CHECK-SSE41-NEXT: psrlw $8, %xmm0 ; CHECK-SSE41-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; CHECK-SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 +; CHECK-SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6 # [1,1,1,128,64,2,1,32] ; CHECK-SSE41-NEXT: psrlw $8, %xmm6 ; CHECK-SSE41-NEXT: packuswb %xmm0, %xmm6 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm0 = [84,2,36,42,2,1,2,4,2,255,4,36,127,31,2,2] @@ -2309,18 +2309,18 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) { ; CHECK-SSE41-NEXT: pblendvb %xmm0, %xmm7, %xmm1 ; CHECK-SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero ; CHECK-SSE41-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; CHECK-SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 +; CHECK-SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [223,223,205,183,161,1,171,239] ; CHECK-SSE41-NEXT: pand %xmm5, %xmm4 -; CHECK-SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [197,205,27,241,1,1,1,163] ; CHECK-SSE41-NEXT: pand %xmm5, %xmm0 ; CHECK-SSE41-NEXT: packuswb %xmm4, %xmm0 ; CHECK-SSE41-NEXT: paddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm4 ; CHECK-SSE41-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm0[8],xmm4[9],xmm0[9],xmm4[10],xmm0[10],xmm4[11],xmm0[11],xmm4[12],xmm0[12],xmm4[13],xmm0[13],xmm4[14],xmm0[14],xmm4[15],xmm0[15] -; CHECK-SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 +; CHECK-SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [128,128,1,1,1,128,1,64] ; CHECK-SSE41-NEXT: psrlw $8, %xmm4 ; CHECK-SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; CHECK-SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,1,1,128,128,32,128,32] ; CHECK-SSE41-NEXT: psrlw $8, %xmm0 ; CHECK-SSE41-NEXT: packuswb %xmm4, %xmm0 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm4 = [19,51,13,7,128,32,128,3,5,5,51,37,3,128,85,5] @@ -2341,21 +2341,21 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) { ; CHECK-AVX1: # %bb.0: ; CHECK-AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] -; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 +; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [34048,34048,26368,37632,21760,33024,22016,35072] ; CHECK-AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; CHECK-AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 +; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [20224,26368,6912,30976,33024,33024,33024,12032] ; CHECK-AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 ; CHECK-AVX1-NEXT: vpackuswb %xmm3, %xmm4, %xmm3 ; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm4 ; CHECK-AVX1-NEXT: vpaddb %xmm4, %xmm3, %xmm3 ; CHECK-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; CHECK-AVX1-NEXT: vpsraw $8, %xmm4, %xmm4 -; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 +; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [8,8,128,64,8,256,256,8] ; CHECK-AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 ; CHECK-AVX1-NEXT: vpunpcklbw {{.*#+}} xmm5 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; CHECK-AVX1-NEXT: vpsraw $8, %xmm5, %xmm5 -; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 +; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [64,128,128,16,256,64,256,16] ; CHECK-AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5 ; CHECK-AVX1-NEXT: vpackuswb %xmm4, %xmm5, %xmm5 ; CHECK-AVX1-NEXT: vpsrlw $7, %xmm3, %xmm3 @@ -2363,35 +2363,35 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) { ; CHECK-AVX1-NEXT: vpand %xmm4, %xmm3, %xmm3 ; CHECK-AVX1-NEXT: vpaddb %xmm3, %xmm5, %xmm5 ; CHECK-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm5[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm6 +; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm6 # [62,62,5,7,97,2,3,60] ; CHECK-AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] ; CHECK-AVX1-NEXT: vpand %xmm3, %xmm6, %xmm6 ; CHECK-AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero -; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 +; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [13,5,19,34,2,8,2,88] ; CHECK-AVX1-NEXT: vpand %xmm3, %xmm5, %xmm5 ; CHECK-AVX1-NEXT: vpackuswb %xmm6, %xmm5, %xmm5 ; CHECK-AVX1-NEXT: vpsubb %xmm5, %xmm0, %xmm5 ; CHECK-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; CHECK-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm6 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] -; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6, %xmm6 +; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6, %xmm6 # [2304,0,10496,37632,33024,33024,21760,36096] ; CHECK-AVX1-NEXT: vpsrlw $8, %xmm6, %xmm6 ; CHECK-AVX1-NEXT: vpunpcklbw {{.*#+}} xmm7 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm7, %xmm7 +; CHECK-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm7, %xmm7 # [22016,24320,37632,11008,12544,32512,16640,37632] ; CHECK-AVX1-NEXT: vpsrlw $8, %xmm7, %xmm7 ; CHECK-AVX1-NEXT: vpackuswb %xmm6, %xmm7, %xmm6 ; CHECK-AVX1-NEXT: vpmovzxbw {{.*#+}} xmm7 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm7, %xmm7 +; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm7, %xmm7 # [0,0,1,0,0,255,0,1] ; CHECK-AVX1-NEXT: vpand %xmm3, %xmm7, %xmm7 ; CHECK-AVX1-NEXT: vpshufb {{.*#+}} xmm8 = zero,zero,xmm0[9],zero,zero,zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,zero,zero,xmm0[15],zero ; CHECK-AVX1-NEXT: vpackuswb %xmm8, %xmm7, %xmm7 ; CHECK-AVX1-NEXT: vpaddb %xmm7, %xmm6, %xmm6 ; CHECK-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm7 = xmm6[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; CHECK-AVX1-NEXT: vpsraw $8, %xmm7, %xmm7 -; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm7, %xmm7 +; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm7, %xmm7 # [64,256,32,64,256,64,8,4] ; CHECK-AVX1-NEXT: vpsrlw $8, %xmm7, %xmm7 ; CHECK-AVX1-NEXT: vpunpcklbw {{.*#+}} xmm8 = xmm6[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; CHECK-AVX1-NEXT: vpsraw $8, %xmm8, %xmm8 -; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm8, %xmm8 +; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm8, %xmm8 # [256,8,64,256,16,4,8,8] ; CHECK-AVX1-NEXT: vpsrlw $8, %xmm8, %xmm8 ; CHECK-AVX1-NEXT: vpackuswb %xmm7, %xmm8, %xmm7 ; CHECK-AVX1-NEXT: vpsrlw $7, %xmm6, %xmm6 @@ -2399,10 +2399,10 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) { ; CHECK-AVX1-NEXT: vpand %xmm4, %xmm6, %xmm4 ; CHECK-AVX1-NEXT: vpaddb %xmm4, %xmm7, %xmm4 ; CHECK-AVX1-NEXT: vpunpckhbw {{.*#+}} xmm6 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6, %xmm6 +; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6, %xmm6 # [114,1,50,7,2,8,97,117] ; CHECK-AVX1-NEXT: vpand %xmm3, %xmm6, %xmm6 ; CHECK-AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero -; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 +; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [3,87,7,6,84,128,127,56] ; CHECK-AVX1-NEXT: vpand %xmm3, %xmm4, %xmm3 ; CHECK-AVX1-NEXT: vpackuswb %xmm6, %xmm3, %xmm3 ; CHECK-AVX1-NEXT: vpsubb %xmm3, %xmm0, %xmm0 @@ -2423,14 +2423,14 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) { ; CHECK-AVX2: # %bb.0: ; CHECK-AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; CHECK-AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31] -; CHECK-AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 +; CHECK-AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [34048,34048,26368,37632,21760,33024,22016,35072,2304,0,10496,37632,33024,33024,21760,36096] ; CHECK-AVX2-NEXT: vpsrlw $8, %ymm3, %ymm3 ; CHECK-AVX2-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23] -; CHECK-AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 +; CHECK-AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [20224,26368,6912,30976,33024,33024,33024,12032,22016,24320,37632,11008,12544,32512,16640,37632] ; CHECK-AVX2-NEXT: vpsrlw $8, %ymm4, %ymm4 ; CHECK-AVX2-NEXT: vpackuswb %ymm3, %ymm4, %ymm3 ; CHECK-AVX2-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; CHECK-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 +; CHECK-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,0,0,0,1,1,1,0,0,0,1,0,0,255,0,1] ; CHECK-AVX2-NEXT: vpbroadcastw {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; CHECK-AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4 ; CHECK-AVX2-NEXT: vpshufb {{.*#+}} ymm6 = ymm0[8],zero,ymm0[9],zero,zero,zero,ymm0[11],zero,zero,zero,ymm0[13],zero,zero,zero,ymm0[15],zero,zero,zero,ymm0[25],zero,zero,zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,zero,zero,ymm0[31],zero @@ -2438,11 +2438,11 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) { ; CHECK-AVX2-NEXT: vpaddb %ymm4, %ymm3, %ymm3 ; CHECK-AVX2-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] ; CHECK-AVX2-NEXT: vpsraw $8, %ymm4, %ymm4 -; CHECK-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 +; CHECK-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [8,8,128,64,8,256,256,8,64,256,32,64,256,64,8,4] ; CHECK-AVX2-NEXT: vpsrlw $8, %ymm4, %ymm4 ; CHECK-AVX2-NEXT: vpunpcklbw {{.*#+}} ymm6 = ymm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] ; CHECK-AVX2-NEXT: vpsraw $8, %ymm6, %ymm6 -; CHECK-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm6, %ymm6 +; CHECK-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm6, %ymm6 # [64,128,128,16,256,64,256,16,256,8,64,256,16,4,8,8] ; CHECK-AVX2-NEXT: vpsrlw $8, %ymm6, %ymm6 ; CHECK-AVX2-NEXT: vpackuswb %ymm4, %ymm6, %ymm4 ; CHECK-AVX2-NEXT: vpsrlw $7, %ymm3, %ymm3 @@ -2450,10 +2450,10 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) { ; CHECK-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 ; CHECK-AVX2-NEXT: vpaddb %ymm3, %ymm4, %ymm3 ; CHECK-AVX2-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; CHECK-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 +; CHECK-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [62,62,5,7,97,2,3,60,114,1,50,7,2,8,97,117] ; CHECK-AVX2-NEXT: vpand %ymm5, %ymm4, %ymm4 ; CHECK-AVX2-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; CHECK-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 +; CHECK-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [13,5,19,34,2,8,2,88,3,87,7,6,84,128,127,56] ; CHECK-AVX2-NEXT: vpand %ymm5, %ymm3, %ymm3 ; CHECK-AVX2-NEXT: vpackuswb %ymm4, %ymm3, %ymm3 ; CHECK-AVX2-NEXT: vpsubb %ymm3, %ymm0, %ymm0 @@ -2467,19 +2467,19 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) { ; CHECK-AVX512VL-LABEL: pr51133: ; CHECK-AVX512VL: # %bb.0: ; CHECK-AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; CHECK-AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; CHECK-AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [223,223,205,183,161,1,171,239,9,0,41,183,1,1,161,221] ; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; CHECK-AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2 ; CHECK-AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; CHECK-AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 +; CHECK-AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [197,205,27,241,1,1,1,163,171,103,183,171,61,1,127,183] ; CHECK-AVX512VL-NEXT: vpand %ymm3, %ymm4, %ymm3 ; CHECK-AVX512VL-NEXT: vpackuswb %ymm2, %ymm3, %ymm2 ; CHECK-AVX512VL-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 ; CHECK-AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; CHECK-AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 +; CHECK-AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [128,128,1,1,1,128,1,64,128,1,128,1,128,32,1,1] ; CHECK-AVX512VL-NEXT: vpsrlw $8, %ymm3, %ymm3 ; CHECK-AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; CHECK-AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; CHECK-AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [1,1,1,128,128,32,128,32,1,1,1,128,64,2,1,32] ; CHECK-AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2 ; CHECK-AVX512VL-NEXT: vpackuswb %ymm3, %ymm2, %ymm2 ; CHECK-AVX512VL-NEXT: vpminub {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3 diff --git a/llvm/test/CodeGen/X86/srem-vector-lkk.ll b/llvm/test/CodeGen/X86/srem-vector-lkk.ll index c8de34f63dd85d..e936e1ef81b74f 100644 --- a/llvm/test/CodeGen/X86/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/X86/srem-vector-lkk.ll @@ -118,18 +118,18 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) { ; SSE-NEXT: psrlw $15, %xmm2 ; SSE-NEXT: psraw $6, %xmm1 ; SSE-NEXT: paddw %xmm2, %xmm1 -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [95,95,95,95,95,95,95,95] ; SSE-NEXT: psubw %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: fold_srem_vec_2: ; AVX: # %bb.0: -; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [44151,44151,44151,44151,44151,44151,44151,44151] ; AVX-NEXT: vpaddw %xmm0, %xmm1, %xmm1 ; AVX-NEXT: vpsrlw $15, %xmm1, %xmm2 ; AVX-NEXT: vpsraw $6, %xmm1, %xmm1 ; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [95,95,95,95,95,95,95,95] ; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = srem <4 x i16> %x, @@ -156,12 +156,12 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) { ; ; AVX-LABEL: combine_srem_sdiv: ; AVX: # %bb.0: -; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [44151,44151,44151,44151,44151,44151,44151,44151] ; AVX-NEXT: vpaddw %xmm0, %xmm1, %xmm1 ; AVX-NEXT: vpsrlw $15, %xmm1, %xmm2 ; AVX-NEXT: vpsraw $6, %xmm1, %xmm1 ; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1 -; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 +; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 # [95,95,95,95,95,95,95,95] ; AVX-NEXT: vpsubw %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll index cdeca96732dc31..a17b5a1e8f3e0b 100644 --- a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll +++ b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll @@ -133,7 +133,7 @@ define <4 x i1> @t1_all_odd_ne(<4 x i32> %X) nounwind { define <8 x i1> @t2_narrow(<8 x i16> %X) nounwind { ; CHECK-SSE2-LABEL: t2_narrow: ; CHECK-SSE2: # %bb.0: -; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [43691,43691,43691,43691,43691,43691,43691,43691] ; CHECK-SSE2-NEXT: psubusw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1 ; CHECK-SSE2-NEXT: pcmpeqw %xmm1, %xmm0 @@ -142,7 +142,7 @@ define <8 x i1> @t2_narrow(<8 x i16> %X) nounwind { ; ; CHECK-SSE41-LABEL: t2_narrow: ; CHECK-SSE41: # %bb.0: -; CHECK-SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [43691,43691,43691,43691,43691,43691,43691,43691] ; CHECK-SSE41-NEXT: pmovsxdq {{.*#+}} xmm1 = [18446744073709507925,18446744073709507925] ; CHECK-SSE41-NEXT: pminuw %xmm0, %xmm1 ; CHECK-SSE41-NEXT: pcmpeqw %xmm1, %xmm0 @@ -152,7 +152,7 @@ define <8 x i1> @t2_narrow(<8 x i16> %X) nounwind { ; ; CHECK-AVX1-LABEL: t2_narrow: ; CHECK-AVX1: # %bb.0: -; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [43691,43691,43691,43691,43691,43691,43691,43691] ; CHECK-AVX1-NEXT: vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -161,7 +161,7 @@ define <8 x i1> @t2_narrow(<8 x i16> %X) nounwind { ; ; CHECK-AVX2-LABEL: t2_narrow: ; CHECK-AVX2: # %bb.0: -; CHECK-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [43691,43691,43691,43691,43691,43691,43691,43691] ; CHECK-AVX2-NEXT: vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 @@ -170,7 +170,7 @@ define <8 x i1> @t2_narrow(<8 x i16> %X) nounwind { ; ; CHECK-AVX512VL-LABEL: t2_narrow: ; CHECK-AVX512VL: # %bb.0: -; CHECK-AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; CHECK-AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [43691,43691,43691,43691,43691,43691,43691,43691] ; CHECK-AVX512VL-NEXT: vpminuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; CHECK-AVX512VL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 diff --git a/llvm/test/CodeGen/X86/urem-seteq.ll b/llvm/test/CodeGen/X86/urem-seteq.ll index b606e118334315..72e91ce80d1a58 100644 --- a/llvm/test/CodeGen/X86/urem-seteq.ll +++ b/llvm/test/CodeGen/X86/urem-seteq.ll @@ -362,7 +362,16 @@ define i32 @test_urem_allones(i32 %X) nounwind { ; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=34366 define void @ossfuzz34366() { ; X86-LABEL: ossfuzz34366: +; X86: # %bb.0: +; X86-NEXT: cmpl $0, (%eax) +; X86-NEXT: sete (%eax) +; X86-NEXT: retl +; ; X64-LABEL: ossfuzz34366: +; X64: # %bb.0: +; X64-NEXT: cmpq $0, (%rax) +; X64-NEXT: sete (%rax) +; X64-NEXT: retq %L10 = load i448, ptr undef, align 4 %B18 = urem i448 %L10, -363419362147803445274661903944002267176820680343659030140745099590319644056698961663095525356881782780381260803133088966767300814307328 %C13 = icmp ule i448 %B18, 0 diff --git a/llvm/test/CodeGen/X86/urem-vector-lkk.ll b/llvm/test/CodeGen/X86/urem-vector-lkk.ll index 3873f04b8307ed..94c7892795c2b6 100644 --- a/llvm/test/CodeGen/X86/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/X86/urem-vector-lkk.ll @@ -92,15 +92,15 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) { ; SSE-NEXT: movdqa {{.*#+}} xmm1 = [44151,44151,44151,44151,44151,44151,44151,44151] ; SSE-NEXT: pmulhuw %xmm0, %xmm1 ; SSE-NEXT: psrlw $6, %xmm1 -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [95,95,95,95,95,95,95,95] ; SSE-NEXT: psubw %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: fold_urem_vec_2: ; AVX: # %bb.0: -; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [44151,44151,44151,44151,44151,44151,44151,44151] ; AVX-NEXT: vpsrlw $6, %xmm1, %xmm1 -; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [95,95,95,95,95,95,95,95] ; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %1 = urem <4 x i16> %x, @@ -123,9 +123,9 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) { ; ; AVX-LABEL: combine_urem_udiv: ; AVX: # %bb.0: -; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [44151,44151,44151,44151,44151,44151,44151,44151] ; AVX-NEXT: vpsrlw $6, %xmm1, %xmm1 -; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 +; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 # [95,95,95,95,95,95,95,95] ; AVX-NEXT: vpsubw %xmm2, %xmm0, %xmm0 ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq diff --git a/llvm/test/CodeGen/X86/var-permute-128.ll b/llvm/test/CodeGen/X86/var-permute-128.ll index f2240a94684427..584a7493ff9ad5 100644 --- a/llvm/test/CodeGen/X86/var-permute-128.ll +++ b/llvm/test/CodeGen/X86/var-permute-128.ll @@ -173,21 +173,21 @@ define <8 x i16> @var_shuffle_v8i16(<8 x i16> %v, <8 x i16> %indices) nounwind { ; ; SSSE3-LABEL: var_shuffle_v8i16: ; SSSE3: # %bb.0: -; SSSE3-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSSE3-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [514,514,514,514,514,514,514,514] ; SSSE3-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; SSSE3-NEXT: pshufb %xmm1, %xmm0 ; SSSE3-NEXT: retq ; ; SSE41-LABEL: var_shuffle_v8i16: ; SSE41: # %bb.0: -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [514,514,514,514,514,514,514,514] ; SSE41-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; SSE41-NEXT: pshufb %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVXNOVLBW-LABEL: var_shuffle_v8i16: ; AVXNOVLBW: # %bb.0: -; AVXNOVLBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVXNOVLBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [514,514,514,514,514,514,514,514] ; AVXNOVLBW-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVXNOVLBW-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ; AVXNOVLBW-NEXT: retq diff --git a/llvm/test/CodeGen/X86/var-permute-256.ll b/llvm/test/CodeGen/X86/var-permute-256.ll index 56dc2f0571b170..5c82b9220a71ed 100644 --- a/llvm/test/CodeGen/X86/var-permute-256.ll +++ b/llvm/test/CodeGen/X86/var-permute-256.ll @@ -168,7 +168,7 @@ define <16 x i16> @var_shuffle_v16i16(<16 x i16> %v, <16 x i16> %indices) nounwi ; ; AVX2-LABEL: var_shuffle_v16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514] ; AVX2-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,2,3] ; AVX2-NEXT: vpshufb %ymm1, %ymm2, %ymm2 @@ -180,7 +180,7 @@ define <16 x i16> @var_shuffle_v16i16(<16 x i16> %v, <16 x i16> %indices) nounwi ; ; AVX512-LABEL: var_shuffle_v16i16: ; AVX512: # %bb.0: -; AVX512-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514] ; AVX512-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX512-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,2,3] ; AVX512-NEXT: vpshufb %ymm1, %ymm2, %ymm2 @@ -192,7 +192,7 @@ define <16 x i16> @var_shuffle_v16i16(<16 x i16> %v, <16 x i16> %indices) nounwi ; ; AVX512VLDQ-LABEL: var_shuffle_v16i16: ; AVX512VLDQ: # %bb.0: -; AVX512VLDQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512VLDQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514] ; AVX512VLDQ-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2 ; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm2, %ymm2 @@ -713,7 +713,7 @@ define <16 x i16> @var_shuffle_v16i16_from_v8i16(<8 x i16> %v, <16 x i16> %indic ; AVX2-LABEL: var_shuffle_v16i16_from_v8i16: ; AVX2: # %bb.0: ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514] ; AVX2-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm2 ; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 @@ -725,7 +725,7 @@ define <16 x i16> @var_shuffle_v16i16_from_v8i16(<8 x i16> %v, <16 x i16> %indic ; AVX512-LABEL: var_shuffle_v16i16_from_v8i16: ; AVX512: # %bb.0: ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; AVX512-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514] ; AVX512-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX512-NEXT: vpshufb %ymm1, %ymm0, %ymm2 ; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 @@ -737,7 +737,7 @@ define <16 x i16> @var_shuffle_v16i16_from_v8i16(<8 x i16> %v, <16 x i16> %indic ; AVX512VLDQ-LABEL: var_shuffle_v16i16_from_v8i16: ; AVX512VLDQ: # %bb.0: ; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 -; AVX512VLDQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512VLDQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [514,514,514,514,514,514,514,514,514,514,514,514,514,514,514,514] ; AVX512VLDQ-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX512VLDQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; AVX512VLDQ-NEXT: vpshufb %ymm1, %ymm0, %ymm2 diff --git a/llvm/test/CodeGen/X86/vec_shift6.ll b/llvm/test/CodeGen/X86/vec_shift6.ll index a905f881742bde..59bc3940fcb31e 100644 --- a/llvm/test/CodeGen/X86/vec_shift6.ll +++ b/llvm/test/CodeGen/X86/vec_shift6.ll @@ -11,12 +11,12 @@ define <8 x i16> @test1(<8 x i16> %a) { ; SSE-LABEL: test1: ; SSE: # %bb.0: -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,2,4,8,128,1,512,2048] ; SSE-NEXT: retq ; ; AVX-LABEL: test1: ; AVX: # %bb.0: -; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2,2,4,8,128,1,512,2048] ; AVX-NEXT: retq %shl = shl <8 x i16> %a, ret <8 x i16> %shl @@ -25,12 +25,12 @@ define <8 x i16> @test1(<8 x i16> %a) { define <8 x i16> @test2(<8 x i16> %a) { ; SSE-LABEL: test2: ; SSE: # %bb.0: -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,u,1,1,2,u,u,2] ; SSE-NEXT: retq ; ; AVX-LABEL: test2: ; AVX: # %bb.0: -; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,u,1,1,2,u,u,2] ; AVX-NEXT: retq %shl = shl <8 x i16> %a, ret <8 x i16> %shl @@ -101,7 +101,7 @@ define <16 x i16> @test5(<16 x i16> %a) { ; ; AVX-LABEL: test5: ; AVX: # %bb.0: -; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [2,2,4,8,128,1,512,2048,2,2,4,8,128,1,512,2048] ; AVX-NEXT: retq %shl = shl <16 x i16> %a, ret <16 x i16> %shl diff --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll index 0459d47eed8178..c7cff092c5a4fe 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll @@ -1950,33 +1950,33 @@ define <4 x i32> @constant_funnnel_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { define <8 x i16> @constant_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; SSE-LABEL: constant_funnnel_v8i16: ; SSE: # %bb.0: -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,4,8,16,32,64,128] ; SSE-NEXT: psrlw $1, %xmm1 -; SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2,4,8,16,32,64,128,256] ; SSE-NEXT: por %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: constant_funnnel_v8i16: ; AVX: # %bb.0: -; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2,4,8,16,32,64,128,256] ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512F-LABEL: constant_funnnel_v8i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; AVX512F-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX512F-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2,4,8,16,32,64,128,256] ; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: constant_funnnel_v8i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; AVX512VL-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX512VL-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2,4,8,16,32,64,128,256] ; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; @@ -2025,9 +2025,9 @@ define <8 x i16> @constant_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; ; X86-SSE2-LABEL: constant_funnnel_v8i16: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [1,2,4,8,16,32,64,128] ; X86-SSE2-NEXT: psrlw $1, %xmm1 -; X86-SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [2,4,8,16,32,64,128,256] ; X86-SSE2-NEXT: por %xmm1, %xmm0 ; X86-SSE2-NEXT: retl %res = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> ) @@ -2039,10 +2039,10 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm1, %xmm2 ; SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [1,128,64,32,16,8,4,2] ; SSE-NEXT: psrlw $8, %xmm2 ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,2,4,8,16,32,64,128] ; SSE-NEXT: psrlw $8, %xmm1 ; SSE-NEXT: packuswb %xmm2, %xmm1 ; SSE-NEXT: movdqa %xmm1, %xmm0 @@ -2051,10 +2051,10 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; AVX-LABEL: constant_funnnel_v16i8: ; AVX: # %bb.0: ; AVX-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [1,128,64,32,16,8,4,2] ; AVX-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 ; AVX-NEXT: retq @@ -2062,10 +2062,10 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; AVX512F-LABEL: constant_funnnel_v16i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [1,128,64,32,16,8,4,2] ; AVX512F-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; AVX512F-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX512F-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 ; AVX512F-NEXT: retq @@ -2073,10 +2073,10 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; AVX512VL-LABEL: constant_funnnel_v16i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [1,128,64,32,16,8,4,2] ; AVX512VL-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX512VL-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 ; AVX512VL-NEXT: retq @@ -2146,10 +2146,10 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 ; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] -; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 +; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 # [1,128,64,32,16,8,4,2] ; X86-SSE2-NEXT: psrlw $8, %xmm2 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [1,2,4,8,16,32,64,128] ; X86-SSE2-NEXT: psrlw $8, %xmm1 ; X86-SSE2-NEXT: packuswb %xmm2, %xmm1 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll index e81b9adfdd3e3d..b26580541fe401 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll @@ -1755,14 +1755,14 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin ; AVX1: # %bb.0: ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vpsrlw $1, %xmm2, %xmm2 -; AVX1-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 +; AVX1-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 # [512,1024,2048,4096,8192,16384,32768,u] ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3,4,5,6],xmm2[7] ; AVX1-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX1-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2,4,8,16,32,64,128,256] ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 # [1,2,4,8,16,32,64,128] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [256,512,1024,2048,4096,8192,16384,32768] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: retq @@ -1770,30 +1770,30 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin ; AVX2-LABEL: constant_funnnel_v16i16: ; AVX2: # %bb.0: ; AVX2-NEXT: vpsrlw $1, %ymm1, %ymm1 -; AVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 +; AVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 # [2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768,u] ; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7],ymm2[8,9,10,11,12,13,14],ymm1[15] ; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; AVX512F-LABEL: constant_funnnel_v16i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpsrlw $1, %ymm1, %ymm1 -; AVX512F-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 +; AVX512F-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 # [2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768,u] ; AVX512F-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7],ymm2[8,9,10,11,12,13,14],ymm1[15] ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: constant_funnnel_v16i16: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrlw $1, %ymm1, %ymm1 -; AVX512VL-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 +; AVX512VL-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 # [2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768,u] ; AVX512VL-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7],ymm2[8,9,10,11,12,13,14],ymm1[15] ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] -; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] ; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; @@ -1848,10 +1848,10 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin ; XOPAVX2-LABEL: constant_funnnel_v16i16: ; XOPAVX2: # %bb.0: ; XOPAVX2-NEXT: vpsrlw $1, %ymm1, %ymm1 -; XOPAVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 +; XOPAVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 # [2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768,u] ; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7],ymm2[8,9,10,11,12,13,14],ymm1[15] ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7] -; XOPAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; XOPAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] ; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq %res = call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> ) @@ -1885,10 +1885,10 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { ; AVX2-LABEL: constant_funnnel_v32i8: ; AVX2: # %bb.0: ; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2] ; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -1896,10 +1896,10 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { ; AVX512F-LABEL: constant_funnnel_v32i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2] ; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512F-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: retq @@ -1907,10 +1907,10 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { ; AVX512VL-LABEL: constant_funnnel_v32i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] -; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2] ; AVX512VL-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] -; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] ; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512VL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 ; AVX512VL-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll index 6b5ba7042c5c51..6d02801cc04dbf 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-128.ll @@ -1505,10 +1505,10 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x) nounwind { ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,128,64,32,16,8,4,2] ; SSE-NEXT: psrlw $8, %xmm1 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,4,8,16,32,64,128] ; SSE-NEXT: psrlw $8, %xmm0 ; SSE-NEXT: packuswb %xmm1, %xmm0 ; SSE-NEXT: retq @@ -1516,10 +1516,10 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x) nounwind { ; AVX-LABEL: constant_funnnel_v16i8: ; AVX: # %bb.0: ; AVX-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,128,64,32,16,8,4,2] ; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq @@ -1527,10 +1527,10 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x) nounwind { ; AVX512F-LABEL: constant_funnnel_v16i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,128,64,32,16,8,4,2] ; AVX512F-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; AVX512F-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX512F-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: retq @@ -1538,10 +1538,10 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x) nounwind { ; AVX512VL-LABEL: constant_funnnel_v16i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,128,64,32,16,8,4,2] ; AVX512VL-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX512VL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq @@ -1605,10 +1605,10 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x) nounwind { ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 ; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [1,128,64,32,16,8,4,2] ; X86-SSE2-NEXT: psrlw $8, %xmm1 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [1,2,4,8,16,32,64,128] ; X86-SSE2-NEXT: psrlw $8, %xmm0 ; X86-SSE2-NEXT: packuswb %xmm1, %xmm0 ; X86-SSE2-NEXT: retl diff --git a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll index 731a9f4e403730..8071150c517a25 100644 --- a/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshl-rot-256.ll @@ -1284,10 +1284,10 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x) nounwind { ; AVX2-LABEL: constant_funnnel_v32i8: ; AVX2: # %bb.0: ; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2] ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -1295,10 +1295,10 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x) nounwind { ; AVX512F-LABEL: constant_funnnel_v32i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2] ; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512F-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq @@ -1306,10 +1306,10 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x) nounwind { ; AVX512VL-LABEL: constant_funnnel_v32i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2] ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] ; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512VL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll index 3aaa9268a8d888..ae031dae239517 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll @@ -1875,7 +1875,7 @@ define <8 x i16> @constant_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; SSE2-NEXT: por %xmm1, %xmm2 ; SSE2-NEXT: paddw %xmm0, %xmm0 -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [32768,16384,8192,4096,2048,1024,512,256] ; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: retq ; @@ -1885,34 +1885,34 @@ define <8 x i16> @constant_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; SSE41-NEXT: pmulhuw %xmm1, %xmm2 ; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3,4,5,6,7] ; SSE41-NEXT: paddw %xmm0, %xmm0 -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [32768,16384,8192,4096,2048,1024,512,256] ; SSE41-NEXT: por %xmm2, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: constant_funnnel_v8i16: ; AVX: # %bb.0: -; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 +; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 # [u,32768,16384,8192,4096,2048,1024,512] ; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] ; AVX-NEXT: vpaddw %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [32768,16384,8192,4096,2048,1024,512,256] ; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512F-LABEL: constant_funnnel_v8i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 +; AVX512F-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 # [u,32768,16384,8192,4096,2048,1024,512] ; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] ; AVX512F-NEXT: vpaddw %xmm0, %xmm0, %xmm0 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [32768,16384,8192,4096,2048,1024,512,256] ; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: constant_funnnel_v8i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 +; AVX512VL-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 # [u,32768,16384,8192,4096,2048,1024,512] ; AVX512VL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] ; AVX512VL-NEXT: vpaddw %xmm0, %xmm0, %xmm0 -; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [32768,16384,8192,4096,2048,1024,512,256] ; AVX512VL-NEXT: vpor %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; @@ -1967,7 +1967,7 @@ define <8 x i16> @constant_funnnel_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; X86-SSE2-NEXT: pmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 ; X86-SSE2-NEXT: por %xmm1, %xmm2 ; X86-SSE2-NEXT: paddw %xmm0, %xmm0 -; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [32768,16384,8192,4096,2048,1024,512,256] ; X86-SSE2-NEXT: por %xmm2, %xmm0 ; X86-SSE2-NEXT: retl %res = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> ) @@ -1980,20 +1980,20 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: movdqa %xmm1, %xmm3 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [256,2,4,8,16,32,64,128] ; SSE2-NEXT: psrlw $8, %xmm3 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [256,128,64,32,16,8,4,2] ; SSE2-NEXT: psrlw $8, %xmm1 ; SSE2-NEXT: packuswb %xmm3, %xmm1 ; SSE2-NEXT: paddb %xmm0, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [128,1,2,4,8,16,32,64] ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] ; SSE2-NEXT: pand %xmm3, %xmm2 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [128,64,32,16,8,4,2,1] ; SSE2-NEXT: pand %xmm3, %xmm0 ; SSE2-NEXT: packuswb %xmm2, %xmm0 ; SSE2-NEXT: por %xmm1, %xmm0 @@ -2004,18 +2004,18 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; SSE41-NEXT: paddb %xmm0, %xmm0 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [128,1,2,4,8,16,32,64] ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] ; SSE41-NEXT: pand %xmm3, %xmm0 -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [128,64,32,16,8,4,2,1] ; SSE41-NEXT: pand %xmm3, %xmm2 ; SSE41-NEXT: packuswb %xmm0, %xmm2 ; SSE41-NEXT: pxor %xmm3, %xmm3 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm3[8],xmm1[9],xmm3[9],xmm1[10],xmm3[10],xmm1[11],xmm3[11],xmm1[12],xmm3[12],xmm1[13],xmm3[13],xmm1[14],xmm3[14],xmm1[15],xmm3[15] -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [256,2,4,8,16,32,64,128] ; SSE41-NEXT: psrlw $8, %xmm1 -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [256,128,64,32,16,8,4,2] ; SSE41-NEXT: psrlw $8, %xmm0 ; SSE41-NEXT: packuswb %xmm1, %xmm0 ; SSE41-NEXT: por %xmm2, %xmm0 @@ -2025,19 +2025,19 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; AVX1: # %bb.0: ; AVX1-NEXT: vpaddb %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [128,1,2,4,8,16,32,64] ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [128,64,32,16,8,4,2,1] ; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [256,2,4,8,16,32,64,128] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [256,128,64,32,16,8,4,2] ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0 @@ -2046,13 +2046,13 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; AVX2-LABEL: constant_funnnel_v16i8: ; AVX2: # %bb.0: ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [256,128,64,32,16,8,4,2,256,2,4,8,16,32,64,128] ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX2-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 ; AVX2-NEXT: vpaddb %xmm0, %xmm0, %xmm0 ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [128,64,32,16,8,4,2,1,128,1,2,4,8,16,32,64] ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 ; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 @@ -2145,20 +2145,20 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; X86-SSE2-NEXT: pxor %xmm2, %xmm2 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm3 ; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm2[8],xmm3[9],xmm2[9],xmm3[10],xmm2[10],xmm3[11],xmm2[11],xmm3[12],xmm2[12],xmm3[13],xmm2[13],xmm3[14],xmm2[14],xmm3[15],xmm2[15] -; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3 +; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3 # [256,2,4,8,16,32,64,128] ; X86-SSE2-NEXT: psrlw $8, %xmm3 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] -; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [256,128,64,32,16,8,4,2] ; X86-SSE2-NEXT: psrlw $8, %xmm1 ; X86-SSE2-NEXT: packuswb %xmm3, %xmm1 ; X86-SSE2-NEXT: paddb %xmm0, %xmm0 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 ; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 +; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 # [128,1,2,4,8,16,32,64] ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] ; X86-SSE2-NEXT: pand %xmm3, %xmm2 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [128,64,32,16,8,4,2,1] ; X86-SSE2-NEXT: pand %xmm3, %xmm0 ; X86-SSE2-NEXT: packuswb %xmm2, %xmm0 ; X86-SSE2-NEXT: por %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll index fc65f759f5fbed..8d78afccddec32 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll @@ -1505,47 +1505,47 @@ define <8 x i32> @constant_funnnel_v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwind { ; AVX1-LABEL: constant_funnnel_v16i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 +; AVX1-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 # [u,32768,16384,8192,4096,2048,1024,512] ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3,4,5,6,7] ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 -; AVX1-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [256,128,64,32,16,8,4,2] ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm2 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [32768,16384,8192,4096,2048,1024,512,256] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vpaddw %xmm0, %xmm0, %xmm0 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [128,64,32,16,8,4,2,1] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: constant_funnnel_v16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 +; AVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 # [u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2] ; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] ; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] ; AVX2-NEXT: vpaddw %ymm0, %ymm0, %ymm0 -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2,1] ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq ; ; AVX512F-LABEL: constant_funnnel_v16i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 +; AVX512F-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 # [u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2] ; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] ; AVX512F-NEXT: vpaddw %ymm0, %ymm0, %ymm0 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2,1] ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: constant_funnnel_v16i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 +; AVX512VL-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 # [u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2] ; AVX512VL-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] ; AVX512VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] ; AVX512VL-NEXT: vpaddw %ymm0, %ymm0, %ymm0 -; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2,1] ; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; @@ -1600,11 +1600,11 @@ define <16 x i16> @constant_funnnel_v16i16(<16 x i16> %x, <16 x i16> %y) nounwin ; ; XOPAVX2-LABEL: constant_funnnel_v16i16: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 +; XOPAVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2 # [u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2] ; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3,4,5,6,7] ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm2[4,5,6,7] ; XOPAVX2-NEXT: vpaddw %ymm0, %ymm0, %ymm0 -; XOPAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; XOPAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2,1] ; XOPAVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 ; XOPAVX2-NEXT: retq %res = call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %x, <16 x i16> %y, <16 x i16> ) @@ -1661,19 +1661,19 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { ; AVX2: # %bb.0: ; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31] -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [256,2,4,8,16,32,64,128,256,2,4,8,16,32,64,128] ; AVX2-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23] -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2] ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2-NEXT: vpackuswb %ymm3, %ymm1, %ymm1 ; AVX2-NEXT: vpaddb %ymm0, %ymm0, %ymm0 ; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64] ; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm2 ; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1] ; AVX2-NEXT: vpand %ymm3, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 @@ -1683,19 +1683,19 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [256,2,4,8,16,32,64,128,256,2,4,8,16,32,64,128] ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2] ; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX512F-NEXT: vpackuswb %ymm3, %ymm1, %ymm1 ; AVX512F-NEXT: vpaddb %ymm0, %ymm0, %ymm0 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64] ; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1] ; AVX512F-NEXT: vpand %ymm3, %ymm0, %ymm0 ; AVX512F-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 ; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 @@ -1705,19 +1705,19 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm1[8],ymm2[8],ymm1[9],ymm2[9],ymm1[10],ymm2[10],ymm1[11],ymm2[11],ymm1[12],ymm2[12],ymm1[13],ymm2[13],ymm1[14],ymm2[14],ymm1[15],ymm2[15],ymm1[24],ymm2[24],ymm1[25],ymm2[25],ymm1[26],ymm2[26],ymm1[27],ymm2[27],ymm1[28],ymm2[28],ymm1[29],ymm2[29],ymm1[30],ymm2[30],ymm1[31],ymm2[31] -; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 +; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [256,2,4,8,16,32,64,128,256,2,4,8,16,32,64,128] ; AVX512VL-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm2[0],ymm1[1],ymm2[1],ymm1[2],ymm2[2],ymm1[3],ymm2[3],ymm1[4],ymm2[4],ymm1[5],ymm2[5],ymm1[6],ymm2[6],ymm1[7],ymm2[7],ymm1[16],ymm2[16],ymm1[17],ymm2[17],ymm1[18],ymm2[18],ymm1[19],ymm2[19],ymm1[20],ymm2[20],ymm1[21],ymm2[21],ymm1[22],ymm2[22],ymm1[23],ymm2[23] -; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2] ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX512VL-NEXT: vpackuswb %ymm3, %ymm1, %ymm1 ; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm0 ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [128,1,2,4,8,16,32,64,128,1,2,4,8,16,32,64] ; AVX512VL-NEXT: vpbroadcastd {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; AVX512VL-NEXT: vpand %ymm3, %ymm2, %ymm2 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1] ; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 ; AVX512VL-NEXT: vpor %ymm1, %ymm0, %ymm0 diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll index 01578d399b774f..e1292aee57138e 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-128.ll @@ -1574,10 +1574,10 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x) nounwind { ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,2,4,8,16,32,64,128] ; SSE-NEXT: psrlw $8, %xmm1 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,128,64,32,16,8,4,2] ; SSE-NEXT: psrlw $8, %xmm0 ; SSE-NEXT: packuswb %xmm1, %xmm0 ; SSE-NEXT: retq @@ -1585,10 +1585,10 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x) nounwind { ; AVX-LABEL: constant_funnnel_v16i8: ; AVX: # %bb.0: ; AVX-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,2,4,8,16,32,64,128] ; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,128,64,32,16,8,4,2] ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq @@ -1596,10 +1596,10 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x) nounwind { ; AVX512F-LABEL: constant_funnnel_v16i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,2,4,8,16,32,64,128] ; AVX512F-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,128,64,32,16,8,4,2] ; AVX512F-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX512F-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: retq @@ -1607,10 +1607,10 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x) nounwind { ; AVX512VL-LABEL: constant_funnnel_v16i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,2,4,8,16,32,64,128] ; AVX512VL-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,128,64,32,16,8,4,2] ; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX512VL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq @@ -1674,10 +1674,10 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x) nounwind { ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 ; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [1,2,4,8,16,32,64,128] ; X86-SSE2-NEXT: psrlw $8, %xmm1 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [1,128,64,32,16,8,4,2] ; X86-SSE2-NEXT: psrlw $8, %xmm0 ; X86-SSE2-NEXT: packuswb %xmm1, %xmm0 ; X86-SSE2-NEXT: retl diff --git a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll index 0aa91b74e12cab..504ba589f3294f 100644 --- a/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll +++ b/llvm/test/CodeGen/X86/vector-fshr-rot-256.ll @@ -1335,10 +1335,10 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x) nounwind { ; AVX2-LABEL: constant_funnnel_v32i8: ; AVX2: # %bb.0: ; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2] ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -1346,10 +1346,10 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x) nounwind { ; AVX512F-LABEL: constant_funnnel_v32i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] ; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2] ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512F-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq @@ -1357,10 +1357,10 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x) nounwind { ; AVX512VL-LABEL: constant_funnnel_v32i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2] ; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512VL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll index 3a54ed456008ba..baa70bb2be63ee 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll @@ -147,7 +147,7 @@ define <4 x i32> @test_div7_4i32(<4 x i32> %a) nounwind { define <8 x i16> @test_div7_8i16(<8 x i16> %a) nounwind { ; SSE-LABEL: test_div7_8i16: ; SSE: # %bb.0: -; SSE-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [18725,18725,18725,18725,18725,18725,18725,18725] ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: psrlw $15, %xmm1 ; SSE-NEXT: psraw $1, %xmm0 @@ -156,7 +156,7 @@ define <8 x i16> @test_div7_8i16(<8 x i16> %a) nounwind { ; ; AVX-LABEL: test_div7_8i16: ; AVX: # %bb.0: -; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [18725,18725,18725,18725,18725,18725,18725,18725] ; AVX-NEXT: vpsrlw $15, %xmm0, %xmm1 ; AVX-NEXT: vpsraw $1, %xmm0, %xmm0 ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 @@ -215,7 +215,7 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind { ; AVX2NOBW-LABEL: test_div7_16i8: ; AVX2NOBW: # %bb.0: ; AVX2NOBW-NEXT: vpmovsxbw %xmm0, %ymm1 -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX2NOBW-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 @@ -234,7 +234,7 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind { ; AVX512BW-LABEL: test_div7_16i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm1 -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427] ; AVX512BW-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 ; AVX512BW-NEXT: vpaddb %xmm0, %xmm1, %xmm0 @@ -262,10 +262,10 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind { ; SSE-NEXT: pxor %xmm1, %xmm1 ; SSE-NEXT: pxor %xmm2, %xmm2 ; SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] -; SSE-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [37632,20224,11008,47872,26368,14592,14592,37632] ; SSE-NEXT: psrlw $8, %xmm2 ; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [37632,33024,14592,26368,47872,11008,20224,37632] ; SSE-NEXT: psrlw $8, %xmm1 ; SSE-NEXT: packuswb %xmm2, %xmm1 ; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 @@ -273,12 +273,12 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind { ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] ; SSE-NEXT: psraw $8, %xmm1 -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [32,64,128,32,64,128,128,64] ; SSE-NEXT: psrlw $8, %xmm1 ; SSE-NEXT: movdqa %xmm0, %xmm2 ; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] ; SSE-NEXT: psraw $8, %xmm2 -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [64,64,128,64,32,128,64,32] ; SSE-NEXT: psrlw $8, %xmm2 ; SSE-NEXT: packuswb %xmm1, %xmm2 ; SSE-NEXT: psrlw $7, %xmm0 @@ -290,21 +290,21 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind { ; AVX1: # %bb.0: ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [37632,20224,11008,47872,26368,14592,14592,37632] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [37632,33024,14592,26368,47872,11008,20224,37632] ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; AVX1-NEXT: vpsraw $8, %xmm1, %xmm1 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [32,64,128,32,64,128,128,64] ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; AVX1-NEXT: vpsraw $8, %xmm2, %xmm2 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [64,64,128,64,32,128,64,32] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0 @@ -315,14 +315,14 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind { ; AVX2NOBW-LABEL: test_divconstant_16i8: ; AVX2NOBW: # %bb.0: ; AVX2NOBW-NEXT: vpmovsxbw %xmm0, %ymm1 -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [65427,65409,57,103,65467,43,79,65427,65427,79,43,65467,103,57,57,65427] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX2NOBW-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 ; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX2NOBW-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ; AVX2NOBW-NEXT: vpmovsxbw %xmm0, %ymm1 -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [64,64,128,64,32,128,64,32,32,64,128,32,64,128,128,64] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX2NOBW-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 @@ -336,7 +336,7 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm1 = [2,2,1,2,3,1,2,3,3,2,1,3,2,1,1,2] ; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm2 -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [65427,65409,57,103,65467,43,79,65427,65427,79,43,65467,103,57,57,65427] ; AVX512BW-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX512BW-NEXT: vpmovwb %zmm2, %ymm2 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 @@ -547,7 +547,7 @@ define <8 x i16> @test_rem7_8i16(<8 x i16> %a) nounwind { ; ; AVX-LABEL: test_rem7_8i16: ; AVX: # %bb.0: -; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [18725,18725,18725,18725,18725,18725,18725,18725] ; AVX-NEXT: vpsrlw $15, %xmm1, %xmm2 ; AVX-NEXT: vpsraw $1, %xmm1, %xmm1 ; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1 @@ -618,7 +618,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind { ; AVX2NOBW-LABEL: test_rem7_16i8: ; AVX2NOBW: # %bb.0: ; AVX2NOBW-NEXT: vpmovsxbw %xmm0, %ymm1 -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX2NOBW-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 @@ -641,7 +641,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind { ; AVX512BW-LABEL: test_rem7_16i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm1 -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427] ; AVX512BW-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 ; AVX512BW-NEXT: vpaddb %xmm0, %xmm1, %xmm1 @@ -673,10 +673,10 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [37632,20224,11008,47872,26368,14592,14592,37632] ; SSE2-NEXT: psrlw $8, %xmm1 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [37632,33024,14592,26368,47872,11008,20224,37632] ; SSE2-NEXT: psrlw $8, %xmm2 ; SSE2-NEXT: packuswb %xmm1, %xmm2 ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,255,0,0,255,0,0,255,255,0,0,255,0,0,0,255] @@ -685,12 +685,12 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] ; SSE2-NEXT: psraw $8, %xmm2 -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [32,64,128,32,64,128,128,64] ; SSE2-NEXT: psrlw $8, %xmm2 ; SSE2-NEXT: movdqa %xmm1, %xmm3 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] ; SSE2-NEXT: psraw $8, %xmm3 -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [64,64,128,64,32,128,64,32] ; SSE2-NEXT: psrlw $8, %xmm3 ; SSE2-NEXT: packuswb %xmm2, %xmm3 ; SSE2-NEXT: psrlw $7, %xmm1 @@ -698,11 +698,11 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; SSE2-NEXT: paddb %xmm3, %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [14,13,12,11,10,9,9,7] ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] ; SSE2-NEXT: pand %xmm3, %xmm2 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [7,8,9,10,11,12,13,14] ; SSE2-NEXT: pand %xmm3, %xmm1 ; SSE2-NEXT: packuswb %xmm2, %xmm1 ; SSE2-NEXT: psubb %xmm1, %xmm0 @@ -713,10 +713,10 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; SSE41-NEXT: pxor %xmm2, %xmm2 ; SSE41-NEXT: pxor %xmm1, %xmm1 ; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [37632,20224,11008,47872,26368,14592,14592,37632] ; SSE41-NEXT: psrlw $8, %xmm1 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE41-NEXT: pmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [37632,33024,14592,26368,47872,11008,20224,37632] ; SSE41-NEXT: psrlw $8, %xmm2 ; SSE41-NEXT: packuswb %xmm1, %xmm2 ; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [255,255,0,0,255,0,0,255,255,0,0,255,0,0,0,255] @@ -725,12 +725,12 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] ; SSE41-NEXT: psraw $8, %xmm2 -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [32,64,128,32,64,128,128,64] ; SSE41-NEXT: psrlw $8, %xmm2 ; SSE41-NEXT: movdqa %xmm1, %xmm3 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] ; SSE41-NEXT: psraw $8, %xmm3 -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [64,64,128,64,32,128,64,32] ; SSE41-NEXT: psrlw $8, %xmm3 ; SSE41-NEXT: packuswb %xmm2, %xmm3 ; SSE41-NEXT: psrlw $7, %xmm1 @@ -738,10 +738,10 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; SSE41-NEXT: paddb %xmm3, %xmm1 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [14,13,12,11,10,9,9,7] ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] ; SSE41-NEXT: pand %xmm3, %xmm1 -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [7,8,9,10,11,12,13,14] ; SSE41-NEXT: pand %xmm3, %xmm2 ; SSE41-NEXT: packuswb %xmm1, %xmm2 ; SSE41-NEXT: psubb %xmm2, %xmm0 @@ -751,32 +751,32 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; AVX1: # %bb.0: ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [37632,20224,11008,47872,26368,14592,14592,37632] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [37632,33024,14592,26368,47872,11008,20224,37632] ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 ; AVX1-NEXT: vpaddb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; AVX1-NEXT: vpsraw $8, %xmm2, %xmm2 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [32,64,128,32,64,128,128,64] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; AVX1-NEXT: vpsraw $8, %xmm3, %xmm3 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [64,64,128,64,32,128,64,32] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpackuswb %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1 ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX1-NEXT: vpaddb %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [14,13,12,11,10,9,9,7] ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [7,8,9,10,11,12,13,14] ; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 @@ -785,14 +785,14 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; AVX2NOBW-LABEL: test_remconstant_16i8: ; AVX2NOBW: # %bb.0: ; AVX2NOBW-NEXT: vpmovsxbw %xmm0, %ymm1 -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [65427,65409,57,103,65467,43,79,65427,65427,79,43,65467,103,57,57,65427] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX2NOBW-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 ; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 ; AVX2NOBW-NEXT: vpaddb %xmm2, %xmm1, %xmm1 ; AVX2NOBW-NEXT: vpmovsxbw %xmm1, %ymm2 -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [64,64,128,64,32,128,64,32,32,64,128,32,64,128,128,64] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX2NOBW-NEXT: vextracti128 $1, %ymm2, %xmm3 ; AVX2NOBW-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 @@ -800,7 +800,7 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 ; AVX2NOBW-NEXT: vpaddb %xmm1, %xmm2, %xmm1 ; AVX2NOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [7,8,9,10,11,12,13,14,14,13,12,11,10,9,9,7] ; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX2NOBW-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 @@ -812,7 +812,7 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm1 = [2,2,1,2,3,1,2,3,3,2,1,3,2,1,1,2] ; AVX512BW-NEXT: vpmovsxbw %xmm0, %ymm2 -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [65427,65409,57,103,65467,43,79,65427,65427,79,43,65467,103,57,57,65427] ; AVX512BW-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX512BW-NEXT: vpmovwb %zmm2, %ymm2 ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm3 @@ -824,7 +824,7 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 ; AVX512BW-NEXT: vpaddb %xmm2, %xmm1, %xmm1 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [7,8,9,10,11,12,13,14,14,13,12,11,10,9,9,7] ; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 ; AVX512BW-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll index f15f5cba290307..40fc377dc037d0 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-256.ll @@ -146,7 +146,7 @@ define <16 x i16> @test_div7_16i16(<16 x i16> %a) nounwind { ; ; AVX2-LABEL: test_div7_16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725] ; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm1 ; AVX2-NEXT: vpsraw $1, %ymm0, %ymm0 ; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0 @@ -222,7 +222,7 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind { ; AVX512BW-LABEL: test_div7_32i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm1 -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427] ; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 ; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 ; AVX512BW-NEXT: vpaddb %ymm0, %ymm1, %ymm0 @@ -249,10 +249,10 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [37632,20224,11008,47872,26368,14592,33024,37632] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [47872,12544,26368,6912,14592,30976,33024,35072] ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX1-NEXT: vpackuswb %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3 @@ -260,11 +260,11 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind { ; AVX1-NEXT: vpaddb %xmm4, %xmm1, %xmm1 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; AVX1-NEXT: vpsraw $8, %xmm4, %xmm4 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [32,64,128,32,64,128,64,64] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm5 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; AVX1-NEXT: vpsraw $8, %xmm5, %xmm5 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [16,64,32,128,64,32,32,32] ; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5 ; AVX1-NEXT: vpackuswb %xmm4, %xmm5, %xmm4 ; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1 @@ -272,20 +272,20 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind { ; AVX1-NEXT: vpand %xmm5, %xmm1, %xmm1 ; AVX1-NEXT: vpaddb %xmm1, %xmm4, %xmm1 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [35072,33024,30976,14592,6912,26368,12544,47872] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [37632,33024,14592,26368,47872,11008,20224,37632] ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; AVX1-NEXT: vpsraw $8, %xmm2, %xmm2 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [32,32,32,64,128,32,64,16] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; AVX1-NEXT: vpsraw $8, %xmm3, %xmm3 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [64,64,128,64,32,128,64,32] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpackuswb %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpsrlw $7, %xmm0, %xmm0 @@ -298,21 +298,21 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind { ; AVX2NOBW: # %bb.0: ; AVX2NOBW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] -; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [35072,33024,30976,14592,6912,26368,12544,47872,37632,20224,11008,47872,26368,14592,33024,37632] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] -; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [37632,33024,14592,26368,47872,11008,20224,37632,47872,12544,26368,6912,14592,30976,33024,35072] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2NOBW-NEXT: vpackuswb %ymm2, %ymm1, %ymm1 ; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX2NOBW-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] ; AVX2NOBW-NEXT: vpsraw $8, %ymm1, %ymm1 -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [32,32,32,64,128,32,64,16,32,64,128,32,64,128,64,64] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] ; AVX2NOBW-NEXT: vpsraw $8, %ymm2, %ymm2 -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [64,64,128,64,32,128,64,32,16,64,32,128,64,32,32,32] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX2NOBW-NEXT: vpackuswb %ymm1, %ymm2, %ymm1 ; AVX2NOBW-NEXT: vpsrlw $7, %ymm0, %ymm0 @@ -324,7 +324,7 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 ; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0 -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [65427,65409,57,103,65467,43,79,65427,65417,65409,121,57,27,103,49,65467,65467,49,103,27,57,121,65409,65417,65427,79,43,65467,103,57,65409,65427] ; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0 @@ -529,7 +529,7 @@ define <16 x i16> @test_rem7_16i16(<16 x i16> %a) nounwind { ; ; AVX2-LABEL: test_rem7_16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 +; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 # [18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725] ; AVX2-NEXT: vpsrlw $15, %ymm1, %ymm2 ; AVX2-NEXT: vpsraw $1, %ymm1, %ymm1 ; AVX2-NEXT: vpaddw %ymm2, %ymm1, %ymm1 @@ -621,7 +621,7 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind { ; AVX512BW-LABEL: test_rem7_32i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm1 -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427,65427] ; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 ; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 ; AVX512BW-NEXT: vpaddb %ymm0, %ymm1, %ymm1 @@ -652,10 +652,10 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [37632,20224,11008,47872,26368,14592,33024,37632] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [47872,12544,26368,6912,14592,30976,33024,35072] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpackuswb %xmm2, %xmm3, %xmm3 ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 @@ -663,11 +663,11 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind { ; AVX1-NEXT: vpaddb %xmm5, %xmm3, %xmm3 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; AVX1-NEXT: vpsraw $8, %xmm5, %xmm5 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [32,64,128,32,64,128,64,64] ; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm6 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; AVX1-NEXT: vpsraw $8, %xmm6, %xmm6 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6, %xmm6 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6, %xmm6 # [16,64,32,128,64,32,32,32] ; AVX1-NEXT: vpsrlw $8, %xmm6, %xmm6 ; AVX1-NEXT: vpackuswb %xmm5, %xmm6, %xmm5 ; AVX1-NEXT: vpsrlw $7, %xmm3, %xmm3 @@ -675,39 +675,39 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind { ; AVX1-NEXT: vpand %xmm6, %xmm3, %xmm3 ; AVX1-NEXT: vpaddb %xmm3, %xmm5, %xmm5 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm5[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm7 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm7 # [14,13,12,11,10,9,8,7] ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] ; AVX1-NEXT: vpand %xmm3, %xmm7, %xmm7 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [22,21,20,19,18,17,16,15] ; AVX1-NEXT: vpand %xmm3, %xmm5, %xmm5 ; AVX1-NEXT: vpackuswb %xmm7, %xmm5, %xmm5 ; AVX1-NEXT: vpsubb %xmm5, %xmm4, %xmm4 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [35072,33024,30976,14592,6912,26368,12544,47872] ; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [37632,33024,14592,26368,47872,11008,20224,37632] ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX1-NEXT: vpackuswb %xmm5, %xmm1, %xmm1 ; AVX1-NEXT: vpaddb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; AVX1-NEXT: vpsraw $8, %xmm2, %xmm2 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [32,32,32,64,128,32,64,16] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm5 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; AVX1-NEXT: vpsraw $8, %xmm5, %xmm5 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [64,64,128,64,32,128,64,32] ; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5 ; AVX1-NEXT: vpackuswb %xmm2, %xmm5, %xmm2 ; AVX1-NEXT: vpsrlw $7, %xmm1, %xmm1 ; AVX1-NEXT: vpand %xmm6, %xmm1, %xmm1 ; AVX1-NEXT: vpaddb %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [15,16,17,18,19,20,21,22] ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [7,8,9,10,11,12,13,14] ; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 @@ -718,32 +718,32 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind { ; AVX2NOBW: # %bb.0: ; AVX2NOBW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] -; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [35072,33024,30976,14592,6912,26368,12544,47872,37632,20224,11008,47872,26368,14592,33024,37632] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] -; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2NOBW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [37632,33024,14592,26368,47872,11008,20224,37632,47872,12544,26368,6912,14592,30976,33024,35072] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2NOBW-NEXT: vpackuswb %ymm2, %ymm1, %ymm1 ; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm2 ; AVX2NOBW-NEXT: vpaddb %ymm2, %ymm1, %ymm1 ; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] ; AVX2NOBW-NEXT: vpsraw $8, %ymm2, %ymm2 -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [32,32,32,64,128,32,64,16,32,64,128,32,64,128,64,64] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] ; AVX2NOBW-NEXT: vpsraw $8, %ymm3, %ymm3 -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [64,64,128,64,32,128,64,32,16,64,32,128,64,32,32,32] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX2NOBW-NEXT: vpackuswb %ymm2, %ymm3, %ymm2 ; AVX2NOBW-NEXT: vpsrlw $7, %ymm1, %ymm1 ; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX2NOBW-NEXT: vpaddb %ymm1, %ymm2, %ymm1 ; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [15,16,17,18,19,20,21,22,14,13,12,11,10,9,8,7] ; AVX2NOBW-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; AVX2NOBW-NEXT: vpand %ymm3, %ymm2, %ymm2 ; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [7,8,9,10,11,12,13,14,22,21,20,19,18,17,16,15] ; AVX2NOBW-NEXT: vpand %ymm3, %ymm1, %ymm1 ; AVX2NOBW-NEXT: vpackuswb %ymm2, %ymm1, %ymm1 ; AVX2NOBW-NEXT: vpsubb %ymm1, %ymm0, %ymm0 @@ -753,7 +753,7 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 ; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm2 -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 # [65427,65409,57,103,65467,43,79,65427,65417,65409,121,57,27,103,49,65467,65467,49,103,27,57,121,65409,65417,65427,79,43,65467,103,57,65409,65427] ; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 ; AVX512BW-NEXT: vpmovwb %zmm2, %ymm2 ; AVX512BW-NEXT: vpaddb %ymm1, %ymm2, %ymm1 @@ -764,7 +764,7 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind { ; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 ; AVX512BW-NEXT: vpaddb %ymm2, %ymm1, %ymm1 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7] ; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 ; AVX512BW-NEXT: vpsubb %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll index 7477029e2d7a54..35d5f0316e23d1 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-512.ll @@ -117,7 +117,7 @@ define <32 x i16> @test_div7_32i16(<32 x i16> %a) nounwind { ; ; AVX512BW-LABEL: test_div7_32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725] ; AVX512BW-NEXT: vpsrlw $15, %zmm0, %zmm1 ; AVX512BW-NEXT: vpsraw $1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 @@ -202,10 +202,10 @@ define <64 x i8> @test_divconstant_64i8(<64 x i8> %a) nounwind { ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15],ymm2[24],ymm1[24],ymm2[25],ymm1[25],ymm2[26],ymm1[26],ymm2[27],ymm1[27],ymm2[28],ymm1[28],ymm2[29],ymm1[29],ymm2[30],ymm1[30],ymm2[31],ymm1[31] -; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 +; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [35072,18176,37632,4864,20224,10496,11008,45824,37632,20224,11008,47872,26368,14592,33024,37632] ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[16],ymm1[16],ymm2[17],ymm1[17],ymm2[18],ymm1[18],ymm2[19],ymm1[19],ymm2[20],ymm1[20],ymm2[21],ymm1[21],ymm2[22],ymm1[22],ymm2[23],ymm1[23] -; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [6912,28416,14592,15104,30976,32000,33024,34048,47872,12544,26368,6912,14592,30976,33024,35072] ; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX512F-NEXT: vpackuswb %ymm3, %ymm1, %ymm1 ; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm3 @@ -213,11 +213,11 @@ define <64 x i8> @test_divconstant_64i8(<64 x i8> %a) nounwind { ; AVX512F-NEXT: vpaddb %ymm4, %ymm1, %ymm1 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] ; AVX512F-NEXT: vpsraw $8, %ymm4, %ymm4 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [16,32,16,128,32,64,64,16,32,64,128,32,64,128,64,64] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] ; AVX512F-NEXT: vpsraw $8, %ymm5, %ymm5 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [64,16,32,32,16,16,16,16,16,64,32,128,64,32,32,32] ; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5 ; AVX512F-NEXT: vpackuswb %ymm4, %ymm5, %ymm4 ; AVX512F-NEXT: vpsrlw $7, %ymm1, %ymm1 @@ -225,20 +225,20 @@ define <64 x i8> @test_divconstant_64i8(<64 x i8> %a) nounwind { ; AVX512F-NEXT: vpand %ymm5, %ymm1, %ymm1 ; AVX512F-NEXT: vpaddb %ymm1, %ymm4, %ymm1 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm2[8],ymm0[8],ymm2[9],ymm0[9],ymm2[10],ymm0[10],ymm2[11],ymm0[11],ymm2[12],ymm0[12],ymm2[13],ymm0[13],ymm2[14],ymm0[14],ymm2[15],ymm0[15],ymm2[24],ymm0[24],ymm2[25],ymm0[25],ymm2[26],ymm0[26],ymm2[27],ymm0[27],ymm2[28],ymm0[28],ymm2[29],ymm0[29],ymm2[30],ymm0[30],ymm2[31],ymm0[31] -; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 +; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [35072,33024,30976,14592,6912,26368,12544,47872,34048,33024,32000,30976,15104,14592,28416,6912] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm2[0],ymm0[0],ymm2[1],ymm0[1],ymm2[2],ymm0[2],ymm2[3],ymm0[3],ymm2[4],ymm0[4],ymm2[5],ymm0[5],ymm2[6],ymm0[6],ymm2[7],ymm0[7],ymm2[16],ymm0[16],ymm2[17],ymm0[17],ymm2[18],ymm0[18],ymm2[19],ymm0[19],ymm2[20],ymm0[20],ymm2[21],ymm0[21],ymm2[22],ymm0[22],ymm2[23],ymm0[23] -; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [37632,33024,14592,26368,47872,11008,20224,37632,45824,11008,10496,20224,4864,37632,18176,35072] ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512F-NEXT: vpackuswb %ymm4, %ymm0, %ymm0 ; AVX512F-NEXT: vpaddb %ymm3, %ymm0, %ymm0 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] ; AVX512F-NEXT: vpsraw $8, %ymm2, %ymm2 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [32,32,32,64,128,32,64,16,16,16,16,16,32,32,16,64] ; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] ; AVX512F-NEXT: vpsraw $8, %ymm3, %ymm3 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [64,64,128,64,32,128,64,32,16,64,64,32,128,16,32,16] ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX512F-NEXT: vpackuswb %ymm2, %ymm3, %ymm2 ; AVX512F-NEXT: vpsrlw $7, %ymm0, %ymm0 @@ -251,10 +251,10 @@ define <64 x i8> @test_divconstant_64i8(<64 x i8> %a) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[10],zmm0[10],zmm1[11],zmm0[11],zmm1[12],zmm0[12],zmm1[13],zmm0[13],zmm1[14],zmm0[14],zmm1[15],zmm0[15],zmm1[24],zmm0[24],zmm1[25],zmm0[25],zmm1[26],zmm0[26],zmm1[27],zmm0[27],zmm1[28],zmm0[28],zmm1[29],zmm0[29],zmm1[30],zmm0[30],zmm1[31],zmm0[31],zmm1[40],zmm0[40],zmm1[41],zmm0[41],zmm1[42],zmm0[42],zmm1[43],zmm0[43],zmm1[44],zmm0[44],zmm1[45],zmm0[45],zmm1[46],zmm0[46],zmm1[47],zmm0[47],zmm1[56],zmm0[56],zmm1[57],zmm0[57],zmm1[58],zmm0[58],zmm1[59],zmm0[59],zmm1[60],zmm0[60],zmm1[61],zmm0[61],zmm1[62],zmm0[62],zmm1[63],zmm0[63] -; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 # [35072,33024,30976,14592,6912,26368,12544,47872,34048,33024,32000,30976,15104,14592,28416,6912,35072,18176,37632,4864,20224,10496,11008,45824,37632,20224,11008,47872,26368,14592,33024,37632] ; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0],zmm0[0],zmm1[1],zmm0[1],zmm1[2],zmm0[2],zmm1[3],zmm0[3],zmm1[4],zmm0[4],zmm1[5],zmm0[5],zmm1[6],zmm0[6],zmm1[7],zmm0[7],zmm1[16],zmm0[16],zmm1[17],zmm0[17],zmm1[18],zmm0[18],zmm1[19],zmm0[19],zmm1[20],zmm0[20],zmm1[21],zmm0[21],zmm1[22],zmm0[22],zmm1[23],zmm0[23],zmm1[32],zmm0[32],zmm1[33],zmm0[33],zmm1[34],zmm0[34],zmm1[35],zmm0[35],zmm1[36],zmm0[36],zmm1[37],zmm0[37],zmm1[38],zmm0[38],zmm1[39],zmm0[39],zmm1[48],zmm0[48],zmm1[49],zmm0[49],zmm1[50],zmm0[50],zmm1[51],zmm0[51],zmm1[52],zmm0[52],zmm1[53],zmm0[53],zmm1[54],zmm0[54],zmm1[55],zmm0[55] -; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [37632,33024,14592,26368,47872,11008,20224,37632,45824,11008,10496,20224,4864,37632,18176,35072,6912,28416,14592,15104,30976,32000,33024,34048,47872,12544,26368,6912,14592,30976,33024,35072] ; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 ; AVX512BW-NEXT: vpackuswb %zmm2, %zmm1, %zmm1 ; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 @@ -432,7 +432,7 @@ define <32 x i16> @test_rem7_32i16(<32 x i16> %a) nounwind { ; ; AVX512BW-LABEL: test_rem7_32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 +; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 # [18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725,18725] ; AVX512BW-NEXT: vpsrlw $15, %zmm1, %zmm2 ; AVX512BW-NEXT: vpsraw $1, %zmm1, %zmm1 ; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1 @@ -533,10 +533,10 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind { ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm4 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm4[8],ymm1[9],ymm4[9],ymm1[10],ymm4[10],ymm1[11],ymm4[11],ymm1[12],ymm4[12],ymm1[13],ymm4[13],ymm1[14],ymm4[14],ymm1[15],ymm4[15],ymm1[24],ymm4[24],ymm1[25],ymm4[25],ymm1[26],ymm4[26],ymm1[27],ymm4[27],ymm1[28],ymm4[28],ymm1[29],ymm4[29],ymm1[30],ymm4[30],ymm1[31],ymm4[31] -; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [35072,18176,37632,4864,20224,10496,11008,45824,37632,20224,11008,47872,26368,14592,33024,37632] ; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm1[0],ymm4[0],ymm1[1],ymm4[1],ymm1[2],ymm4[2],ymm1[3],ymm4[3],ymm1[4],ymm4[4],ymm1[5],ymm4[5],ymm1[6],ymm4[6],ymm1[7],ymm4[7],ymm1[16],ymm4[16],ymm1[17],ymm4[17],ymm1[18],ymm4[18],ymm1[19],ymm4[19],ymm1[20],ymm4[20],ymm1[21],ymm4[21],ymm1[22],ymm4[22],ymm1[23],ymm4[23] -; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 +; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [6912,28416,14592,15104,30976,32000,33024,34048,47872,12544,26368,6912,14592,30976,33024,35072] ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX512F-NEXT: vpackuswb %ymm2, %ymm3, %ymm3 ; AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm2 @@ -544,11 +544,11 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind { ; AVX512F-NEXT: vpaddb %ymm5, %ymm3, %ymm3 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] ; AVX512F-NEXT: vpsraw $8, %ymm5, %ymm5 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [16,32,16,128,32,64,64,16,32,64,128,32,64,128,64,64] ; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm6 = ymm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] ; AVX512F-NEXT: vpsraw $8, %ymm6, %ymm6 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm6, %ymm6 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm6, %ymm6 # [64,16,32,32,16,16,16,16,16,64,32,128,64,32,32,32] ; AVX512F-NEXT: vpsrlw $8, %ymm6, %ymm6 ; AVX512F-NEXT: vpackuswb %ymm5, %ymm6, %ymm5 ; AVX512F-NEXT: vpsrlw $7, %ymm3, %ymm3 @@ -556,39 +556,39 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind { ; AVX512F-NEXT: vpand %ymm6, %ymm3, %ymm3 ; AVX512F-NEXT: vpaddb %ymm3, %ymm5, %ymm5 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm5[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm7 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm7 # [30,29,28,27,26,25,24,23,14,13,12,11,10,9,8,7] ; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; AVX512F-NEXT: vpand %ymm3, %ymm7, %ymm7 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm5[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [38,37,36,35,34,33,32,31,22,21,20,19,18,17,16,15] ; AVX512F-NEXT: vpand %ymm3, %ymm5, %ymm5 ; AVX512F-NEXT: vpackuswb %ymm7, %ymm5, %ymm5 ; AVX512F-NEXT: vpsubb %ymm5, %ymm4, %ymm4 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] -; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 +; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [35072,33024,30976,14592,6912,26368,12544,47872,34048,33024,32000,30976,15104,14592,28416,6912] ; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] -; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512F-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [37632,33024,14592,26368,47872,11008,20224,37632,45824,11008,10496,20224,4864,37632,18176,35072] ; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX512F-NEXT: vpackuswb %ymm5, %ymm1, %ymm1 ; AVX512F-NEXT: vpaddb %ymm2, %ymm1, %ymm1 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] ; AVX512F-NEXT: vpsraw $8, %ymm2, %ymm2 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [32,32,32,64,128,32,64,16,16,16,16,16,32,32,16,64] ; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] ; AVX512F-NEXT: vpsraw $8, %ymm5, %ymm5 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [64,64,128,64,32,128,64,32,16,64,64,32,128,16,32,16] ; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5 ; AVX512F-NEXT: vpackuswb %ymm2, %ymm5, %ymm2 ; AVX512F-NEXT: vpsrlw $7, %ymm1, %ymm1 ; AVX512F-NEXT: vpand %ymm6, %ymm1, %ymm1 ; AVX512F-NEXT: vpaddb %ymm1, %ymm2, %ymm1 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [15,16,17,18,19,20,21,22,31,32,33,34,35,36,37,38] ; AVX512F-NEXT: vpand %ymm3, %ymm2, %ymm2 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [7,8,9,10,11,12,13,14,23,24,25,26,27,28,29,30] ; AVX512F-NEXT: vpand %ymm3, %ymm1, %ymm1 ; AVX512F-NEXT: vpackuswb %ymm2, %ymm1, %ymm1 ; AVX512F-NEXT: vpsubb %ymm1, %ymm0, %ymm0 @@ -599,10 +599,10 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8],zmm0[8],zmm1[9],zmm0[9],zmm1[10],zmm0[10],zmm1[11],zmm0[11],zmm1[12],zmm0[12],zmm1[13],zmm0[13],zmm1[14],zmm0[14],zmm1[15],zmm0[15],zmm1[24],zmm0[24],zmm1[25],zmm0[25],zmm1[26],zmm0[26],zmm1[27],zmm0[27],zmm1[28],zmm0[28],zmm1[29],zmm0[29],zmm1[30],zmm0[30],zmm1[31],zmm0[31],zmm1[40],zmm0[40],zmm1[41],zmm0[41],zmm1[42],zmm0[42],zmm1[43],zmm0[43],zmm1[44],zmm0[44],zmm1[45],zmm0[45],zmm1[46],zmm0[46],zmm1[47],zmm0[47],zmm1[56],zmm0[56],zmm1[57],zmm0[57],zmm1[58],zmm0[58],zmm1[59],zmm0[59],zmm1[60],zmm0[60],zmm1[61],zmm0[61],zmm1[62],zmm0[62],zmm1[63],zmm0[63] -; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 # [35072,33024,30976,14592,6912,26368,12544,47872,34048,33024,32000,30976,15104,14592,28416,6912,35072,18176,37632,4864,20224,10496,11008,45824,37632,20224,11008,47872,26368,14592,33024,37632] ; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0],zmm0[0],zmm1[1],zmm0[1],zmm1[2],zmm0[2],zmm1[3],zmm0[3],zmm1[4],zmm0[4],zmm1[5],zmm0[5],zmm1[6],zmm0[6],zmm1[7],zmm0[7],zmm1[16],zmm0[16],zmm1[17],zmm0[17],zmm1[18],zmm0[18],zmm1[19],zmm0[19],zmm1[20],zmm0[20],zmm1[21],zmm0[21],zmm1[22],zmm0[22],zmm1[23],zmm0[23],zmm1[32],zmm0[32],zmm1[33],zmm0[33],zmm1[34],zmm0[34],zmm1[35],zmm0[35],zmm1[36],zmm0[36],zmm1[37],zmm0[37],zmm1[38],zmm0[38],zmm1[39],zmm0[39],zmm1[48],zmm0[48],zmm1[49],zmm0[49],zmm1[50],zmm0[50],zmm1[51],zmm0[51],zmm1[52],zmm0[52],zmm1[53],zmm0[53],zmm1[54],zmm0[54],zmm1[55],zmm0[55] -; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [37632,33024,14592,26368,47872,11008,20224,37632,45824,11008,10496,20224,4864,37632,18176,35072,6912,28416,14592,15104,30976,32000,33024,34048,47872,12544,26368,6912,14592,30976,33024,35072] ; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 ; AVX512BW-NEXT: vpackuswb %zmm2, %zmm1, %zmm1 ; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm2 @@ -620,11 +620,11 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm1, %zmm1 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm2, %zmm1 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31,40,40,41,41,42,42,43,43,44,44,45,45,46,46,47,47,56,56,57,57,58,58,59,59,60,60,61,61,62,62,63,63] -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 # [15,16,17,18,19,20,21,22,31,32,33,34,35,36,37,38,30,29,28,27,26,25,24,23,14,13,12,11,10,9,8,7] ; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; AVX512BW-NEXT: vpandq %zmm3, %zmm2, %zmm2 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23,32,32,33,33,34,34,35,35,36,36,37,37,38,38,39,39,48,48,49,49,50,50,51,51,52,52,53,53,54,54,55,55] -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [7,8,9,10,11,12,13,14,23,24,25,26,27,28,29,30,38,37,36,35,34,33,32,31,22,21,20,19,18,17,16,15] ; AVX512BW-NEXT: vpandq %zmm3, %zmm1, %zmm1 ; AVX512BW-NEXT: vpackuswb %zmm2, %zmm1, %zmm1 ; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm0 diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll index 2b05c9a7f18863..a8258441507db1 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll @@ -153,7 +153,7 @@ define <8 x i16> @test_div7_8i16(<8 x i16> %a) nounwind { ; ; AVX-LABEL: test_div7_8i16: ; AVX: # %bb.0: -; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [9363,9363,9363,9363,9363,9363,9363,9363] ; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 ; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 @@ -227,7 +227,7 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind { ; AVX2NOBW-LABEL: test_div7_16i8: ; AVX2NOBW: # %bb.0: ; AVX2NOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX2NOBW-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 @@ -243,7 +243,7 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind { ; AVX512BW-LABEL: test_div7_16i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37] ; AVX512BW-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 ; AVX512BW-NEXT: vpsubb %xmm1, %xmm0, %xmm0 @@ -268,33 +268,33 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [128,256,256,256,256,256,256,256] ; SSE2-NEXT: psrlw $8, %xmm2 -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [147,79,171,117,205,57,57,37] ; SSE2-NEXT: psrlw $8, %xmm2 ; SSE2-NEXT: movdqa %xmm0, %xmm3 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [256,256,256,256,256,256,256,128] ; SSE2-NEXT: psrlw $8, %xmm3 -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [37,32,57,205,117,171,79,147] ; SSE2-NEXT: psrlw $8, %xmm3 ; SSE2-NEXT: packuswb %xmm2, %xmm3 ; SSE2-NEXT: psubb %xmm3, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [0,0,0,128,0,0,0,128] ; SSE2-NEXT: psrlw $8, %xmm2 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [128,0,0,0,128,0,0,0] ; SSE2-NEXT: psrlw $8, %xmm0 ; SSE2-NEXT: packuswb %xmm2, %xmm0 ; SSE2-NEXT: paddb %xmm3, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [64,64,32,32,32,128,128,64] ; SSE2-NEXT: psrlw $8, %xmm2 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [64,256,128,32,32,32,64,64] ; SSE2-NEXT: psrlw $8, %xmm0 ; SSE2-NEXT: packuswb %xmm2, %xmm0 ; SSE2-NEXT: retq @@ -309,7 +309,7 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind { ; SSE41-NEXT: psllw $7, %xmm3 ; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1,2,3,4,5,6,7] ; SSE41-NEXT: psrlw $8, %xmm3 -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [147,79,171,117,205,57,57,37] ; SSE41-NEXT: psrlw $8, %xmm3 ; SSE41-NEXT: pxor %xmm2, %xmm2 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] @@ -317,23 +317,23 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind { ; SSE41-NEXT: psllw $7, %xmm4 ; SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm2[0,1,2,3,4,5,6],xmm4[7] ; SSE41-NEXT: psrlw $8, %xmm4 -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [37,32,57,205,117,171,79,147] ; SSE41-NEXT: psrlw $8, %xmm4 ; SSE41-NEXT: packuswb %xmm3, %xmm4 ; SSE41-NEXT: psubb %xmm4, %xmm0 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,0,0,128,0,0,0,128] ; SSE41-NEXT: psrlw $8, %xmm0 -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [128,0,0,0,128,0,0,0] ; SSE41-NEXT: psrlw $8, %xmm2 ; SSE41-NEXT: packuswb %xmm0, %xmm2 ; SSE41-NEXT: paddb %xmm4, %xmm2 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [64,64,32,32,32,128,128,64] ; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [64,256,128,32,32,32,64,64] ; SSE41-NEXT: psrlw $8, %xmm0 ; SSE41-NEXT: packuswb %xmm2, %xmm0 ; SSE41-NEXT: retq @@ -346,30 +346,30 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind { ; AVX1-NEXT: vpsllw $7, %xmm3, %xmm3 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3,4,5,6,7] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [147,79,171,117,205,57,57,37] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX1-NEXT: vpsllw $7, %xmm4, %xmm4 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,5,6],xmm4[7] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [37,32,57,205,117,171,79,147] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpackuswb %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpsubb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [0,0,0,128,0,0,0,128] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [128,0,0,0,128,0,0,0] ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpaddb %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [64,64,32,32,32,128,128,64] ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [64,256,128,32,32,32,64,64] ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq @@ -377,21 +377,21 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind { ; AVX2NOBW-LABEL: test_divconstant_16i8: ; AVX2NOBW: # %bb.0: ; AVX2NOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [256,256,256,256,256,256,256,128,128,256,256,256,256,256,256,256] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1 -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [37,32,57,205,117,171,79,147,147,79,171,117,205,57,57,37] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX2NOBW-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 ; AVX2NOBW-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ; AVX2NOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [128,0,0,0,128,0,0,0,0,0,0,128,0,0,0,128] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX2NOBW-NEXT: vextracti128 $1, %ymm0, %xmm2 ; AVX2NOBW-NEXT: vpackuswb %xmm2, %xmm0, %xmm0 ; AVX2NOBW-NEXT: vpaddb %xmm1, %xmm0, %xmm0 ; AVX2NOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [64,256,128,32,32,32,64,64,64,64,32,32,32,128,128,64] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX2NOBW-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2NOBW-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -403,12 +403,12 @@ define <16 x i8> @test_divconstant_16i8(<16 x i8> %a) nounwind { ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm1 = [0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0] ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm2, %zmm1 -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [37,32,57,205,117,171,79,147,147,79,171,117,205,57,57,37] ; AVX512BW-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 ; AVX512BW-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [128,0,0,0,128,0,0,0,0,0,0,128,0,0,0,128] ; AVX512BW-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: vpaddb %xmm1, %xmm0, %xmm0 @@ -619,7 +619,7 @@ define <8 x i16> @test_rem7_8i16(<8 x i16> %a) nounwind { ; ; AVX-LABEL: test_rem7_8i16: ; AVX: # %bb.0: -; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [9363,9363,9363,9363,9363,9363,9363,9363] ; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm2 ; AVX-NEXT: vpsrlw $1, %xmm2, %xmm2 ; AVX-NEXT: vpaddw %xmm1, %xmm2, %xmm1 @@ -712,7 +712,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind { ; AVX2NOBW-LABEL: test_rem7_16i8: ; AVX2NOBW: # %bb.0: ; AVX2NOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX2NOBW-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 @@ -732,7 +732,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind { ; AVX512BW-LABEL: test_rem7_16i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37] ; AVX512BW-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 ; AVX512BW-NEXT: vpsubb %xmm1, %xmm0, %xmm2 @@ -761,39 +761,39 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [128,256,256,256,256,256,256,256] ; SSE2-NEXT: psrlw $8, %xmm2 -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [147,79,171,117,205,57,57,37] ; SSE2-NEXT: psrlw $8, %xmm2 ; SSE2-NEXT: movdqa %xmm0, %xmm3 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [256,256,256,256,256,256,256,128] ; SSE2-NEXT: psrlw $8, %xmm3 -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [37,32,57,205,117,171,79,147] ; SSE2-NEXT: psrlw $8, %xmm3 ; SSE2-NEXT: packuswb %xmm2, %xmm3 ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: psubb %xmm3, %xmm2 ; SSE2-NEXT: movdqa %xmm2, %xmm4 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [0,0,0,128,0,0,0,128] ; SSE2-NEXT: psrlw $8, %xmm4 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [128,0,0,0,128,0,0,0] ; SSE2-NEXT: psrlw $8, %xmm2 ; SSE2-NEXT: packuswb %xmm4, %xmm2 ; SSE2-NEXT: paddb %xmm3, %xmm2 ; SSE2-NEXT: movdqa %xmm2, %xmm3 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [64,64,32,32,32,128,128,64] ; SSE2-NEXT: psrlw $8, %xmm3 -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [14,13,12,11,10,9,9,7] ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] ; SSE2-NEXT: pand %xmm4, %xmm3 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [64,256,128,32,32,32,64,64] ; SSE2-NEXT: psrlw $8, %xmm2 -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [7,8,9,10,11,12,13,14] ; SSE2-NEXT: pand %xmm4, %xmm2 ; SSE2-NEXT: packuswb %xmm3, %xmm2 ; SSE2-NEXT: psubb %xmm2, %xmm0 @@ -809,7 +809,7 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; SSE41-NEXT: psllw $7, %xmm3 ; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm2[1,2,3,4,5,6,7] ; SSE41-NEXT: psrlw $8, %xmm3 -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [147,79,171,117,205,57,57,37] ; SSE41-NEXT: psrlw $8, %xmm3 ; SSE41-NEXT: pxor %xmm2, %xmm2 ; SSE41-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] @@ -817,29 +817,29 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; SSE41-NEXT: psllw $7, %xmm4 ; SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm2[0,1,2,3,4,5,6],xmm4[7] ; SSE41-NEXT: psrlw $8, %xmm4 -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4 # [37,32,57,205,117,171,79,147] ; SSE41-NEXT: psrlw $8, %xmm4 ; SSE41-NEXT: packuswb %xmm3, %xmm4 ; SSE41-NEXT: movdqa %xmm0, %xmm3 ; SSE41-NEXT: psubb %xmm4, %xmm3 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15] -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [0,0,0,128,0,0,0,128] ; SSE41-NEXT: psrlw $8, %xmm3 -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [128,0,0,0,128,0,0,0] ; SSE41-NEXT: psrlw $8, %xmm2 ; SSE41-NEXT: packuswb %xmm3, %xmm2 ; SSE41-NEXT: paddb %xmm4, %xmm2 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [64,64,32,32,32,128,128,64] ; SSE41-NEXT: psrlw $8, %xmm2 -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [14,13,12,11,10,9,9,7] ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = [255,255,255,255,255,255,255,255] ; SSE41-NEXT: pand %xmm1, %xmm2 -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [64,256,128,32,32,32,64,64] ; SSE41-NEXT: psrlw $8, %xmm3 -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 # [7,8,9,10,11,12,13,14] ; SSE41-NEXT: pand %xmm1, %xmm3 ; SSE41-NEXT: packuswb %xmm2, %xmm3 ; SSE41-NEXT: psubb %xmm3, %xmm0 @@ -853,35 +853,35 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; AVX1-NEXT: vpsllw $7, %xmm3, %xmm3 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0],xmm2[1,2,3,4,5,6,7] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [147,79,171,117,205,57,57,37] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm3 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX1-NEXT: vpsllw $7, %xmm4, %xmm4 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,5,6],xmm4[7] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [37,32,57,205,117,171,79,147] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpackuswb %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpsubb %xmm2, %xmm0, %xmm3 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,0,0,128,0,0,0,128] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [128,0,0,0,128,0,0,0] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpackuswb %xmm4, %xmm3, %xmm3 ; AVX1-NEXT: vpaddb %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [64,64,32,32,32,128,128,64] ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [14,13,12,11,10,9,9,7] ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [255,255,255,255,255,255,255,255] ; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [64,256,128,32,32,32,64,64] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [7,8,9,10,11,12,13,14] ; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 @@ -890,23 +890,23 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; AVX2NOBW-LABEL: test_remconstant_16i8: ; AVX2NOBW: # %bb.0: ; AVX2NOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [256,256,256,256,256,256,256,128,128,256,256,256,256,256,256,256] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1 -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [37,32,57,205,117,171,79,147,147,79,171,117,205,57,57,37] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX2NOBW-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 ; AVX2NOBW-NEXT: vpsubb %xmm1, %xmm0, %xmm2 ; AVX2NOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [128,0,0,0,128,0,0,0,0,0,0,128,0,0,0,128] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX2NOBW-NEXT: vextracti128 $1, %ymm2, %xmm3 ; AVX2NOBW-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 ; AVX2NOBW-NEXT: vpaddb %xmm1, %xmm2, %xmm1 ; AVX2NOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [64,256,128,32,32,32,64,64,64,64,32,32,32,128,128,64] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1 -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [7,8,9,10,11,12,13,14,14,13,12,11,10,9,9,7] ; AVX2NOBW-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 ; AVX2NOBW-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX2NOBW-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 @@ -919,19 +919,19 @@ define <16 x i8> @test_remconstant_16i8(<16 x i8> %a) nounwind { ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm1 = [0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0] ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm2, %zmm1 -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [37,32,57,205,117,171,79,147,147,79,171,117,205,57,57,37] ; AVX512BW-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 ; AVX512BW-NEXT: vpsubb %xmm1, %xmm0, %xmm2 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [128,0,0,0,128,0,0,0,0,0,0,128,0,0,0,128] ; AVX512BW-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX512BW-NEXT: vpmovwb %zmm2, %ymm2 ; AVX512BW-NEXT: vpaddb %xmm1, %xmm2, %xmm1 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm2 = [2,0,1,3,3,3,2,2,2,2,3,3,3,1,1,2] ; AVX512BW-NEXT: vpsrlvw %zmm2, %zmm1, %zmm1 -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [7,8,9,10,11,12,13,14,14,13,12,11,10,9,9,7] ; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 ; AVX512BW-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll index d1e631eae7d4b4..ebb7814ac79fd6 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll @@ -151,7 +151,7 @@ define <16 x i16> @test_div7_16i16(<16 x i16> %a) nounwind { ; ; AVX2-LABEL: test_div7_16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 +; AVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 # [9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363] ; AVX2-NEXT: vpsubw %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpsrlw $1, %ymm0, %ymm0 ; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0 @@ -220,7 +220,7 @@ define <32 x i8> @test_div7_32i8(<32 x i8> %a) nounwind { ; AVX512BW-LABEL: test_div7_32i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37] ; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 ; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 ; AVX512BW-NEXT: vpsubb %ymm1, %ymm0, %ymm0 @@ -248,30 +248,30 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind { ; AVX1-NEXT: vpsllw $7, %xmm4, %xmm4 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0],xmm3[1,2,3,4,5,6,7] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [147,79,171,117,205,57,32,37] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero ; AVX1-NEXT: vpsllw $7, %xmm5, %xmm5 ; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0],xmm4[1,2,3,4,5,6,7] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [187,135,205,27,57,241,16,137] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX1-NEXT: vpackuswb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsubb %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,0,0,128,0,0,0,128] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [0,128,0,0,0,0,0,0] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpackuswb %xmm4, %xmm2, %xmm2 ; AVX1-NEXT: vpaddb %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [64,64,32,32,32,128,256,64] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [32,16,16,128,64,16,256,32] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpackuswb %xmm3, %xmm2, %xmm2 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm3 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] @@ -279,30 +279,30 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind { ; AVX1-NEXT: vpsllw $7, %xmm4, %xmm4 ; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3,4,5,6],xmm4[7] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3 # [137,16,241,57,27,205,135,187] ; AVX1-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX1-NEXT: vpsllw $7, %xmm5, %xmm5 ; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,5,6],xmm5[7] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [37,32,57,205,117,171,79,147] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX1-NEXT: vpackuswb %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpsubb %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm4 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,0,0,0,0,0,128,0] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [128,0,0,0,128,0,0,0] ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm4, %xmm0, %xmm0 ; AVX1-NEXT: vpaddb %xmm3, %xmm0, %xmm0 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [32,256,16,64,128,16,16,32] ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [64,256,128,32,32,32,64,64] ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 @@ -315,28 +315,28 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind { ; AVX2NOBW-NEXT: vmovdqa {{.*#+}} ymm3 = [256,256,256,256,256,256,256,128,128,256,256,256,256,256,256,256] ; AVX2NOBW-NEXT: vpmullw %ymm3, %ymm2, %ymm2 ; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [137,16,241,57,27,205,135,187,147,79,171,117,205,57,32,37] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] ; AVX2NOBW-NEXT: vpmullw %ymm3, %ymm4, %ymm3 ; AVX2NOBW-NEXT: vpsrlw $8, %ymm3, %ymm3 -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [37,32,57,205,117,171,79,147,187,135,205,27,57,241,16,137] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX2NOBW-NEXT: vpackuswb %ymm2, %ymm3, %ymm2 ; AVX2NOBW-NEXT: vpsubb %ymm2, %ymm0, %ymm0 ; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [0,0,0,0,0,0,128,0,0,0,0,128,0,0,0,128] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [128,0,0,0,128,0,0,0,0,128,0,0,0,0,0,0] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX2NOBW-NEXT: vpackuswb %ymm3, %ymm0, %ymm0 ; AVX2NOBW-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [32,256,16,64,128,16,16,32,64,64,32,32,32,128,256,64] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [64,256,128,32,32,32,64,64,32,16,16,128,64,16,256,32] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX2NOBW-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 ; AVX2NOBW-NEXT: retq @@ -345,12 +345,12 @@ define <32 x i8> @test_divconstant_32i8(<32 x i8> %a) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero ; AVX512BW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [37,32,57,205,117,171,79,147,137,16,241,57,27,205,135,187,187,135,205,27,57,241,16,137,147,79,171,117,205,57,32,37] ; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 ; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 ; AVX512BW-NEXT: vpsubb %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [128,0,0,0,128,0,0,0,0,0,0,0,0,0,128,0,0,128,0,0,0,0,0,0,0,0,0,128,0,0,0,128] ; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: vpaddb %ymm1, %ymm0, %ymm0 @@ -562,7 +562,7 @@ define <16 x i16> @test_rem7_16i16(<16 x i16> %a) nounwind { ; ; AVX2-LABEL: test_rem7_16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 +; AVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 # [9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363] ; AVX2-NEXT: vpsubw %ymm1, %ymm0, %ymm2 ; AVX2-NEXT: vpsrlw $1, %ymm2, %ymm2 ; AVX2-NEXT: vpaddw %ymm1, %ymm2, %ymm1 @@ -647,7 +647,7 @@ define <32 x i8> @test_rem7_32i8(<32 x i8> %a) nounwind { ; AVX512BW-LABEL: test_rem7_32i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37,37] ; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 ; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 ; AVX512BW-NEXT: vpsubb %ymm1, %ymm0, %ymm2 @@ -679,35 +679,35 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind { ; AVX1-NEXT: vpsllw $7, %xmm4, %xmm4 ; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm4[0],xmm2[1,2,3,4,5,6,7] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [147,79,171,117,205,57,32,37] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm4 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero ; AVX1-NEXT: vpsllw $7, %xmm5, %xmm5 ; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm5[0],xmm4[1,2,3,4,5,6,7] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [187,135,205,27,57,241,16,137] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX1-NEXT: vpackuswb %xmm2, %xmm4, %xmm2 ; AVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm4 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm5 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [0,0,0,128,0,0,0,128] ; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [0,128,0,0,0,0,0,0] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX1-NEXT: vpackuswb %xmm5, %xmm4, %xmm4 ; AVX1-NEXT: vpaddb %xmm2, %xmm4, %xmm4 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 # [64,64,32,32,32,128,256,64] ; AVX1-NEXT: vpsrlw $8, %xmm2, %xmm2 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm5 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm5 # [14,13,12,11,10,9,8,7] ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] ; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm5 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [32,16,16,128,64,16,256,32] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [22,21,20,19,18,17,16,15] ; AVX1-NEXT: vpand %xmm2, %xmm4, %xmm4 ; AVX1-NEXT: vpackuswb %xmm5, %xmm4, %xmm4 ; AVX1-NEXT: vpsubb %xmm4, %xmm3, %xmm3 @@ -716,34 +716,34 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind { ; AVX1-NEXT: vpsllw $7, %xmm5, %xmm5 ; AVX1-NEXT: vpblendw {{.*#+}} xmm4 = xmm4[0,1,2,3,4,5,6],xmm5[7] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [137,16,241,57,27,205,135,187] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm5 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm6 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX1-NEXT: vpsllw $7, %xmm6, %xmm6 ; AVX1-NEXT: vpblendw {{.*#+}} xmm5 = xmm5[0,1,2,3,4,5,6],xmm6[7] ; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [37,32,57,205,117,171,79,147] ; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5 ; AVX1-NEXT: vpackuswb %xmm4, %xmm5, %xmm4 ; AVX1-NEXT: vpsubb %xmm4, %xmm0, %xmm5 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm6 = xmm5[8],xmm1[8],xmm5[9],xmm1[9],xmm5[10],xmm1[10],xmm5[11],xmm1[11],xmm5[12],xmm1[12],xmm5[13],xmm1[13],xmm5[14],xmm1[14],xmm5[15],xmm1[15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6, %xmm6 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm6, %xmm6 # [0,0,0,0,0,0,128,0] ; AVX1-NEXT: vpsrlw $8, %xmm6, %xmm6 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm5 = xmm5[0],zero,xmm5[1],zero,xmm5[2],zero,xmm5[3],zero,xmm5[4],zero,xmm5[5],zero,xmm5[6],zero,xmm5[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm5, %xmm5 # [128,0,0,0,128,0,0,0] ; AVX1-NEXT: vpsrlw $8, %xmm5, %xmm5 ; AVX1-NEXT: vpackuswb %xmm6, %xmm5, %xmm5 ; AVX1-NEXT: vpaddb %xmm4, %xmm5, %xmm4 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [32,256,16,64,128,16,16,32] ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [15,16,17,18,19,20,21,22] ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [64,256,128,32,32,32,64,64] ; AVX1-NEXT: vpsrlw $8, %xmm4, %xmm4 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm4, %xmm4 # [7,8,9,10,11,12,13,14] ; AVX1-NEXT: vpand %xmm2, %xmm4, %xmm2 ; AVX1-NEXT: vpackuswb %xmm1, %xmm2, %xmm1 ; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 @@ -757,33 +757,33 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind { ; AVX2NOBW-NEXT: vmovdqa {{.*#+}} ymm3 = [256,256,256,256,256,256,256,128,128,256,256,256,256,256,256,256] ; AVX2NOBW-NEXT: vpmullw %ymm3, %ymm2, %ymm2 ; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [137,16,241,57,27,205,135,187,147,79,171,117,205,57,32,37] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] ; AVX2NOBW-NEXT: vpmullw %ymm3, %ymm4, %ymm3 ; AVX2NOBW-NEXT: vpsrlw $8, %ymm3, %ymm3 -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [37,32,57,205,117,171,79,147,187,135,205,27,57,241,16,137] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX2NOBW-NEXT: vpackuswb %ymm2, %ymm3, %ymm2 ; AVX2NOBW-NEXT: vpsubb %ymm2, %ymm0, %ymm3 ; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm3[8],ymm1[8],ymm3[9],ymm1[9],ymm3[10],ymm1[10],ymm3[11],ymm1[11],ymm3[12],ymm1[12],ymm3[13],ymm1[13],ymm3[14],ymm1[14],ymm3[15],ymm1[15],ymm3[24],ymm1[24],ymm3[25],ymm1[25],ymm3[26],ymm1[26],ymm3[27],ymm1[27],ymm3[28],ymm1[28],ymm3[29],ymm1[29],ymm3[30],ymm1[30],ymm3[31],ymm1[31] -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,0,0,0,0,0,128,0,0,0,0,128,0,0,0,128] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm4, %ymm4 ; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm3[0],ymm1[0],ymm3[1],ymm1[1],ymm3[2],ymm1[2],ymm3[3],ymm1[3],ymm3[4],ymm1[4],ymm3[5],ymm1[5],ymm3[6],ymm1[6],ymm3[7],ymm1[7],ymm3[16],ymm1[16],ymm3[17],ymm1[17],ymm3[18],ymm1[18],ymm3[19],ymm1[19],ymm3[20],ymm1[20],ymm3[21],ymm1[21],ymm3[22],ymm1[22],ymm3[23],ymm1[23] -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [128,0,0,0,128,0,0,0,0,128,0,0,0,0,0,0] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX2NOBW-NEXT: vpackuswb %ymm4, %ymm3, %ymm3 ; AVX2NOBW-NEXT: vpaddb %ymm2, %ymm3, %ymm2 ; AVX2NOBW-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15],ymm2[24],ymm1[24],ymm2[25],ymm1[25],ymm2[26],ymm1[26],ymm2[27],ymm1[27],ymm2[28],ymm1[28],ymm2[29],ymm1[29],ymm2[30],ymm1[30],ymm2[31],ymm1[31] -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [32,256,16,64,128,16,16,32,64,64,32,32,32,128,256,64] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm3, %ymm3 -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [15,16,17,18,19,20,21,22,14,13,12,11,10,9,8,7] ; AVX2NOBW-NEXT: vpbroadcastw {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; AVX2NOBW-NEXT: vpand %ymm4, %ymm3, %ymm3 ; AVX2NOBW-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[16],ymm1[16],ymm2[17],ymm1[17],ymm2[18],ymm1[18],ymm2[19],ymm1[19],ymm2[20],ymm1[20],ymm2[21],ymm1[21],ymm2[22],ymm1[22],ymm2[23],ymm1[23] -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [64,256,128,32,32,32,64,64,32,16,16,128,64,16,256,32] ; AVX2NOBW-NEXT: vpsrlw $8, %ymm1, %ymm1 -; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2NOBW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [7,8,9,10,11,12,13,14,22,21,20,19,18,17,16,15] ; AVX2NOBW-NEXT: vpand %ymm4, %ymm1, %ymm1 ; AVX2NOBW-NEXT: vpackuswb %ymm3, %ymm1, %ymm1 ; AVX2NOBW-NEXT: vpsubb %ymm1, %ymm0, %ymm0 @@ -793,18 +793,18 @@ define <32 x i8> @test_remconstant_32i8(<32 x i8> %a) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero ; AVX512BW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [37,32,57,205,117,171,79,147,137,16,241,57,27,205,135,187,187,135,205,27,57,241,16,137,147,79,171,117,205,57,32,37] ; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 ; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 ; AVX512BW-NEXT: vpsubb %ymm1, %ymm0, %ymm2 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm2 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero,ymm2[16],zero,ymm2[17],zero,ymm2[18],zero,ymm2[19],zero,ymm2[20],zero,ymm2[21],zero,ymm2[22],zero,ymm2[23],zero,ymm2[24],zero,ymm2[25],zero,ymm2[26],zero,ymm2[27],zero,ymm2[28],zero,ymm2[29],zero,ymm2[30],zero,ymm2[31],zero -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 # [128,0,0,0,128,0,0,0,0,0,0,0,0,0,128,0,0,128,0,0,0,0,0,0,0,0,0,128,0,0,0,128] ; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 ; AVX512BW-NEXT: vpmovwb %zmm2, %ymm2 ; AVX512BW-NEXT: vpaddb %ymm1, %ymm2, %ymm1 ; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero,ymm1[16],zero,ymm1[17],zero,ymm1[18],zero,ymm1[19],zero,ymm1[20],zero,ymm1[21],zero,ymm1[22],zero,ymm1[23],zero,ymm1[24],zero,ymm1[25],zero,ymm1[26],zero,ymm1[27],zero,ymm1[28],zero,ymm1[29],zero,ymm1[30],zero,ymm1[31],zero ; AVX512BW-NEXT: vpsrlvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7] ; AVX512BW-NEXT: vpmovwb %zmm1, %ymm1 ; AVX512BW-NEXT: vpsubb %ymm1, %ymm0, %ymm0 ; AVX512BW-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll index b21c50d91447b8..8ed8fc1b65c19a 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll @@ -120,7 +120,7 @@ define <32 x i16> @test_div7_32i16(<32 x i16> %a) nounwind { ; ; AVX512BW-LABEL: test_div7_32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 +; AVX512BW-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 # [9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363] ; AVX512BW-NEXT: vpsubw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsrlw $1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm0, %zmm0 @@ -197,57 +197,57 @@ define <64 x i8> @test_divconstant_64i8(<64 x i8> %a) nounwind { ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm2 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15],ymm2[24],ymm1[24],ymm2[25],ymm1[25],ymm2[26],ymm1[26],ymm2[27],ymm1[27],ymm2[28],ymm1[28],ymm2[29],ymm1[29],ymm2[30],ymm1[30],ymm2[31],ymm1[31] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [256,256,64,256,256,256,256,256,128,256,256,256,256,256,256,256] ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [137,27,37,19,79,41,171,101,147,79,171,117,205,57,32,37] ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[16],ymm1[16],ymm2[17],ymm1[17],ymm2[18],ymm1[18],ymm2[19],ymm1[19],ymm2[20],ymm1[20],ymm2[21],ymm1[21],ymm2[22],ymm1[22],ymm2[23],ymm1[23] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [256,256,256,256,256,256,256,256,128,256,256,256,256,256,256,256] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [27,111,57,235,241,249,8,9,187,135,205,27,57,241,16,137] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 ; AVX512F-NEXT: vpackuswb %ymm3, %ymm4, %ymm3 ; AVX512F-NEXT: vpsubb %ymm3, %ymm2, %ymm2 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15],ymm2[24],ymm1[24],ymm2[25],ymm1[25],ymm2[26],ymm1[26],ymm2[27],ymm1[27],ymm2[28],ymm1[28],ymm2[29],ymm1[29],ymm2[30],ymm1[30],ymm2[31],ymm1[31] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,128,0,0,0,0,0,128,0,0,0,128,0,0,0,128] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[16],ymm1[16],ymm2[17],ymm1[17],ymm2[18],ymm1[18],ymm2[19],ymm1[19],ymm2[20],ymm1[20],ymm2[21],ymm1[21],ymm2[22],ymm1[22],ymm2[23],ymm1[23] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [0,0,0,0,0,0,0,128,0,128,0,0,0,0,0,0] ; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX512F-NEXT: vpackuswb %ymm4, %ymm2, %ymm2 ; AVX512F-NEXT: vpaddb %ymm3, %ymm2, %ymm2 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm2[8],ymm1[8],ymm2[9],ymm1[9],ymm2[10],ymm1[10],ymm2[11],ymm1[11],ymm2[12],ymm1[12],ymm2[13],ymm1[13],ymm2[14],ymm1[14],ymm2[15],ymm1[15],ymm2[24],ymm1[24],ymm2[25],ymm1[25],ymm2[26],ymm1[26],ymm2[27],ymm1[27],ymm2[28],ymm1[28],ymm2[29],ymm1[29],ymm2[30],ymm1[30],ymm2[31],ymm1[31] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [16,16,256,128,32,64,16,16,64,64,32,32,32,128,256,64] ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm2 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[2],ymm1[2],ymm2[3],ymm1[3],ymm2[4],ymm1[4],ymm2[5],ymm1[5],ymm2[6],ymm1[6],ymm2[7],ymm1[7],ymm2[16],ymm1[16],ymm2[17],ymm1[17],ymm2[18],ymm1[18],ymm2[19],ymm1[19],ymm2[20],ymm1[20],ymm2[21],ymm1[21],ymm2[22],ymm1[22],ymm2[23],ymm1[23] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [64,16,32,8,8,8,256,16,32,16,16,128,64,16,256,32] ; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX512F-NEXT: vpackuswb %ymm3, %ymm2, %ymm2 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [256,256,256,256,256,256,256,128,256,256,256,256,256,256,256,256] ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [137,16,241,57,27,205,135,187,9,8,249,241,235,57,111,27] ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [256,256,256,256,256,256,256,128,256,256,256,256,256,64,256,256] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [37,32,57,205,117,171,79,147,101,171,41,79,19,37,27,137] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 ; AVX512F-NEXT: vpackuswb %ymm3, %ymm4, %ymm3 ; AVX512F-NEXT: vpsubb %ymm3, %ymm0, %ymm0 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,0,0,0,0,0,128,0,128,0,0,0,0,0,0,0] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [128,0,0,0,128,0,0,0,128,0,0,0,0,0,128,0] ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512F-NEXT: vpackuswb %ymm4, %ymm0, %ymm0 ; AVX512F-NEXT: vpaddb %ymm3, %ymm0, %ymm0 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm3, %ymm3 # [32,256,16,64,128,16,16,32,16,256,8,8,8,32,16,64] ; AVX512F-NEXT: vpsrlw $8, %ymm3, %ymm3 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [64,256,128,32,32,32,64,64,16,16,64,32,128,256,16,16] ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512F-NEXT: vpackuswb %ymm3, %ymm0, %ymm0 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 @@ -259,20 +259,20 @@ define <64 x i8> @test_divconstant_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] ; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 ; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 # [137,16,241,57,27,205,135,187,9,8,249,241,235,57,111,27,137,27,37,19,79,41,171,101,147,79,171,117,205,57,32,37] ; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] ; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 ; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3 -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 # [37,32,57,205,117,171,79,147,101,171,41,79,19,37,27,137,27,111,57,235,241,249,8,9,187,135,205,27,57,241,16,137] ; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3 ; AVX512BW-NEXT: vpackuswb %zmm2, %zmm3, %zmm2 ; AVX512BW-NEXT: vpsubb %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 # [0,0,0,0,0,0,128,0,128,0,0,0,0,0,0,0,0,128,0,0,0,0,0,128,0,0,0,128,0,0,0,128] ; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0 # [128,0,0,0,128,0,0,0,128,0,0,0,0,0,128,0,0,0,0,0,0,0,0,128,0,128,0,0,0,0,0,0] ; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0 ; AVX512BW-NEXT: vpackuswb %zmm3, %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 @@ -454,7 +454,7 @@ define <32 x i16> @test_rem7_32i16(<32 x i16> %a) nounwind { ; ; AVX512BW-LABEL: test_rem7_32i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 +; AVX512BW-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm1 # [9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363,9363] ; AVX512BW-NEXT: vpsubw %zmm1, %zmm0, %zmm2 ; AVX512BW-NEXT: vpsrlw $1, %zmm2, %zmm2 ; AVX512BW-NEXT: vpaddw %zmm1, %zmm2, %zmm1 @@ -549,67 +549,67 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind { ; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm3[8],ymm1[8],ymm3[9],ymm1[9],ymm3[10],ymm1[10],ymm3[11],ymm1[11],ymm3[12],ymm1[12],ymm3[13],ymm1[13],ymm3[14],ymm1[14],ymm3[15],ymm1[15],ymm3[24],ymm1[24],ymm3[25],ymm1[25],ymm3[26],ymm1[26],ymm3[27],ymm1[27],ymm3[28],ymm1[28],ymm3[29],ymm1[29],ymm3[30],ymm1[30],ymm3[31],ymm1[31] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [256,256,64,256,256,256,256,256,128,256,256,256,256,256,256,256] ; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [137,27,37,19,79,41,171,101,147,79,171,117,205,57,32,37] ; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm3[0],ymm1[0],ymm3[1],ymm1[1],ymm3[2],ymm1[2],ymm3[3],ymm1[3],ymm3[4],ymm1[4],ymm3[5],ymm1[5],ymm3[6],ymm1[6],ymm3[7],ymm1[7],ymm3[16],ymm1[16],ymm3[17],ymm1[17],ymm3[18],ymm1[18],ymm3[19],ymm1[19],ymm3[20],ymm1[20],ymm3[21],ymm1[21],ymm3[22],ymm1[22],ymm3[23],ymm1[23] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [256,256,256,256,256,256,256,256,128,256,256,256,256,256,256,256] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [27,111,57,235,241,249,8,9,187,135,205,27,57,241,16,137] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 ; AVX512F-NEXT: vpackuswb %ymm2, %ymm4, %ymm2 ; AVX512F-NEXT: vpsubb %ymm2, %ymm3, %ymm4 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm4[8],ymm1[8],ymm4[9],ymm1[9],ymm4[10],ymm1[10],ymm4[11],ymm1[11],ymm4[12],ymm1[12],ymm4[13],ymm1[13],ymm4[14],ymm1[14],ymm4[15],ymm1[15],ymm4[24],ymm1[24],ymm4[25],ymm1[25],ymm4[26],ymm1[26],ymm4[27],ymm1[27],ymm4[28],ymm1[28],ymm4[29],ymm1[29],ymm4[30],ymm1[30],ymm4[31],ymm1[31] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [0,128,0,0,0,0,0,128,0,0,0,128,0,0,0,128] ; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[4],ymm1[4],ymm4[5],ymm1[5],ymm4[6],ymm1[6],ymm4[7],ymm1[7],ymm4[16],ymm1[16],ymm4[17],ymm1[17],ymm4[18],ymm1[18],ymm4[19],ymm1[19],ymm4[20],ymm1[20],ymm4[21],ymm1[21],ymm4[22],ymm1[22],ymm4[23],ymm1[23] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [0,0,0,0,0,0,0,128,0,128,0,0,0,0,0,0] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 ; AVX512F-NEXT: vpackuswb %ymm5, %ymm4, %ymm4 ; AVX512F-NEXT: vpaddb %ymm2, %ymm4, %ymm4 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm4[8],ymm1[8],ymm4[9],ymm1[9],ymm4[10],ymm1[10],ymm4[11],ymm1[11],ymm4[12],ymm1[12],ymm4[13],ymm1[13],ymm4[14],ymm1[14],ymm4[15],ymm1[15],ymm4[24],ymm1[24],ymm4[25],ymm1[25],ymm4[26],ymm1[26],ymm4[27],ymm1[27],ymm4[28],ymm1[28],ymm4[29],ymm1[29],ymm4[30],ymm1[30],ymm4[31],ymm1[31] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [16,16,256,128,32,64,16,16,64,64,32,32,32,128,256,64] ; AVX512F-NEXT: vpsrlw $8, %ymm2, %ymm2 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm5 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm5 # [30,29,28,27,26,25,24,23,14,13,12,11,10,9,8,7] ; AVX512F-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; AVX512F-NEXT: vpand %ymm2, %ymm5, %ymm5 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[4],ymm1[4],ymm4[5],ymm1[5],ymm4[6],ymm1[6],ymm4[7],ymm1[7],ymm4[16],ymm1[16],ymm4[17],ymm1[17],ymm4[18],ymm1[18],ymm4[19],ymm1[19],ymm4[20],ymm1[20],ymm4[21],ymm1[21],ymm4[22],ymm1[22],ymm4[23],ymm1[23] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [64,16,32,8,8,8,256,16,32,16,16,128,64,16,256,32] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [38,37,36,35,34,33,32,31,22,21,20,19,18,17,16,15] ; AVX512F-NEXT: vpand %ymm2, %ymm4, %ymm4 ; AVX512F-NEXT: vpackuswb %ymm5, %ymm4, %ymm4 ; AVX512F-NEXT: vpsubb %ymm4, %ymm3, %ymm3 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [256,256,256,256,256,256,256,128,256,256,256,256,256,256,256,256] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm4, %ymm4 # [137,16,241,57,27,205,135,187,9,8,249,241,235,57,111,27] ; AVX512F-NEXT: vpsrlw $8, %ymm4, %ymm4 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [256,256,256,256,256,256,256,128,256,256,256,256,256,64,256,256] ; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [37,32,57,205,117,171,79,147,101,171,41,79,19,37,27,137] ; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5 ; AVX512F-NEXT: vpackuswb %ymm4, %ymm5, %ymm4 ; AVX512F-NEXT: vpsubb %ymm4, %ymm0, %ymm5 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm6 = ymm5[8],ymm1[8],ymm5[9],ymm1[9],ymm5[10],ymm1[10],ymm5[11],ymm1[11],ymm5[12],ymm1[12],ymm5[13],ymm1[13],ymm5[14],ymm1[14],ymm5[15],ymm1[15],ymm5[24],ymm1[24],ymm5[25],ymm1[25],ymm5[26],ymm1[26],ymm5[27],ymm1[27],ymm5[28],ymm1[28],ymm5[29],ymm1[29],ymm5[30],ymm1[30],ymm5[31],ymm1[31] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm6, %ymm6 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm6, %ymm6 # [0,0,0,0,0,0,128,0,128,0,0,0,0,0,0,0] ; AVX512F-NEXT: vpsrlw $8, %ymm6, %ymm6 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm5[0],ymm1[0],ymm5[1],ymm1[1],ymm5[2],ymm1[2],ymm5[3],ymm1[3],ymm5[4],ymm1[4],ymm5[5],ymm1[5],ymm5[6],ymm1[6],ymm5[7],ymm1[7],ymm5[16],ymm1[16],ymm5[17],ymm1[17],ymm5[18],ymm1[18],ymm5[19],ymm1[19],ymm5[20],ymm1[20],ymm5[21],ymm1[21],ymm5[22],ymm1[22],ymm5[23],ymm1[23] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [128,0,0,0,128,0,0,0,128,0,0,0,0,0,128,0] ; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5 ; AVX512F-NEXT: vpackuswb %ymm6, %ymm5, %ymm5 ; AVX512F-NEXT: vpaddb %ymm4, %ymm5, %ymm4 ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm4[8],ymm1[8],ymm4[9],ymm1[9],ymm4[10],ymm1[10],ymm4[11],ymm1[11],ymm4[12],ymm1[12],ymm4[13],ymm1[13],ymm4[14],ymm1[14],ymm4[15],ymm1[15],ymm4[24],ymm1[24],ymm4[25],ymm1[25],ymm4[26],ymm1[26],ymm4[27],ymm1[27],ymm4[28],ymm1[28],ymm4[29],ymm1[29],ymm4[30],ymm1[30],ymm4[31],ymm1[31] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [32,256,16,64,128,16,16,32,16,256,8,8,8,32,16,64] ; AVX512F-NEXT: vpsrlw $8, %ymm5, %ymm5 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm5, %ymm5 # [15,16,17,18,19,20,21,22,31,32,33,34,35,36,37,38] ; AVX512F-NEXT: vpand %ymm2, %ymm5, %ymm5 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm4[0],ymm1[0],ymm4[1],ymm1[1],ymm4[2],ymm1[2],ymm4[3],ymm1[3],ymm4[4],ymm1[4],ymm4[5],ymm1[5],ymm4[6],ymm1[6],ymm4[7],ymm1[7],ymm4[16],ymm1[16],ymm4[17],ymm1[17],ymm4[18],ymm1[18],ymm4[19],ymm1[19],ymm4[20],ymm1[20],ymm4[21],ymm1[21],ymm4[22],ymm1[22],ymm4[23],ymm1[23] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [64,256,128,32,32,32,64,64,16,16,64,32,128,256,16,16] ; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1 -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [7,8,9,10,11,12,13,14,23,24,25,26,27,28,29,30] ; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1 ; AVX512F-NEXT: vpackuswb %ymm5, %ymm1, %ymm1 ; AVX512F-NEXT: vpsubb %ymm1, %ymm0, %ymm0 @@ -622,33 +622,33 @@ define <64 x i8> @test_remconstant_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm2 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63] ; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 ; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm2 # [137,16,241,57,27,205,135,187,9,8,249,241,235,57,111,27,137,27,37,19,79,41,171,101,147,79,171,117,205,57,32,37] ; AVX512BW-NEXT: vpsrlw $8, %zmm2, %zmm2 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55] ; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 ; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3 -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 # [37,32,57,205,117,171,79,147,101,171,41,79,19,37,27,137,27,111,57,235,241,249,8,9,187,135,205,27,57,241,16,137] ; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3 ; AVX512BW-NEXT: vpackuswb %zmm2, %zmm3, %zmm2 ; AVX512BW-NEXT: vpsubb %zmm2, %zmm0, %zmm3 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm4 = zmm3[8],zmm1[8],zmm3[9],zmm1[9],zmm3[10],zmm1[10],zmm3[11],zmm1[11],zmm3[12],zmm1[12],zmm3[13],zmm1[13],zmm3[14],zmm1[14],zmm3[15],zmm1[15],zmm3[24],zmm1[24],zmm3[25],zmm1[25],zmm3[26],zmm1[26],zmm3[27],zmm1[27],zmm3[28],zmm1[28],zmm3[29],zmm1[29],zmm3[30],zmm1[30],zmm3[31],zmm1[31],zmm3[40],zmm1[40],zmm3[41],zmm1[41],zmm3[42],zmm1[42],zmm3[43],zmm1[43],zmm3[44],zmm1[44],zmm3[45],zmm1[45],zmm3[46],zmm1[46],zmm3[47],zmm1[47],zmm3[56],zmm1[56],zmm3[57],zmm1[57],zmm3[58],zmm1[58],zmm3[59],zmm1[59],zmm3[60],zmm1[60],zmm3[61],zmm1[61],zmm3[62],zmm1[62],zmm3[63],zmm1[63] -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm4 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm4, %zmm4 # [0,0,0,0,0,0,128,0,128,0,0,0,0,0,0,0,0,128,0,0,0,0,0,128,0,0,0,128,0,0,0,128] ; AVX512BW-NEXT: vpsrlw $8, %zmm4, %zmm4 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm3 = zmm3[0],zmm1[0],zmm3[1],zmm1[1],zmm3[2],zmm1[2],zmm3[3],zmm1[3],zmm3[4],zmm1[4],zmm3[5],zmm1[5],zmm3[6],zmm1[6],zmm3[7],zmm1[7],zmm3[16],zmm1[16],zmm3[17],zmm1[17],zmm3[18],zmm1[18],zmm3[19],zmm1[19],zmm3[20],zmm1[20],zmm3[21],zmm1[21],zmm3[22],zmm1[22],zmm3[23],zmm1[23],zmm3[32],zmm1[32],zmm3[33],zmm1[33],zmm3[34],zmm1[34],zmm3[35],zmm1[35],zmm3[36],zmm1[36],zmm3[37],zmm1[37],zmm3[38],zmm1[38],zmm3[39],zmm1[39],zmm3[48],zmm1[48],zmm3[49],zmm1[49],zmm3[50],zmm1[50],zmm3[51],zmm1[51],zmm3[52],zmm1[52],zmm3[53],zmm1[53],zmm3[54],zmm1[54],zmm3[55],zmm1[55] -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 # [128,0,0,0,128,0,0,0,128,0,0,0,0,0,128,0,0,0,0,0,0,0,0,128,0,128,0,0,0,0,0,0] ; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3 ; AVX512BW-NEXT: vpackuswb %zmm4, %zmm3, %zmm3 ; AVX512BW-NEXT: vpaddb %zmm2, %zmm3, %zmm2 ; AVX512BW-NEXT: vpunpckhbw {{.*#+}} zmm3 = zmm2[8],zmm1[8],zmm2[9],zmm1[9],zmm2[10],zmm1[10],zmm2[11],zmm1[11],zmm2[12],zmm1[12],zmm2[13],zmm1[13],zmm2[14],zmm1[14],zmm2[15],zmm1[15],zmm2[24],zmm1[24],zmm2[25],zmm1[25],zmm2[26],zmm1[26],zmm2[27],zmm1[27],zmm2[28],zmm1[28],zmm2[29],zmm1[29],zmm2[30],zmm1[30],zmm2[31],zmm1[31],zmm2[40],zmm1[40],zmm2[41],zmm1[41],zmm2[42],zmm1[42],zmm2[43],zmm1[43],zmm2[44],zmm1[44],zmm2[45],zmm1[45],zmm2[46],zmm1[46],zmm2[47],zmm1[47],zmm2[56],zmm1[56],zmm2[57],zmm1[57],zmm2[58],zmm1[58],zmm2[59],zmm1[59],zmm2[60],zmm1[60],zmm2[61],zmm1[61],zmm2[62],zmm1[62],zmm2[63],zmm1[63] ; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 ; AVX512BW-NEXT: vpsrlw $8, %zmm3, %zmm3 -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm3, %zmm3 # [15,16,17,18,19,20,21,22,31,32,33,34,35,36,37,38,30,29,28,27,26,25,24,23,14,13,12,11,10,9,8,7] ; AVX512BW-NEXT: vpbroadcastw {{.*#+}} zmm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; AVX512BW-NEXT: vpandq %zmm4, %zmm3, %zmm3 ; AVX512BW-NEXT: vpunpcklbw {{.*#+}} zmm1 = zmm2[0],zmm1[0],zmm2[1],zmm1[1],zmm2[2],zmm1[2],zmm2[3],zmm1[3],zmm2[4],zmm1[4],zmm2[5],zmm1[5],zmm2[6],zmm1[6],zmm2[7],zmm1[7],zmm2[16],zmm1[16],zmm2[17],zmm1[17],zmm2[18],zmm1[18],zmm2[19],zmm1[19],zmm2[20],zmm1[20],zmm2[21],zmm1[21],zmm2[22],zmm1[22],zmm2[23],zmm1[23],zmm2[32],zmm1[32],zmm2[33],zmm1[33],zmm2[34],zmm1[34],zmm2[35],zmm1[35],zmm2[36],zmm1[36],zmm2[37],zmm1[37],zmm2[38],zmm1[38],zmm2[39],zmm1[39],zmm2[48],zmm1[48],zmm2[49],zmm1[49],zmm2[50],zmm1[50],zmm2[51],zmm1[51],zmm2[52],zmm1[52],zmm2[53],zmm1[53],zmm2[54],zmm1[54],zmm2[55],zmm1[55] ; AVX512BW-NEXT: vpsllvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 ; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1 -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm1 # [7,8,9,10,11,12,13,14,23,24,25,26,27,28,29,30,38,37,36,35,34,33,32,31,22,21,20,19,18,17,16,15] ; AVX512BW-NEXT: vpandq %zmm4, %zmm1, %zmm1 ; AVX512BW-NEXT: vpackuswb %zmm3, %zmm1, %zmm1 ; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm0 diff --git a/llvm/test/CodeGen/X86/vector-mul.ll b/llvm/test/CodeGen/X86/vector-mul.ll index b94e2107c943d1..24ce707b496c8c 100644 --- a/llvm/test/CodeGen/X86/vector-mul.ll +++ b/llvm/test/CodeGen/X86/vector-mul.ll @@ -219,12 +219,12 @@ define <4 x i32> @mul_v4i32_1_2_4_8_optsize(<4 x i32> %a0) nounwind optsize { define <8 x i16> @mul_v8i16_1_2_4_8_16_32_64_128(<8 x i16> %a0) nounwind { ; X86-SSE-LABEL: mul_v8i16_1_2_4_8_16_32_64_128: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [1,2,4,8,16,32,64,128] ; X86-SSE-NEXT: retl ; ; X64-SSE-LABEL: mul_v8i16_1_2_4_8_16_32_64_128: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,4,8,16,32,64,128] ; X64-SSE-NEXT: retq ; ; X64-XOP-LABEL: mul_v8i16_1_2_4_8_16_32_64_128: @@ -234,12 +234,12 @@ define <8 x i16> @mul_v8i16_1_2_4_8_16_32_64_128(<8 x i16> %a0) nounwind { ; ; X64-AVX2-LABEL: mul_v8i16_1_2_4_8_16_32_64_128: ; X64-AVX2: # %bb.0: -; X64-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; X64-AVX2-NEXT: retq ; ; X64-AVX512DQ-LABEL: mul_v8i16_1_2_4_8_16_32_64_128: ; X64-AVX512DQ: # %bb.0: -; X64-AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; X64-AVX512DQ-NEXT: retq %1 = mul <8 x i16> %a0, ret <8 x i16> %1 @@ -282,7 +282,7 @@ define <16 x i8> @mul_v16i8_1_2_4_8_1_2_4_8_1_2_4_8_1_2_4_8(<16 x i8> %a0) nounw ; X64-AVX2-LABEL: mul_v16i8_1_2_4_8_1_2_4_8_1_2_4_8_1_2_4_8: ; X64-AVX2: # %bb.0: ; X64-AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; X64-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; X64-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,1,2,4,8,1,2,4,8,1,2,4,8] ; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X64-AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -984,17 +984,17 @@ define <4 x i32> @mul_v4i32_5_17_33_65(<4 x i32> %a0) nounwind { define <8 x i16> @mul_v8i16_2_3_9_17_33_65_129_257(<8 x i16> %a0) nounwind { ; X86-SSE-LABEL: mul_v8i16_2_3_9_17_33_65_129_257: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [2,3,9,17,33,65,129,257] ; X86-SSE-NEXT: retl ; ; X64-SSE-LABEL: mul_v8i16_2_3_9_17_33_65_129_257: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,3,9,17,33,65,129,257] ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: mul_v8i16_2_3_9_17_33_65_129_257: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2,3,9,17,33,65,129,257] ; X64-AVX-NEXT: retq %1 = mul <8 x i16> %a0, ret <8 x i16> %1 @@ -1005,11 +1005,11 @@ define <16 x i8> @mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3(<16 x i8> ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 ; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [3,9,17,33,65,129,2,3] ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] ; X86-SSE2-NEXT: pand %xmm2, %xmm1 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [2,3,9,17,33,65,129,2] ; X86-SSE2-NEXT: pand %xmm2, %xmm0 ; X86-SSE2-NEXT: packuswb %xmm1, %xmm0 ; X86-SSE2-NEXT: retl @@ -1018,10 +1018,10 @@ define <16 x i8> @mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3(<16 x i8> ; X86-SSE4: # %bb.0: ; X86-SSE4-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; X86-SSE4-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; X86-SSE4-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE4-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [3,9,17,33,65,129,2,3] ; X86-SSE4-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] ; X86-SSE4-NEXT: pand %xmm2, %xmm0 -; X86-SSE4-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE4-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [2,3,9,17,33,65,129,2] ; X86-SSE4-NEXT: pand %xmm2, %xmm1 ; X86-SSE4-NEXT: packuswb %xmm0, %xmm1 ; X86-SSE4-NEXT: movdqa %xmm1, %xmm0 @@ -1031,11 +1031,11 @@ define <16 x i8> @mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3(<16 x i8> ; X64-SSE2: # %bb.0: ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1 ; X64-SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; X64-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; X64-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [3,9,17,33,65,129,2,3] ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] ; X64-SSE2-NEXT: pand %xmm2, %xmm1 ; X64-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X64-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,3,9,17,33,65,129,2] ; X64-SSE2-NEXT: pand %xmm2, %xmm0 ; X64-SSE2-NEXT: packuswb %xmm1, %xmm0 ; X64-SSE2-NEXT: retq @@ -1044,10 +1044,10 @@ define <16 x i8> @mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3(<16 x i8> ; X64-SSE4: # %bb.0: ; X64-SSE4-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; X64-SSE4-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; X64-SSE4-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE4-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [3,9,17,33,65,129,2,3] ; X64-SSE4-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] ; X64-SSE4-NEXT: pand %xmm2, %xmm0 -; X64-SSE4-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; X64-SSE4-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2,3,9,17,33,65,129,2] ; X64-SSE4-NEXT: pand %xmm2, %xmm1 ; X64-SSE4-NEXT: packuswb %xmm0, %xmm1 ; X64-SSE4-NEXT: movdqa %xmm1, %xmm0 @@ -1056,16 +1056,16 @@ define <16 x i8> @mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3(<16 x i8> ; X64-XOP-LABEL: mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3: ; X64-XOP: # %bb.0: ; X64-XOP-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; X64-XOP-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; X64-XOP-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [3,9,17,33,65,129,2,3] ; X64-XOP-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; X64-XOP-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-XOP-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [2,3,9,17,33,65,129,2] ; X64-XOP-NEXT: vpperm {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],xmm1[0,2,4,6,8,10,12,14] ; X64-XOP-NEXT: retq ; ; X64-AVX2-LABEL: mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3: ; X64-AVX2: # %bb.0: ; X64-AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; X64-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; X64-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [2,3,9,17,33,65,129,2,3,9,17,33,65,129,2,3] ; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X64-AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -1075,7 +1075,7 @@ define <16 x i8> @mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3(<16 x i8> ; X64-AVX512DQ-LABEL: mul_v16i8_2_3_9_17_33_65_129_2_3_9_17_33_65_129_2_3: ; X64-AVX512DQ: # %bb.0: ; X64-AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; X64-AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; X64-AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [2,3,9,17,33,65,129,2,3,9,17,33,65,129,2,3] ; X64-AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; X64-AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; X64-AVX512DQ-NEXT: vzeroupper @@ -1794,17 +1794,17 @@ define <4 x i32> @mul_v4i32_0_15_31_7(<4 x i32> %a0) nounwind { define <8 x i16> @mul_v8i16_0_1_7_15_31_63_127_255(<8 x i16> %a0) nounwind { ; X86-SSE-LABEL: mul_v8i16_0_1_7_15_31_63_127_255: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [0,1,7,15,31,63,127,255] ; X86-SSE-NEXT: retl ; ; X64-SSE-LABEL: mul_v8i16_0_1_7_15_31_63_127_255: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,1,7,15,31,63,127,255] ; X64-SSE-NEXT: retq ; ; X64-AVX-LABEL: mul_v8i16_0_1_7_15_31_63_127_255: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,1,7,15,31,63,127,255] ; X64-AVX-NEXT: retq %1 = mul <8 x i16> %a0, ret <8 x i16> %1 @@ -1852,7 +1852,7 @@ define <16 x i8> @mul_v16i8_0_1_3_7_15_31_63_127_0_1_3_7_15_31_63_127(<16 x i8> ; X64-AVX2-LABEL: mul_v16i8_0_1_3_7_15_31_63_127_0_1_3_7_15_31_63_127: ; X64-AVX2: # %bb.0: ; X64-AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; X64-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; X64-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [0,1,3,7,15,31,63,127,0,1,3,7,15,31,63,127] ; X64-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; X64-AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -1862,7 +1862,7 @@ define <16 x i8> @mul_v16i8_0_1_3_7_15_31_63_127_0_1_3_7_15_31_63_127(<16 x i8> ; X64-AVX512DQ-LABEL: mul_v16i8_0_1_3_7_15_31_63_127_0_1_3_7_15_31_63_127: ; X64-AVX512DQ: # %bb.0: ; X64-AVX512DQ-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; X64-AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; X64-AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [0,1,3,7,15,31,63,127,0,1,3,7,15,31,63,127] ; X64-AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; X64-AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; X64-AVX512DQ-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll index 2e21f8d0aa32a1..f9b903406e30fd 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-128.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll @@ -1271,10 +1271,10 @@ define <16 x i8> @constant_rotate_v16i8(<16 x i8> %a) nounwind { ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,128,64,32,16,8,4,2] ; SSE-NEXT: psrlw $8, %xmm1 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,4,8,16,32,64,128] ; SSE-NEXT: psrlw $8, %xmm0 ; SSE-NEXT: packuswb %xmm1, %xmm0 ; SSE-NEXT: retq @@ -1282,10 +1282,10 @@ define <16 x i8> @constant_rotate_v16i8(<16 x i8> %a) nounwind { ; AVX-LABEL: constant_rotate_v16i8: ; AVX: # %bb.0: ; AVX-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,128,64,32,16,8,4,2] ; AVX-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; AVX-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq @@ -1293,10 +1293,10 @@ define <16 x i8> @constant_rotate_v16i8(<16 x i8> %a) nounwind { ; AVX512F-LABEL: constant_rotate_v16i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,128,64,32,16,8,4,2] ; AVX512F-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; AVX512F-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX512F-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: retq @@ -1304,10 +1304,10 @@ define <16 x i8> @constant_rotate_v16i8(<16 x i8> %a) nounwind { ; AVX512VL-LABEL: constant_rotate_v16i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [1,128,64,32,16,8,4,2] ; AVX512VL-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX512VL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq @@ -1371,10 +1371,10 @@ define <16 x i8> @constant_rotate_v16i8(<16 x i8> %a) nounwind { ; X86-SSE2: # %bb.0: ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 ; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] -; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [1,128,64,32,16,8,4,2] ; X86-SSE2-NEXT: psrlw $8, %xmm1 ; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [1,2,4,8,16,32,64,128] ; X86-SSE2-NEXT: psrlw $8, %xmm0 ; X86-SSE2-NEXT: packuswb %xmm1, %xmm0 ; X86-SSE2-NEXT: retl diff --git a/llvm/test/CodeGen/X86/vector-rotate-256.ll b/llvm/test/CodeGen/X86/vector-rotate-256.ll index b5f0522327a448..e54d235973c79b 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-256.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-256.ll @@ -1075,10 +1075,10 @@ define <32 x i8> @constant_rotate_v32i8(<32 x i8> %a) nounwind { ; AVX2-LABEL: constant_rotate_v32i8: ; AVX2: # %bb.0: ; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2] ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -1086,10 +1086,10 @@ define <32 x i8> @constant_rotate_v32i8(<32 x i8> %a) nounwind { ; AVX512F-LABEL: constant_rotate_v32i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2] ; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX512F-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] ; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512F-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; AVX512F-NEXT: retq @@ -1097,10 +1097,10 @@ define <32 x i8> @constant_rotate_v32i8(<32 x i8> %a) nounwind { ; AVX512VL-LABEL: constant_rotate_v32i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [1,128,64,32,16,8,4,2,1,128,64,32,16,8,4,2] ; AVX512VL-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX512VL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512VL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] ; AVX512VL-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512VL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll index 53b6aca3e9fcba..0fe862aade95c9 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll @@ -1429,7 +1429,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind { ; ; AVX-LABEL: constant_shift_v8i16: ; AVX: # %bb.0: -; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [u,u,16384,8192,4096,2048,1024,512] ; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX-NEXT: vpsraw $1, %xmm0, %xmm0 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3,4,5,6,7] @@ -1498,11 +1498,11 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] ; SSE-NEXT: psraw $8, %xmm1 -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [2,4,8,16,32,64,128,256] ; SSE-NEXT: psrlw $8, %xmm1 ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE-NEXT: psraw $8, %xmm0 -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [256,128,64,32,16,8,4,2] ; SSE-NEXT: psrlw $8, %xmm0 ; SSE-NEXT: packuswb %xmm1, %xmm0 ; SSE-NEXT: retq @@ -1511,11 +1511,11 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; AVX1: # %bb.0: ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; AVX1-NEXT: vpsraw $8, %xmm1, %xmm1 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2,4,8,16,32,64,128,256] ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; AVX1-NEXT: vpsraw $8, %xmm0, %xmm0 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [256,128,64,32,16,8,4,2] ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq @@ -1523,7 +1523,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; AVX2-LABEL: constant_shift_v16i8: ; AVX2: # %bb.0: ; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [256,128,64,32,16,8,4,2,2,4,8,16,32,64,128,256] ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -1574,11 +1574,11 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; X86-SSE-NEXT: movdqa %xmm0, %xmm1 ; X86-SSE-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] ; X86-SSE-NEXT: psraw $8, %xmm1 -; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [2,4,8,16,32,64,128,256] ; X86-SSE-NEXT: psrlw $8, %xmm1 ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; X86-SSE-NEXT: psraw $8, %xmm0 -; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [256,128,64,32,16,8,4,2] ; X86-SSE-NEXT: psrlw $8, %xmm0 ; X86-SSE-NEXT: packuswb %xmm1, %xmm0 ; X86-SSE-NEXT: retl diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll index 9a483c345f92cb..07dd662d2e8b15 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-256.ll @@ -1539,18 +1539,18 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind { define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { ; AVX1-LABEL: constant_shift_v16i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [u,u,16384,8192,4096,2048,1024,512] ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX1-NEXT: vpsraw $1, %xmm0, %xmm2 ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [256,128,64,32,16,8,4,2] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: constant_shift_v16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 +; AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 # [u,u,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2] ; AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX2-NEXT: vpsraw $1, %xmm0, %xmm0 ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2,3,4,5,6,7] @@ -1602,18 +1602,18 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { ; ; X86-AVX1-LABEL: constant_shift_v16i16: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1 +; X86-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1 # [u,u,16384,8192,4096,2048,1024,512] ; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; X86-AVX1-NEXT: vpsraw $1, %xmm0, %xmm2 ; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2,3,4,5,6,7] ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; X86-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [256,128,64,32,16,8,4,2] ; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; X86-AVX1-NEXT: retl ; ; X86-AVX2-LABEL: constant_shift_v16i16: ; X86-AVX2: # %bb.0: -; X86-AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm1 +; X86-AVX2-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm1 # [u,u,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2] ; X86-AVX2-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; X86-AVX2-NEXT: vpsraw $1, %xmm0, %xmm0 ; X86-AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2,3,4,5,6,7] @@ -1654,11 +1654,11 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; AVX2: # %bb.0: ; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] ; AVX2-NEXT: vpsraw $8, %ymm1, %ymm1 -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [2,4,8,16,32,64,128,256,2,4,8,16,32,64,128,256] ; AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] ; AVX2-NEXT: vpsraw $8, %ymm0, %ymm0 -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2] ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -1685,11 +1685,11 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] ; AVX512DQ-NEXT: vpsraw $8, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [2,4,8,16,32,64,128,256,2,4,8,16,32,64,128,256] ; AVX512DQ-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] ; AVX512DQ-NEXT: vpsraw $8, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2] ; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: retq @@ -1705,11 +1705,11 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; AVX512DQVL: # %bb.0: ; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] ; AVX512DQVL-NEXT: vpsraw $8, %ymm1, %ymm1 -; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [2,4,8,16,32,64,128,256,2,4,8,16,32,64,128,256] ; AVX512DQVL-NEXT: vpsrlw $8, %ymm1, %ymm1 ; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] ; AVX512DQVL-NEXT: vpsraw $8, %ymm0, %ymm0 -; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2] ; AVX512DQVL-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; AVX512DQVL-NEXT: retq @@ -1751,11 +1751,11 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; X86-AVX2: # %bb.0: ; X86-AVX2-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] ; X86-AVX2-NEXT: vpsraw $8, %ymm1, %ymm1 -; X86-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 +; X86-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 # [2,4,8,16,32,64,128,256,2,4,8,16,32,64,128,256] ; X86-AVX2-NEXT: vpsrlw $8, %ymm1, %ymm1 ; X86-AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] ; X86-AVX2-NEXT: vpsraw $8, %ymm0, %ymm0 -; X86-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; X86-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2] ; X86-AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; X86-AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; X86-AVX2-NEXT: retl diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll index 36a6226f8f4b9c..2ec9de0cb447f5 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-sub128.ll @@ -1795,7 +1795,7 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind { ; ; AVX-LABEL: constant_shift_v4i16: ; AVX: # %bb.0: -; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpmulhw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [u,u,16384,8192,u,u,u,u] ; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX-NEXT: vpsraw $1, %xmm0, %xmm0 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1],xmm1[2,3,4,5,6,7] @@ -1936,7 +1936,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind { ; SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE-NEXT: psraw $8, %xmm0 -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [256,128,64,32,16,8,4,2] ; SSE-NEXT: psrlw $8, %xmm0 ; SSE-NEXT: packuswb %xmm2, %xmm0 ; SSE-NEXT: retq @@ -1947,7 +1947,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind { ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; AVX1-NEXT: vpsraw $8, %xmm0, %xmm0 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [256,128,64,32,16,8,4,2] ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq @@ -1955,7 +1955,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind { ; AVX2-LABEL: constant_shift_v8i8: ; AVX2: # %bb.0: ; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [256,128,64,32,16,8,4,2,256,256,256,256,256,256,256,256] ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -2008,7 +2008,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind { ; X86-SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; X86-SSE-NEXT: psraw $8, %xmm0 -; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [256,128,64,32,16,8,4,2] ; X86-SSE-NEXT: psrlw $8, %xmm0 ; X86-SSE-NEXT: packuswb %xmm2, %xmm0 ; X86-SSE-NEXT: retl @@ -2024,7 +2024,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind { ; SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE-NEXT: psraw $8, %xmm0 -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [256,128,64,32,256,256,256,256] ; SSE-NEXT: psrlw $8, %xmm0 ; SSE-NEXT: packuswb %xmm2, %xmm0 ; SSE-NEXT: retq @@ -2035,7 +2035,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind { ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; AVX1-NEXT: vpsraw $8, %xmm0, %xmm0 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [256,128,64,32,256,256,256,256] ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq @@ -2043,7 +2043,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind { ; AVX2-LABEL: constant_shift_v4i8: ; AVX2: # %bb.0: ; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [256,128,64,32,256,256,256,256,256,256,256,256,256,256,256,256] ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -2096,7 +2096,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind { ; X86-SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; X86-SSE-NEXT: psraw $8, %xmm0 -; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [256,128,64,32,256,256,256,256] ; X86-SSE-NEXT: psrlw $8, %xmm0 ; X86-SSE-NEXT: packuswb %xmm2, %xmm0 ; X86-SSE-NEXT: retl @@ -2112,7 +2112,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind { ; SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] ; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE-NEXT: psraw $8, %xmm0 -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [64,32,256,256,256,256,256,256] ; SSE-NEXT: psrlw $8, %xmm0 ; SSE-NEXT: packuswb %xmm2, %xmm0 ; SSE-NEXT: retq @@ -2123,7 +2123,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind { ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; AVX1-NEXT: vpsraw $8, %xmm0, %xmm0 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [64,32,256,256,256,256,256,256] ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq @@ -2131,7 +2131,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind { ; AVX2-LABEL: constant_shift_v2i8: ; AVX2: # %bb.0: ; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [64,32,256,256,256,256,256,256,256,256,256,256,256,256,256,256] ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -2184,7 +2184,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind { ; X86-SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; X86-SSE-NEXT: psraw $8, %xmm0 -; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [64,32,256,256,256,256,256,256] ; X86-SSE-NEXT: psrlw $8, %xmm0 ; X86-SSE-NEXT: packuswb %xmm2, %xmm0 ; X86-SSE-NEXT: retl diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll index ca8343cd4812cd..62b95eedc9d4f1 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll @@ -1180,7 +1180,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind { ; ; AVX-LABEL: constant_shift_v8i16: ; AVX: # %bb.0: -; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [u,32768,16384,8192,4096,2048,1024,512] ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX-NEXT: retq ; @@ -1191,7 +1191,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind { ; ; AVX512DQ-LABEL: constant_shift_v8i16: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512DQ-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [u,32768,16384,8192,4096,2048,1024,512] ; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX512DQ-NEXT: retq ; @@ -1206,7 +1206,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind { ; ; AVX512DQVL-LABEL: constant_shift_v8i16: ; AVX512DQVL: # %bb.0: -; AVX512DQVL-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512DQVL-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [u,32768,16384,8192,4096,2048,1024,512] ; AVX512DQVL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX512DQVL-NEXT: retq ; @@ -1232,10 +1232,10 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 # [2,4,8,16,32,64,128,256] ; SSE2-NEXT: psrlw $8, %xmm2 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [256,128,64,32,16,8,4,2] ; SSE2-NEXT: psrlw $8, %xmm0 ; SSE2-NEXT: packuswb %xmm2, %xmm0 ; SSE2-NEXT: retq @@ -1245,9 +1245,9 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; SSE41-NEXT: pxor %xmm2, %xmm2 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15] -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2,4,8,16,32,64,128,256] ; SSE41-NEXT: psrlw $8, %xmm0 -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [256,128,64,32,16,8,4,2] ; SSE41-NEXT: psrlw $8, %xmm1 ; SSE41-NEXT: packuswb %xmm0, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm0 @@ -1257,10 +1257,10 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; AVX1: # %bb.0: ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [2,4,8,16,32,64,128,256] ; AVX1-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [256,128,64,32,16,8,4,2] ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq @@ -1268,7 +1268,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; AVX2-LABEL: constant_shift_v16i8: ; AVX2: # %bb.0: ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [256,128,64,32,16,8,4,2,2,4,8,16,32,64,128,256] ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -1319,10 +1319,10 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; X86-SSE-NEXT: pxor %xmm1, %xmm1 ; X86-SSE-NEXT: movdqa %xmm0, %xmm2 ; X86-SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] -; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 +; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm2 # [2,4,8,16,32,64,128,256] ; X86-SSE-NEXT: psrlw $8, %xmm2 ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [256,128,64,32,16,8,4,2] ; X86-SSE-NEXT: psrlw $8, %xmm0 ; X86-SSE-NEXT: packuswb %xmm2, %xmm0 ; X86-SSE-NEXT: retl diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll index e65f78e49dc8d2..0ef5d650535d23 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-256.ll @@ -1270,16 +1270,16 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind { define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { ; AVX1-LABEL: constant_shift_v16i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [u,32768,16384,8192,4096,2048,1024,512] ; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [256,128,64,32,16,8,4,2] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: constant_shift_v16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 +; AVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 # [u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2] ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX2-NEXT: retq @@ -1294,14 +1294,14 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { ; ; XOPAVX2-LABEL: constant_shift_v16i16: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 +; XOPAVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 # [u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2] ; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; XOPAVX2-NEXT: retq ; ; AVX512DQ-LABEL: constant_shift_v16i16: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 +; AVX512DQ-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 # [u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2] ; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX512DQ-NEXT: retq @@ -1316,7 +1316,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { ; ; AVX512DQVL-LABEL: constant_shift_v16i16: ; AVX512DQVL: # %bb.0: -; AVX512DQVL-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 +; AVX512DQVL-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm1 # [u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2] ; AVX512DQVL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX512DQVL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; AVX512DQVL-NEXT: retq @@ -1328,16 +1328,16 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { ; ; X86-AVX1-LABEL: constant_shift_v16i16: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1 +; X86-AVX1-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1 # [u,32768,16384,8192,4096,2048,1024,512] ; X86-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; X86-AVX1-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [256,128,64,32,16,8,4,2] ; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; X86-AVX1-NEXT: retl ; ; X86-AVX2-LABEL: constant_shift_v16i16: ; X86-AVX2: # %bb.0: -; X86-AVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm1 +; X86-AVX2-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm1 # [u,32768,16384,8192,4096,2048,1024,512,256,128,64,32,16,8,4,2] ; X86-AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; X86-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; X86-AVX2-NEXT: retl @@ -1373,10 +1373,10 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; AVX2: # %bb.0: ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [2,4,8,16,32,64,128,256,2,4,8,16,32,64,128,256] ; AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2] ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -1403,10 +1403,10 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] -; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [2,4,8,16,32,64,128,256,2,4,8,16,32,64,128,256] ; AVX512DQ-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] -; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2] ; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 ; AVX512DQ-NEXT: retq @@ -1422,10 +1422,10 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; AVX512DQVL: # %bb.0: ; AVX512DQVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] -; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 +; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 # [2,4,8,16,32,64,128,256,2,4,8,16,32,64,128,256] ; AVX512DQVL-NEXT: vpsrlw $8, %ymm2, %ymm2 ; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] -; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2] ; AVX512DQVL-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512DQVL-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 ; AVX512DQVL-NEXT: retq @@ -1464,10 +1464,10 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; X86-AVX2: # %bb.0: ; X86-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X86-AVX2-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] -; X86-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm2, %ymm2 +; X86-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm2, %ymm2 # [2,4,8,16,32,64,128,256,2,4,8,16,32,64,128,256] ; X86-AVX2-NEXT: vpsrlw $8, %ymm2, %ymm2 ; X86-AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23] -; X86-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; X86-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # [256,128,64,32,16,8,4,2,256,128,64,32,16,8,4,2] ; X86-AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; X86-AVX2-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 ; X86-AVX2-NEXT: retl diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll index 74ba1d04161f87..fe349e9ff995d9 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-sub128.ll @@ -1493,7 +1493,7 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind { ; ; AVX-LABEL: constant_shift_v4i16: ; AVX: # %bb.0: -; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [u,32768,16384,8192,u,u,u,u] ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX-NEXT: retq ; @@ -1504,7 +1504,7 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind { ; ; AVX512DQ-LABEL: constant_shift_v4i16: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512DQ-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [u,32768,16384,8192,u,u,u,u] ; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX512DQ-NEXT: retq ; @@ -1519,7 +1519,7 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind { ; ; AVX512DQVL-LABEL: constant_shift_v4i16: ; AVX512DQVL: # %bb.0: -; AVX512DQVL-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX512DQVL-NEXT: vpmulhuw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [u,32768,16384,8192,u,u,u,u] ; AVX512DQVL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7] ; AVX512DQVL-NEXT: retq ; @@ -1620,7 +1620,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind { ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [256,128,64,32,16,8,4,2] ; SSE2-NEXT: psrlw $8, %xmm0 ; SSE2-NEXT: packuswb %xmm2, %xmm0 ; SSE2-NEXT: retq @@ -1630,7 +1630,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind { ; SSE41-NEXT: pxor %xmm2, %xmm2 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15] -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [256,128,64,32,16,8,4,2] ; SSE41-NEXT: psrlw $8, %xmm1 ; SSE41-NEXT: packuswb %xmm0, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm0 @@ -1641,7 +1641,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind { ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [256,128,64,32,16,8,4,2] ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq @@ -1649,7 +1649,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind { ; AVX2-LABEL: constant_shift_v8i8: ; AVX2: # %bb.0: ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [256,128,64,32,16,8,4,2,256,256,256,256,256,256,256,256] ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -1701,7 +1701,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind { ; X86-SSE-NEXT: movdqa %xmm0, %xmm2 ; X86-SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [256,128,64,32,16,8,4,2] ; X86-SSE-NEXT: psrlw $8, %xmm0 ; X86-SSE-NEXT: packuswb %xmm2, %xmm0 ; X86-SSE-NEXT: retl @@ -1716,7 +1716,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind { ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [256,128,64,32,256,256,256,256] ; SSE2-NEXT: psrlw $8, %xmm0 ; SSE2-NEXT: packuswb %xmm2, %xmm0 ; SSE2-NEXT: retq @@ -1726,7 +1726,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind { ; SSE41-NEXT: pxor %xmm2, %xmm2 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15] -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [256,128,64,32,256,256,256,256] ; SSE41-NEXT: psrlw $8, %xmm1 ; SSE41-NEXT: packuswb %xmm0, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm0 @@ -1737,7 +1737,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind { ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [256,128,64,32,256,256,256,256] ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq @@ -1745,7 +1745,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind { ; AVX2-LABEL: constant_shift_v4i8: ; AVX2: # %bb.0: ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [256,128,64,32,256,256,256,256,256,256,256,256,256,256,256,256] ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -1797,7 +1797,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind { ; X86-SSE-NEXT: movdqa %xmm0, %xmm2 ; X86-SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [256,128,64,32,256,256,256,256] ; X86-SSE-NEXT: psrlw $8, %xmm0 ; X86-SSE-NEXT: packuswb %xmm2, %xmm0 ; X86-SSE-NEXT: retl @@ -1812,7 +1812,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind { ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [64,32,256,256,256,256,256,256] ; SSE2-NEXT: psrlw $8, %xmm0 ; SSE2-NEXT: packuswb %xmm2, %xmm0 ; SSE2-NEXT: retq @@ -1822,7 +1822,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind { ; SSE41-NEXT: pxor %xmm2, %xmm2 ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm2[8],xmm0[9],xmm2[9],xmm0[10],xmm2[10],xmm0[11],xmm2[11],xmm0[12],xmm2[12],xmm0[13],xmm2[13],xmm0[14],xmm2[14],xmm0[15],xmm2[15] -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [64,32,256,256,256,256,256,256] ; SSE41-NEXT: psrlw $8, %xmm1 ; SSE41-NEXT: packuswb %xmm0, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm0 @@ -1833,7 +1833,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind { ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [64,32,256,256,256,256,256,256] ; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq @@ -1841,7 +1841,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind { ; AVX2-LABEL: constant_shift_v2i8: ; AVX2: # %bb.0: ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [64,32,256,256,256,256,256,256,256,256,256,256,256,256,256,256] ; AVX2-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -1893,7 +1893,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind { ; X86-SSE-NEXT: movdqa %xmm0, %xmm2 ; X86-SSE-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [64,32,256,256,256,256,256,256] ; X86-SSE-NEXT: psrlw $8, %xmm0 ; X86-SSE-NEXT: packuswb %xmm2, %xmm0 ; X86-SSE-NEXT: retl diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll index 75baba5f35f792..aee6d707f9343e 100644 --- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll @@ -1045,12 +1045,12 @@ define <4 x i32> @constant_shift_v4i32(<4 x i32> %a) nounwind { define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind { ; SSE-LABEL: constant_shift_v8i16: ; SSE: # %bb.0: -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,4,8,16,32,64,128] ; SSE-NEXT: retq ; ; AVX-LABEL: constant_shift_v8i16: ; AVX: # %bb.0: -; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; AVX-NEXT: retq ; ; XOP-LABEL: constant_shift_v8i16: @@ -1060,7 +1060,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind { ; ; AVX512DQ-LABEL: constant_shift_v8i16: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: constant_shift_v8i16: @@ -1074,7 +1074,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind { ; ; AVX512DQVL-LABEL: constant_shift_v8i16: ; AVX512DQVL: # %bb.0: -; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; AVX512DQVL-NEXT: retq ; ; AVX512BWVL-LABEL: constant_shift_v8i16: @@ -1084,7 +1084,7 @@ define <8 x i16> @constant_shift_v8i16(<8 x i16> %a) nounwind { ; ; X86-SSE-LABEL: constant_shift_v8i16: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [1,2,4,8,16,32,64,128] ; X86-SSE-NEXT: retl %shift = shl <8 x i16> %a, ret <8 x i16> %shift @@ -1095,11 +1095,11 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [128,64,32,16,8,4,2,1] ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] ; SSE2-NEXT: pand %xmm2, %xmm1 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,4,8,16,32,64,128] ; SSE2-NEXT: pand %xmm2, %xmm0 ; SSE2-NEXT: packuswb %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -1108,10 +1108,10 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; SSE41: # %bb.0: ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; SSE41-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [128,64,32,16,8,4,2,1] ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] ; SSE41-NEXT: pand %xmm2, %xmm0 -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [1,2,4,8,16,32,64,128] ; SSE41-NEXT: pand %xmm2, %xmm1 ; SSE41-NEXT: packuswb %xmm0, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm0 @@ -1120,11 +1120,11 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; AVX1-LABEL: constant_shift_v16i8: ; AVX1: # %bb.0: ; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 # [128,64,32,16,8,4,2,1] ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq @@ -1132,7 +1132,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; AVX2-LABEL: constant_shift_v16i8: ; AVX2: # %bb.0: ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,128,64,32,16,8,4,2,1] ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -1182,11 +1182,11 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: movdqa %xmm0, %xmm1 ; X86-SSE-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] -; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 +; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 # [128,64,32,16,8,4,2,1] ; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] ; X86-SSE-NEXT: pand %xmm2, %xmm1 ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [1,2,4,8,16,32,64,128] ; X86-SSE-NEXT: pand %xmm2, %xmm0 ; X86-SSE-NEXT: packuswb %xmm1, %xmm0 ; X86-SSE-NEXT: retl diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll index 6dde209e94d811..f43477f0502dee 100644 --- a/llvm/test/CodeGen/X86/vector-shift-shl-256.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-256.ll @@ -1170,15 +1170,15 @@ define <8 x i32> @constant_shift_v8i32(<8 x i32> %a) nounwind { define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { ; AVX1-LABEL: constant_shift_v16i16: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [1,2,4,8,16,32,64,128] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [256,512,1024,2048,4096,8192,16384,32768] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: constant_shift_v16i16: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] ; AVX2-NEXT: retq ; ; XOPAVX1-LABEL: constant_shift_v16i16: @@ -1191,12 +1191,12 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { ; ; XOPAVX2-LABEL: constant_shift_v16i16: ; XOPAVX2: # %bb.0: -; XOPAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; XOPAVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] ; XOPAVX2-NEXT: retq ; ; AVX512DQ-LABEL: constant_shift_v16i16: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: constant_shift_v16i16: @@ -1209,7 +1209,7 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { ; ; AVX512DQVL-LABEL: constant_shift_v16i16: ; AVX512DQVL: # %bb.0: -; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] ; AVX512DQVL-NEXT: retq ; ; AVX512BWVL-LABEL: constant_shift_v16i16: @@ -1219,15 +1219,15 @@ define <16 x i16> @constant_shift_v16i16(<16 x i16> %a) nounwind { ; ; X86-AVX1-LABEL: constant_shift_v16i16: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1 +; X86-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1 # [1,2,4,8,16,32,64,128] ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; X86-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 +; X86-AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 # [256,512,1024,2048,4096,8192,16384,32768] ; X86-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; X86-AVX1-NEXT: retl ; ; X86-AVX2-LABEL: constant_shift_v16i16: ; X86-AVX2: # %bb.0: -; X86-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; X86-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] ; X86-AVX2-NEXT: retl %shift = shl <16 x i16> %a, ret <16 x i16> %shift @@ -1260,11 +1260,11 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; AVX2-LABEL: constant_shift_v32i8: ; AVX2: # %bb.0: ; AVX2-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1] ; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 ; AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] ; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: retq @@ -1290,11 +1290,11 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; AVX512DQ-LABEL: constant_shift_v32i8: ; AVX512DQ: # %bb.0: ; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1] ; AVX512DQ-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; AVX512DQ-NEXT: vpand %ymm2, %ymm1, %ymm1 ; AVX512DQ-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] ; AVX512DQ-NEXT: vpand %ymm2, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: retq @@ -1309,11 +1309,11 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; AVX512DQVL-LABEL: constant_shift_v32i8: ; AVX512DQVL: # %bb.0: ; AVX512DQVL-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 +; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1 # [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1] ; AVX512DQVL-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; AVX512DQVL-NEXT: vpand %ymm2, %ymm1, %ymm1 ; AVX512DQVL-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] ; AVX512DQVL-NEXT: vpand %ymm2, %ymm0, %ymm0 ; AVX512DQVL-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; AVX512DQVL-NEXT: retq @@ -1351,11 +1351,11 @@ define <32 x i8> @constant_shift_v32i8(<32 x i8> %a) nounwind { ; X86-AVX2-LABEL: constant_shift_v32i8: ; X86-AVX2: # %bb.0: ; X86-AVX2-NEXT: vpunpckhbw {{.*#+}} ymm1 = ymm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,24,24,25,25,26,26,27,27,28,28,29,29,30,30,31,31] -; X86-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 +; X86-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 # [128,64,32,16,8,4,2,1,128,64,32,16,8,4,2,1] ; X86-AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] ; X86-AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 ; X86-AVX2-NEXT: vpunpcklbw {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,16,16,17,17,18,18,19,19,20,20,21,21,22,22,23,23] -; X86-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 +; X86-AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] ; X86-AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 ; X86-AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; X86-AVX2-NEXT: retl diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll index d545cb77cba2e4..a44120b6d038ce 100644 --- a/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-sub128.ll @@ -1318,12 +1318,12 @@ define <2 x i32> @constant_shift_v2i32(<2 x i32> %a) nounwind { define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind { ; SSE-LABEL: constant_shift_v4i16: ; SSE: # %bb.0: -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,4,8,u,u,u,u] ; SSE-NEXT: retq ; ; AVX-LABEL: constant_shift_v4i16: ; AVX: # %bb.0: -; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,u,u,u,u] ; AVX-NEXT: retq ; ; XOP-LABEL: constant_shift_v4i16: @@ -1333,7 +1333,7 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind { ; ; AVX512DQ-LABEL: constant_shift_v4i16: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,u,u,u,u] ; AVX512DQ-NEXT: retq ; ; AVX512BW-LABEL: constant_shift_v4i16: @@ -1347,7 +1347,7 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind { ; ; AVX512DQVL-LABEL: constant_shift_v4i16: ; AVX512DQVL: # %bb.0: -; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512DQVL-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,u,u,u,u] ; AVX512DQVL-NEXT: retq ; ; AVX512BWVL-LABEL: constant_shift_v4i16: @@ -1357,7 +1357,7 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind { ; ; X86-SSE-LABEL: constant_shift_v4i16: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [1,2,4,8,u,u,u,u] ; X86-SSE-NEXT: retl %shift = shl <4 x i16> %a, ret <4 x i16> %shift @@ -1366,7 +1366,7 @@ define <4 x i16> @constant_shift_v4i16(<4 x i16> %a) nounwind { define <2 x i16> @constant_shift_v2i16(<2 x i16> %a) nounwind { ; SSE2-LABEL: constant_shift_v2i16: ; SSE2: # %bb.0: -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4,8,u,u,u,u,u,u] ; SSE2-NEXT: retq ; ; SSE41-LABEL: constant_shift_v2i16: @@ -1419,7 +1419,7 @@ define <2 x i16> @constant_shift_v2i16(<2 x i16> %a) nounwind { ; ; X86-SSE-LABEL: constant_shift_v2i16: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [4,8,u,u,u,u,u,u] ; X86-SSE-NEXT: retl %shift = shl <2 x i16> %a, ret <2 x i16> %shift @@ -1429,7 +1429,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind { ; SSE2-LABEL: constant_shift_v8i8: ; SSE2: # %bb.0: ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,4,8,16,32,64,128] ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: packuswb %xmm1, %xmm0 @@ -1438,7 +1438,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind { ; SSE41-LABEL: constant_shift_v8i8: ; SSE41: # %bb.0: ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,4,8,16,32,64,128] ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE41-NEXT: pxor %xmm1, %xmm1 ; SSE41-NEXT: packuswb %xmm1, %xmm0 @@ -1447,7 +1447,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind { ; AVX1-LABEL: constant_shift_v8i8: ; AVX1: # %bb.0: ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,16,32,64,128] ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -1456,7 +1456,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind { ; AVX2-LABEL: constant_shift_v8i8: ; AVX2: # %bb.0: ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,16,32,64,128,u,u,u,u,u,u,u,u] ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -1505,7 +1505,7 @@ define <8 x i8> @constant_shift_v8i8(<8 x i8> %a) nounwind { ; X86-SSE-LABEL: constant_shift_v8i8: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [1,2,4,8,16,32,64,128] ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; X86-SSE-NEXT: pxor %xmm1, %xmm1 ; X86-SSE-NEXT: packuswb %xmm1, %xmm0 @@ -1518,7 +1518,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind { ; SSE2-LABEL: constant_shift_v4i8: ; SSE2: # %bb.0: ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,4,8,u,u,u,u] ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: packuswb %xmm1, %xmm0 @@ -1527,7 +1527,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind { ; SSE41-LABEL: constant_shift_v4i8: ; SSE41: # %bb.0: ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,2,4,8,u,u,u,u] ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE41-NEXT: pxor %xmm1, %xmm1 ; SSE41-NEXT: packuswb %xmm1, %xmm0 @@ -1536,7 +1536,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind { ; AVX1-LABEL: constant_shift_v4i8: ; AVX1: # %bb.0: ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,2,4,8,u,u,u,u] ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -1545,7 +1545,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind { ; AVX2-LABEL: constant_shift_v4i8: ; AVX2: # %bb.0: ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [1,2,4,8,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -1594,7 +1594,7 @@ define <4 x i8> @constant_shift_v4i8(<4 x i8> %a) nounwind { ; X86-SSE-LABEL: constant_shift_v4i8: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [1,2,4,8,u,u,u,u] ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; X86-SSE-NEXT: pxor %xmm1, %xmm1 ; X86-SSE-NEXT: packuswb %xmm1, %xmm0 @@ -1607,7 +1607,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind { ; SSE2-LABEL: constant_shift_v2i8: ; SSE2: # %bb.0: ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4,8,u,u,u,u,u,u] ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: packuswb %xmm1, %xmm0 @@ -1616,7 +1616,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind { ; SSE41-LABEL: constant_shift_v2i8: ; SSE41: # %bb.0: ; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE41-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [4,8,u,u,u,u,u,u] ; SSE41-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE41-NEXT: pxor %xmm1, %xmm1 ; SSE41-NEXT: packuswb %xmm1, %xmm0 @@ -1625,7 +1625,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind { ; AVX1-LABEL: constant_shift_v2i8: ; AVX1: # %bb.0: ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [4,8,u,u,u,u,u,u] ; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -1634,7 +1634,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind { ; AVX2-LABEL: constant_shift_v2i8: ; AVX2: # %bb.0: ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [4,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -1683,7 +1683,7 @@ define <2 x i8> @constant_shift_v2i8(<2 x i8> %a) nounwind { ; X86-SSE-LABEL: constant_shift_v2i8: ; X86-SSE: # %bb.0: ; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [4,8,u,u,u,u,u,u] ; X86-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; X86-SSE-NEXT: pxor %xmm1, %xmm1 ; X86-SSE-NEXT: packuswb %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-trunc-math.ll b/llvm/test/CodeGen/X86/vector-trunc-math.ll index 2dcbcb0225cdfc..1af75424365011 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-math.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-math.ll @@ -2169,7 +2169,7 @@ define <8 x i16> @trunc_mul_const_v8i64_v8i16(<8 x i64> %a0) nounwind { ; SSE-NEXT: pslld $16, %xmm0 ; SSE-NEXT: psrad $16, %xmm0 ; SSE-NEXT: packssdw %xmm2, %xmm0 -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,1,2,3,4,5,6,7] ; SSE-NEXT: retq ; ; AVX1-LABEL: trunc_mul_const_v8i64_v8i16: @@ -2182,7 +2182,7 @@ define <8 x i16> @trunc_mul_const_v8i64_v8i16(<8 x i64> %a0) nounwind { ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 ; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,1,2,3,4,5,6,7] ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; @@ -2195,14 +2195,14 @@ define <8 x i16> @trunc_mul_const_v8i64_v8i16(<8 x i64> %a0) nounwind { ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,1,2,3,4,5,6,7] ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512-LABEL: trunc_mul_const_v8i64_v8i16: ; AVX512: # %bb.0: ; AVX512-NEXT: vpmovqw %zmm0, %xmm0 -; AVX512-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,1,2,3,4,5,6,7] ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = mul <8 x i64> %a0, @@ -2218,7 +2218,7 @@ define <8 x i16> @trunc_mul_const_v8i32_v8i16(<8 x i32> %a0) nounwind { ; SSE-NEXT: pslld $16, %xmm0 ; SSE-NEXT: psrad $16, %xmm0 ; SSE-NEXT: packssdw %xmm1, %xmm0 -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,1,2,3,4,5,6,7] ; SSE-NEXT: retq ; ; AVX1-LABEL: trunc_mul_const_v8i32_v8i16: @@ -2226,7 +2226,7 @@ define <8 x i16> @trunc_mul_const_v8i32_v8i16(<8 x i32> %a0) nounwind { ; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,1,2,3,4,5,6,7] ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; @@ -2234,7 +2234,7 @@ define <8 x i16> @trunc_mul_const_v8i32_v8i16(<8 x i32> %a0) nounwind { ; AVX2: # %bb.0: ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u] ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,1,2,3,4,5,6,7] ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; @@ -2242,7 +2242,7 @@ define <8 x i16> @trunc_mul_const_v8i32_v8i16(<8 x i32> %a0) nounwind { ; AVX512: # %bb.0: ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512-NEXT: vpmovdw %zmm0, %ymm0 -; AVX512-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [0,1,2,3,4,5,6,7] ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %1 = mul <8 x i32> %a0, @@ -2451,8 +2451,8 @@ define <16 x i8> @trunc_mul_const_v16i32_v16i8(<16 x i32> %a0) nounwind { define <16 x i8> @trunc_mul_const_v16i16_v16i8(<16 x i16> %a0) nounwind { ; SSE-LABEL: trunc_mul_const_v16i16_v16i8: ; SSE: # %bb.0: -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [0,1,2,3,4,5,6,7] +; SSE-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [8,9,10,11,12,13,14,15] ; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] ; SSE-NEXT: pand %xmm2, %xmm1 ; SSE-NEXT: pand %xmm2, %xmm0 @@ -2461,9 +2461,9 @@ define <16 x i8> @trunc_mul_const_v16i16_v16i8(<16 x i16> %a0) nounwind { ; ; AVX1-LABEL: trunc_mul_const_v16i16_v16i8: ; AVX1: # %bb.0: -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 # [0,1,2,3,4,5,6,7] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [8,9,10,11,12,13,14,15] ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] ; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 @@ -2473,7 +2473,7 @@ define <16 x i8> @trunc_mul_const_v16i16_v16i8(<16 x i16> %a0) nounwind { ; ; AVX2-LABEL: trunc_mul_const_v16i16_v16i8: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 @@ -2482,7 +2482,7 @@ define <16 x i8> @trunc_mul_const_v16i16_v16i8(<16 x i16> %a0) nounwind { ; ; AVX512F-LABEL: trunc_mul_const_v16i16_v16i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper @@ -2490,7 +2490,7 @@ define <16 x i8> @trunc_mul_const_v16i16_v16i8(<16 x i16> %a0) nounwind { ; ; AVX512BW-LABEL: trunc_mul_const_v16i16_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512BW-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512BW-NEXT: vzeroupper @@ -2498,7 +2498,7 @@ define <16 x i8> @trunc_mul_const_v16i16_v16i8(<16 x i16> %a0) nounwind { ; ; AVX512DQ-LABEL: trunc_mul_const_v16i16_v16i8: ; AVX512DQ: # %bb.0: -; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX512DQ-NEXT: vpmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 # [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; AVX512DQ-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512DQ-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/x86-shifts.ll b/llvm/test/CodeGen/X86/x86-shifts.ll index a1d9eebbe9d789..8d469a39a5700e 100644 --- a/llvm/test/CodeGen/X86/x86-shifts.ll +++ b/llvm/test/CodeGen/X86/x86-shifts.ll @@ -131,7 +131,7 @@ define <8 x i16> @sll8_nosplat(<8 x i16> %A) nounwind { ; X86: # %bb.0: # %entry ; X86-NEXT: movdqa {{.*#+}} xmm1 = [2,4,8,64,4,4,4,4] ; X86-NEXT: pmullw %xmm0, %xmm1 -; X86-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; X86-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 # [512,128,32,2,16,2,2,2] ; X86-NEXT: pxor %xmm1, %xmm0 ; X86-NEXT: retl ; @@ -139,7 +139,7 @@ define <8 x i16> @sll8_nosplat(<8 x i16> %A) nounwind { ; X64: # %bb.0: # %entry ; X64-NEXT: movdqa {{.*#+}} xmm1 = [2,4,8,64,4,4,4,4] ; X64-NEXT: pmullw %xmm0, %xmm1 -; X64-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; X64-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [512,128,32,2,16,2,2,2] ; X64-NEXT: pxor %xmm1, %xmm0 ; X64-NEXT: retq entry: From 9b7b1bee07ea583af7a90ed29634e3f9af22a284 Mon Sep 17 00:00:00 2001 From: Alexander Yermolovich <43973793+ayermolo@users.noreply.github.com> Date: Fri, 14 Jun 2024 07:44:37 -0700 Subject: [PATCH 106/155] [CLANG][DWARF] Handle DIE offset collision in DW_IDX_parent (#95339) This fixes https://github.com/llvm/llvm-project/issues/93886. The UnitID is not unique between CUs and TUs. This led to DW_IDX_parent to point ot an entry for a DIE in CU if it had the same relative offset as TU die. Added a IsTU to the hash for parent chain. --- llvm/include/llvm/CodeGen/AccelTable.h | 45 ++++++++---- .../Classic/DWARFLinkerCompileUnit.h | 2 + llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 3 +- llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp | 9 ++- .../DWARFLinker/Parallel/DWARFLinkerImpl.cpp | 3 +- .../DWARFLinker/Parallel/DWARFLinkerUnit.h | 10 ++- .../debug-names-types-die-offset-collision.ll | 69 +++++++++++++++++++ 7 files changed, 120 insertions(+), 21 deletions(-) create mode 100644 llvm/test/DebugInfo/X86/debug-names-types-die-offset-collision.ll diff --git a/llvm/include/llvm/CodeGen/AccelTable.h b/llvm/include/llvm/CodeGen/AccelTable.h index cff8fcbaf2cd7b..f98e994b3f9987 100644 --- a/llvm/include/llvm/CodeGen/AccelTable.h +++ b/llvm/include/llvm/CodeGen/AccelTable.h @@ -257,17 +257,33 @@ class AppleAccelTableData : public AccelTableData { /// Helper class to identify an entry in DWARF5AccelTable based on their DIE /// offset and UnitID. -struct OffsetAndUnitID : std::pair { - using Base = std::pair; - OffsetAndUnitID(Base B) : Base(B) {} - - OffsetAndUnitID(uint64_t Offset, uint32_t UnitID) : Base(Offset, UnitID) {} - uint64_t offset() const { return first; }; - uint32_t unitID() const { return second; }; +struct OffsetAndUnitID { + uint64_t Offset = 0; + uint32_t UnitID = 0; + bool IsTU = false; + OffsetAndUnitID() = delete; + OffsetAndUnitID(uint64_t Offset, uint32_t UnitID, bool IsTU) + : Offset(Offset), UnitID(UnitID), IsTU(IsTU) {} + uint64_t offset() const { return Offset; }; + uint32_t unitID() const { return UnitID; }; + bool isTU() const { return IsTU; } }; -template <> -struct DenseMapInfo : DenseMapInfo {}; +template <> struct DenseMapInfo { + static inline OffsetAndUnitID getEmptyKey() { + return OffsetAndUnitID(-1, -1, false); + } + static inline OffsetAndUnitID getTombstoneKey() { + return OffsetAndUnitID(-2, -2, false); + } + static unsigned getHashValue(const OffsetAndUnitID &Val) { + return (unsigned)llvm::hash_combine(Val.offset(), Val.unitID(), Val.IsTU); + } + static bool isEqual(const OffsetAndUnitID &LHS, const OffsetAndUnitID &RHS) { + return LHS.offset() == RHS.offset() && LHS.unitID() == RHS.unitID() && + LHS.IsTU == RHS.isTU(); + } +}; /// The Data class implementation for DWARF v5 accelerator table. Unlike the /// Apple Data classes, this class is just a DIE wrapper, and does not know to @@ -277,12 +293,11 @@ class DWARF5AccelTableData : public AccelTableData { public: static uint32_t hash(StringRef Name) { return caseFoldingDjbHash(Name); } - DWARF5AccelTableData(const DIE &Die, const uint32_t UnitID, - const bool IsTU = false); + DWARF5AccelTableData(const DIE &Die, const uint32_t UnitID, const bool IsTU); DWARF5AccelTableData(const uint64_t DieOffset, const std::optional DefiningParentOffset, const unsigned DieTag, const unsigned UnitID, - const bool IsTU = false) + const bool IsTU) : OffsetVal(DieOffset), ParentOffset(DefiningParentOffset), DieTag(DieTag), AbbrevNumber(0), IsTU(IsTU), UnitID(UnitID) {} @@ -296,7 +311,7 @@ class DWARF5AccelTableData : public AccelTableData { } OffsetAndUnitID getDieOffsetAndUnitID() const { - return {getDieOffset(), UnitID}; + return {getDieOffset(), getUnitID(), isTU()}; } unsigned getDieTag() const { return DieTag; } @@ -322,7 +337,7 @@ class DWARF5AccelTableData : public AccelTableData { assert(isNormalized() && "Accessing DIE Offset before normalizing."); if (!ParentOffset) return std::nullopt; - return OffsetAndUnitID(*ParentOffset, getUnitID()); + return OffsetAndUnitID(*ParentOffset, getUnitID(), isTU()); } /// Sets AbbrevIndex for an Entry. @@ -416,7 +431,7 @@ class DWARF5AccelTable : public AccelTable { for (auto *Data : Entry.second.getValues()) { addName(Entry.second.Name, Data->getDieOffset(), Data->getParentDieOffset(), Data->getDieTag(), - Data->getUnitID(), true); + Data->getUnitID(), Data->isTU()); } } } diff --git a/llvm/include/llvm/DWARFLinker/Classic/DWARFLinkerCompileUnit.h b/llvm/include/llvm/DWARFLinker/Classic/DWARFLinkerCompileUnit.h index bfe544946fd903..cdb6f4a4443ab7 100644 --- a/llvm/include/llvm/DWARFLinker/Classic/DWARFLinkerCompileUnit.h +++ b/llvm/include/llvm/DWARFLinker/Classic/DWARFLinkerCompileUnit.h @@ -137,6 +137,8 @@ class CompileUnit { return nullptr; } + dwarf::Tag getTag() const { return OrigUnit.getUnitDIE().getTag(); } + bool hasODR() const { return HasODR; } bool isClangModule() const { return !ClangModuleName.empty(); } uint16_t getLanguage(); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index b9c02aed848cce..7de9432325d8a5 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -3592,7 +3592,8 @@ void DwarfDebug::addAccelNameImpl( "Kind is TU but CU is being processed."); // The type unit can be discarded, so need to add references to final // acceleration table once we know it's complete and we emit it. - Current.addName(Ref, Die, Unit.getUniqueID()); + Current.addName(Ref, Die, Unit.getUniqueID(), + Unit.getUnitDie().getTag() == dwarf::DW_TAG_type_unit); break; } case AccelTableKind::Default: diff --git a/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp index 2544d97eaafd06..c6312c387744aa 100644 --- a/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp +++ b/llvm/lib/DWARFLinker/Classic/DWARFLinker.cpp @@ -2247,17 +2247,20 @@ void DWARFLinker::emitAcceleratorEntriesForUnit(CompileUnit &Unit) { DebugNames.addName( Namespace.Name, Namespace.Die->getOffset(), DWARF5AccelTableData::getDefiningParentDieOffset(*Namespace.Die), - Namespace.Die->getTag(), Unit.getUniqueID()); + Namespace.Die->getTag(), Unit.getUniqueID(), + Unit.getTag() == dwarf::DW_TAG_type_unit); for (const auto &Pubname : Unit.getPubnames()) DebugNames.addName( Pubname.Name, Pubname.Die->getOffset(), DWARF5AccelTableData::getDefiningParentDieOffset(*Pubname.Die), - Pubname.Die->getTag(), Unit.getUniqueID()); + Pubname.Die->getTag(), Unit.getUniqueID(), + Unit.getTag() == dwarf::DW_TAG_type_unit); for (const auto &Pubtype : Unit.getPubtypes()) DebugNames.addName( Pubtype.Name, Pubtype.Die->getOffset(), DWARF5AccelTableData::getDefiningParentDieOffset(*Pubtype.Die), - Pubtype.Die->getTag(), Unit.getUniqueID()); + Pubtype.Die->getTag(), Unit.getUniqueID(), + Unit.getTag() == dwarf::DW_TAG_type_unit); } break; } } diff --git a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp index e68bf0c227a0a0..c060f8f4c1718b 100644 --- a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp +++ b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp @@ -1358,7 +1358,8 @@ void DWARFLinkerImpl::emitDWARFv5DebugNamesSection(const Triple &TargetTriple) { case DwarfUnit::AccelType::Type: { DebugNames->addName(*DebugStrStrings.getExistingEntry(Info.String), Info.OutOffset, std::nullopt /*ParentDIEOffset*/, - Info.Tag, CU->getUniqueID()); + Info.Tag, CU->getUniqueID(), + CU->getTag() == dwarf::DW_TAG_type_unit); } break; default: diff --git a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerUnit.h b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerUnit.h index e16b82f696a2d8..84757aea7045d8 100644 --- a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerUnit.h +++ b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerUnit.h @@ -77,10 +77,15 @@ class DwarfUnit : public OutputSections { void setOutUnitDIE(DIE *UnitDie) { OutUnitDIE = UnitDie; - if (OutUnitDIE != nullptr) + if (OutUnitDIE != nullptr) { UnitSize = getDebugInfoHeaderSize() + OutUnitDIE->getSize(); + UnitTag = OutUnitDIE->getTag(); + } } + /// Returns unit DWARF tag. + dwarf::Tag getTag() const { return UnitTag; } + /// \defgroup Methods used to emit unit's debug info: /// /// @{ @@ -180,6 +185,9 @@ class DwarfUnit : public OutputSections { uint64_t UnitSize = 0; + /// DWARF unit tag. + dwarf::Tag UnitTag = dwarf::DW_TAG_null; + /// true if current unit references_to/is_referenced by other unit. std::atomic IsInterconnectedCU = {false}; diff --git a/llvm/test/DebugInfo/X86/debug-names-types-die-offset-collision.ll b/llvm/test/DebugInfo/X86/debug-names-types-die-offset-collision.ll new file mode 100644 index 00000000000000..104a166ffd3ad9 --- /dev/null +++ b/llvm/test/DebugInfo/X86/debug-names-types-die-offset-collision.ll @@ -0,0 +1,69 @@ +; UNSUPPORTED: system-windows + +;; This test checks that DW_IDX_parent is generated correctly when there is DIE relative offset collision between CU and TU. + +; RUN: llc -mtriple=x86_64 -generate-type-units -dwarf-version=5 -filetype=obj %s -o %t +; RUN: llvm-dwarfdump -debug-info -debug-names %t | FileCheck %s + +; CHECK: .debug_info contents: +; CHECK: 0x00000023: DW_TAG_namespace +; CHECK-NEXT: DW_AT_name ("B") +; CHECK: 0x00000023: DW_TAG_subprogram +; CHECK-NEXT: DW_AT_low_pc +; CHECK-NEXT: DW_AT_high_pc +; CHECK-NEXT: DW_AT_frame_base +; CHECK-NEXT: DW_AT_linkage_name ("_Z9get_statev") +; CHECK-NEXT: DW_AT_name ("get_state") + +; CHECK: .debug_names contents: +; CHECK: String: {{.*}} "B" +; CHECK: Entry @ [[ENTRY:0x[0-9a-f]*]] +; CHECK: String: {{.*}} "State" +; CHECK: Entry @ 0xd3 { +; CHECK: Abbrev: 0x4 +; CHECK: Tag: DW_TAG_structure_type +; CHECK: DW_IDX_type_unit: 0x00 +; CHECK: DW_IDX_die_offset: 0x00000025 +; CHECK: DW_IDX_parent: Entry @ [[ENTRY:0x[0-9a-f]*]] +; CHECK: } + + +;; namespace B { struct State { class InnerState{}; }; } +;; B::State::InnerState get_state() { return B::State::InnerState(); } +;; clang++ main.cpp -g2 -O0 -fdebug-types-section -gpubnames + +; ModuleID = 'main.cpp' +source_filename = "main.cpp" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: mustprogress noinline nounwind optnone uwtable +define dso_local void @_Z9get_statev() #0 !dbg !10 { +entry: + ret void, !dbg !17 +} + +attributes #0 = { mustprogress noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8} +!llvm.ident = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 19.0.0git", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false) +!1 = !DIFile(filename: "main.cpp", directory: "/folder", checksumkind: CSK_MD5, checksum: "a84fe2e4ecb77633f6c33f3b6833b9e7") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 8, !"PIC Level", i32 2} +!6 = !{i32 7, !"PIE Level", i32 2} +!7 = !{i32 7, !"uwtable", i32 2} +!8 = !{i32 7, !"frame-pointer", i32 2} +!9 = !{!"clang version 19.0.0git"} +!10 = distinct !DISubprogram(name: "get_state", linkageName: "_Z9get_statev", scope: !1, file: !1, line: 2, type: !11, scopeLine: 2, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0) +!11 = !DISubroutineType(types: !12) +!12 = !{!13} +!13 = distinct !DICompositeType(tag: DW_TAG_class_type, name: "InnerState", scope: !14, file: !1, line: 1, size: 8, flags: DIFlagTypePassByValue, elements: !16, identifier: "_ZTSN1B5State10InnerStateE") +!14 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "State", scope: !15, file: !1, line: 1, size: 8, flags: DIFlagTypePassByValue, elements: !16, identifier: "_ZTSN1B5StateE") +!15 = !DINamespace(name: "B", scope: null) +!16 = !{} +!17 = !DILocation(line: 2, column: 36, scope: !10) From 0a57a20aa506c5a5a8b0a8eb45446d0747493d7c Mon Sep 17 00:00:00 2001 From: Scott Egerton <9487234+ScottEgerton@users.noreply.github.com> Date: Fri, 14 Jun 2024 15:52:28 +0100 Subject: [PATCH 107/155] [AMDGPU] NFC: Remove duplicate VOP_DPP_Pseudo TableGen definitions (#95370) After recent changes, VOP_DPP_Pseudo now inherits from VOP_Pseudo. This commit removes some on the duplicate definitions in VOP_DPP_Pseudo that are exactly the same as definitions inherited from VOP_Pseudo. --- llvm/lib/Target/AMDGPU/VOPInstructions.td | 7 ------- 1 file changed, 7 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index eb260610e37ffc..2b05165cc94bc4 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -831,13 +831,9 @@ class VOP_DPP_Pseudo pattern=[], dag Ins = P.InsDPP, string asmOps = P.AsmDPP> : VOP_Pseudo { - let isPseudo = 1; - let isCodeGenOnly = 1; - let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; - let UseNamedOperandTable = 1; let VALU = 1; let DPP = 1; @@ -850,7 +846,6 @@ class VOP_DPP_Pseudo pattern=[], let Uses = !if(ReadsModeReg, [MODE, EXEC], [EXEC]); let isConvergent = 1; - string Mnemonic = OpName; string AsmOperands = asmOps; let AsmMatchConverter = !if(P.HasModifiers, "cvtDPP", ""); @@ -863,8 +858,6 @@ class VOP_DPP_Pseudo pattern=[], let IsInvalidSingleUseConsumer = !not(VINTERP); let IsInvalidSingleUseProducer = !not(VINTERP); - - VOPProfile Pfl = P; } class VOP3_DPP_Pseudo : From d462bf687548a5630f60a8afaa66120df8319e88 Mon Sep 17 00:00:00 2001 From: Rolf Morel Date: Fri, 14 Jun 2024 17:02:47 +0200 Subject: [PATCH 108/155] [mlir][Transform] Extend transform.foreach to take multiple arguments (#93705) Changes transform.foreach's interface to take multiple arguments, e.g. transform.foreach %ops1, %ops2, %params : ... { ^bb0(%op1, %op2, %param): BODY } The semantics are that the payloads for these handles get iterated over as if the payloads have been zipped-up together - BODY gets executed once for each such tuple. The documentation explains that this implementation requires that the payloads have the same length. This change also enables the target argument(s) to be any op/value/param handle. The added test cases demonstrate some use cases for this change. --- .../mlir/Dialect/Transform/IR/TransformOps.td | 57 +++++---- .../lib/Dialect/Transform/IR/TransformOps.cpp | 120 +++++++++++++----- .../Dialect/Linalg/multisize-tiling-full.mlir | 21 +-- .../SCF/transform-loop-fuse-sibling.mlir | 73 +++++++++++ mlir/test/Dialect/Transform/ops-invalid.mlir | 49 +++++++ mlir/test/Dialect/Transform/ops.mlir | 22 +++- .../Dialect/Transform/test-interpreter.mlir | 85 +++++++++++++ 7 files changed, 354 insertions(+), 73 deletions(-) diff --git a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td index 77048a28d75108..3bb297cbf91d21 100644 --- a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td +++ b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td @@ -614,43 +614,48 @@ def ForeachOp : TransformDialectOp<"foreach", "getSuccessorRegions", "getEntrySuccessorOperands"]>, SingleBlockImplicitTerminator<"::mlir::transform::YieldOp"> ]> { - let summary = "Executes the body for each payload op"; + let summary = "Executes the body for each element of the payload"; let description = [{ - This op has exactly one region with exactly one block ("body"). The body is - executed for each payload op that is associated to the target operand in an - unbatched fashion. I.e., the block argument ("iteration variable") is always - mapped to exactly one payload op. - - This op always reads the target handle. Furthermore, it consumes the handle - if there is a transform op in the body that consumes the iteration variable. - This op does not return anything. - - The transformations inside the body are applied in order of their - appearance. During application, if any transformation in the sequence fails, - the entire sequence fails immediately leaving the payload IR in potentially - invalid state, i.e., this operation offers no transformation rollback - capabilities. - - This op generates as many handles as the terminating YieldOp has operands. - For each result, the payload ops of the corresponding YieldOp operand are - merged and mapped to the same resulting handle. + Execute the op's body - its single region block - exactly once per + element of the payload associated to a target handle. The body's + transformations are applied in order of appearance until reaching the + (implicit) YieldOp terminator. + + Each iteration gets executed by co-indexing the payloads of the arguments + and mapping the body's arguments to these tuples, as though iterating over + the zipped together `targets`. As such, in each iteration, the size of the + payload of each of the body's block arguments is exactly one. + + This op always reads the target handles. Furthermore, it consumes a handle + if there is a transform op in the body that consumes the corresponding + block argument. Handles can point to ops, values, or parameters. + + #### Return Modes + + This op produces as many result handles as the body's terminating YieldOp + has operands. For each result, the payloads of the corresponding YieldOp + operand are merged and mapped to the same resulting handle. + + If the target handles do not associate payloads of the same size, a + silencable failure will be generated. + + During application, if any transformation in the sequence fails, the entire + sequence fails immediately with the same failure, leaving the payload IR in + a potentially invalid state, i.e., this operation offers no transformation + rollback capabilities. }]; - let arguments = (ins TransformHandleTypeInterface:$target); - let results = (outs Variadic:$results); + let arguments = (ins Variadic:$targets); + let results = (outs Variadic:$results); let regions = (region SizedRegion<1>:$body); let assemblyFormat = - "$target `:` type($target) (`->` type($results)^)? $body attr-dict"; + "$targets `:` type($targets) (`->` type($results)^)? $body attr-dict"; let hasVerifier = 1; let extraClassDeclaration = [{ /// Allow the dialect prefix to be omitted. static StringRef getDefaultDialect() { return "transform"; } - BlockArgument getIterationVariable() { - return getBody().front().getArgument(0); - } - transform::YieldOp getYieldOp(); }]; } diff --git a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp index 247759e21efb16..1a7ec030f0eb1e 100644 --- a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp @@ -1391,46 +1391,83 @@ DiagnosedSilenceableFailure transform::ForeachOp::apply(transform::TransformRewriter &rewriter, transform::TransformResults &results, transform::TransformState &state) { - SmallVector> resultOps(getNumResults(), {}); - // Store payload ops in a vector because ops may be removed from the mapping - // by the TrackingRewriter while the iteration is in progress. - SmallVector targets = - llvm::to_vector(state.getPayloadOps(getTarget())); - for (Operation *op : targets) { + // We store the payloads before executing the body as ops may be removed from + // the mapping by the TrackingRewriter while iteration is in progress. + SmallVector> payloads; + detail::prepareValueMappings(payloads, getTargets(), state); + size_t numIterations = payloads.empty() ? 0 : payloads.front().size(); + + // As we will be "zipping" over them, check all payloads have the same size. + for (size_t argIdx = 1; argIdx < payloads.size(); argIdx++) { + if (payloads[argIdx].size() != numIterations) { + return emitSilenceableError() + << "prior targets' payload size (" << numIterations + << ") differs from payload size (" << payloads[argIdx].size() + << ") of target " << getTargets()[argIdx]; + } + } + + // Start iterating, indexing into payloads to obtain the right arguments to + // call the body with - each slice of payloads at the same argument index + // corresponding to a tuple to use as the body's block arguments. + ArrayRef blockArguments = getBody().front().getArguments(); + SmallVector> zippedResults(getNumResults(), {}); + for (size_t iterIdx = 0; iterIdx < numIterations; iterIdx++) { auto scope = state.make_region_scope(getBody()); - if (failed(state.mapBlockArguments(getIterationVariable(), {op}))) - return DiagnosedSilenceableFailure::definiteFailure(); + // Set up arguments to the region's block. + for (auto &&[argIdx, blockArg] : llvm::enumerate(blockArguments)) { + MappedValue argument = payloads[argIdx][iterIdx]; + // Note that each blockArg's handle gets associated with just a single + // element from the corresponding target's payload. + if (failed(state.mapBlockArgument(blockArg, {argument}))) + return DiagnosedSilenceableFailure::definiteFailure(); + } // Execute loop body. for (Operation &transform : getBody().front().without_terminator()) { DiagnosedSilenceableFailure result = state.applyTransform( - cast(transform)); + llvm::cast(transform)); if (!result.succeeded()) return result; } - // Append yielded payload ops to result list (if any). - for (unsigned i = 0; i < getNumResults(); ++i) { - auto yieldedOps = state.getPayloadOps(getYieldOp().getOperand(i)); - resultOps[i].append(yieldedOps.begin(), yieldedOps.end()); - } - } - - for (unsigned i = 0; i < getNumResults(); ++i) - results.set(llvm::cast(getResult(i)), resultOps[i]); + // Append yielded payloads to corresponding results from prior iterations. + OperandRange yieldOperands = getYieldOp().getOperands(); + for (auto &&[result, yieldOperand, resTuple] : + llvm::zip_equal(getResults(), yieldOperands, zippedResults)) + // NB: each iteration we add any number of ops/vals/params to a result. + if (isa(result.getType())) + llvm::append_range(resTuple, state.getPayloadOps(yieldOperand)); + else if (isa(result.getType())) + llvm::append_range(resTuple, state.getPayloadValues(yieldOperand)); + else if (isa(result.getType())) + llvm::append_range(resTuple, state.getParams(yieldOperand)); + else + assert(false && "unhandled handle type"); + } + + // Associate the accumulated result payloads to the op's actual results. + for (auto &&[result, resPayload] : zip_equal(getResults(), zippedResults)) + results.setMappedValues(llvm::cast(result), resPayload); return DiagnosedSilenceableFailure::success(); } void transform::ForeachOp::getEffects( SmallVectorImpl &effects) { - BlockArgument iterVar = getIterationVariable(); - if (any_of(getBody().front().without_terminator(), [&](Operation &op) { - return isHandleConsumed(iterVar, cast(&op)); - })) { - consumesHandle(getTarget(), effects); - } else { - onlyReadsHandle(getTarget(), effects); + // NB: this `zip` should be `zip_equal` - while this op's verifier catches + // arity errors, this method might get called before/in absence of `verify()`. + for (auto &&[target, blockArg] : + llvm::zip(getTargets(), getBody().front().getArguments())) { + BlockArgument blockArgument = blockArg; + if (any_of(getBody().front().without_terminator(), [&](Operation &op) { + return isHandleConsumed(blockArgument, + cast(&op)); + })) { + consumesHandle(target, effects); + } else { + onlyReadsHandle(target, effects); + } } if (any_of(getBody().front().without_terminator(), [&](Operation &op) { @@ -1463,8 +1500,8 @@ void transform::ForeachOp::getSuccessorRegions( OperandRange transform::ForeachOp::getEntrySuccessorOperands(RegionBranchPoint point) { - // The iteration variable op handle is mapped to a subset (one op to be - // precise) of the payload ops of the ForeachOp operand. + // Each block argument handle is mapped to a subset (one op to be precise) + // of the payload of the corresponding `targets` operand of ForeachOp. assert(point == getBody() && "unexpected region index"); return getOperation()->getOperands(); } @@ -1474,14 +1511,27 @@ transform::YieldOp transform::ForeachOp::getYieldOp() { } LogicalResult transform::ForeachOp::verify() { - auto yieldOp = getYieldOp(); - if (getNumResults() != yieldOp.getNumOperands()) - return emitOpError() << "expects the same number of results as the " - "terminator has operands"; - for (Value v : yieldOp.getOperands()) - if (!llvm::isa(v.getType())) - return yieldOp->emitOpError("expects operands to have types implementing " - "TransformHandleTypeInterface"); + for (auto [targetOpt, bodyArgOpt] : + llvm::zip_longest(getTargets(), getBody().front().getArguments())) { + if (!targetOpt || !bodyArgOpt) + return emitOpError() << "expects the same number of targets as the body " + "has block arguments"; + if (targetOpt.value().getType() != bodyArgOpt.value().getType()) + return emitOpError( + "expects co-indexed targets and the body's " + "block arguments to have the same op/value/param type"); + } + + for (auto [resultOpt, yieldOperandOpt] : + llvm::zip_longest(getResults(), getYieldOp().getOperands())) { + if (!resultOpt || !yieldOperandOpt) + return emitOpError() << "expects the same number of results as the " + "yield terminator has operands"; + if (resultOpt.value().getType() != yieldOperandOpt.value().getType()) + return emitOpError("expects co-indexed results and yield " + "operands to have the same op/value/param type"); + } + return success(); } diff --git a/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir b/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir index 15b24b56608e33..51332ffce03d1d 100644 --- a/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir +++ b/mlir/test/Dialect/Linalg/multisize-tiling-full.mlir @@ -6,15 +6,17 @@ module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { %0 = transform.structured.match ops{["linalg.generic"]} in %arg1 : (!transform.any_op) -> !transform.any_op %1:3 = transform.structured.multitile_sizes %0 { dimension = 0, target_size = 3} : (!transform.any_op) -> !transform.any_op - %t:3 = transform.structured.multitile_sizes %0 { dimension = 1, target_size = 10} : (!transform.any_op) -> !transform.any_op %2:2 = transform.structured.split %0 after %1#2 { dimension = 0 } : !transform.any_op, !transform.any_op %3:2 = transform.structured.tile_using_for %2#0 tile_sizes [%1#0] : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op) %4:2 = transform.structured.tile_using_for %2#1 tile_sizes [%1#1] : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op) %5 = transform.merge_handles %3#0, %4#0 : !transform.any_op - %tt:3 = transform.replicate num(%5) %t#0, %t#1, %t#2 : !transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op - %6:2 = transform.structured.split %5 after %tt#2 { dimension = 1 } : !transform.any_op, !transform.any_op - transform.structured.tile_using_for %6#0 tile_sizes [0, %tt#0] : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op) - transform.structured.tile_using_for %6#1 tile_sizes [0, %tt#1] : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op) + transform.foreach %5 : !transform.any_op { + ^bb0(%inner_linalg: !transform.any_op): + %low, %high, %split_point = transform.structured.multitile_sizes %inner_linalg { dimension = 1, target_size = 10} : (!transform.any_op) -> !transform.any_op + %inner_linalg_low, %inner_linalg_high = transform.structured.split %inner_linalg after %split_point { dimension = 1 } : !transform.any_op, !transform.any_op + transform.structured.tile_using_for %inner_linalg_low tile_sizes [0, %low] : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op) + transform.structured.tile_using_for %inner_linalg_high tile_sizes [0, %high] : (!transform.any_op, !transform.any_op) -> (!transform.any_op, !transform.any_op) + } transform.yield } } @@ -114,9 +116,12 @@ module attributes {transform.with_named_sequence} { %4:2 = transform.structured.tile_using_for %2#1 tile_sizes [%1#1] : (!transform.any_op, !transform.param) -> (!transform.any_op, !transform.any_op) %5 = transform.merge_handles %3#0, %4#0 : !transform.any_op %tt:3 = transform.replicate num(%5) %t#0, %t#1, %t#2 : !transform.any_op, !transform.param, !transform.param, !transform.param - %6:2 = transform.structured.split %5 after %tt#2 { dimension = 1 } : !transform.any_op, !transform.param - transform.structured.tile_using_for %6#0 tile_sizes [0, %tt#0] : (!transform.any_op, !transform.param) -> (!transform.any_op, !transform.any_op) - transform.structured.tile_using_for %6#1 tile_sizes [0, %tt#1] : (!transform.any_op, !transform.param) -> (!transform.any_op, !transform.any_op) + transform.foreach %5, %tt#0, %tt#1, %tt#2 : !transform.any_op, !transform.param, !transform.param, !transform.param { + ^bb0(%inner_linalg: !transform.any_op, %low: !transform.param, %high: !transform.param, %split_point: !transform.param): + %inner_linalg_low, %inner_linalg_high = transform.structured.split %inner_linalg after %split_point { dimension = 1 } : !transform.any_op, !transform.param + transform.structured.tile_using_for %inner_linalg_low tile_sizes [0, %low] : (!transform.any_op, !transform.param) -> (!transform.any_op, !transform.any_op) + transform.structured.tile_using_for %inner_linalg_high tile_sizes [0, %high] : (!transform.any_op, !transform.param) -> (!transform.any_op, !transform.any_op) + } transform.yield } } diff --git a/mlir/test/Dialect/SCF/transform-loop-fuse-sibling.mlir b/mlir/test/Dialect/SCF/transform-loop-fuse-sibling.mlir index 0f51b1cdbe0cf1..54dd2bdf953cae 100644 --- a/mlir/test/Dialect/SCF/transform-loop-fuse-sibling.mlir +++ b/mlir/test/Dialect/SCF/transform-loop-fuse-sibling.mlir @@ -328,3 +328,76 @@ module attributes {transform.with_named_sequence} { transform.yield } } +// ----- + +// CHECK: func.func @foreach_loop_pair_fuse([[A:%.*]]: {{.*}}, [[B:%.*]]: {{.*}} +func.func @foreach_loop_pair_fuse(%arg1: tensor<128xf32>, %arg2: tensor<128xf32>) -> (tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32>) { + // CHECK-DAG: [[C0:%.*]] = arith.constant 0 : index + // CHECK-DAG: [[C16:%.*]] = arith.constant 16 : index + // CHECK-DAG: [[C128:%.*]] = arith.constant 128 : index + // CHECK-DAG: [[ZERO:%.*]] = arith.constant 0.000000e+00 : f32 + %c0 = arith.constant 0 : index + %c16 = arith.constant 16 : index + %c32 = arith.constant 32 : index + %c128 = arith.constant 128 : index + %cst = arith.constant 0.000000e+00 : f32 + // CHECK: [[RST:%.*]]:2 = scf.for [[IV:%.*]] = [[C0]] to [[C128]] step [[C16]] iter_args([[IB0:%.*]] = [[B]], [[IB1:%.*]] = [[B]]) {{.*}} + %1 = scf.for %arg3 = %c0 to %c128 step %c16 iter_args(%arg4 = %arg2) -> (tensor<128xf32>) { + // CHECK-DAG: [[ASLICE:%.*]] = vector.transfer_read [[A]][[[IV]]], [[ZERO]] + // CHECK-DAG: [[SLICE0:%.*]] = vector.transfer_read [[IB0]][[[IV]]], [[ZERO]] + // CHECK: [[OUT1:%.*]] = arith.addf [[SLICE0]], [[ASLICE]] + // CHECK-NEXT: [[WRT0:%.*]] = vector.transfer_write [[OUT1]], [[IB0]][[[IV]]] + %2 = vector.transfer_read %arg1[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<16xf32> + %3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<16xf32> + %5 = arith.addf %3, %2 : vector<16xf32> + %6 = vector.transfer_write %5, %arg4[%arg3] {in_bounds = [true]} : vector<16xf32>, tensor<128xf32> + scf.yield %6 : tensor<128xf32> + } {target_loops} + %dup1 = scf.for %arg3 = %c0 to %c128 step %c16 iter_args(%arg4 = %arg2) -> (tensor<128xf32>) { + // CHECK-DAG: [[SLICE1:%.*]] = vector.transfer_read [[IB1]][[[IV]]], [[ZERO]] + // CHECK: [[OUT2:%.*]] = arith.addf [[SLICE1]], [[ASLICE]] + // CHECK-NEXT: [[WRT1:%.*]] = vector.transfer_write [[OUT2]], [[IB1]][[[IV]]] + %dup2 = vector.transfer_read %arg1[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<16xf32> + %dup3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<16xf32> + %dup5 = arith.addf %dup3, %dup2 : vector<16xf32> + %dup6 = vector.transfer_write %dup5, %arg4[%arg3] {in_bounds = [true]} : vector<16xf32>, tensor<128xf32> + // CHECK: scf.yield [[WRT0]], [[WRT1]] : {{.*}} + scf.yield %dup6 : tensor<128xf32> + } {source_loops} + %2 = scf.for %arg3 = %c0 to %c128 step %c32 iter_args(%arg4 = %arg2) -> (tensor<128xf32>) { + // CHECK-DAG: [[ASLICE:%.*]] = vector.transfer_read [[A]][[[IV]]], [[ZERO]] + // CHECK-DAG: [[SLICE0:%.*]] = vector.transfer_read [[IB0]][[[IV]]], [[ZERO]] + // CHECK: [[OUT1:%.*]] = arith.addf [[SLICE0]], [[ASLICE]] + // CHECK-NEXT: [[WRT0:%.*]] = vector.transfer_write [[OUT1]], [[IB0]][[[IV]]] + %2 = vector.transfer_read %arg1[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<32xf32> + %3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<32xf32> + %5 = arith.addf %3, %2 : vector<32xf32> + %6 = vector.transfer_write %5, %arg4[%arg3] {in_bounds = [true]} : vector<32xf32>, tensor<128xf32> + scf.yield %6 : tensor<128xf32> + } {target_loops} + %dup2 = scf.for %arg3 = %c0 to %c128 step %c32 iter_args(%arg4 = %arg2) -> (tensor<128xf32>) { + // CHECK-DAG: [[SLICE1:%.*]] = vector.transfer_read [[IB1]][[[IV]]], [[ZERO]] + // CHECK: [[OUT2:%.*]] = arith.addf [[SLICE1]], [[ASLICE]] + // CHECK-NEXT: [[WRT1:%.*]] = vector.transfer_write [[OUT2]], [[IB1]][[[IV]]] + %dup2 = vector.transfer_read %arg1[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<32xf32> + %dup3 = vector.transfer_read %arg4[%arg3], %cst {in_bounds = [true]} : tensor<128xf32>, vector<32xf32> + %dup5 = arith.addf %dup3, %dup2 : vector<32xf32> + %dup6 = vector.transfer_write %dup5, %arg4[%arg3] {in_bounds = [true]} : vector<32xf32>, tensor<128xf32> + // CHECK: scf.yield [[WRT0]], [[WRT1]] : {{.*}} + scf.yield %dup6 : tensor<128xf32> + } {source_loops} + return %1, %dup1, %2, %dup2 : tensor<128xf32>, tensor<128xf32>, tensor<128xf32>, tensor<128xf32> +} + + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %target_loops = transform.structured.match ops{["scf.for"]} attributes {target_loops} in %arg0 : (!transform.any_op) -> !transform.any_op + %source_loops = transform.structured.match ops{["scf.for"]} attributes {source_loops} in %arg0 : (!transform.any_op) -> !transform.any_op + transform.foreach %target_loops, %source_loops : !transform.any_op, !transform.any_op { + ^bb0(%target_loop: !transform.any_op, %source_loop: !transform.any_op): + %fused = transform.loop.fuse_sibling %target_loop into %source_loop : (!transform.any_op,!transform.any_op) -> !transform.any_op + } + transform.yield + } +} diff --git a/mlir/test/Dialect/Transform/ops-invalid.mlir b/mlir/test/Dialect/Transform/ops-invalid.mlir index 30a68cc5f3c448..71a260f1196e94 100644 --- a/mlir/test/Dialect/Transform/ops-invalid.mlir +++ b/mlir/test/Dialect/Transform/ops-invalid.mlir @@ -279,6 +279,55 @@ transform.sequence failures(propagate) { // ----- +transform.sequence failures(propagate) { + ^bb0(%root: !transform.any_op): + %op = test_produce_self_handle_or_forward_operand : () -> !transform.any_op + // expected-error @below {{op expects the same number of targets as the body has block arguments}} + transform.foreach %op : !transform.any_op -> !transform.any_op, !transform.any_value { + ^bb1(%op_arg: !transform.any_op, %val_arg: !transform.any_value): + transform.yield %op_arg, %val_arg : !transform.any_op, !transform.any_value + } +} + +// ----- + +transform.sequence failures(propagate) { + ^bb0(%root: !transform.any_op): + %op = test_produce_self_handle_or_forward_operand : () -> !transform.any_op + // expected-error @below {{op expects co-indexed targets and the body's block arguments to have the same op/value/param type}} + transform.foreach %op : !transform.any_op -> !transform.any_value { + ^bb1(%val_arg: !transform.any_value): + transform.yield %val_arg : !transform.any_value + } +} + +// ----- + +transform.sequence failures(propagate) { + ^bb0(%root: !transform.any_op): + %op = test_produce_self_handle_or_forward_operand : () -> !transform.any_op + // expected-error @below {{op expects the same number of results as the yield terminator has operands}} + transform.foreach %op : !transform.any_op -> !transform.any_op, !transform.any_op { + ^bb1(%arg_op: !transform.any_op): + transform.yield %arg_op : !transform.any_op + } +} + +// ----- + +transform.sequence failures(propagate) { + ^bb0(%root: !transform.any_op): + %op = test_produce_self_handle_or_forward_operand : () -> !transform.any_op + %val = transform.test_produce_value_handle_to_self_operand %op : (!transform.any_op) -> !transform.any_value + // expected-error @below {{expects co-indexed results and yield operands to have the same op/value/param type}} + transform.foreach %op, %val : !transform.any_op, !transform.any_value -> !transform.any_op, !transform.any_value { + ^bb1(%op_arg: !transform.any_op, %val_arg: !transform.any_value): + transform.yield %val_arg, %op_arg : !transform.any_value, !transform.any_op + } +} + +// ----- + transform.sequence failures(suppress) { ^bb0(%arg0: !transform.any_op): // expected-error @below {{TransformOpInterface requires memory effects on operands to be specified}} diff --git a/mlir/test/Dialect/Transform/ops.mlir b/mlir/test/Dialect/Transform/ops.mlir index b03a9f4d760d21..e9baffde262faf 100644 --- a/mlir/test/Dialect/Transform/ops.mlir +++ b/mlir/test/Dialect/Transform/ops.mlir @@ -68,11 +68,25 @@ transform.sequence failures(propagate) { } // CHECK: transform.sequence -// CHECK: foreach transform.sequence failures(propagate) { -^bb0(%arg0: !transform.any_op): - transform.foreach %arg0 : !transform.any_op { - ^bb1(%arg1: !transform.any_op): +^bb0(%op0: !transform.any_op, %val0: !transform.any_value, %par0: !transform.any_param): + // CHECK: foreach %{{.*}} : !transform.any_op + transform.foreach %op0 : !transform.any_op { + ^bb1(%op1: !transform.any_op): + } + // CHECK: foreach %{{.*}} : !transform.any_op, !transform.any_value, !transform.any_param + transform.foreach %op0, %val0, %par0 : !transform.any_op, !transform.any_value, !transform.any_param { + ^bb1(%op1: !transform.any_op, %val1: !transform.any_value, %par1: !transform.any_param): + } + // CHECK: foreach %{{.*}} : !transform.any_op, !transform.any_value, !transform.any_param -> !transform.any_op + transform.foreach %op0, %val0, %par0 : !transform.any_op, !transform.any_value, !transform.any_param -> !transform.any_op { + ^bb1(%op1: !transform.any_op, %val1: !transform.any_value, %par1: !transform.any_param): + transform.yield %op1 : !transform.any_op + } + // CHECK: foreach %{{.*}} : !transform.any_op, !transform.any_value, !transform.any_param -> !transform.any_param, !transform.any_value + transform.foreach %op0, %val0, %par0 : !transform.any_op, !transform.any_value, !transform.any_param -> !transform.any_param, !transform.any_value { + ^bb1(%op1: !transform.any_op, %val1: !transform.any_value, %par1: !transform.any_param): + transform.yield %par1, %val1 : !transform.any_param, !transform.any_value } } diff --git a/mlir/test/Dialect/Transform/test-interpreter.mlir b/mlir/test/Dialect/Transform/test-interpreter.mlir index b6850e2024d53d..4fe2dbedff56e3 100644 --- a/mlir/test/Dialect/Transform/test-interpreter.mlir +++ b/mlir/test/Dialect/Transform/test-interpreter.mlir @@ -830,6 +830,91 @@ module attributes {transform.with_named_sequence} { // ----- +module attributes { transform.with_named_sequence } { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["linalg.matmul"]} in %arg0 : (!transform.any_op) -> !transform.any_op + %results, %types = transform.foreach %0 : !transform.any_op -> !transform.any_value, !transform.any_param { + ^bb0(%op0 : !transform.any_op): + %result = transform.get_result %op0[0] : (!transform.any_op) -> !transform.any_value + %type = transform.get_type elemental %result : (!transform.any_value) -> !transform.any_param + transform.yield %result, %type : !transform.any_value, !transform.any_param + } + transform.debug.emit_remark_at %results, "result selected" : !transform.any_value + transform.debug.emit_param_as_remark %types, "elemental types" at %0 : !transform.any_param, !transform.any_op + + transform.yield + } +} + +func.func @payload(%lhs: tensor<10x20xf16>, + %rhs: tensor<20x15xf32>) -> (tensor<10x15xf64>, tensor<10x15xf32>) { + %cst64 = arith.constant 0.0 : f64 + %empty64 = tensor.empty() : tensor<10x15xf64> + %fill64 = linalg.fill ins(%cst64 : f64) outs(%empty64 : tensor<10x15xf64>) -> tensor<10x15xf64> + // expected-remark @below {{result selected}} + // expected-note @below {{value handle points to an op result #0}} + // expected-remark @below {{elemental types f64, f32}} + %result64 = linalg.matmul ins(%lhs, %rhs: tensor<10x20xf16>, tensor<20x15xf32>) + outs(%fill64: tensor<10x15xf64>) -> tensor<10x15xf64> + + %cst32 = arith.constant 0.0 : f32 + %empty32 = tensor.empty() : tensor<10x15xf32> + %fill32 = linalg.fill ins(%cst32 : f32) outs(%empty32 : tensor<10x15xf32>) -> tensor<10x15xf32> + // expected-remark @below {{result selected}} + // expected-note @below {{value handle points to an op result #0}} + // expected-remark @below {{elemental types f64, f32}} + %result32 = linalg.matmul ins(%lhs, %rhs: tensor<10x20xf16>, tensor<20x15xf32>) + outs(%fill32: tensor<10x15xf32>) -> tensor<10x15xf32> + + return %result64, %result32 : tensor<10x15xf64>, tensor<10x15xf32> + +} + +// ----- + +func.func @two_const_ops() { + %0 = arith.constant 0 : index + %1 = arith.constant 1 : index + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op) { + %two_ops = transform.structured.match ops{["arith.constant"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %one_param = transform.param.constant 1 : i32 -> !transform.test_dialect_param + // expected-error @below {{prior targets' payload size (2) differs from payload size (1) of target}} + transform.foreach %two_ops, %one_param : !transform.any_op, !transform.test_dialect_param { + ^bb2(%op: !transform.any_op, %param: !transform.test_dialect_param): + } + transform.yield + } +} + +// ----- + +func.func @one_const_op() { + %0 = arith.constant 0 : index + return +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op) { + %one_op = transform.structured.match ops{["arith.constant"]} in %arg1 : (!transform.any_op) -> !transform.any_op + %one_val = transform.test_produce_value_handle_to_self_operand %one_op : (!transform.any_op) -> !transform.any_value + %param_one = transform.param.constant 1 : i32 -> !transform.test_dialect_param + %param_two = transform.param.constant 2 : i32 -> !transform.test_dialect_param + %two_params = transform.merge_handles %param_one, %param_two : !transform.test_dialect_param + + // expected-error @below {{prior targets' payload size (1) differs from payload size (2) of target}} + transform.foreach %one_val, %one_op, %two_params : !transform.any_value, !transform.any_op, !transform.test_dialect_param { + ^bb2(%val: !transform.any_value, %op: !transform.any_op, %param: !transform.test_dialect_param): + } + transform.yield + } +} + +// ----- + // CHECK-LABEL: func @consume_in_foreach() // CHECK-NEXT: return func.func @consume_in_foreach() { From 9afb09e674d6195faf09431dda8a3a08886ab27d Mon Sep 17 00:00:00 2001 From: David Tenty Date: Fri, 14 Jun 2024 11:13:54 -0400 Subject: [PATCH 109/155] [libcxx][test][AIX] address more platform differences in locale tests (#94826) This is a follow on to https://github.com/llvm/llvm-project/pull/92312, where we address some more locale platform differences. These are: - for locale fr_FR AIX libc expects `U202F` as `LC_MONETARY` `thousands_sep` - for locale zh_CN AIX libc `LC_MONETARY` has `n_sign_posn == 1`, indicating the `negative_sign` should come before the `currency_symbol` string --- .../get_long_double_fr_FR.pass.cpp | 267 +++++++++--------- .../get_long_double_zh_CN.pass.cpp | 238 ++++++++-------- .../put_long_double_fr_FR.pass.cpp | 37 +-- .../put_long_double_zh_CN.pass.cpp | 61 +++- 4 files changed, 326 insertions(+), 277 deletions(-) diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp index 3effa80e7d6f79..bbb67d694970a6 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp @@ -11,8 +11,6 @@ // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd -// XFAIL: LIBCXX-AIX-FIXME - // REQUIRES: locale.fr_FR.UTF-8 // @@ -32,6 +30,13 @@ #include "platform_support.h" // locale name macros #include "test_macros.h" +#ifdef _AIX +// the AIX libc expects U202F as LC_MONETARY thousands_sep +# define THOUSANDS_SEP L"\u202F" +#else +# define THOUSANDS_SEP L" " +#endif + typedef std::money_get > Fn; class my_facet @@ -432,26 +437,24 @@ int main(int, char**) assert(ex == -1); } { // positive - std::wstring v = convert_thousands_sep(L"1 234 567,89 "); - typedef cpp17_input_iterator I; - long double ex; - std::ios_base::iostate err = std::ios_base::goodbit; - I iter = f.get(I(v.data()), I(v.data() + v.size()), - false, ios, err, ex); - assert(base(iter) == v.data() + v.size()); - assert(err == std::ios_base::eofbit); - assert(ex == 123456789); + std::wstring v = convert_thousands_sep(L"1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89 "); + typedef cpp17_input_iterator I; + long double ex; + std::ios_base::iostate err = std::ios_base::goodbit; + I iter = f.get(I(v.data()), I(v.data() + v.size()), false, ios, err, ex); + assert(base(iter) == v.data() + v.size()); + assert(err == std::ios_base::eofbit); + assert(ex == 123456789); } { // negative - std::wstring v = convert_thousands_sep(L"-1 234 567,89"); - typedef cpp17_input_iterator I; - long double ex; - std::ios_base::iostate err = std::ios_base::goodbit; - I iter = f.get(I(v.data()), I(v.data() + v.size()), - false, ios, err, ex); - assert(base(iter) == v.data() + v.size()); - assert(err == std::ios_base::eofbit); - assert(ex == -123456789); + std::wstring v = convert_thousands_sep(L"-1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89"); + typedef cpp17_input_iterator I; + long double ex; + std::ios_base::iostate err = std::ios_base::goodbit; + I iter = f.get(I(v.data()), I(v.data() + v.size()), false, ios, err, ex); + assert(base(iter) == v.data() + v.size()); + assert(err == std::ios_base::eofbit); + assert(ex == -123456789); } { // negative std::wstring v = L"-1234567,89"; @@ -512,64 +515,59 @@ int main(int, char**) assert(ex == -1); } { // positive, showbase - std::wstring v = convert_thousands_sep(L"1 234 567,89 \u20ac"); // EURO SIGN - typedef cpp17_input_iterator I; - long double ex; - std::ios_base::iostate err = std::ios_base::goodbit; - I iter = f.get(I(v.data()), I(v.data() + v.size()), - false, ios, err, ex); - assert(base(iter) == v.data() + v.size()); - assert(err == std::ios_base::eofbit); - assert(ex == 123456789); + std::wstring v = convert_thousands_sep(L"1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89 \u20ac"); // EURO SIGN + typedef cpp17_input_iterator I; + long double ex; + std::ios_base::iostate err = std::ios_base::goodbit; + I iter = f.get(I(v.data()), I(v.data() + v.size()), false, ios, err, ex); + assert(base(iter) == v.data() + v.size()); + assert(err == std::ios_base::eofbit); + assert(ex == 123456789); } { // positive, showbase - std::wstring v = convert_thousands_sep(L"1 234 567,89 \u20ac"); // EURO SIGN - std::showbase(ios); - typedef cpp17_input_iterator I; - long double ex; - std::ios_base::iostate err = std::ios_base::goodbit; - I iter = f.get(I(v.data()), I(v.data() + v.size()), - false, ios, err, ex); - assert(base(iter) == v.data() + v.size()); - assert(err == std::ios_base::eofbit); - assert(ex == 123456789); - std::noshowbase(ios); + std::wstring v = convert_thousands_sep(L"1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89 \u20ac"); // EURO SIGN + std::showbase(ios); + typedef cpp17_input_iterator I; + long double ex; + std::ios_base::iostate err = std::ios_base::goodbit; + I iter = f.get(I(v.data()), I(v.data() + v.size()), false, ios, err, ex); + assert(base(iter) == v.data() + v.size()); + assert(err == std::ios_base::eofbit); + assert(ex == 123456789); + std::noshowbase(ios); } { // negative, showbase - std::wstring v = convert_thousands_sep(L"-1 234 567,89 \u20ac"); // EURO SIGN - std::showbase(ios); - typedef cpp17_input_iterator I; - long double ex; - std::ios_base::iostate err = std::ios_base::goodbit; - I iter = f.get(I(v.data()), I(v.data() + v.size()), - false, ios, err, ex); - assert(base(iter) == v.data() + v.size()); - assert(err == std::ios_base::eofbit); - assert(ex == -123456789); - std::noshowbase(ios); + std::wstring v = convert_thousands_sep(L"-1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89 \u20ac"); // EURO SIGN + std::showbase(ios); + typedef cpp17_input_iterator I; + long double ex; + std::ios_base::iostate err = std::ios_base::goodbit; + I iter = f.get(I(v.data()), I(v.data() + v.size()), false, ios, err, ex); + assert(base(iter) == v.data() + v.size()); + assert(err == std::ios_base::eofbit); + assert(ex == -123456789); + std::noshowbase(ios); } { // negative, showbase - std::wstring v = convert_thousands_sep(L"1 234 567,89 EUR -"); - std::showbase(ios); - typedef cpp17_input_iterator I; - long double ex; - std::ios_base::iostate err = std::ios_base::goodbit; - I iter = f.get(I(v.data()), I(v.data() + v.size()), - false, ios, err, ex); - assert(base(iter) == v.data() + 13); - assert(err == std::ios_base::failbit); - std::noshowbase(ios); + std::wstring v = convert_thousands_sep(L"1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89 EUR -"); + std::showbase(ios); + typedef cpp17_input_iterator I; + long double ex; + std::ios_base::iostate err = std::ios_base::goodbit; + I iter = f.get(I(v.data()), I(v.data() + v.size()), false, ios, err, ex); + assert(base(iter) == v.data() + 13); + assert(err == std::ios_base::failbit); + std::noshowbase(ios); } { // negative, showbase - std::wstring v = convert_thousands_sep(L"1 234 567,89 EUR -"); - typedef cpp17_input_iterator I; - long double ex; - std::ios_base::iostate err = std::ios_base::goodbit; - I iter = f.get(I(v.data()), I(v.data() + v.size()), - false, ios, err, ex); - assert(base(iter) == v.data() + 13); - assert(err == std::ios_base::goodbit); - assert(ex == 123456789); + std::wstring v = convert_thousands_sep(L"1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89 EUR -"); + typedef cpp17_input_iterator I; + long double ex; + std::ios_base::iostate err = std::ios_base::goodbit; + I iter = f.get(I(v.data()), I(v.data() + v.size()), false, ios, err, ex); + assert(base(iter) == v.data() + 13); + assert(err == std::ios_base::goodbit); + assert(ex == 123456789); } } { @@ -598,26 +596,24 @@ int main(int, char**) assert(ex == -1); } { // positive - std::wstring v = convert_thousands_sep(L"1 234 567,89 "); - typedef cpp17_input_iterator I; - long double ex; - std::ios_base::iostate err = std::ios_base::goodbit; - I iter = f.get(I(v.data()), I(v.data() + v.size()), - true, ios, err, ex); - assert(base(iter) == v.data() + v.size()); - assert(err == std::ios_base::eofbit); - assert(ex == 123456789); + std::wstring v = convert_thousands_sep(L"1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89 "); + typedef cpp17_input_iterator I; + long double ex; + std::ios_base::iostate err = std::ios_base::goodbit; + I iter = f.get(I(v.data()), I(v.data() + v.size()), true, ios, err, ex); + assert(base(iter) == v.data() + v.size()); + assert(err == std::ios_base::eofbit); + assert(ex == 123456789); } { // negative - std::wstring v = convert_thousands_sep(L"-1 234 567,89"); - typedef cpp17_input_iterator I; - long double ex; - std::ios_base::iostate err = std::ios_base::goodbit; - I iter = f.get(I(v.data()), I(v.data() + v.size()), - true, ios, err, ex); - assert(base(iter) == v.data() + v.size()); - assert(err == std::ios_base::eofbit); - assert(ex == -123456789); + std::wstring v = convert_thousands_sep(L"-1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89"); + typedef cpp17_input_iterator I; + long double ex; + std::ios_base::iostate err = std::ios_base::goodbit; + I iter = f.get(I(v.data()), I(v.data() + v.size()), true, ios, err, ex); + assert(base(iter) == v.data() + v.size()); + assert(err == std::ios_base::eofbit); + assert(ex == -123456789); } { // negative std::wstring v = L"-1234567,89"; @@ -678,64 +674,59 @@ int main(int, char**) assert(ex == -1); } { // positive, showbase - std::wstring v = convert_thousands_sep(L"1 234 567,89 EUR"); - typedef cpp17_input_iterator I; - long double ex; - std::ios_base::iostate err = std::ios_base::goodbit; - I iter = f.get(I(v.data()), I(v.data() + v.size()), - true, ios, err, ex); - assert(base(iter) == v.data() + v.size()); - assert(err == std::ios_base::eofbit); - assert(ex == 123456789); + std::wstring v = convert_thousands_sep(L"1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89 EUR"); + typedef cpp17_input_iterator I; + long double ex; + std::ios_base::iostate err = std::ios_base::goodbit; + I iter = f.get(I(v.data()), I(v.data() + v.size()), true, ios, err, ex); + assert(base(iter) == v.data() + v.size()); + assert(err == std::ios_base::eofbit); + assert(ex == 123456789); } { // positive, showbase - std::wstring v = convert_thousands_sep(L"1 234 567,89 EUR"); - std::showbase(ios); - typedef cpp17_input_iterator I; - long double ex; - std::ios_base::iostate err = std::ios_base::goodbit; - I iter = f.get(I(v.data()), I(v.data() + v.size()), - true, ios, err, ex); - assert(base(iter) == v.data() + v.size()); - assert(err == std::ios_base::eofbit); - assert(ex == 123456789); - std::noshowbase(ios); + std::wstring v = convert_thousands_sep(L"1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89 EUR"); + std::showbase(ios); + typedef cpp17_input_iterator I; + long double ex; + std::ios_base::iostate err = std::ios_base::goodbit; + I iter = f.get(I(v.data()), I(v.data() + v.size()), true, ios, err, ex); + assert(base(iter) == v.data() + v.size()); + assert(err == std::ios_base::eofbit); + assert(ex == 123456789); + std::noshowbase(ios); } { // negative, showbase - std::wstring v = convert_thousands_sep(L"-1 234 567,89 EUR"); - std::showbase(ios); - typedef cpp17_input_iterator I; - long double ex; - std::ios_base::iostate err = std::ios_base::goodbit; - I iter = f.get(I(v.data()), I(v.data() + v.size()), - true, ios, err, ex); - assert(base(iter) == v.data() + v.size()); - assert(err == std::ios_base::eofbit); - assert(ex == -123456789); - std::noshowbase(ios); + std::wstring v = convert_thousands_sep(L"-1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89 EUR"); + std::showbase(ios); + typedef cpp17_input_iterator I; + long double ex; + std::ios_base::iostate err = std::ios_base::goodbit; + I iter = f.get(I(v.data()), I(v.data() + v.size()), true, ios, err, ex); + assert(base(iter) == v.data() + v.size()); + assert(err == std::ios_base::eofbit); + assert(ex == -123456789); + std::noshowbase(ios); } { // negative, showbase - std::wstring v = convert_thousands_sep(L"1 234 567,89 Eu-"); - std::showbase(ios); - typedef cpp17_input_iterator I; - long double ex; - std::ios_base::iostate err = std::ios_base::goodbit; - I iter = f.get(I(v.data()), I(v.data() + v.size()), - true, ios, err, ex); - assert(base(iter) == v.data() + 14); - assert(err == std::ios_base::failbit); - std::noshowbase(ios); + std::wstring v = convert_thousands_sep(L"1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89 Eu-"); + std::showbase(ios); + typedef cpp17_input_iterator I; + long double ex; + std::ios_base::iostate err = std::ios_base::goodbit; + I iter = f.get(I(v.data()), I(v.data() + v.size()), true, ios, err, ex); + assert(base(iter) == v.data() + 14); + assert(err == std::ios_base::failbit); + std::noshowbase(ios); } { // negative, showbase - std::wstring v = convert_thousands_sep(L"1 234 567,89 Eu-"); - typedef cpp17_input_iterator I; - long double ex; - std::ios_base::iostate err = std::ios_base::goodbit; - I iter = f.get(I(v.data()), I(v.data() + v.size()), - true, ios, err, ex); - assert(base(iter) == v.data() + 13); - assert(err == std::ios_base::goodbit); - assert(ex == 123456789); + std::wstring v = convert_thousands_sep(L"1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89 Eu-"); + typedef cpp17_input_iterator I; + long double ex; + std::ios_base::iostate err = std::ios_base::goodbit; + I iter = f.get(I(v.data()), I(v.data() + v.size()), true, ios, err, ex); + assert(base(iter) == v.data() + 13); + assert(err == std::ios_base::goodbit); + assert(ex == 123456789); } } #endif // TEST_HAS_NO_WIDE_CHARACTERS diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp index 4cdb25728af7d2..05c09b26969f9c 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp @@ -9,7 +9,6 @@ // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd -// XFAIL: LIBCXX-AIX-FIXME // XFAIL: LIBCXX-FREEBSD-FIXME // REQUIRES: locale.zh_CN.UTF-8 @@ -156,28 +155,34 @@ int main(int, char**) std::noshowbase(ios); } { // negative one, showbase - std::string v = currency_symbol + "-0.01"; - typedef cpp17_input_iterator I; - long double ex; - std::ios_base::iostate err = std::ios_base::goodbit; - I iter = f.get(I(v.data()), I(v.data() + v.size()), - false, ios, err, ex); - assert(base(iter) == v.data() + v.size()); - assert(err == std::ios_base::eofbit); - assert(ex == -1); +#ifdef _AIX + std::string v = "-" + currency_symbol + "0.01"; +#else + std::string v = currency_symbol + "-0.01"; +#endif + typedef cpp17_input_iterator I; + long double ex; + std::ios_base::iostate err = std::ios_base::goodbit; + I iter = f.get(I(v.data()), I(v.data() + v.size()), false, ios, err, ex); + assert(base(iter) == v.data() + v.size()); + assert(err == std::ios_base::eofbit); + assert(ex == -1); } { // negative one, showbase - std::string v = currency_symbol + "-0.01"; - std::showbase(ios); - typedef cpp17_input_iterator I; - long double ex; - std::ios_base::iostate err = std::ios_base::goodbit; - I iter = f.get(I(v.data()), I(v.data() + v.size()), - false, ios, err, ex); - assert(base(iter) == v.data() + v.size()); - assert(err == std::ios_base::eofbit); - assert(ex == -1); - std::noshowbase(ios); +#ifdef _AIX + std::string v = "-" + currency_symbol + "0.01"; +#else + std::string v = currency_symbol + "-0.01"; +#endif + std::showbase(ios); + typedef cpp17_input_iterator I; + long double ex; + std::ios_base::iostate err = std::ios_base::goodbit; + I iter = f.get(I(v.data()), I(v.data() + v.size()), false, ios, err, ex); + assert(base(iter) == v.data() + v.size()); + assert(err == std::ios_base::eofbit); + assert(ex == -1); + std::noshowbase(ios); } { // positive, showbase std::string v = currency_symbol + "1,234,567.89"; @@ -204,17 +209,20 @@ int main(int, char**) std::noshowbase(ios); } { // negative, showbase - std::string v = currency_symbol + "-1,234,567.89"; - std::showbase(ios); - typedef cpp17_input_iterator I; - long double ex; - std::ios_base::iostate err = std::ios_base::goodbit; - I iter = f.get(I(v.data()), I(v.data() + v.size()), - false, ios, err, ex); - assert(base(iter) == v.data() + v.size()); - assert(err == std::ios_base::eofbit); - assert(ex == -123456789); - std::noshowbase(ios); +#ifdef _AIX + std::string v = "-" + currency_symbol + "1,234,567.89"; +#else + std::string v = currency_symbol + "-1,234,567.89"; +#endif + std::showbase(ios); + typedef cpp17_input_iterator I; + long double ex; + std::ios_base::iostate err = std::ios_base::goodbit; + I iter = f.get(I(v.data()), I(v.data() + v.size()), false, ios, err, ex); + assert(base(iter) == v.data() + v.size()); + assert(err == std::ios_base::eofbit); + assert(ex == -123456789); + std::noshowbase(ios); } { // negative, showbase std::string v = "CNY -1,234,567.89"; @@ -322,8 +330,8 @@ int main(int, char**) std::noshowbase(ios); } { // negative one, showbase -#ifdef TEST_HAS_GLIBC - std::string v = "-" + currency_name + "0.01"; +#if defined(TEST_HAS_GLIBC) || defined(_AIX) + std::string v = "-" + currency_name + "0.01"; #else std::string v = currency_name + "-0.01"; #endif @@ -337,8 +345,8 @@ int main(int, char**) assert(ex == -1); } { // negative one, showbase -#ifdef TEST_HAS_GLIBC - std::string v = "-" + currency_name + "0.01"; +#if defined(TEST_HAS_GLIBC) || defined(_AIX) + std::string v = "-" + currency_name + "0.01"; #else std::string v = currency_name + "-0.01"; #endif @@ -378,8 +386,8 @@ int main(int, char**) std::noshowbase(ios); } { // negative, showbase -#ifdef TEST_HAS_GLIBC - std::string v = "-" + currency_name + "1,234,567.89"; +#if defined(TEST_HAS_GLIBC) || defined(_AIX) + std::string v = "-" + currency_name + "1,234,567.89"; #else std::string v = currency_name + "-1,234,567.89"; #endif @@ -507,28 +515,34 @@ int main(int, char**) std::noshowbase(ios); } { // negative one, showbase - std::wstring v = w_currency_symbol + L"-0.01"; - typedef cpp17_input_iterator I; - long double ex; - std::ios_base::iostate err = std::ios_base::goodbit; - I iter = f.get(I(v.data()), I(v.data() + v.size()), - false, ios, err, ex); - assert(base(iter) == v.data() + v.size()); - assert(err == std::ios_base::eofbit); - assert(ex == -1); +# ifdef _AIX + std::wstring v = L"-" + w_currency_symbol + L"0.01"; +# else + std::wstring v = w_currency_symbol + L"-0.01"; +# endif + typedef cpp17_input_iterator I; + long double ex; + std::ios_base::iostate err = std::ios_base::goodbit; + I iter = f.get(I(v.data()), I(v.data() + v.size()), false, ios, err, ex); + assert(base(iter) == v.data() + v.size()); + assert(err == std::ios_base::eofbit); + assert(ex == -1); } { // negative one, showbase - std::wstring v = w_currency_symbol + L"-0.01"; - std::showbase(ios); - typedef cpp17_input_iterator I; - long double ex; - std::ios_base::iostate err = std::ios_base::goodbit; - I iter = f.get(I(v.data()), I(v.data() + v.size()), - false, ios, err, ex); - assert(base(iter) == v.data() + v.size()); - assert(err == std::ios_base::eofbit); - assert(ex == -1); - std::noshowbase(ios); +# ifdef _AIX + std::wstring v = L"-" + w_currency_symbol + L"0.01"; +# else + std::wstring v = w_currency_symbol + L"-0.01"; +# endif + std::showbase(ios); + typedef cpp17_input_iterator I; + long double ex; + std::ios_base::iostate err = std::ios_base::goodbit; + I iter = f.get(I(v.data()), I(v.data() + v.size()), false, ios, err, ex); + assert(base(iter) == v.data() + v.size()); + assert(err == std::ios_base::eofbit); + assert(ex == -1); + std::noshowbase(ios); } { // positive, showbase std::wstring v = w_currency_symbol + L"1,234,567.89"; @@ -555,17 +569,20 @@ int main(int, char**) std::noshowbase(ios); } { // negative, showbase - std::wstring v = w_currency_symbol + L"-1,234,567.89"; - std::showbase(ios); - typedef cpp17_input_iterator I; - long double ex; - std::ios_base::iostate err = std::ios_base::goodbit; - I iter = f.get(I(v.data()), I(v.data() + v.size()), - false, ios, err, ex); - assert(base(iter) == v.data() + v.size()); - assert(err == std::ios_base::eofbit); - assert(ex == -123456789); - std::noshowbase(ios); +# ifdef _AIX + std::wstring v = L"-" + w_currency_symbol + L"1,234,567.89"; +# else + std::wstring v = w_currency_symbol + L"-1,234,567.89"; +# endif + std::showbase(ios); + typedef cpp17_input_iterator I; + long double ex; + std::ios_base::iostate err = std::ios_base::goodbit; + I iter = f.get(I(v.data()), I(v.data() + v.size()), false, ios, err, ex); + assert(base(iter) == v.data() + v.size()); + assert(err == std::ios_base::eofbit); + assert(ex == -123456789); + std::noshowbase(ios); } { // negative, showbase std::wstring v = L"CNY -1,234,567.89"; @@ -673,36 +690,34 @@ int main(int, char**) std::noshowbase(ios); } { // negative one, showbase -#ifdef TEST_HAS_GLIBC - std::wstring v = L"-" + w_currency_name + L"0.01"; -#else - std::wstring v = w_currency_name + L"-0.01"; -#endif - typedef cpp17_input_iterator I; - long double ex; - std::ios_base::iostate err = std::ios_base::goodbit; - I iter = f.get(I(v.data()), I(v.data() + v.size()), - true, ios, err, ex); - assert(base(iter) == v.data() + v.size()); - assert(err == std::ios_base::eofbit); - assert(ex == -1); +# if defined(TEST_HAS_GLIBC) || defined(_AIX) + std::wstring v = L"-" + w_currency_name + L"0.01"; +# else + std::wstring v = w_currency_name + L"-0.01"; +# endif + typedef cpp17_input_iterator I; + long double ex; + std::ios_base::iostate err = std::ios_base::goodbit; + I iter = f.get(I(v.data()), I(v.data() + v.size()), true, ios, err, ex); + assert(base(iter) == v.data() + v.size()); + assert(err == std::ios_base::eofbit); + assert(ex == -1); } { // negative one, showbase -#ifdef TEST_HAS_GLIBC - std::wstring v = L"-" + w_currency_name + L"0.01"; -#else - std::wstring v = w_currency_name + L"-0.01"; -#endif - std::showbase(ios); - typedef cpp17_input_iterator I; - long double ex; - std::ios_base::iostate err = std::ios_base::goodbit; - I iter = f.get(I(v.data()), I(v.data() + v.size()), - true, ios, err, ex); - assert(base(iter) == v.data() + v.size()); - assert(err == std::ios_base::eofbit); - assert(ex == -1); - std::noshowbase(ios); +# if defined(TEST_HAS_GLIBC) || defined(_AIX) + std::wstring v = L"-" + w_currency_name + L"0.01"; +# else + std::wstring v = w_currency_name + L"-0.01"; +# endif + std::showbase(ios); + typedef cpp17_input_iterator I; + long double ex; + std::ios_base::iostate err = std::ios_base::goodbit; + I iter = f.get(I(v.data()), I(v.data() + v.size()), true, ios, err, ex); + assert(base(iter) == v.data() + v.size()); + assert(err == std::ios_base::eofbit); + assert(ex == -1); + std::noshowbase(ios); } { // positive, showbase std::wstring v = w_currency_name + L"1,234,567.89"; @@ -729,21 +744,20 @@ int main(int, char**) std::noshowbase(ios); } { // negative, showbase -#ifdef TEST_HAS_GLIBC - std::wstring v = L"-" + w_currency_name + L"1,234,567.89"; -#else - std::wstring v = w_currency_name + L"-1,234,567.89"; -#endif - std::showbase(ios); - typedef cpp17_input_iterator I; - long double ex; - std::ios_base::iostate err = std::ios_base::goodbit; - I iter = f.get(I(v.data()), I(v.data() + v.size()), - true, ios, err, ex); - assert(base(iter) == v.data() + v.size()); - assert(err == std::ios_base::eofbit); - assert(ex == -123456789); - std::noshowbase(ios); +# if defined(TEST_HAS_GLIBC) || defined(_AIX) + std::wstring v = L"-" + w_currency_name + L"1,234,567.89"; +# else + std::wstring v = w_currency_name + L"-1,234,567.89"; +# endif + std::showbase(ios); + typedef cpp17_input_iterator I; + long double ex; + std::ios_base::iostate err = std::ios_base::goodbit; + I iter = f.get(I(v.data()), I(v.data() + v.size()), true, ios, err, ex); + assert(base(iter) == v.data() + v.size()); + assert(err == std::ios_base::eofbit); + assert(ex == -123456789); + std::noshowbase(ios); } { // negative, showbase std::wstring v = w_currency_symbol + L"-1,234,567.89"; diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp index 05b4ee474944af..47a48deb3368c0 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp @@ -11,8 +11,6 @@ // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd -// XFAIL: LIBCXX-AIX-FIXME - // REQUIRES: locale.fr_FR.UTF-8 // @@ -32,6 +30,13 @@ #include "platform_support.h" // locale name macros #include "test_macros.h" +#ifdef _AIX +// the AIX libc expects U202F as LC_MONETARY thousands_sep +# define THOUSANDS_SEP L"\u202F" +#else +# define THOUSANDS_SEP L" " +#endif + typedef std::money_put > Fn; class my_facet @@ -291,14 +296,14 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, '*', v); std::wstring ex(str, base(iter)); - assert(ex == convert_thousands_sep(L"1 234 567,89")); + assert(ex == convert_thousands_sep(L"1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89")); } { // negative long double v = -123456789; wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, '*', v); std::wstring ex(str, base(iter)); - assert(ex == convert_thousands_sep(L"-1 234 567,89")); + assert(ex == convert_thousands_sep(L"-1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89")); } { // zero, showbase long double v = 0; @@ -322,7 +327,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, '*', v); std::wstring ex(str, base(iter)); - assert(ex == convert_thousands_sep(L"1 234 567,89 \u20ac")); + assert(ex == convert_thousands_sep(L"1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89 \u20ac")); } { // negative, showbase long double v = -123456789; @@ -330,7 +335,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, '*', v); std::wstring ex(str, base(iter)); - assert(ex == convert_thousands_sep(L"-1 234 567,89 \u20ac")); + assert(ex == convert_thousands_sep(L"-1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89 \u20ac")); } { // negative, showbase, left long double v = -123456789; @@ -340,7 +345,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, ' ', v); std::wstring ex(str, base(iter)); - assert(ex == convert_thousands_sep(L"-1 234 567,89 \u20ac ")); + assert(ex == convert_thousands_sep(L"-1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89 \u20ac ")); assert(ios.width() == 0); } { // negative, showbase, internal @@ -351,7 +356,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, ' ', v); std::wstring ex(str, base(iter)); - assert(ex == convert_thousands_sep(L"-1 234 567,89 \u20ac")); + assert(ex == convert_thousands_sep(L"-1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89 \u20ac")); assert(ios.width() == 0); } { // negative, showbase, right @@ -362,7 +367,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, ' ', v); std::wstring ex(str, base(iter)); - assert(ex == convert_thousands_sep(L" -1 234 567,89 \u20ac")); + assert(ex == convert_thousands_sep(L" -1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89 \u20ac")); assert(ios.width() == 0); } @@ -388,14 +393,14 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, '*', v); std::wstring ex(str, base(iter)); - assert(ex == convert_thousands_sep(L"1 234 567,89")); + assert(ex == convert_thousands_sep(L"1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89")); } { // negative long double v = -123456789; wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, '*', v); std::wstring ex(str, base(iter)); - assert(ex == convert_thousands_sep(L"-1 234 567,89")); + assert(ex == convert_thousands_sep(L"-1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89")); } { // zero, showbase long double v = 0; @@ -419,7 +424,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, '*', v); std::wstring ex(str, base(iter)); - assert(ex == convert_thousands_sep(L"1 234 567,89 EUR")); + assert(ex == convert_thousands_sep(L"1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89 EUR")); } { // negative, showbase long double v = -123456789; @@ -427,7 +432,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, '*', v); std::wstring ex(str, base(iter)); - assert(ex == convert_thousands_sep(L"-1 234 567,89 EUR")); + assert(ex == convert_thousands_sep(L"-1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89 EUR")); } { // negative, showbase, left long double v = -123456789; @@ -437,7 +442,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, ' ', v); std::wstring ex(str, base(iter)); - assert(ex == convert_thousands_sep(L"-1 234 567,89 EUR ")); + assert(ex == convert_thousands_sep(L"-1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89 EUR ")); assert(ios.width() == 0); } { // negative, showbase, internal @@ -448,7 +453,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, ' ', v); std::wstring ex(str, base(iter)); - assert(ex == convert_thousands_sep(L"-1 234 567,89 EUR")); + assert(ex == convert_thousands_sep(L"-1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89 EUR")); assert(ios.width() == 0); } { // negative, showbase, right @@ -459,7 +464,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, ' ', v); std::wstring ex(str, base(iter)); - assert(ex == convert_thousands_sep(L" -1 234 567,89 EUR")); + assert(ex == convert_thousands_sep(L" -1" THOUSANDS_SEP "234" THOUSANDS_SEP "567,89 EUR")); assert(ios.width() == 0); } } diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp index 1aba05992a960b..4d581032d5642b 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp @@ -9,7 +9,6 @@ // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd -// XFAIL: LIBCXX-AIX-FIXME // XFAIL: LIBCXX-FREEBSD-FIXME // REQUIRES: locale.zh_CN.UTF-8 @@ -120,7 +119,11 @@ int main(int, char**) char str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, '*', v); std::string ex(str, base(iter)); +#ifdef _AIX + assert(ex == "-" + currency_symbol + "0.01"); +#else assert(ex == currency_symbol + "-0.01"); +#endif } { // positive, showbase long double v = 123456789; @@ -136,7 +139,11 @@ int main(int, char**) char str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, '*', v); std::string ex(str, base(iter)); +#ifdef _AIX + assert(ex == "-" + currency_symbol + "1,234,567.89"); +#else assert(ex == currency_symbol + "-1,234,567.89"); +#endif } { // negative, showbase, left long double v = -123456789; @@ -146,7 +153,11 @@ int main(int, char**) char str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, ' ', v); std::string ex(str, base(iter)); +#ifdef _AIX + assert(ex == "-" + currency_symbol + "1,234,567.89" + currency_symbol_padding); +#else assert(ex == currency_symbol + "-1,234,567.89" + currency_symbol_padding); +#endif assert(ios.width() == 0); } { // negative, showbase, internal @@ -157,7 +168,11 @@ int main(int, char**) char str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, ' ', v); std::string ex(str, base(iter)); +#ifdef _AIX + assert(ex == "-" + currency_symbol + currency_symbol_padding + "1,234,567.89"); +#else assert(ex == currency_symbol + "-" + currency_symbol_padding + "1,234,567.89"); +#endif assert(ios.width() == 0); } { // negative, showbase, right @@ -168,7 +183,11 @@ int main(int, char**) char str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, ' ', v); std::string ex(str, base(iter)); +#ifdef _AIX + assert(ex == currency_symbol_padding + "-" + currency_symbol + "1,234,567.89"); +#else assert(ex == currency_symbol_padding + currency_symbol + "-1,234,567.89"); +#endif assert(ios.width() == 0); } @@ -217,7 +236,7 @@ int main(int, char**) char str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, '*', v); std::string ex(str, base(iter)); -#ifdef TEST_HAS_GLIBC +#if defined(TEST_HAS_GLIBC) || defined(_AIX) assert(ex == "-" + currency_name + "0.01"); #else assert(ex == currency_name + "-0.01"); @@ -237,7 +256,7 @@ int main(int, char**) char str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, '*', v); std::string ex(str, base(iter)); -#ifdef TEST_HAS_GLIBC +#if defined(TEST_HAS_GLIBC) || defined(_AIX) assert(ex == "-" + currency_name + "1,234,567.89"); #else assert(ex == currency_name + "-1,234,567.89"); @@ -251,7 +270,7 @@ int main(int, char**) char str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, ' ', v); std::string ex(str, base(iter)); -#ifdef TEST_HAS_GLIBC +#if defined(TEST_HAS_GLIBC) || defined(_AIX) assert(ex == "-" + currency_name + "1,234,567.89" + currency_name_padding); #else assert(ex == currency_name + "-1,234,567.89" + currency_name_padding); @@ -266,7 +285,7 @@ int main(int, char**) char str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, ' ', v); std::string ex(str, base(iter)); -#ifdef TEST_HAS_GLIBC +#if defined(TEST_HAS_GLIBC) || defined(_AIX) assert(ex == "-" + currency_name + currency_name_padding + "1,234,567.89"); #else assert(ex == currency_name + "-" + currency_name_padding + "1,234,567.89"); @@ -281,7 +300,7 @@ int main(int, char**) char str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, ' ', v); std::string ex(str, base(iter)); -#ifdef TEST_HAS_GLIBC +#if defined(TEST_HAS_GLIBC) || defined(_AIX) assert(ex == currency_name_padding + "-" + currency_name + "1,234,567.89"); #else assert(ex == currency_name_padding + currency_name + "-1,234,567.89"); @@ -344,7 +363,11 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, '*', v); std::wstring ex(str, base(iter)); +# ifdef _AIX + assert(ex == L"-" + currency_symbol + L"0.01"); +# else assert(ex == currency_symbol + L"-0.01"); +# endif } { // positive, showbase long double v = 123456789; @@ -360,7 +383,11 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, '*', v); std::wstring ex(str, base(iter)); +# ifdef _AIX + assert(ex == L"-" + currency_symbol + L"1,234,567.89"); +# else assert(ex == currency_symbol + L"-1,234,567.89"); +# endif } { // negative, showbase, left long double v = -123456789; @@ -370,7 +397,11 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, ' ', v); std::wstring ex(str, base(iter)); +# ifdef _AIX + assert(ex == L"-" + currency_symbol + L"1,234,567.89 "); +# else assert(ex == currency_symbol + L"-1,234,567.89 "); +# endif assert(ios.width() == 0); } { // negative, showbase, internal @@ -381,7 +412,11 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, ' ', v); std::wstring ex(str, base(iter)); +# ifdef _AIX + assert(ex == L"-" + currency_symbol + L" 1,234,567.89"); +# else assert(ex == currency_symbol + L"- 1,234,567.89"); +# endif assert(ios.width() == 0); } { // negative, showbase, right @@ -392,7 +427,11 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, ' ', v); std::wstring ex(str, base(iter)); +# ifdef _AIX + assert(ex == L" -" + currency_symbol + L"1,234,567.89"); +# else assert(ex == L" " + currency_symbol + L"-1,234,567.89"); +# endif assert(ios.width() == 0); } @@ -441,7 +480,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, '*', v); std::wstring ex(str, base(iter)); -#ifdef TEST_HAS_GLIBC +# if defined(TEST_HAS_GLIBC) || defined(_AIX) assert(ex == L"-" + currency_name + L"0.01"); #else assert(ex == currency_name + L"-0.01"); @@ -461,7 +500,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, '*', v); std::wstring ex(str, base(iter)); -#ifdef TEST_HAS_GLIBC +# if defined(TEST_HAS_GLIBC) || defined(_AIX) assert(ex == L"-" + currency_name + L"1,234,567.89"); #else assert(ex == currency_name + L"-1,234,567.89"); @@ -475,7 +514,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, ' ', v); std::wstring ex(str, base(iter)); -#ifdef TEST_HAS_GLIBC +# if defined(TEST_HAS_GLIBC) || defined(_AIX) assert(ex == L"-" + currency_name + L"1,234,567.89" + currency_name_padding); #else assert(ex == currency_name + L"-1,234,567.89" + currency_name_padding); @@ -490,7 +529,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, ' ', v); std::wstring ex(str, base(iter)); -#ifdef TEST_HAS_GLIBC +# if defined(TEST_HAS_GLIBC) || defined(_AIX) assert(ex == L"-" + currency_name + currency_name_padding + L"1,234,567.89"); #else assert(ex == currency_name + L"-" + currency_name_padding + L"1,234,567.89"); @@ -505,7 +544,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, ' ', v); std::wstring ex(str, base(iter)); -#ifdef TEST_HAS_GLIBC +# if defined(TEST_HAS_GLIBC) || defined(_AIX) assert(ex == currency_name_padding + L"-" + currency_name + L"1,234,567.89"); #else assert(ex == currency_name_padding + currency_name + L"-1,234,567.89"); From a5985ca51dd7e0759d65fac9cb2b6a4448ebc404 Mon Sep 17 00:00:00 2001 From: klensy Date: Fri, 14 Jun 2024 18:16:02 +0300 Subject: [PATCH 110/155] [mlir][test] Fix filecheck annotation typos [2/n] (#93476) Few more fixes previous: https://github.com/llvm/llvm-project/pull/92897 pr Issues from https://github.com/llvm/llvm-project/issues/93154 unfixed. --------- Co-authored-by: klensy --- mlir/test/Analysis/test-liveness.mlir | 8 ++--- .../Conversion/GPUToNVVM/gpu-to-nvvm.mlir | 2 +- .../convert-dynamic-memref-ops.mlir | 4 +-- .../Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir | 10 +++--- mlir/test/Dialect/AMX/roundtrip.mlir | 2 +- mlir/test/Dialect/Affine/loop-fusion-3.mlir | 2 +- mlir/test/Dialect/Affine/unroll.mlir | 2 +- .../ArmSME/tile-allocation-liveness.mlir | 2 +- .../Dialect/Linalg/drop-unit-extent-dims.mlir | 2 +- .../Linalg/fusion-elementwise-ops.mlir | 2 +- mlir/test/Dialect/Linalg/transform-ops.mlir | 6 ++-- mlir/test/Dialect/OpenMP/ops.mlir | 2 +- .../Dialect/SparseTensor/sparse_reshape.mlir | 12 +++---- mlir/test/Dialect/Tensor/canonicalize.mlir | 4 +-- mlir/test/Dialect/Tosa/canonicalize.mlir | 2 +- mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir | 6 ++-- .../vector-dropleadunitdim-transforms.mlir | 2 +- mlir/test/IR/parser.mlir | 30 ++++++++--------- mlir/test/Target/LLVMIR/llvmir.mlir | 28 ++++++++++------ mlir/test/Target/LLVMIR/openmp-llvm.mlir | 32 +++++++++---------- mlir/test/Transforms/canonicalize.mlir | 2 +- mlir/test/mlir-cpu-runner/copy.mlir | 2 +- mlir/test/mlir-tblgen/bytecode-reserved.td | 2 +- mlir/test/python/ir/attributes.py | 4 +-- 24 files changed, 89 insertions(+), 81 deletions(-) diff --git a/mlir/test/Analysis/test-liveness.mlir b/mlir/test/Analysis/test-liveness.mlir index 61a1e5fffa8884..47805e5d95dae3 100644 --- a/mlir/test/Analysis/test-liveness.mlir +++ b/mlir/test/Analysis/test-liveness.mlir @@ -40,7 +40,7 @@ func.func @func_simpleBranch(%arg0: i32, %arg1 : i32) -> i32 { // CHECK-SAME: arg0@0 arg1@0 val_2 // CHECK: return // CHECK-SAME: val_2 - // CHECK-NEXT EndCurrentlyLive + // CHECK-NEXT:EndCurrentlyLive %result = arith.addi %arg0, %arg1 : i32 return %result : i32 } @@ -197,9 +197,9 @@ func.func @func_ranges(%cond : i1, %arg1 : i32, %arg2 : i32, %arg3 : i32) -> i32 // CHECK-NEXT: %2 = arith.addi // CHECK-NEXT: %3 = arith.muli // CHECK-NEXT: val_7 - // CHECK-NEXT %2 = arith.addi - // CHECK-NEXT %3 = arith.muli - // CHECK-NEXT %4 = arith.muli + // CHECK-NEXT: %2 = arith.addi + // CHECK-NEXT: %3 = arith.muli + // CHECK-NEXT: %4 = arith.muli // CHECK: val_8 // CHECK-NEXT: %3 = arith.muli // CHECK-NEXT: %4 = arith.muli diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir index 8877ee083286b4..cd701095d8e649 100644 --- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir +++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir @@ -638,7 +638,7 @@ gpu.module @test_module_30 { } // CHECK-LABEL: @subgroup_reduce_xor gpu.func @subgroup_reduce_xor(%arg0 : i32) { - // CHECK nvvm.redux.sync xor {{.*}} + // CHECK: nvvm.redux.sync xor {{.*}} %result = gpu.subgroup_reduce xor %arg0 uniform {} : (i32) -> (i32) gpu.return } diff --git a/mlir/test/Conversion/MemRefToLLVM/convert-dynamic-memref-ops.mlir b/mlir/test/Conversion/MemRefToLLVM/convert-dynamic-memref-ops.mlir index 609fcb10b992c6..9d8f4266adf27f 100644 --- a/mlir/test/Conversion/MemRefToLLVM/convert-dynamic-memref-ops.mlir +++ b/mlir/test/Conversion/MemRefToLLVM/convert-dynamic-memref-ops.mlir @@ -378,14 +378,14 @@ func.func @memref_cast_ranked_to_unranked(%arg : memref<42x2x?xf32>) { // CHECK-DAG: %[[p:.*]] = llvm.alloca %[[c]] x !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)> : (i64) -> !llvm.ptr // CHECK-DAG: llvm.store %{{.*}}, %[[p]] : !llvm.struct<(ptr, ptr, i64, array<3 x i64>, array<3 x i64>)>, !llvm.ptr // CHECK-DAG: %[[r:.*]] = llvm.mlir.constant(3 : index) : i64 -// CHECK : llvm.mlir.undef : !llvm.struct<(i64, ptr)> +// CHECK: llvm.mlir.undef : !llvm.struct<(i64, ptr)> // CHECK-DAG: llvm.insertvalue %[[r]], %{{.*}}[0] : !llvm.struct<(i64, ptr)> // CHECK-DAG: llvm.insertvalue %[[p]], %{{.*}}[1] : !llvm.struct<(i64, ptr)> // CHECK32-DAG: %[[c:.*]] = llvm.mlir.constant(1 : index) : i64 // CHECK32-DAG: %[[p:.*]] = llvm.alloca %[[c]] x !llvm.struct<(ptr, ptr, i32, array<3 x i32>, array<3 x i32>)> : (i64) -> !llvm.ptr // CHECK32-DAG: llvm.store %{{.*}}, %[[p]] : !llvm.struct<(ptr, ptr, i32, array<3 x i32>, array<3 x i32>)>, !llvm.ptr // CHECK32-DAG: %[[r:.*]] = llvm.mlir.constant(3 : index) : i32 -// CHECK32 : llvm.mlir.undef : !llvm.struct<(i32, ptr)> +// CHECK32: llvm.mlir.undef : !llvm.struct<(i32, ptr)> // CHECK32-DAG: llvm.insertvalue %[[r]], %{{.*}}[0] : !llvm.struct<(i32, ptr)> // CHECK32-DAG: llvm.insertvalue %[[p]], %{{.*}}[1] : !llvm.struct<(i32, ptr)> %0 = memref.cast %arg : memref<42x2x?xf32> to memref<*xf32> diff --git a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir index 1b046d32f163af..73d23679152848 100644 --- a/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir +++ b/mlir/test/Conversion/NVGPUToNVVM/nvgpu-to-nvvm.mlir @@ -11,7 +11,7 @@ func.func @m16n8k16_fp16(%arg0: vector<4x2xf16>, %arg1: vector<2x2xf16>, %arg2: // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<2xf16>> // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<2xf16>> // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<2xf16>> - // CHECK-NOT llvm.extractvalue + // CHECK-NOT: llvm.extractvalue // CHECK: [[d:%.+]] = nvvm.mma.sync // CHECK-SAME: shape = #nvvm.shape %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 16]} : (vector<4x2xf16>, vector<2x2xf16>, vector<2x2xf16>) -> vector<2x2xf16> @@ -56,7 +56,7 @@ func.func @m16n8k8_fp16(%arg0: vector<2x2xf16>, %arg1: vector<1x2xf16>, %arg2: v // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.array<1 x vector<2xf16>> // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<2xf16>> // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<2xf16>> - // CHECK-NOT llvm.extractvalue + // CHECK-NOT: llvm.extractvalue // CHECK: [[d:%.+]] = nvvm.mma.sync // CHECK-SAME: shape = #nvvm.shape %d = nvgpu.mma.sync (%arg0, %arg1, %arg2) {mmaShape = [16, 8, 8]} : (vector<2x2xf16>, vector<1x2xf16>, vector<2x2xf16>) -> vector<2x2xf16> @@ -360,7 +360,7 @@ func.func @mma_sp_sync_f16_16832(%arg0: vector<4x2xf16>, // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<2xf16>> // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<2xf16>> - // CHECK-NOT llvm.extractvalue + // CHECK-NOT: llvm.extractvalue // CHECK: %[[sparseMetadata:.+]] = llvm.bitcast %{{.+}} : vector<2xi16> to i32 @@ -396,7 +396,7 @@ func.func @mma_sp_sync_f16_16816(%arg0: vector<2x2xf16>, // CHECK: llvm.extractvalue %{{.*}}[0] : !llvm.array<2 x vector<2xf16>> // CHECK: llvm.extractvalue %{{.*}}[1] : !llvm.array<2 x vector<2xf16>> - // CHECK-NOT llvm.extractvalue + // CHECK-NOT: llvm.extractvalue // CHECK: %[[sparseMetadata:.+]] = llvm.bitcast %{{.+}} : vector<2xi16> to i32 @@ -455,7 +455,7 @@ func.func @mma_sp_sync_i8_16864(%arg0: vector<4x4xi8>, // CHECK: llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<2 x vector<2xi32>> // CHECK: llvm.extractvalue %{{.*}}[{{.*}}] : !llvm.array<2 x vector<2xi32>> - // CHECK-NOT llvm.extractvalue + // CHECK-NOT: llvm.extractvalue // CHECK: %[[sparseMetadata:.+]] = llvm.bitcast %{{.+}} : vector<2xi16> to i32 diff --git a/mlir/test/Dialect/AMX/roundtrip.mlir b/mlir/test/Dialect/AMX/roundtrip.mlir index 8018962f1092d8..f2ac5e47f6c357 100644 --- a/mlir/test/Dialect/AMX/roundtrip.mlir +++ b/mlir/test/Dialect/AMX/roundtrip.mlir @@ -2,7 +2,7 @@ // CHECK-LABEL: tzero // CHECK: amx.tile_zero : vector<16x16xbf16> -// CHECK amx.tile_store %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} : memref, vector<16x16xbf16> +// CHECK: amx.tile_store %{{.*}}[%{{.*}}, %{{.*}}], %{{.*}} : memref, vector<16x16xbf16> func.func @tzero(%arg0: memref) { %0 = arith.constant 0 : index %1 = amx.tile_zero : vector<16x16xbf16> diff --git a/mlir/test/Dialect/Affine/loop-fusion-3.mlir b/mlir/test/Dialect/Affine/loop-fusion-3.mlir index 37ad178235dc9c..6bc4feadb8c98f 100644 --- a/mlir/test/Dialect/Affine/loop-fusion-3.mlir +++ b/mlir/test/Dialect/Affine/loop-fusion-3.mlir @@ -889,7 +889,7 @@ func.func @reduce_add_non_innermost(%arg0: memref<64x64xf32, 1>, %arg1: memref<1 // CHECK: affine.for // CHECK-NEXT: affine.for // CHECK-NEXT: affine.for -// CHECK affine.for +// CHECK: affine.for diff --git a/mlir/test/Dialect/Affine/unroll.mlir b/mlir/test/Dialect/Affine/unroll.mlir index eb90d10362f9b4..e398c3fe2011dd 100644 --- a/mlir/test/Dialect/Affine/unroll.mlir +++ b/mlir/test/Dialect/Affine/unroll.mlir @@ -616,7 +616,7 @@ func.func @loop_nest_non_trivial_multiple_upper_bound_alt(%M : index, %N : index // UNROLL-BY-4-NEXT: "foo" // UNROLL-BY-4-NEXT: "foo" // UNROLL-BY-4-NEXT: "foo" - // UNROLL-BY-4-NOT for + // UNROLL-BY-4-NOT: for // UNROLL-BY-4: return return } diff --git a/mlir/test/Dialect/ArmSME/tile-allocation-liveness.mlir b/mlir/test/Dialect/ArmSME/tile-allocation-liveness.mlir index fe4c005c7c42ff..9c22b29ac22e77 100644 --- a/mlir/test/Dialect/ArmSME/tile-allocation-liveness.mlir +++ b/mlir/test/Dialect/ArmSME/tile-allocation-liveness.mlir @@ -406,7 +406,7 @@ func.func @avoidable_spill(%a: vector<[4]xf32>, %b: vector<[4]xf32>, %c: vector< // CHECK: arm_sme.get_tile {tile_id = 2 : i32} : vector<[4]x[4]xf32> // CHECK: arm_sme.get_tile {tile_id = 3 : i32} : vector<[4]x[4]xf32> // CHECK: arm_sme.move_vector_to_tile_slice {{.*}} {tile_id = 0 : i32} : vector<[4]xf32> into vector<[4]x[4]xf32> -// CHECK-NOT tile_id = 16 +// CHECK-NOT: tile_id = 16 func.func @cond_branch_with_backedge(%slice: vector<[4]xf32>) { %tileA = arm_sme.get_tile : vector<[4]x[4]xf32> %tileB = arm_sme.get_tile : vector<[4]x[4]xf32> diff --git a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir index 8f9b12880adcf7..9a00b19aae400f 100644 --- a/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir +++ b/mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir @@ -881,7 +881,7 @@ func.func @input_stays_same(%arg0 : memref>, %arg1 // CHECK: func @input_stays_same( // CHECK-SAME: %[[ARG0:.*]]: memref>, // CHECK-SAME: %[[ARG1:.*]]: f32, %[[ARG2:.*]]: memref) -// CHECK-SAME -> memref { +// CHECK-SAME: -> memref { // CHECK: %[[OUT:.*]] = memref.collapse_shape %[[ARG2]] {{\[}}[0, 1], [2, 3], [4]] // CHECK-SAME: : memref into memref // CHECK: linalg.generic diff --git a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir index e45a9fbb1052c1..28e1291bce1fad 100644 --- a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir +++ b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir @@ -532,7 +532,7 @@ func.func @scalar_generic_fusion // CHECK-SAME: ins(%[[ARG1]] : tensor) // CHECK: tensor.extract %[[ARG0]] // CHECK: linalg.yield -// CHECK return %[[T0]] +// CHECK: return %[[T0]] // ----- diff --git a/mlir/test/Dialect/Linalg/transform-ops.mlir b/mlir/test/Dialect/Linalg/transform-ops.mlir index 733f305f850c60..c152fc887a3a39 100644 --- a/mlir/test/Dialect/Linalg/transform-ops.mlir +++ b/mlir/test/Dialect/Linalg/transform-ops.mlir @@ -2,7 +2,7 @@ transform.sequence failures(propagate) { ^bb1(%arg0: !transform.any_op): - // CHECK %{{.*}}, %{{.*}}:2 = transform.structured.tile + // CHECK: %{{.*}}, %{{.*}}:2 = transform.structured.tile %0, %1:2 = transform.structured.tile_using_for %arg0 tile_sizes [2, 0, 3] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) } @@ -10,9 +10,9 @@ transform.sequence failures(propagate) { // and parsing with and without use of the optional `interchange` Attribute. transform.sequence failures(propagate) { ^bb1(%arg0: !transform.any_op): - // CHECK %{{.*}}, %{{.*}}:2 = transform.structured.tile %arg0 [2, 0, 3] interchange = [2, 1] {test_attr1 = 1 : i64, test_attr2} + // CHECK: %{{.*}}, %{{.*}}:2 = transform.structured.tile_using_for %arg0 tile_sizes [2, 0, 3] interchange = [2, 1] {test_attr1 = 1 : i64, test_attr2} %0, %1:2 = transform.structured.tile_using_for %arg0 tile_sizes [2, 0, 3] interchange = [2, 1] {test_attr1 = 1 : i64, test_attr2}: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) - // CHECK %{{.*}}, %{{.*}}:2 = transform.structured.tile %arg0 [4, 5, 3] {test_attr3 = 1 : i64, test_attr4} + // CHECK: %{{.*}}, %{{.*}}:2 = transform.structured.tile_using_for %tiled_linalg_op tile_sizes [0, 5, 3] {test_attr3 = 1 : i64, test_attr4} %2, %3:2 = transform.structured.tile_using_for %0 tile_sizes [0, 5, 3] {test_attr3 = 1 : i64, test_attr4}: (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op) } diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index caf25a3cb59f0c..af66d0c65dab87 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -142,7 +142,7 @@ func.func @omp_parallel_pretty(%data_var : memref, %if_cond : i1, %num_thre omp.terminator } - // CHECK omp.parallel if(%{{.*}}) num_threads(%{{.*}} : i32) private(%{{.*}} : memref) proc_bind(close) + // CHECK: omp.parallel if(%{{.*}}) num_threads(%{{.*}} : i32) proc_bind(close) omp.parallel num_threads(%num_threads : i32) if(%if_cond: i1) proc_bind(close) { omp.terminator } diff --git a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir index c96f9c31443db3..492dcd05dc909f 100644 --- a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir +++ b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir @@ -76,9 +76,9 @@ func.func @sparse_expand(%arg0: tensor<100xf64, #SparseVector>) -> tensor<10x10x // CHECK: %[[T:.*]] = arith.muli %[[SI0]], %[[C10]] : index // CHECK: %[[DI:.*]] = arith.addi %[[T]], %[[SI1]] : index // CHECK: %[[R1:.*]] = tensor.insert %[[SV]] into %[[A1]]{{\[}}%[[DI]]] -// CHECK scf.yield %[[R1]] -// CHECK } -// CHECK scf.yield %[[RET_1]] +// CHECK: scf.yield %[[R1]] +// CHECK: } +// CHECK: scf.yield %[[RET_1]] // CHECK: } // CHECK: %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts // CHECK-NOT: sparse_tensor.convert @@ -170,9 +170,9 @@ func.func @dynamic_sparse_expand(%arg0: tensor, %sz0: inde // CHECK: %[[T4:.*]] = arith.muli %[[SI1]], %[[T3]] : index // CHECK: %[[DI:.*]] = arith.addi %[[T2]], %[[T4]] : index // CHECK: %[[NT:.*]] = tensor.insert %[[SV]] into %[[R1]]{{\[}}%[[DI]]] -// CHECK scf.yield %[[NT]] -// CHECK } -// CHECK scf.yield %[[RET_1]] +// CHECK: scf.yield %[[NT]] +// CHECK: } +// CHECK: scf.yield %[[RET_1]] // CHECK: } // CHECK: %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts // CHECK-NOT: sparse_tensor.convert diff --git a/mlir/test/Dialect/Tensor/canonicalize.mlir b/mlir/test/Dialect/Tensor/canonicalize.mlir index df2bea08577e22..baa205b9f42c64 100644 --- a/mlir/test/Dialect/Tensor/canonicalize.mlir +++ b/mlir/test/Dialect/Tensor/canonicalize.mlir @@ -1120,8 +1120,8 @@ func.func @compose_expand_of_expand_of_zero_dim(%arg0 : tensor) // CHECK-LABEL: func.func @collapse_of_cast( // CHECK-SAME: %[[IN:.*]]: tensor<8x12x32xf32>) -> tensor { // CHECK-NEXT: %[[COLLAPSE:.*]] = tensor.collapse_shape %[[IN]] {{\[}}[0, 1], [2]] : tensor<8x12x32xf32> into tensor<96x32xf32> -// CHECK-NEXT %[[CAST:.*]] = tensor.cast %[[COLLAPSE]] : tensor<96x32xf32> to tensor -// CHECK-NEXT return %[[CAST]] : tensor +// CHECK-NEXT: %[[CAST:.*]] = tensor.cast %[[COLLAPSE]] : tensor<96x32xf32> to tensor +// CHECK-NEXT: return %[[CAST]] : tensor func.func @collapse_of_cast(%t: tensor<8x12x32xf32>) -> tensor { %0 = tensor.cast %t : tensor<8x12x32xf32> to tensor %1 = tensor.collapse_shape %0 [[0, 1], [2]] : tensor into tensor diff --git a/mlir/test/Dialect/Tosa/canonicalize.mlir b/mlir/test/Dialect/Tosa/canonicalize.mlir index 077d79ad320c8d..accc792c8f2aca 100644 --- a/mlir/test/Dialect/Tosa/canonicalize.mlir +++ b/mlir/test/Dialect/Tosa/canonicalize.mlir @@ -684,7 +684,7 @@ func.func @canonicalize_concat_slice_on_non_concat_axis(%arg0 : tensor<1x12x12xf // ----- -// CHECK-LABEL +// CHECK-LABEL: @fold_log_exp func.func @fold_log_exp(%arg0: tensor) -> tensor { // CHECK: return %arg{{.*}} : tensor %0 = tosa.exp %arg0 : (tensor) -> tensor diff --git a/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir b/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir index 2be120439ed68e..5f8afa57bc7478 100644 --- a/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir +++ b/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir @@ -222,13 +222,13 @@ func.func @test_binary_i32(%arg0 : tensor<4xi32>, %arg1 : tensor) -> () { // CHECK-LABEL: @test_binary_i1 func.func @test_binary_i1(%arg0 : tensor<4xi1>, %arg1 : tensor) -> () { - // CHECK tosa.logical_and %arg0, %arg1 : (tensor<4xi1>, tensor) -> tensor<4xi1> + // CHECK: tosa.logical_and %arg0, %arg1 : (tensor<4xi1>, tensor) -> tensor<4xi1> %0 = tosa.logical_and %arg0, %arg1 : (tensor<4xi1>, tensor) -> tensor<*xi1> - // CHECK tosa.logical_or %arg0, %arg1 : (tensor<4xi1>, tensor) -> tensor<4xi1> + // CHECK: tosa.logical_or %arg0, %arg1 : (tensor<4xi1>, tensor) -> tensor<4xi1> %1 = tosa.logical_or %arg0, %arg1 : (tensor<4xi1>, tensor) -> tensor<*xi1> - // CHECK tosa.logical_xor %arg0, %arg1 : (tensor<4xi1>, tensor) -> tensor<*4i1> + // CHECK: tosa.logical_xor %arg0, %arg1 : (tensor<4xi1>, tensor) -> tensor<4xi1> %2 = tosa.logical_xor %arg0, %arg1 : (tensor<4xi1>, tensor) -> tensor<*xi1> return diff --git a/mlir/test/Dialect/Vector/vector-dropleadunitdim-transforms.mlir b/mlir/test/Dialect/Vector/vector-dropleadunitdim-transforms.mlir index 252aeb0c15cbeb..9526d610e490e7 100644 --- a/mlir/test/Dialect/Vector/vector-dropleadunitdim-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-dropleadunitdim-transforms.mlir @@ -241,7 +241,7 @@ func.func @cast_away_contraction_leading_one_dims_nonleadingunitdim_rank4_acctra // ----- // CHECK-LABEL: func.func @cast_away_contraction_does_not_transpose_leading_unit_dims -// CHECK-NOT vector.transpose +// CHECK-NOT: vector.transpose // CHECK: vector.contract func.func @cast_away_contraction_does_not_transpose_leading_unit_dims(%lhs: vector<1x1x8xi32>, %rhs: vector<1x8x8xi32>, diff --git a/mlir/test/IR/parser.mlir b/mlir/test/IR/parser.mlir index bcc146ea0101b7..c8c4025945f7ac 100644 --- a/mlir/test/IR/parser.mlir +++ b/mlir/test/IR/parser.mlir @@ -342,15 +342,15 @@ func.func @loop_bounds(%N : index) { // CHECK-LABEL: func @ifinst(%{{.*}}: index) { func.func @ifinst(%N: index) { - %c = arith.constant 200 : index // CHECK %{{.*}} = arith.constant 200 - affine.for %i = 1 to 10 { // CHECK affine.for %{{.*}} = 1 to 10 { - affine.if #set0(%i)[%N, %c] { // CHECK affine.if #set0(%{{.*}})[%{{.*}}, %{{.*}}] { + %c = arith.constant 200 : index // CHECK: %{{.*}} = arith.constant 200 + affine.for %i = 1 to 10 { // CHECK: affine.for %{{.*}} = 1 to 10 { + affine.if #set0(%i)[%N, %c] { // CHECK: affine.if #set(%{{.*}})[%{{.*}}, %{{.*}}] { %x = arith.constant 1 : i32 // CHECK: %{{.*}} = arith.constant 1 : i32 %y = "add"(%x, %i) : (i32, index) -> i32 // CHECK: %{{.*}} = "add"(%{{.*}}, %{{.*}}) : (i32, index) -> i32 %z = "mul"(%y, %y) : (i32, i32) -> i32 // CHECK: %{{.*}} = "mul"(%{{.*}}, %{{.*}}) : (i32, i32) -> i32 } else { // CHECK } else { - affine.if affine_set<(i)[N] : (i - 2 >= 0, 4 - i >= 0)>(%i)[%N] { // CHECK affine.if (#set1(%{{.*}})[%{{.*}}]) { + affine.if affine_set<(i)[N] : (i - 2 >= 0, 4 - i >= 0)>(%i)[%N] { // CHECK: affine.if #set1(%{{.*}})[%{{.*}}] { // CHECK: %{{.*}} = arith.constant 1 : index %u = arith.constant 1 : index // CHECK: %{{.*}} = affine.apply #map{{.*}}(%{{.*}}, %{{.*}})[%{{.*}}] @@ -358,24 +358,24 @@ func.func @ifinst(%N: index) { } else { // CHECK } else { %v = arith.constant 3 : i32 // %c3_i32 = arith.constant 3 : i32 } - } // CHECK } - } // CHECK } - return // CHECK return -} // CHECK } + } // CHECK: } + } // CHECK: } + return // CHECK: return +} // CHECK:} // CHECK-LABEL: func @simple_ifinst(%{{.*}}: index) { func.func @simple_ifinst(%N: index) { - %c = arith.constant 200 : index // CHECK %{{.*}} = arith.constant 200 - affine.for %i = 1 to 10 { // CHECK affine.for %{{.*}} = 1 to 10 { - affine.if #set0(%i)[%N, %c] { // CHECK affine.if #set0(%{{.*}})[%{{.*}}, %{{.*}}] { + %c = arith.constant 200 : index // CHECK: %{{.*}} = arith.constant 200 + affine.for %i = 1 to 10 { // CHECK: affine.for %{{.*}} = 1 to 10 { + affine.if #set0(%i)[%N, %c] { // CHECK: affine.if #set(%{{.*}})[%{{.*}}, %{{.*}}] { %x = arith.constant 1 : i32 // CHECK: %{{.*}} = arith.constant 1 : i32 %y = "add"(%x, %i) : (i32, index) -> i32 // CHECK: %{{.*}} = "add"(%{{.*}}, %{{.*}}) : (i32, index) -> i32 %z = "mul"(%y, %y) : (i32, i32) -> i32 // CHECK: %{{.*}} = "mul"(%{{.*}}, %{{.*}}) : (i32, i32) -> i32 - } // CHECK } - } // CHECK } - return // CHECK return -} // CHECK } + } // CHECK: } + } // CHECK: } + return // CHECK: return +} // CHECK:} // CHECK-LABEL: func @attributes() { func.func @attributes() { diff --git a/mlir/test/Target/LLVMIR/llvmir.mlir b/mlir/test/Target/LLVMIR/llvmir.mlir index 7efc3ab7faba24..7116f3b64d7f50 100644 --- a/mlir/test/Target/LLVMIR/llvmir.mlir +++ b/mlir/test/Target/LLVMIR/llvmir.mlir @@ -2330,39 +2330,47 @@ llvm.func @streaming_compatible_func() attributes {arm_streaming_compatible} { // ----- // CHECK-LABEL: @new_za_func -// CHECK: #[[ATTR:[0-9]*]] +// CHECK-SAME: #[[ATTR:[0-9]*]] llvm.func @new_za_func() attributes {arm_new_za} { llvm.return } -// CHECK #[[ATTR]] = { "aarch64_new_za" } +// CHECK: #[[ATTR]] = { "aarch64_new_za" } + +// ----- // CHECK-LABEL: @in_za_func -// CHECK: #[[ATTR:[0-9]*]] +// CHECK-SAME: #[[ATTR:[0-9]*]] llvm.func @in_za_func() attributes {arm_in_za } { llvm.return } -// CHECK #[[ATTR]] = { "aarch64_in_za" } +// CHECK: #[[ATTR]] = { "aarch64_in_za" } + +// ----- // CHECK-LABEL: @out_za_func -// CHECK: #[[ATTR:[0-9]*]] +// CHECK-SAME: #[[ATTR:[0-9]*]] llvm.func @out_za_func() attributes {arm_out_za } { llvm.return } -// CHECK #[[ATTR]] = { "aarch64_out_za" } +// CHECK: #[[ATTR]] = { "aarch64_out_za" } + +// ----- // CHECK-LABEL: @inout_za_func -// CHECK: #[[ATTR:[0-9]*]] +// CHECK-SAME: #[[ATTR:[0-9]*]] llvm.func @inout_za_func() attributes {arm_inout_za } { llvm.return } -// CHECK #[[ATTR]] = { "aarch64_inout_za" } +// CHECK: #[[ATTR]] = { "aarch64_inout_za" } + +// ----- // CHECK-LABEL: @preserves_za_func -// CHECK: #[[ATTR:[0-9]*]] +// CHECK-SAME: #[[ATTR:[0-9]*]] llvm.func @preserves_za_func() attributes {arm_preserves_za} { llvm.return } -// CHECK #[[ATTR]] = { "aarch64_preserves_za" } +// CHECK: #[[ATTR]] = { "aarch64_preserves_za" } // ----- diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 8654899efefd29..a1cc76f9ab7708 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -549,7 +549,7 @@ llvm.func @test_omp_wsloop_auto(%lb : i64, %ub : i64, %step : i64) -> () { // CHECK: call void @__kmpc_dispatch_init_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 - // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} llvm.call @body(%iv) : (i64) -> () omp.yield } @@ -568,7 +568,7 @@ llvm.func @test_omp_wsloop_runtime(%lb : i64, %ub : i64, %step : i64) -> () { // CHECK: call void @__kmpc_dispatch_init_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 - // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} llvm.call @body(%iv) : (i64) -> () omp.yield } @@ -587,7 +587,7 @@ llvm.func @test_omp_wsloop_guided(%lb : i64, %ub : i64, %step : i64) -> () { // CHECK: call void @__kmpc_dispatch_init_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 - // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} llvm.call @body(%iv) : (i64) -> () omp.yield } @@ -606,7 +606,7 @@ llvm.func @test_omp_wsloop_dynamic_nonmonotonic(%lb : i64, %ub : i64, %step : i6 // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741859 // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 - // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} llvm.call @body(%iv) : (i64) -> () omp.yield } @@ -625,7 +625,7 @@ llvm.func @test_omp_wsloop_dynamic_monotonic(%lb : i64, %ub : i64, %step : i64) // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 536870947 // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 - // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} llvm.call @body(%iv) : (i64) -> () omp.yield } @@ -644,7 +644,7 @@ llvm.func @test_omp_wsloop_runtime_simd(%lb : i64, %ub : i64, %step : i64) -> () // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741871 // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 - // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} llvm.call @body(%iv) : (i64) -> () omp.yield } @@ -663,7 +663,7 @@ llvm.func @test_omp_wsloop_guided_simd(%lb : i64, %ub : i64, %step : i64) -> () // CHECK: call void @__kmpc_dispatch_init_8u(ptr @{{.*}}, i32 %{{.*}}, i32 1073741870 // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 - // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} llvm.call @body(%iv) : (i64) -> () omp.yield } @@ -835,7 +835,7 @@ llvm.func @test_omp_wsloop_ordered(%lb : i64, %ub : i64, %step : i64) -> () { // CHECK: call void @__kmpc_dispatch_fini_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 - // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} llvm.call @body(%iv) : (i64) -> () omp.yield } @@ -855,7 +855,7 @@ llvm.func @test_omp_wsloop_static_ordered(%lb : i64, %ub : i64, %step : i64) -> // CHECK: call void @__kmpc_dispatch_fini_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 - // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} llvm.call @body(%iv) : (i64) -> () omp.yield } @@ -876,7 +876,7 @@ llvm.func @test_omp_wsloop_static_chunk_ordered(%lb : i32, %ub : i32, %step : i3 // CHECK: call void @__kmpc_dispatch_fini_4u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_4u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 - // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} llvm.call @body(%iv) : (i32) -> () omp.yield } @@ -896,7 +896,7 @@ llvm.func @test_omp_wsloop_dynamic_ordered(%lb : i64, %ub : i64, %step : i64) -> // CHECK: call void @__kmpc_dispatch_fini_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 - // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} llvm.call @body(%iv) : (i64) -> () omp.yield } @@ -916,7 +916,7 @@ llvm.func @test_omp_wsloop_auto_ordered(%lb : i64, %ub : i64, %step : i64) -> () // CHECK: call void @__kmpc_dispatch_fini_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 - // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} llvm.call @body(%iv) : (i64) -> () omp.yield } @@ -936,7 +936,7 @@ llvm.func @test_omp_wsloop_runtime_ordered(%lb : i64, %ub : i64, %step : i64) -> // CHECK: call void @__kmpc_dispatch_fini_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 - // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} llvm.call @body(%iv) : (i64) -> () omp.yield } @@ -956,7 +956,7 @@ llvm.func @test_omp_wsloop_guided_ordered(%lb : i64, %ub : i64, %step : i64) -> // CHECK: call void @__kmpc_dispatch_fini_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 - // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} llvm.call @body(%iv) : (i64) -> () omp.yield } @@ -976,7 +976,7 @@ llvm.func @test_omp_wsloop_dynamic_nonmonotonic_ordered(%lb : i64, %ub : i64, %s // CHECK: call void @__kmpc_dispatch_fini_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 - // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} llvm.call @body(%iv) : (i64) -> () omp.yield } @@ -996,7 +996,7 @@ llvm.func @test_omp_wsloop_dynamic_monotonic_ordered(%lb : i64, %ub : i64, %step // CHECK: call void @__kmpc_dispatch_fini_8u // CHECK: %[[continue:.*]] = call i32 @__kmpc_dispatch_next_8u // CHECK: %[[cond:.*]] = icmp ne i32 %[[continue]], 0 - // CHECK br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} + // CHECK: br i1 %[[cond]], label %omp_loop.header{{.*}}, label %omp_loop.exit{{.*}} llvm.call @body(%iv) : (i64) -> () omp.yield } diff --git a/mlir/test/Transforms/canonicalize.mlir b/mlir/test/Transforms/canonicalize.mlir index d2c2c12d323892..b34b87610a0e4d 100644 --- a/mlir/test/Transforms/canonicalize.mlir +++ b/mlir/test/Transforms/canonicalize.mlir @@ -719,7 +719,7 @@ func.func @view(%arg0 : index) -> (f32, f32, f32, f32) { %r2 = memref.load %3[%c0, %c0] : memref // Test: folding static alloc and memref.cast into a view. - // CHECK memref.view %[[ALLOC_MEM]][%[[C15]]][] : memref<2048xi8> to memref<15x7xf32> + // CHECK: memref.view %[[ALLOC_MEM]][%[[C15]]][] : memref<2048xi8> to memref<15x7xf32> %4 = memref.cast %0 : memref<2048xi8> to memref %5 = memref.view %4[%c15][%c15, %c7] : memref to memref %r3 = memref.load %5[%c0, %c0] : memref diff --git a/mlir/test/mlir-cpu-runner/copy.mlir b/mlir/test/mlir-cpu-runner/copy.mlir index 8766f51050e094..7fa35fac70e4dc 100644 --- a/mlir/test/mlir-cpu-runner/copy.mlir +++ b/mlir/test/mlir-cpu-runner/copy.mlir @@ -64,7 +64,7 @@ func.func @main() -> () { %unranked_scalar_copy = memref.cast %scalar_copy : memref to memref<*xf32> call @printMemrefF32(%unranked_scalar_copy) : (memref<*xf32>) -> () // CHECK: rank = 0 offset = 0 sizes = [] strides = [] - // CHECK-NEXT [42] + // CHECK-NEXT: [42] memref.dealloc %copy_empty : memref<3x0x1xf32> memref.dealloc %copy_empty_casted : memref<0x3x1xf32> diff --git a/mlir/test/mlir-tblgen/bytecode-reserved.td b/mlir/test/mlir-tblgen/bytecode-reserved.td index 1a7bcd306d075e..5162a6b36317c7 100644 --- a/mlir/test/mlir-tblgen/bytecode-reserved.td +++ b/mlir/test/mlir-tblgen/bytecode-reserved.td @@ -2,7 +2,7 @@ include "mlir/IR/BuiltinDialectBytecode.td" -// CHECK static ::mlir::Type readIntegerType +// CHECK: static ::mlir::Type readIntegerType def TestDialectTypes : DialectTypes<"Test"> { // CHECK: static Type readType diff --git a/mlir/test/python/ir/attributes.py b/mlir/test/python/ir/attributes.py index 0f2c8e7b7252ac..8a4e19463c3ee2 100644 --- a/mlir/test/python/ir/attributes.py +++ b/mlir/test/python/ir/attributes.py @@ -514,7 +514,7 @@ def testDictAttr(): a = DictAttr.get(dict_attr) - # CHECK attr: {integerattr = 42 : i32, stringattr = "string"} + # CHECK: attr: {integerattr = 42 : i32, stringattr = "string"} print("attr:", a) assert len(a) == 2 @@ -546,7 +546,7 @@ def testDictAttr(): else: assert False, "expected IndexError on accessing an out-of-bounds attribute" - # CHECK "empty: {}" + # CHECK: empty: {} print("empty: ", DictAttr.get()) From 2d9b6a01c7a77ee76a5c279901bca1659a550499 Mon Sep 17 00:00:00 2001 From: Tobias Stadler Date: Fri, 14 Jun 2024 17:51:09 +0200 Subject: [PATCH 111/155] [GlobalISel][AArch64] AArch64O0PreLegalizerCombiner: Disable fixed-point iteration (#94291) This adds an option to CombinerInfo to limit the number of iterations in the Combiner. This option is then used to disable fixed-point iteration for the AArch64O0PreLegalizerCombiner. The combines there are simple enough that code quality impact should be minimal with the current heuristics (instructions are processed from top to bottom of the basic block, new/changed instructions are added back to the worklist). Test changes are due to some instructions not being DCE'd, which has no actual impact because InstructionSelect performs DCE as well. AArch64 CTMark O0: -0.9% geomean compile-time (instruction count) no change in size..text for any of the benchmarks --- .../llvm/CodeGen/GlobalISel/CombinerInfo.h | 4 ++ llvm/lib/CodeGen/GlobalISel/Combiner.cpp | 35 +++++++++++- .../GISel/AArch64O0PreLegalizerCombiner.cpp | 4 ++ .../AArch64/GlobalISel/localizer-arm64-tti.ll | 54 ++++++++++--------- 4 files changed, 71 insertions(+), 26 deletions(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h index 13a8faf955a7e4..2b0eb71f880827 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerInfo.h @@ -49,6 +49,10 @@ struct CombinerInfo { bool EnableOptSize; /// Whether we're optimizing for minsize (-Oz). bool EnableMinSize; + + /// The maximum number of times the Combiner will iterate over the + /// MachineFunction. Setting this to 0 enables fixed-point iteration. + unsigned MaxIterations = 0; }; } // namespace llvm diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp index d18e65a83484f6..3310ce5455c978 100644 --- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp @@ -13,6 +13,7 @@ #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" @@ -27,6 +28,11 @@ using namespace llvm; +STATISTIC(NumOneIteration, "Number of functions with one iteration"); +STATISTIC(NumTwoIterations, "Number of functions with two iterations"); +STATISTIC(NumThreeOrMoreIterations, + "Number of functions with three or more iterations"); + namespace llvm { cl::OptionCategory GICombinerOptionCategory( "GlobalISel Combiner", @@ -135,7 +141,11 @@ bool Combiner::combineMachineInstrs() { bool MFChanged = false; bool Changed; - do { + unsigned Iteration = 0; + while (true) { + ++Iteration; + LLVM_DEBUG(dbgs() << "\n\nCombiner iteration #" << Iteration << '\n'); + WorkList.clear(); // Collect all instructions. Do a post order traversal for basic blocks and @@ -166,7 +176,28 @@ bool Combiner::combineMachineInstrs() { WLObserver->reportFullyCreatedInstrs(); } MFChanged |= Changed; - } while (Changed); + + if (!Changed) { + LLVM_DEBUG(dbgs() << "\nCombiner reached fixed-point after iteration #" + << Iteration << '\n'); + break; + } + // Iterate until a fixed-point is reached if MaxIterations == 0, + // otherwise limit the number of iterations. + if (CInfo.MaxIterations && Iteration >= CInfo.MaxIterations) { + LLVM_DEBUG( + dbgs() << "\nCombiner reached iteration limit after iteration #" + << Iteration << '\n'); + break; + } + } + + if (Iteration == 1) + ++NumOneIteration; + else if (Iteration == 2) + ++NumTwoIterations; + else + ++NumThreeOrMoreIterations; #ifndef NDEBUG if (CSEInfo) { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp index 17dd8f2314a2b3..0ba3a543d114ac 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp @@ -165,6 +165,10 @@ bool AArch64O0PreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, /*LegalizerInfo*/ nullptr, /*EnableOpt*/ false, F.hasOptSize(), F.hasMinSize()); + // Disable fixed-point iteration in the Combiner. This improves compile-time + // at the cost of possibly missing optimizations. See PR#94291 for details. + CInfo.MaxIterations = 1; + AArch64O0PreLegalizerCombinerImpl Impl(MF, CInfo, &TPC, *KB, /*CSEInfo*/ nullptr, RuleConfig, ST); return Impl.combineMachineInstrs(); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll b/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll index 5ab086ffd2c13a..c4e07de265eddf 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/localizer-arm64-tti.ll @@ -28,6 +28,7 @@ define i32 @foo() { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s32) from @var1) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]] + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.3 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} @@ -35,19 +36,19 @@ define i32 @foo() { ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[GV3:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2 - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: G_STORE [[C4]](s32), [[GV3]](p0) :: (store (s32) into @var2) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: G_STORE [[C5]](s32), [[GV3]](p0) :: (store (s32) into @var2) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 ; CHECK-NEXT: [[GV4:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1 - ; CHECK-NEXT: G_STORE [[C5]](s32), [[GV4]](p0) :: (store (s32) into @var1) + ; CHECK-NEXT: G_STORE [[C6]](s32), [[GV4]](p0) :: (store (s32) into @var1) ; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3 - ; CHECK-NEXT: G_STORE [[C4]](s32), [[GV5]](p0) :: (store (s32) into @var3) - ; CHECK-NEXT: G_STORE [[C5]](s32), [[GV4]](p0) :: (store (s32) into @var1) + ; CHECK-NEXT: G_STORE [[C5]](s32), [[GV5]](p0) :: (store (s32) into @var3) + ; CHECK-NEXT: G_STORE [[C6]](s32), [[GV4]](p0) :: (store (s32) into @var1) ; CHECK-NEXT: G_BR %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.if.end: - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: $w0 = COPY [[C6]](s32) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: $w0 = COPY [[C7]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 entry: %0 = load i32, ptr @var1, align 4 @@ -84,6 +85,7 @@ define i32 @darwin_tls() { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s32) from @var1) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C1]] + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.3 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} @@ -96,8 +98,8 @@ define i32 @darwin_tls() { ; CHECK-NEXT: G_BR %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.if.end: - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: $w0 = COPY [[C2]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: $w0 = COPY [[C3]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 entry: %0 = load i32, ptr @var1, align 4 @@ -127,6 +129,7 @@ define i32 @imm_cost_too_large_cost_of_2() { ; CHECK-NEXT: [[CONSTANT_FOLD_BARRIER:%[0-9]+]]:_(s32) = G_CONSTANT_FOLD_BARRIER [[C1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} @@ -147,8 +150,8 @@ define i32 @imm_cost_too_large_cost_of_2() { ; CHECK-NEXT: bb.4.if.end: ; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3 ; CHECK-NEXT: G_STORE [[CONSTANT_FOLD_BARRIER]](s32), [[GV5]](p0) :: (store (s32) into @var3) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: $w0 = COPY [[C3]](s32) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: $w0 = COPY [[C4]](s32) ; CHECK-NEXT: RET_ReallyLR implicit $w0 entry: %0 = load i32, ptr @var1, align 4 @@ -183,6 +186,7 @@ define i64 @imm_cost_too_large_cost_of_4() { ; CHECK-NEXT: [[CONSTANT_FOLD_BARRIER:%[0-9]+]]:_(s64) = G_CONSTANT_FOLD_BARRIER [[C1]] ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s64), [[C2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} @@ -203,8 +207,8 @@ define i64 @imm_cost_too_large_cost_of_4() { ; CHECK-NEXT: bb.4.if.end: ; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64 ; CHECK-NEXT: G_STORE [[CONSTANT_FOLD_BARRIER]](s64), [[GV5]](p0) :: (store (s64) into @var3_64) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: $x0 = COPY [[C3]](s64) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: $x0 = COPY [[C4]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 entry: %0 = load i64, ptr @var1_64, align 4 @@ -239,6 +243,7 @@ define i64 @f64_imm_cost_too_high(double %a) { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s64) from @var1_64, align 4) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s64), [[C2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} @@ -259,8 +264,8 @@ define i64 @f64_imm_cost_too_high(double %a) { ; CHECK-NEXT: bb.4.if.end: ; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64 ; CHECK-NEXT: G_STORE [[C]](s64), [[GV5]](p0) :: (store (s64) into @var3_64) - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: $x0 = COPY [[C3]](s64) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: $x0 = COPY [[C4]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 entry: %0 = load i64, ptr @var1_64, align 4 @@ -294,6 +299,7 @@ define i64 @f64_imm_cheap(double %a) { ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[GV2]](p0) :: (dereferenceable load (s64) from @var1_64, align 4) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s64), [[C2]] + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; CHECK-NEXT: G_BRCOND [[ICMP]](s1), %bb.4 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} @@ -301,24 +307,24 @@ define i64 @f64_imm_cheap(double %a) { ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[GV3:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var2_64 - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; CHECK-NEXT: G_STORE [[C3]](s64), [[GV3]](p0) :: (store (s64) into @var2_64) + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; CHECK-NEXT: G_STORE [[C4]](s64), [[GV3]](p0) :: (store (s64) into @var2_64) ; CHECK-NEXT: G_BR %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.if.then2: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 ; CHECK-NEXT: [[GV4:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var1_64 - ; CHECK-NEXT: G_STORE [[C4]](s64), [[GV4]](p0) :: (store (s64) into @var1_64) + ; CHECK-NEXT: G_STORE [[C5]](s64), [[GV4]](p0) :: (store (s64) into @var1_64) ; CHECK-NEXT: G_BR %bb.4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4.if.end: ; CHECK-NEXT: [[GV5:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @var3_64 - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 - ; CHECK-NEXT: G_STORE [[C5]](s64), [[GV5]](p0) :: (store (s64) into @var3_64) - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: $x0 = COPY [[C6]](s64) + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_FCONSTANT double 0.000000e+00 + ; CHECK-NEXT: G_STORE [[C6]](s64), [[GV5]](p0) :: (store (s64) into @var3_64) + ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: $x0 = COPY [[C7]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 entry: %0 = load i64, ptr @var1_64, align 4 From 180a536665127bded6c7ef1755e9dd0edfa8802f Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 14 Jun 2024 09:18:31 -0700 Subject: [PATCH 112/155] [ProfileData] Fix the order of tests (#95549) Without this patch, we call getValueForSite before veryfing that we have an expected number of value sites with getNumValueSites. This patch fixes the order by "sinking" the call to getValueForSite. While I am at it, this patch migrates the use of getValueForSite to getValueArrayForSite. --- llvm/unittests/ProfileData/InstrProfTest.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp index dae55422909342..54689e7669633c 100644 --- a/llvm/unittests/ProfileData/InstrProfTest.cpp +++ b/llvm/unittests/ProfileData/InstrProfTest.cpp @@ -1299,9 +1299,9 @@ TEST_P(ValueProfileMergeEdgeCaseTest, value_profile_data_merge_site_trunc) { Expected R = Reader->getInstrProfRecord("caller", 0x1234); ASSERT_THAT_ERROR(R.takeError(), Succeeded()); - std::unique_ptr VD(R->getValueForSite(ValueKind, 0)); ASSERT_EQ(2U, R->getNumValueSites(ValueKind)); - EXPECT_EQ(255U, R->getNumValueDataForSite(ValueKind, 0)); + auto VD = R->getValueArrayForSite(ValueKind, 0); + EXPECT_THAT(VD, SizeIs(255)); for (unsigned I = 0; I < 255; I++) { EXPECT_EQ(VD[I].Value, 509 - I); EXPECT_EQ(VD[I].Count, 1509 - I); From bbe9119d9cb37662faafe7fe273e792b1b70145e Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 14 Jun 2024 09:21:18 -0700 Subject: [PATCH 113/155] [ProfileData] Sink the length checks (#95559) The new API getValueArrayForSite returns ArrayRef, packaging the array length and contents together. This patch sinks the array length checks just before we check the contents. This way, we check both the array length and contents immediately after calling getValueArrayForSite. --- llvm/unittests/ProfileData/InstrProfTest.cpp | 25 +++++++++++--------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp index 54689e7669633c..749d35e02c2866 100644 --- a/llvm/unittests/ProfileData/InstrProfTest.cpp +++ b/llvm/unittests/ProfileData/InstrProfTest.cpp @@ -1381,12 +1381,6 @@ TEST(ValueProfileReadWriteTest, value_prof_data_read_write) { // Now read data from Record and sanity check the data ASSERT_EQ(6U, Record.getNumValueSites(IPVK_IndirectCallTarget)); - ASSERT_EQ(5U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 0)); - ASSERT_EQ(4U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 1)); - ASSERT_EQ(3U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 2)); - ASSERT_EQ(2U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 3)); - ASSERT_EQ(0U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 4)); - ASSERT_EQ(2U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 5)); auto Cmp = [](const InstrProfValueData &VD1, const InstrProfValueData &VD2) { return VD1.Count > VD2.Count; @@ -1394,6 +1388,7 @@ TEST(ValueProfileReadWriteTest, value_prof_data_read_write) { SmallVector VD_0( Record.getValueArrayForSite(IPVK_IndirectCallTarget, 0)); + ASSERT_THAT(VD_0, SizeIs(5)); llvm::sort(VD_0, Cmp); EXPECT_STREQ((const char *)VD_0[0].Value, "callee2"); EXPECT_EQ(1000U, VD_0[0].Count); @@ -1408,6 +1403,7 @@ TEST(ValueProfileReadWriteTest, value_prof_data_read_write) { SmallVector VD_1( Record.getValueArrayForSite(IPVK_IndirectCallTarget, 1)); + ASSERT_THAT(VD_1, SizeIs(4)); llvm::sort(VD_1, Cmp); EXPECT_STREQ((const char *)VD_1[0].Value, "callee2"); EXPECT_EQ(VD_1[0].Count, 2500U); @@ -1420,6 +1416,7 @@ TEST(ValueProfileReadWriteTest, value_prof_data_read_write) { SmallVector VD_2( Record.getValueArrayForSite(IPVK_IndirectCallTarget, 2)); + ASSERT_THAT(VD_2, SizeIs(3)); llvm::sort(VD_2, Cmp); EXPECT_STREQ((const char *)VD_2[0].Value, "callee4"); EXPECT_EQ(VD_2[0].Count, 5500U); @@ -1430,20 +1427,23 @@ TEST(ValueProfileReadWriteTest, value_prof_data_read_write) { SmallVector VD_3( Record.getValueArrayForSite(IPVK_IndirectCallTarget, 3)); + ASSERT_THAT(VD_3, SizeIs(2)); llvm::sort(VD_3, Cmp); EXPECT_STREQ((const char *)VD_3[0].Value, "callee3"); EXPECT_EQ(VD_3[0].Count, 2000U); EXPECT_STREQ((const char *)VD_3[1].Value, "callee2"); EXPECT_EQ(VD_3[1].Count, 1800U); + ASSERT_THAT(Record.getValueArrayForSite(IPVK_IndirectCallTarget, 4), + SizeIs(0)); + ASSERT_THAT(Record.getValueArrayForSite(IPVK_IndirectCallTarget, 5), + SizeIs(2)); + ASSERT_EQ(Record.getNumValueSites(IPVK_VTableTarget), 4U); - ASSERT_EQ(Record.getNumValueDataForSite(IPVK_VTableTarget, 0), 5U); - ASSERT_EQ(Record.getNumValueDataForSite(IPVK_VTableTarget, 1), 4U); - ASSERT_EQ(Record.getNumValueDataForSite(IPVK_VTableTarget, 2), 3U); - ASSERT_EQ(Record.getNumValueDataForSite(IPVK_VTableTarget, 3), 2U); SmallVector VD0( Record.getValueArrayForSite(IPVK_VTableTarget, 0)); + ASSERT_THAT(VD0, SizeIs(5)); llvm::sort(VD0, Cmp); EXPECT_EQ(VD0[0].Value, getCalleeAddress(vtable2)); EXPECT_EQ(VD0[0].Count, 1000U); @@ -1458,6 +1458,7 @@ TEST(ValueProfileReadWriteTest, value_prof_data_read_write) { SmallVector VD1( Record.getValueArrayForSite(IPVK_VTableTarget, 1)); + ASSERT_THAT(VD1, SizeIs(4)); llvm::sort(VD1, Cmp); EXPECT_EQ(VD1[0].Value, getCalleeAddress(vtable2)); EXPECT_EQ(VD1[0].Count, 2500U); @@ -1470,6 +1471,7 @@ TEST(ValueProfileReadWriteTest, value_prof_data_read_write) { SmallVector VD2( Record.getValueArrayForSite(IPVK_VTableTarget, 2)); + ASSERT_THAT(VD2, SizeIs(3)); llvm::sort(VD2, Cmp); EXPECT_EQ(VD2[0].Value, getCalleeAddress(vtable4)); EXPECT_EQ(VD2[0].Count, 5500U); @@ -1480,6 +1482,7 @@ TEST(ValueProfileReadWriteTest, value_prof_data_read_write) { SmallVector VD3( Record.getValueArrayForSite(IPVK_VTableTarget, 3)); + ASSERT_THAT(VD3, SizeIs(2)); llvm::sort(VD3, Cmp); EXPECT_EQ(VD3[0].Value, getCalleeAddress(vtable3)); EXPECT_EQ(VD3[0].Count, 2000U); @@ -1525,7 +1528,6 @@ TEST(ValueProfileReadWriteTest, symtab_mapping) { // Now read data from Record and sanity check the data ASSERT_EQ(Record.getNumValueSites(IPVK_IndirectCallTarget), 6U); - ASSERT_EQ(Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 0), 5U); // Look up the value correpsonding to the middle of a vtable in symtab and // test that it's the hash of the name. @@ -1543,6 +1545,7 @@ TEST(ValueProfileReadWriteTest, symtab_mapping) { }; SmallVector VD_0( Record.getValueArrayForSite(IPVK_IndirectCallTarget, 0)); + ASSERT_THAT(VD_0, SizeIs(5)); llvm::sort(VD_0, Cmp); ASSERT_EQ(VD_0[0].Value, 0x2000ULL); ASSERT_EQ(VD_0[0].Count, 1000U); From f3aceeee8a8c5fef107657dc6c4d558f3de99773 Mon Sep 17 00:00:00 2001 From: OverMighty Date: Fri, 14 Jun 2024 18:31:32 +0200 Subject: [PATCH 114/155] [libc][math][c23] Add f16fmaf C23 math function (#95483) Part of #93566. --- libc/config/linux/aarch64/entrypoints.txt | 1 + libc/config/linux/x86_64/entrypoints.txt | 1 + libc/docs/math/index.rst | 2 + libc/spec/stdc.td | 3 +- libc/src/__support/FPUtil/FMA.h | 32 +-- .../__support/FPUtil/generic/CMakeLists.txt | 3 + libc/src/__support/FPUtil/generic/FMA.h | 188 ++++++++++-------- libc/src/__support/FPUtil/multiply_add.h | 4 +- libc/src/__support/big_int.h | 37 ++-- libc/src/math/CMakeLists.txt | 2 + libc/src/math/f16fmaf.h | 20 ++ libc/src/math/generic/CMakeLists.txt | 13 ++ libc/src/math/generic/expm1f.cpp | 2 +- libc/src/math/generic/f16fmaf.cpp | 19 ++ libc/src/math/generic/fma.cpp | 2 +- libc/src/math/generic/fmaf.cpp | 2 +- libc/src/math/generic/range_reduction_fma.h | 25 +-- libc/test/src/math/CMakeLists.txt | 21 +- libc/test/src/math/FmaTest.h | 112 +++++------ libc/test/src/math/f16fmaf_test.cpp | 21 ++ libc/test/src/math/fma_test.cpp | 4 - libc/test/src/math/fmaf_test.cpp | 4 - libc/test/src/math/smoke/CMakeLists.txt | 18 +- libc/test/src/math/smoke/FmaTest.h | 114 ++++++++--- libc/test/src/math/smoke/f16fmaf_test.cpp | 13 ++ libc/test/src/math/smoke/fma_test.cpp | 6 +- libc/test/src/math/smoke/fmaf_test.cpp | 6 +- libc/utils/MPFRWrapper/MPFRUtils.cpp | 78 +++++--- libc/utils/MPFRWrapper/MPFRUtils.h | 48 +++-- 29 files changed, 501 insertions(+), 300 deletions(-) create mode 100644 libc/src/math/f16fmaf.h create mode 100644 libc/src/math/generic/f16fmaf.cpp create mode 100644 libc/test/src/math/f16fmaf_test.cpp create mode 100644 libc/test/src/math/smoke/f16fmaf_test.cpp diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index ee865fdec05c1d..f2f9803523df29 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -503,6 +503,7 @@ if(LIBC_TYPES_HAS_FLOAT16) libc.src.math.canonicalizef16 libc.src.math.ceilf16 libc.src.math.copysignf16 + libc.src.math.f16fmaf libc.src.math.f16sqrtf libc.src.math.fabsf16 libc.src.math.fdimf16 diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 2d36ca296c3a45..45914fe9f7ad2a 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -535,6 +535,7 @@ if(LIBC_TYPES_HAS_FLOAT16) libc.src.math.canonicalizef16 libc.src.math.ceilf16 libc.src.math.copysignf16 + libc.src.math.f16fmaf libc.src.math.f16sqrtf libc.src.math.fabsf16 libc.src.math.fdimf16 diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index 790786147c1645..293edd1c15100b 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -124,6 +124,8 @@ Basic Operations +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | dsub | N/A | N/A | | N/A | | 7.12.14.2 | F.10.11 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| f16fma | |check| | | | N/A | | 7.12.14.5 | F.10.11 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | fabs | |check| | |check| | |check| | |check| | |check| | 7.12.7.3 | F.10.4.3 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | fadd | N/A | | | N/A | | 7.12.14.1 | F.10.11 | diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index 7c4135032a0b28..b7375fb4112204 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -474,10 +474,11 @@ def StdC : StandardSpec<"stdc"> { FunctionSpec<"fmul", RetValSpec, [ArgSpec, ArgSpec]>, - FunctionSpec<"fma", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, FunctionSpec<"fmaf", RetValSpec, [ArgSpec, ArgSpec, ArgSpec]>, + GuardedFunctionSpec<"f16fmaf", RetValSpec, [ArgSpec, ArgSpec, ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, + FunctionSpec<"fmod", RetValSpec, [ArgSpec, ArgSpec]>, FunctionSpec<"fmodf", RetValSpec, [ArgSpec, ArgSpec]>, FunctionSpec<"fmodl", RetValSpec, [ArgSpec, ArgSpec]>, diff --git a/libc/src/__support/FPUtil/FMA.h b/libc/src/__support/FPUtil/FMA.h index c277da49538bf3..cf01a317d73595 100644 --- a/libc/src/__support/FPUtil/FMA.h +++ b/libc/src/__support/FPUtil/FMA.h @@ -10,41 +10,29 @@ #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_FMA_H #include "src/__support/CPP/type_traits.h" +#include "src/__support/FPUtil/generic/FMA.h" #include "src/__support/macros/properties/architectures.h" #include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA -#if defined(LIBC_TARGET_CPU_HAS_FMA) - namespace LIBC_NAMESPACE { namespace fputil { -template -LIBC_INLINE cpp::enable_if_t, T> fma(T x, T y, T z) { - return __builtin_fmaf(x, y, z); +template +LIBC_INLINE OutType fma(InType x, InType y, InType z) { + return generic::fma(x, y, z); } -template -LIBC_INLINE cpp::enable_if_t, T> fma(T x, T y, T z) { - return __builtin_fma(x, y, z); +#ifdef LIBC_TARGET_CPU_HAS_FMA +template <> LIBC_INLINE float fma(float x, float y, float z) { + return __builtin_fmaf(x, y, z); } -} // namespace fputil -} // namespace LIBC_NAMESPACE - -#else -// FMA instructions are not available -#include "generic/FMA.h" - -namespace LIBC_NAMESPACE { -namespace fputil { - -template LIBC_INLINE T fma(T x, T y, T z) { - return generic::fma(x, y, z); +template <> LIBC_INLINE double fma(double x, double y, double z) { + return __builtin_fma(x, y, z); } +#endif // LIBC_TARGET_CPU_HAS_FMA } // namespace fputil } // namespace LIBC_NAMESPACE -#endif - #endif // LLVM_LIBC_SRC___SUPPORT_FPUTIL_FMA_H diff --git a/libc/src/__support/FPUtil/generic/CMakeLists.txt b/libc/src/__support/FPUtil/generic/CMakeLists.txt index 595656e3e8d900..a8a95ba3f15ffa 100644 --- a/libc/src/__support/FPUtil/generic/CMakeLists.txt +++ b/libc/src/__support/FPUtil/generic/CMakeLists.txt @@ -19,12 +19,15 @@ add_header_library( HDRS FMA.h DEPENDS + libc.hdr.fenv_macros libc.src.__support.common libc.src.__support.CPP.bit + libc.src.__support.CPP.limits libc.src.__support.CPP.type_traits libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.rounding_mode + libc.src.__support.big_int libc.src.__support.macros.optimization libc.src.__support.uint128 ) diff --git a/libc/src/__support/FPUtil/generic/FMA.h b/libc/src/__support/FPUtil/generic/FMA.h index f403aa7333b394..71b150758d4191 100644 --- a/libc/src/__support/FPUtil/generic/FMA.h +++ b/libc/src/__support/FPUtil/generic/FMA.h @@ -10,19 +10,26 @@ #define LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_FMA_H #include "src/__support/CPP/bit.h" +#include "src/__support/CPP/limits.h" #include "src/__support/CPP/type_traits.h" -#include "src/__support/FPUtil/FEnvImpl.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/rounding_mode.h" +#include "src/__support/big_int.h" #include "src/__support/macros/attributes.h" // LIBC_INLINE #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY -#include "src/__support/uint128.h" + +#include "hdr/fenv_macros.h" namespace LIBC_NAMESPACE { namespace fputil { namespace generic { -template LIBC_INLINE T fma(T x, T y, T z); +template +LIBC_INLINE cpp::enable_if_t && + cpp::is_floating_point_v && + sizeof(OutType) <= sizeof(InType), + OutType> +fma(InType x, InType y, InType z); // TODO(lntue): Implement fmaf that is correctly rounded to all rounding modes. // The implementation below only is only correct for the default rounding mode, @@ -64,11 +71,10 @@ template <> LIBC_INLINE float fma(float x, float y, float z) { // Update sticky bits if t != 0.0 and the least (52 - 23 - 1 = 28) bits are // zero. if (!t.is_zero() && ((bit_sum.get_mantissa() & 0xfff'ffffULL) == 0)) { - if (bit_sum.sign() != t.sign()) { + if (bit_sum.sign() != t.sign()) bit_sum.set_mantissa(bit_sum.get_mantissa() + 1); - } else if (bit_sum.get_mantissa()) { + else if (bit_sum.get_mantissa()) bit_sum.set_mantissa(bit_sum.get_mantissa() - 1); - } } } @@ -79,12 +85,14 @@ namespace internal { // Extract the sticky bits and shift the `mantissa` to the right by // `shift_length`. -LIBC_INLINE bool shift_mantissa(int shift_length, UInt128 &mant) { - if (shift_length >= 128) { +template +LIBC_INLINE cpp::enable_if_t, bool> +shift_mantissa(int shift_length, T &mant) { + if (shift_length >= cpp::numeric_limits::digits) { mant = 0; return true; // prod_mant is non-zero. } - UInt128 mask = (UInt128(1) << shift_length) - 1; + T mask = (T(1) << shift_length) - 1; bool sticky_bits = (mant & mask) != 0; mant >>= shift_length; return sticky_bits; @@ -92,47 +100,64 @@ LIBC_INLINE bool shift_mantissa(int shift_length, UInt128 &mant) { } // namespace internal -template <> LIBC_INLINE double fma(double x, double y, double z) { - using FPBits = fputil::FPBits; - - if (LIBC_UNLIKELY(x == 0 || y == 0 || z == 0)) { - return x * y + z; - } +template +LIBC_INLINE cpp::enable_if_t && + cpp::is_floating_point_v && + sizeof(OutType) <= sizeof(InType), + OutType> +fma(InType x, InType y, InType z) { + using OutFPBits = fputil::FPBits; + using OutStorageType = typename OutFPBits::StorageType; + using InFPBits = fputil::FPBits; + using InStorageType = typename InFPBits::StorageType; + + constexpr int IN_EXPLICIT_MANT_LEN = InFPBits::FRACTION_LEN + 1; + constexpr size_t PROD_LEN = 2 * IN_EXPLICIT_MANT_LEN; + constexpr size_t TMP_RESULT_LEN = cpp::bit_ceil(PROD_LEN + 1); + using TmpResultType = UInt; + + constexpr size_t EXTRA_FRACTION_LEN = + TMP_RESULT_LEN - 1 - OutFPBits::FRACTION_LEN; + constexpr TmpResultType EXTRA_FRACTION_STICKY_MASK = + (TmpResultType(1) << (EXTRA_FRACTION_LEN - 1)) - 1; + + if (LIBC_UNLIKELY(x == 0 || y == 0 || z == 0)) + return static_cast(x * y + z); int x_exp = 0; int y_exp = 0; int z_exp = 0; // Normalize denormal inputs. - if (LIBC_UNLIKELY(FPBits(x).is_subnormal())) { - x_exp -= 52; - x *= 0x1.0p+52; + if (LIBC_UNLIKELY(InFPBits(x).is_subnormal())) { + x_exp -= InFPBits::FRACTION_LEN; + x *= InType(InStorageType(1) << InFPBits::FRACTION_LEN); } - if (LIBC_UNLIKELY(FPBits(y).is_subnormal())) { - y_exp -= 52; - y *= 0x1.0p+52; + if (LIBC_UNLIKELY(InFPBits(y).is_subnormal())) { + y_exp -= InFPBits::FRACTION_LEN; + y *= InType(InStorageType(1) << InFPBits::FRACTION_LEN); } - if (LIBC_UNLIKELY(FPBits(z).is_subnormal())) { - z_exp -= 52; - z *= 0x1.0p+52; + if (LIBC_UNLIKELY(InFPBits(z).is_subnormal())) { + z_exp -= InFPBits::FRACTION_LEN; + z *= InType(InStorageType(1) << InFPBits::FRACTION_LEN); } - FPBits x_bits(x), y_bits(y), z_bits(z); + InFPBits x_bits(x), y_bits(y), z_bits(z); const Sign z_sign = z_bits.sign(); Sign prod_sign = (x_bits.sign() == y_bits.sign()) ? Sign::POS : Sign::NEG; x_exp += x_bits.get_biased_exponent(); y_exp += y_bits.get_biased_exponent(); z_exp += z_bits.get_biased_exponent(); - if (LIBC_UNLIKELY(x_exp == FPBits::MAX_BIASED_EXPONENT || - y_exp == FPBits::MAX_BIASED_EXPONENT || - z_exp == FPBits::MAX_BIASED_EXPONENT)) - return x * y + z; + if (LIBC_UNLIKELY(x_exp == InFPBits::MAX_BIASED_EXPONENT || + y_exp == InFPBits::MAX_BIASED_EXPONENT || + z_exp == InFPBits::MAX_BIASED_EXPONENT)) + return static_cast(x * y + z); // Extract mantissa and append hidden leading bits. - UInt128 x_mant = x_bits.get_explicit_mantissa(); - UInt128 y_mant = y_bits.get_explicit_mantissa(); - UInt128 z_mant = z_bits.get_explicit_mantissa(); + InStorageType x_mant = x_bits.get_explicit_mantissa(); + InStorageType y_mant = y_bits.get_explicit_mantissa(); + TmpResultType z_mant = z_bits.get_explicit_mantissa(); // If the exponent of the product x*y > the exponent of z, then no extra // precision beside the entire product x*y is needed. On the other hand, when @@ -143,22 +168,20 @@ template <> LIBC_INLINE double fma(double x, double y, double z) { // z : 10aa...a // - prod : 1bb...bb....b // In that case, in order to store the exact result, we need at least - // (Length of prod) - (MantissaLength of z) = 2*(52 + 1) - 52 = 54. + // (Length of prod) - (Fraction length of z) + // = 2*(Length of input explicit mantissa) - (Fraction length of z) bits. // Overall, before aligning the mantissas and exponents, we can simply left- - // shift the mantissa of z by at least 54, and left-shift the product of x*y - // by (that amount - 52). After that, it is enough to align the least - // significant bit, given that we keep track of the round and sticky bits - // after the least significant bit. - // We pick shifting z_mant by 64 bits so that technically we can simply use - // the original mantissa as high part when constructing 128-bit z_mant. So the - // mantissa of prod will be left-shifted by 64 - 54 = 10 initially. - - UInt128 prod_mant = x_mant * y_mant << 10; + // shift the mantissa of z by that amount. After that, it is enough to align + // the least significant bit, given that we keep track of the round and sticky + // bits after the least significant bit. + + TmpResultType prod_mant = TmpResultType(x_mant) * y_mant; int prod_lsb_exp = - x_exp + y_exp - (FPBits::EXP_BIAS + 2 * FPBits::FRACTION_LEN + 10); + x_exp + y_exp - (InFPBits::EXP_BIAS + 2 * InFPBits::FRACTION_LEN); - z_mant <<= 64; - int z_lsb_exp = z_exp - (FPBits::FRACTION_LEN + 64); + constexpr int RESULT_MIN_LEN = PROD_LEN - InFPBits::FRACTION_LEN; + z_mant <<= RESULT_MIN_LEN; + int z_lsb_exp = z_exp - (InFPBits::FRACTION_LEN + RESULT_MIN_LEN); bool round_bit = false; bool sticky_bits = false; bool z_shifted = false; @@ -198,46 +221,42 @@ template <> LIBC_INLINE double fma(double x, double y, double z) { } } - uint64_t result = 0; + OutStorageType result = 0; int r_exp = 0; // Unbiased exponent of the result + int round_mode = fputil::quick_get_round(); + // Normalize the result. if (prod_mant != 0) { - uint64_t prod_hi = static_cast(prod_mant >> 64); - int lead_zeros = - prod_hi ? cpp::countl_zero(prod_hi) - : 64 + cpp::countl_zero(static_cast(prod_mant)); + int lead_zeros = cpp::countl_zero(prod_mant); // Move the leading 1 to the most significant bit. prod_mant <<= lead_zeros; - // The lower 64 bits are always sticky bits after moving the leading 1 to - // the most significant bit. - sticky_bits |= (static_cast(prod_mant) != 0); - result = static_cast(prod_mant >> 64); - // Change prod_lsb_exp the be the exponent of the least significant bit of - // the result. - prod_lsb_exp += 64 - lead_zeros; - r_exp = prod_lsb_exp + 63; + prod_lsb_exp -= lead_zeros; + r_exp = prod_lsb_exp + (cpp::numeric_limits::digits - 1) - + InFPBits::EXP_BIAS + OutFPBits::EXP_BIAS; if (r_exp > 0) { - // The result is normal. We will shift the mantissa to the right by - // 63 - 52 = 11 bits (from the locations of the most significant bit). - // Then the rounding bit will correspond the 11th bit, and the lowest - // 10 bits are merged into sticky bits. - round_bit = (result & 0x0400ULL) != 0; - sticky_bits |= (result & 0x03ffULL) != 0; - result >>= 11; + // The result is normal. We will shift the mantissa to the right by the + // amount of extra bits compared to the length of the explicit mantissa in + // the output type. The rounding bit then becomes the highest bit that is + // shifted out, and the following lower bits are merged into sticky bits. + round_bit = + (prod_mant & (TmpResultType(1) << (EXTRA_FRACTION_LEN - 1))) != 0; + sticky_bits |= (prod_mant & EXTRA_FRACTION_STICKY_MASK) != 0; + result = static_cast(prod_mant >> EXTRA_FRACTION_LEN); } else { - if (r_exp < -52) { + if (r_exp < -OutFPBits::FRACTION_LEN) { // The result is smaller than 1/2 of the smallest denormal number. sticky_bits = true; // since the result is non-zero. result = 0; } else { // The result is denormal. - uint64_t mask = 1ULL << (11 - r_exp); - round_bit = (result & mask) != 0; - sticky_bits |= (result & (mask - 1)) != 0; - if (r_exp > -52) - result >>= 12 - r_exp; + TmpResultType mask = TmpResultType(1) << (EXTRA_FRACTION_LEN - r_exp); + round_bit = (prod_mant & mask) != 0; + sticky_bits |= (prod_mant & (mask - 1)) != 0; + if (r_exp > -OutFPBits::FRACTION_LEN) + result = static_cast( + prod_mant >> (EXTRA_FRACTION_LEN + 1 - r_exp)); else result = 0; } @@ -245,27 +264,30 @@ template <> LIBC_INLINE double fma(double x, double y, double z) { r_exp = 0; } } else { - // Return +0.0 when there is exact cancellation, i.e., x*y == -z exactly. - prod_sign = Sign::POS; + // When there is exact cancellation, i.e., x*y == -z exactly, return -0.0 if + // rounding downward and +0.0 for other rounding modes. + if (round_mode == FE_DOWNWARD) + prod_sign = Sign::NEG; + else + prod_sign = Sign::POS; } // Finalize the result. - int round_mode = fputil::quick_get_round(); - if (LIBC_UNLIKELY(r_exp >= FPBits::MAX_BIASED_EXPONENT)) { + if (LIBC_UNLIKELY(r_exp >= OutFPBits::MAX_BIASED_EXPONENT)) { if ((round_mode == FE_TOWARDZERO) || (round_mode == FE_UPWARD && prod_sign.is_neg()) || (round_mode == FE_DOWNWARD && prod_sign.is_pos())) { - return FPBits::max_normal(prod_sign).get_val(); + return OutFPBits::max_normal(prod_sign).get_val(); } - return FPBits::inf(prod_sign).get_val(); + return OutFPBits::inf(prod_sign).get_val(); } // Remove hidden bit and append the exponent field and sign bit. - result = (result & FPBits::FRACTION_MASK) | - (static_cast(r_exp) << FPBits::FRACTION_LEN); - if (prod_sign.is_neg()) { - result |= FPBits::SIGN_MASK; - } + result = static_cast( + (result & OutFPBits::FRACTION_MASK) | + (static_cast(r_exp) << OutFPBits::FRACTION_LEN)); + if (prod_sign.is_neg()) + result |= OutFPBits::SIGN_MASK; // Rounding. if (round_mode == FE_TONEAREST) { @@ -277,7 +299,7 @@ template <> LIBC_INLINE double fma(double x, double y, double z) { ++result; } - return cpp::bit_cast(result); + return cpp::bit_cast(result); } } // namespace generic diff --git a/libc/src/__support/FPUtil/multiply_add.h b/libc/src/__support/FPUtil/multiply_add.h index 82932da5417c8d..622914e4265c91 100644 --- a/libc/src/__support/FPUtil/multiply_add.h +++ b/libc/src/__support/FPUtil/multiply_add.h @@ -45,11 +45,11 @@ namespace LIBC_NAMESPACE { namespace fputil { LIBC_INLINE float multiply_add(float x, float y, float z) { - return fma(x, y, z); + return fma(x, y, z); } LIBC_INLINE double multiply_add(double x, double y, double z) { - return fma(x, y, z); + return fma(x, y, z); } } // namespace fputil diff --git a/libc/src/__support/big_int.h b/libc/src/__support/big_int.h index 40ad6eeed7ac26..c30c3ece54a308 100644 --- a/libc/src/__support/big_int.h +++ b/libc/src/__support/big_int.h @@ -983,23 +983,18 @@ using UInt = BigInt>; template using Int = BigInt>; -// Provides limits of U/Int<128>. -template <> class cpp::numeric_limits> { -public: - LIBC_INLINE static constexpr UInt<128> max() { return UInt<128>::max(); } - LIBC_INLINE static constexpr UInt<128> min() { return UInt<128>::min(); } - // Meant to match std::numeric_limits interface. - // NOLINTNEXTLINE(readability-identifier-naming) - LIBC_INLINE_VAR static constexpr int digits = 128; -}; - -template <> class cpp::numeric_limits> { -public: - LIBC_INLINE static constexpr Int<128> max() { return Int<128>::max(); } - LIBC_INLINE static constexpr Int<128> min() { return Int<128>::min(); } +// Provides limits of BigInt. +template +struct cpp::numeric_limits> { + LIBC_INLINE static constexpr BigInt max() { + return BigInt::max(); + } + LIBC_INLINE static constexpr BigInt min() { + return BigInt::min(); + } // Meant to match std::numeric_limits interface. // NOLINTNEXTLINE(readability-identifier-naming) - LIBC_INLINE_VAR static constexpr int digits = 128; + LIBC_INLINE_VAR static constexpr int digits = Bits - Signed; }; // type traits to determine whether a T is a BigInt. @@ -1073,6 +1068,18 @@ template using make_integral_or_big_int_signed_t = typename make_integral_or_big_int_signed::type; +// is_unsigned_integral_or_big_int +template +struct is_unsigned_integral_or_big_int + : cpp::bool_constant< + cpp::is_same_v>> {}; + +template +// Meant to look like helper variable templates. +// NOLINTNEXTLINE(readability-identifier-naming) +LIBC_INLINE_VAR constexpr bool is_unsigned_integral_or_big_int_v = + is_unsigned_integral_or_big_int::value; + namespace cpp { // Specialization of cpp::bit_cast ('bit.h') from T to BigInt. diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index df8e6c0b253da2..4472367d6c0738 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -99,6 +99,8 @@ add_math_entrypoint_object(exp10f) add_math_entrypoint_object(expm1) add_math_entrypoint_object(expm1f) +add_math_entrypoint_object(f16fmaf) + add_math_entrypoint_object(f16sqrtf) add_math_entrypoint_object(fabs) diff --git a/libc/src/math/f16fmaf.h b/libc/src/math/f16fmaf.h new file mode 100644 index 00000000000000..d92cb43c292eb6 --- /dev/null +++ b/libc/src/math/f16fmaf.h @@ -0,0 +1,20 @@ +//===-- Implementation header for f16fmaf -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_F16FMAF_H +#define LLVM_LIBC_SRC_MATH_F16FMAF_H + +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE { + +float16 f16fmaf(float x, float y, float z); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_MATH_F16FMAF_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index f1f7d6c367be2d..aa0069d821d0d0 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -3602,6 +3602,19 @@ add_entrypoint_object( -O3 ) +add_entrypoint_object( + f16fmaf + SRCS + f16fmaf.cpp + HDRS + ../f16fmaf.h + DEPENDS + libc.src.__support.macros.properties.types + libc.src.__support.FPUtil.fma + COMPILE_OPTIONS + -O3 +) + add_entrypoint_object( f16sqrtf SRCS diff --git a/libc/src/math/generic/expm1f.cpp b/libc/src/math/generic/expm1f.cpp index 037e60021b2961..6b9f07476a6506 100644 --- a/libc/src/math/generic/expm1f.cpp +++ b/libc/src/math/generic/expm1f.cpp @@ -104,7 +104,7 @@ LLVM_LIBC_FUNCTION(float, expm1f, (float x)) { // intermediate results as it is more efficient than using an emulated // version of FMA. #if defined(LIBC_TARGET_CPU_HAS_FMA) - return fputil::fma(x, x, x); + return fputil::fma(x, x, x); #else double xd = x; return static_cast(fputil::multiply_add(xd, xd, xd)); diff --git a/libc/src/math/generic/f16fmaf.cpp b/libc/src/math/generic/f16fmaf.cpp new file mode 100644 index 00000000000000..09f27126393353 --- /dev/null +++ b/libc/src/math/generic/f16fmaf.cpp @@ -0,0 +1,19 @@ +//===-- Implementation of f16fmaf function --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/f16fmaf.h" +#include "src/__support/FPUtil/FMA.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(float16, f16fmaf, (float x, float y, float z)) { + return fputil::fma(x, y, z); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/math/generic/fma.cpp b/libc/src/math/generic/fma.cpp index e27e5baeddf588..7937766dccd71e 100644 --- a/libc/src/math/generic/fma.cpp +++ b/libc/src/math/generic/fma.cpp @@ -14,7 +14,7 @@ namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(double, fma, (double x, double y, double z)) { - return fputil::fma(x, y, z); + return fputil::fma(x, y, z); } } // namespace LIBC_NAMESPACE diff --git a/libc/src/math/generic/fmaf.cpp b/libc/src/math/generic/fmaf.cpp index 7512b82005d0f5..d367a069ea7d88 100644 --- a/libc/src/math/generic/fmaf.cpp +++ b/libc/src/math/generic/fmaf.cpp @@ -14,7 +14,7 @@ namespace LIBC_NAMESPACE { LLVM_LIBC_FUNCTION(float, fmaf, (float x, float y, float z)) { - return fputil::fma(x, y, z); + return fputil::fma(x, y, z); } } // namespace LIBC_NAMESPACE diff --git a/libc/src/math/generic/range_reduction_fma.h b/libc/src/math/generic/range_reduction_fma.h index aee8cbb1332a6a..82b4ae1c705e10 100644 --- a/libc/src/math/generic/range_reduction_fma.h +++ b/libc/src/math/generic/range_reduction_fma.h @@ -33,8 +33,8 @@ static constexpr double THIRTYTWO_OVER_PI[5] = { // k = round(x * 32 / pi) and y = (x * 32 / pi) - k. LIBC_INLINE int64_t small_range_reduction(double x, double &y) { double kd = fputil::nearest_integer(x * THIRTYTWO_OVER_PI[0]); - y = fputil::fma(x, THIRTYTWO_OVER_PI[0], -kd); - y = fputil::fma(x, THIRTYTWO_OVER_PI[1], y); + y = fputil::fma(x, THIRTYTWO_OVER_PI[0], -kd); + y = fputil::fma(x, THIRTYTWO_OVER_PI[1], y); return static_cast(kd); } @@ -54,12 +54,13 @@ LIBC_INLINE int64_t large_range_reduction(double x, int x_exp, double &y) { prod_hi.set_uintval(prod_hi.uintval() & ((x_exp < 55) ? (~0xfffULL) : (~0ULL))); // |x| < 2^55 double k_hi = fputil::nearest_integer(prod_hi.get_val()); - double truncated_prod = fputil::fma(x, THIRTYTWO_OVER_PI[0], -k_hi); - double prod_lo = fputil::fma(x, THIRTYTWO_OVER_PI[1], truncated_prod); + double truncated_prod = fputil::fma(x, THIRTYTWO_OVER_PI[0], -k_hi); + double prod_lo = + fputil::fma(x, THIRTYTWO_OVER_PI[1], truncated_prod); double k_lo = fputil::nearest_integer(prod_lo); - y = fputil::fma(x, THIRTYTWO_OVER_PI[1], truncated_prod - k_lo); - y = fputil::fma(x, THIRTYTWO_OVER_PI[2], y); - y = fputil::fma(x, THIRTYTWO_OVER_PI[3], y); + y = fputil::fma(x, THIRTYTWO_OVER_PI[1], truncated_prod - k_lo); + y = fputil::fma(x, THIRTYTWO_OVER_PI[2], y); + y = fputil::fma(x, THIRTYTWO_OVER_PI[3], y); return static_cast(k_lo); } @@ -74,12 +75,12 @@ LIBC_INLINE int64_t large_range_reduction(double x, int x_exp, double &y) { prod_hi.set_uintval(prod_hi.uintval() & ((x_exp < 110) ? (~0xfffULL) : (~0ULL))); // |x| < 2^110 double k_hi = fputil::nearest_integer(prod_hi.get_val()); - double truncated_prod = fputil::fma(x, THIRTYTWO_OVER_PI[1], -k_hi); - double prod_lo = fputil::fma(x, THIRTYTWO_OVER_PI[2], truncated_prod); + double truncated_prod = fputil::fma(x, THIRTYTWO_OVER_PI[1], -k_hi); + double prod_lo = fputil::fma(x, THIRTYTWO_OVER_PI[2], truncated_prod); double k_lo = fputil::nearest_integer(prod_lo); - y = fputil::fma(x, THIRTYTWO_OVER_PI[2], truncated_prod - k_lo); - y = fputil::fma(x, THIRTYTWO_OVER_PI[3], y); - y = fputil::fma(x, THIRTYTWO_OVER_PI[4], y); + y = fputil::fma(x, THIRTYTWO_OVER_PI[2], truncated_prod - k_lo); + y = fputil::fma(x, THIRTYTWO_OVER_PI[3], y); + y = fputil::fma(x, THIRTYTWO_OVER_PI[4], y); return static_cast(k_lo); } diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt index 79e6e89a5324ed..bb364c3f0a1751 100644 --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -1455,11 +1455,12 @@ add_fp_unittest( libc-math-unittests SRCS fmaf_test.cpp + HDRS + FmaTest.h DEPENDS libc.src.math.fmaf libc.src.stdlib.rand libc.src.stdlib.srand - libc.src.__support.FPUtil.fp_bits FLAGS FMA_OPT__ONLY ) @@ -1471,11 +1472,12 @@ add_fp_unittest( libc-math-unittests SRCS fma_test.cpp + HDRS + FmaTest.h DEPENDS libc.src.math.fma libc.src.stdlib.rand libc.src.stdlib.srand - libc.src.__support.FPUtil.fp_bits ) add_fp_unittest( @@ -1888,6 +1890,21 @@ add_fp_unittest( libc.src.__support.FPUtil.fp_bits ) +add_fp_unittest( + f16fmaf_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + f16fmaf_test.cpp + HDRS + FmaTest.h + DEPENDS + libc.src.math.f16fmaf + libc.src.stdlib.rand + libc.src.stdlib.srand +) + add_subdirectory(generic) add_subdirectory(smoke) diff --git a/libc/test/src/math/FmaTest.h b/libc/test/src/math/FmaTest.h index 5a40f694ebd107..53895e7d633c29 100644 --- a/libc/test/src/math/FmaTest.h +++ b/libc/test/src/math/FmaTest.h @@ -9,7 +9,6 @@ #ifndef LLVM_LIBC_TEST_SRC_MATH_FMATEST_H #define LLVM_LIBC_TEST_SRC_MATH_FMATEST_H -#include "src/__support/FPUtil/FPBits.h" #include "src/stdlib/rand.h" #include "src/stdlib/srand.h" #include "test/UnitTest/FEnvSafeTest.h" @@ -19,85 +18,74 @@ namespace mpfr = LIBC_NAMESPACE::testing::mpfr; -template +template class FmaTestTemplate : public LIBC_NAMESPACE::testing::FEnvSafeTest { -private: - using Func = T (*)(T, T, T); - using FPBits = LIBC_NAMESPACE::fputil::FPBits; - using StorageType = typename FPBits::StorageType; - const T min_subnormal = FPBits::min_subnormal(Sign::POS).get_val(); - const T min_normal = FPBits::min_normal(Sign::POS).get_val(); - const T max_normal = FPBits::max_normal(Sign::POS).get_val(); - const T inf = FPBits::inf(Sign::POS).get_val(); - const T neg_inf = FPBits::inf(Sign::NEG).get_val(); - const T zero = FPBits::zero(Sign::POS).get_val(); - const T neg_zero = FPBits::zero(Sign::NEG).get_val(); - const T nan = FPBits::quiet_nan().get_val(); + struct OutConstants { + DECLARE_SPECIAL_CONSTANTS(OutType) + }; - static constexpr StorageType MAX_NORMAL = FPBits::max_normal().uintval(); - static constexpr StorageType MIN_NORMAL = FPBits::min_normal().uintval(); - static constexpr StorageType MAX_SUBNORMAL = - FPBits::max_subnormal().uintval(); - static constexpr StorageType MIN_SUBNORMAL = - FPBits::min_subnormal().uintval(); + struct InConstants { + DECLARE_SPECIAL_CONSTANTS(InType) + }; - StorageType get_random_bit_pattern() { - StorageType bits{0}; - for (StorageType i = 0; i < sizeof(StorageType) / 2; ++i) { + using OutFPBits = typename OutConstants::FPBits; + using OutStorageType = typename OutConstants::StorageType; + using InFPBits = typename InConstants::FPBits; + using InStorageType = typename InConstants::StorageType; + + static constexpr OutStorageType OUT_MIN_NORMAL_U = + OutFPBits::min_normal().uintval(); + static constexpr InStorageType IN_MAX_NORMAL_U = + InFPBits::max_normal().uintval(); + static constexpr InStorageType IN_MIN_NORMAL_U = + InFPBits::min_normal().uintval(); + static constexpr InStorageType IN_MAX_SUBNORMAL_U = + InFPBits::max_subnormal().uintval(); + static constexpr InStorageType IN_MIN_SUBNORMAL_U = + InFPBits::min_subnormal().uintval(); + + OutConstants out; + InConstants in; + + InStorageType get_random_bit_pattern() { + InStorageType bits{0}; + for (InStorageType i = 0; i < sizeof(InStorageType) / 2; ++i) { bits = (bits << 2) + static_cast(LIBC_NAMESPACE::rand()); } return bits; } public: - void test_special_numbers(Func func) { - EXPECT_FP_EQ(func(zero, zero, zero), zero); - EXPECT_FP_EQ(func(zero, neg_zero, neg_zero), neg_zero); - EXPECT_FP_EQ(func(inf, inf, zero), inf); - EXPECT_FP_EQ(func(neg_inf, inf, neg_inf), neg_inf); - EXPECT_FP_EQ(func(inf, zero, zero), nan); - EXPECT_FP_EQ(func(inf, neg_inf, inf), nan); - EXPECT_FP_EQ(func(nan, zero, inf), nan); - EXPECT_FP_EQ(func(inf, neg_inf, nan), nan); - - // Test underflow rounding up. - EXPECT_FP_EQ(func(T(0.5), min_subnormal, min_subnormal), - FPBits(StorageType(2)).get_val()); - // Test underflow rounding down. - T v = FPBits(MIN_NORMAL + StorageType(1)).get_val(); - EXPECT_FP_EQ(func(T(1) / T(MIN_NORMAL << 1), v, min_normal), v); - // Test overflow. - T z = max_normal; - EXPECT_FP_EQ(func(T(1.75), z, -z), T(0.75) * z); - // Exact cancellation. - EXPECT_FP_EQ(func(T(3.0), T(5.0), -T(15.0)), T(0.0)); - EXPECT_FP_EQ(func(T(-3.0), T(5.0), T(15.0)), T(0.0)); - } + using FmaFunc = OutType (*)(InType, InType, InType); - void test_subnormal_range(Func func) { - constexpr StorageType COUNT = 100'001; - constexpr StorageType STEP = (MAX_SUBNORMAL - MIN_SUBNORMAL) / COUNT; + void test_subnormal_range(FmaFunc func) { + constexpr InStorageType COUNT = 100'001; + constexpr InStorageType STEP = + (IN_MAX_SUBNORMAL_U - IN_MIN_SUBNORMAL_U) / COUNT; LIBC_NAMESPACE::srand(1); - for (StorageType v = MIN_SUBNORMAL, w = MAX_SUBNORMAL; - v <= MAX_SUBNORMAL && w >= MIN_SUBNORMAL; v += STEP, w -= STEP) { - T x = FPBits(get_random_bit_pattern()).get_val(), y = FPBits(v).get_val(), - z = FPBits(w).get_val(); - mpfr::TernaryInput input{x, y, z}; + for (InStorageType v = IN_MIN_SUBNORMAL_U, w = IN_MAX_SUBNORMAL_U; + v <= IN_MAX_SUBNORMAL_U && w >= IN_MIN_SUBNORMAL_U; + v += STEP, w -= STEP) { + InType x = InFPBits(get_random_bit_pattern()).get_val(); + InType y = InFPBits(v).get_val(); + InType z = InFPBits(w).get_val(); + mpfr::TernaryInput input{x, y, z}; ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Fma, input, func(x, y, z), 0.5); } } - void test_normal_range(Func func) { - constexpr StorageType COUNT = 100'001; - constexpr StorageType STEP = (MAX_NORMAL - MIN_NORMAL) / COUNT; + void test_normal_range(FmaFunc func) { + constexpr InStorageType COUNT = 100'001; + constexpr InStorageType STEP = (IN_MAX_NORMAL_U - IN_MIN_NORMAL_U) / COUNT; LIBC_NAMESPACE::srand(1); - for (StorageType v = MIN_NORMAL, w = MAX_NORMAL; - v <= MAX_NORMAL && w >= MIN_NORMAL; v += STEP, w -= STEP) { - T x = FPBits(v).get_val(), y = FPBits(w).get_val(), - z = FPBits(get_random_bit_pattern()).get_val(); - mpfr::TernaryInput input{x, y, z}; + for (InStorageType v = IN_MIN_NORMAL_U, w = IN_MAX_NORMAL_U; + v <= IN_MAX_NORMAL_U && w >= IN_MIN_NORMAL_U; v += STEP, w -= STEP) { + InType x = InFPBits(v).get_val(); + InType y = InFPBits(w).get_val(); + InType z = InFPBits(get_random_bit_pattern()).get_val(); + mpfr::TernaryInput input{x, y, z}; ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Fma, input, func(x, y, z), 0.5); } diff --git a/libc/test/src/math/f16fmaf_test.cpp b/libc/test/src/math/f16fmaf_test.cpp new file mode 100644 index 00000000000000..e4ca88b8810e16 --- /dev/null +++ b/libc/test/src/math/f16fmaf_test.cpp @@ -0,0 +1,21 @@ +//===-- Unittests for f16fmaf ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "FmaTest.h" + +#include "src/math/f16fmaf.h" + +using LlvmLibcF16fmafTest = FmaTestTemplate; + +TEST_F(LlvmLibcF16fmafTest, SubnormalRange) { + test_subnormal_range(&LIBC_NAMESPACE::f16fmaf); +} + +TEST_F(LlvmLibcF16fmafTest, NormalRange) { + test_normal_range(&LIBC_NAMESPACE::f16fmaf); +} diff --git a/libc/test/src/math/fma_test.cpp b/libc/test/src/math/fma_test.cpp index 20224d99894be6..dd761382631d5e 100644 --- a/libc/test/src/math/fma_test.cpp +++ b/libc/test/src/math/fma_test.cpp @@ -276,10 +276,6 @@ struct LlvmLibcFmaTest : public FmaTestTemplate { } }; -TEST_F(LlvmLibcFmaTest, SpecialNumbers) { - test_special_numbers(&LIBC_NAMESPACE::fma); -} - TEST_F(LlvmLibcFmaTest, SubnormalRange) { test_subnormal_range(&LIBC_NAMESPACE::fma); } diff --git a/libc/test/src/math/fmaf_test.cpp b/libc/test/src/math/fmaf_test.cpp index b607d4a66f8eb0..0e498d46ecfb0d 100644 --- a/libc/test/src/math/fmaf_test.cpp +++ b/libc/test/src/math/fmaf_test.cpp @@ -12,10 +12,6 @@ using LlvmLibcFmafTest = FmaTestTemplate; -TEST_F(LlvmLibcFmafTest, SpecialNumbers) { - test_special_numbers(&LIBC_NAMESPACE::fmaf); -} - TEST_F(LlvmLibcFmafTest, SubnormalRange) { test_subnormal_range(&LIBC_NAMESPACE::fmaf); } diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index 3e9edc51b004f0..a67d0437592d50 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -3109,9 +3109,10 @@ add_fp_unittest( libc-math-smoke-tests SRCS fmaf_test.cpp + HDRS + FmaTest.h DEPENDS libc.src.math.fmaf - libc.src.__support.FPUtil.fp_bits FLAGS FMA_OPT__ONLY ) @@ -3122,9 +3123,10 @@ add_fp_unittest( libc-math-smoke-tests SRCS fma_test.cpp + HDRS + FmaTest.h DEPENDS libc.src.math.fma - libc.src.__support.FPUtil.fp_bits ) add_fp_unittest( @@ -3551,6 +3553,18 @@ add_fp_unittest( libc.src.math.totalordermagf16 ) +add_fp_unittest( + f16fmaf_test + SUITE + libc-math-smoke-tests + SRCS + f16fmaf_test.cpp + HDRS + FmaTest.h + DEPENDS + libc.src.math.f16fmaf +) + add_fp_unittest( f16sqrtf_test SUITE diff --git a/libc/test/src/math/smoke/FmaTest.h b/libc/test/src/math/smoke/FmaTest.h index 7063ecf199837b..f942de37654dd3 100644 --- a/libc/test/src/math/smoke/FmaTest.h +++ b/libc/test/src/math/smoke/FmaTest.h @@ -9,51 +9,103 @@ #ifndef LLVM_LIBC_TEST_SRC_MATH_FMATEST_H #define LLVM_LIBC_TEST_SRC_MATH_FMATEST_H -#include "src/__support/FPUtil/FPBits.h" #include "test/UnitTest/FEnvSafeTest.h" #include "test/UnitTest/FPMatcher.h" #include "test/UnitTest/Test.h" -template +template class FmaTestTemplate : public LIBC_NAMESPACE::testing::FEnvSafeTest { -private: - using Func = T (*)(T, T, T); - using FPBits = LIBC_NAMESPACE::fputil::FPBits; - using StorageType = typename FPBits::StorageType; - const T inf = FPBits::inf(Sign::POS).get_val(); - const T neg_inf = FPBits::inf(Sign::NEG).get_val(); - const T zero = FPBits::zero(Sign::POS).get_val(); - const T neg_zero = FPBits::zero(Sign::NEG).get_val(); - const T nan = FPBits::quiet_nan().get_val(); + struct OutConstants { + DECLARE_SPECIAL_CONSTANTS(OutType) + }; + + struct InConstants { + DECLARE_SPECIAL_CONSTANTS(InType) + }; + + using OutFPBits = typename OutConstants::FPBits; + using OutStorageType = typename OutConstants::StorageType; + using InFPBits = typename InConstants::FPBits; + using InStorageType = typename InConstants::StorageType; + + static constexpr OutStorageType OUT_MIN_NORMAL_U = + OutFPBits::min_normal().uintval(); + static constexpr InStorageType IN_MIN_NORMAL_U = + InFPBits::min_normal().uintval(); + + OutConstants out; + InConstants in; public: - void test_special_numbers(Func func) { - EXPECT_FP_EQ(func(zero, zero, zero), zero); - EXPECT_FP_EQ(func(zero, neg_zero, neg_zero), neg_zero); - EXPECT_FP_EQ(func(inf, inf, zero), inf); - EXPECT_FP_EQ(func(neg_inf, inf, neg_inf), neg_inf); - EXPECT_FP_EQ(func(inf, zero, zero), nan); - EXPECT_FP_EQ(func(inf, neg_inf, inf), nan); - EXPECT_FP_EQ(func(nan, zero, inf), nan); - EXPECT_FP_EQ(func(inf, neg_inf, nan), nan); + using FmaFunc = OutType (*)(InType, InType, InType); + + void test_special_numbers(FmaFunc func) { + EXPECT_FP_EQ(out.zero, func(in.zero, in.zero, in.zero)); + EXPECT_FP_EQ(out.neg_zero, func(in.zero, in.neg_zero, in.neg_zero)); + EXPECT_FP_EQ(out.inf, func(in.inf, in.inf, in.zero)); + EXPECT_FP_EQ(out.neg_inf, func(in.neg_inf, in.inf, in.neg_inf)); + EXPECT_FP_EQ(out.aNaN, func(in.inf, in.zero, in.zero)); + EXPECT_FP_EQ(out.aNaN, func(in.inf, in.neg_inf, in.inf)); + EXPECT_FP_EQ(out.aNaN, func(in.aNaN, in.zero, in.inf)); + EXPECT_FP_EQ(out.aNaN, func(in.inf, in.neg_inf, in.aNaN)); // Test underflow rounding up. - EXPECT_FP_EQ(func(T(0.5), FPBits::min_subnormal().get_val(), - FPBits::min_subnormal().get_val()), - FPBits(StorageType(2)).get_val()); + EXPECT_FP_EQ(OutFPBits(OutStorageType(2)).get_val(), + func(OutType(0.5), out.min_denormal, out.min_denormal)); + + if constexpr (sizeof(OutType) < sizeof(InType)) { + EXPECT_FP_EQ(out.zero, + func(InType(0.5), in.min_denormal, in.min_denormal)); + } + // Test underflow rounding down. - StorageType MIN_NORMAL = FPBits::min_normal().uintval(); - T v = FPBits(MIN_NORMAL + StorageType(1)).get_val(); - EXPECT_FP_EQ( - func(T(1) / T(MIN_NORMAL << 1), v, FPBits::min_normal().get_val()), v); + OutType v = OutFPBits(static_cast(OUT_MIN_NORMAL_U + + OutStorageType(1))) + .get_val(); + EXPECT_FP_EQ(v, func(OutType(1) / OutType(OUT_MIN_NORMAL_U << 1), v, + out.min_normal)); + + if constexpr (sizeof(OutType) < sizeof(InType)) { + InType v = InFPBits(static_cast(IN_MIN_NORMAL_U + + InStorageType(1))) + .get_val(); + EXPECT_FP_EQ( + out.min_normal, + func(InType(1) / InType(IN_MIN_NORMAL_U << 1), v, out.min_normal)); + } + // Test overflow. - T z = FPBits::max_normal().get_val(); - EXPECT_FP_EQ(func(T(1.75), z, -z), T(0.75) * z); + OutType z = out.max_normal; + EXPECT_FP_EQ_ALL_ROUNDING(OutType(0.75) * z, func(InType(1.75), z, -z)); + // Exact cancellation. - EXPECT_FP_EQ(func(T(3.0), T(5.0), -T(15.0)), T(0.0)); - EXPECT_FP_EQ(func(T(-3.0), T(5.0), T(15.0)), T(0.0)); + EXPECT_FP_EQ_ROUNDING_NEAREST( + out.zero, func(InType(3.0), InType(5.0), InType(-15.0))); + EXPECT_FP_EQ_ROUNDING_UPWARD(out.zero, + func(InType(3.0), InType(5.0), InType(-15.0))); + EXPECT_FP_EQ_ROUNDING_TOWARD_ZERO( + out.zero, func(InType(3.0), InType(5.0), InType(-15.0))); + EXPECT_FP_EQ_ROUNDING_DOWNWARD( + out.neg_zero, func(InType(3.0), InType(5.0), InType(-15.0))); + + EXPECT_FP_EQ_ROUNDING_NEAREST( + out.zero, func(InType(-3.0), InType(5.0), InType(15.0))); + EXPECT_FP_EQ_ROUNDING_UPWARD(out.zero, + func(InType(-3.0), InType(5.0), InType(15.0))); + EXPECT_FP_EQ_ROUNDING_TOWARD_ZERO( + out.zero, func(InType(-3.0), InType(5.0), InType(15.0))); + EXPECT_FP_EQ_ROUNDING_DOWNWARD( + out.neg_zero, func(InType(-3.0), InType(5.0), InType(15.0))); } }; +#define LIST_FMA_TESTS(T, func) \ + using LlvmLibcFmaTest = FmaTestTemplate; \ + TEST_F(LlvmLibcFmaTest, SpecialNumbers) { test_special_numbers(&func); } + +#define LIST_NARROWING_FMA_TESTS(OutType, InType, func) \ + using LlvmLibcFmaTest = FmaTestTemplate; \ + TEST_F(LlvmLibcFmaTest, SpecialNumbers) { test_special_numbers(&func); } + #endif // LLVM_LIBC_TEST_SRC_MATH_FMATEST_H diff --git a/libc/test/src/math/smoke/f16fmaf_test.cpp b/libc/test/src/math/smoke/f16fmaf_test.cpp new file mode 100644 index 00000000000000..5e3aec768c1910 --- /dev/null +++ b/libc/test/src/math/smoke/f16fmaf_test.cpp @@ -0,0 +1,13 @@ +//===-- Unittests for f16fmaf ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "FmaTest.h" + +#include "src/math/f16fmaf.h" + +LIST_NARROWING_FMA_TESTS(float16, float, LIBC_NAMESPACE::f16fmaf) diff --git a/libc/test/src/math/smoke/fma_test.cpp b/libc/test/src/math/smoke/fma_test.cpp index 4460b80d9ad650..c5d802a532eb0d 100644 --- a/libc/test/src/math/smoke/fma_test.cpp +++ b/libc/test/src/math/smoke/fma_test.cpp @@ -10,8 +10,4 @@ #include "src/math/fma.h" -using LlvmLibcFmaTest = FmaTestTemplate; - -TEST_F(LlvmLibcFmaTest, SpecialNumbers) { - test_special_numbers(&LIBC_NAMESPACE::fma); -} +LIST_FMA_TESTS(double, LIBC_NAMESPACE::fma) diff --git a/libc/test/src/math/smoke/fmaf_test.cpp b/libc/test/src/math/smoke/fmaf_test.cpp index a645efb8776d0f..09e9c504b942a0 100644 --- a/libc/test/src/math/smoke/fmaf_test.cpp +++ b/libc/test/src/math/smoke/fmaf_test.cpp @@ -10,8 +10,4 @@ #include "src/math/fmaf.h" -using LlvmLibcFmafTest = FmaTestTemplate; - -TEST_F(LlvmLibcFmafTest, SpecialNumbers) { - test_special_numbers(&LIBC_NAMESPACE::fmaf); -} +LIST_FMA_TESTS(float, LIBC_NAMESPACE::fmaf) diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp index 100c6b1644b16d..2eac4dd8e199de 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.cpp +++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp @@ -922,46 +922,49 @@ template void explain_binary_operation_one_output_error( Operation, const BinaryInput &, long double, double, RoundingMode); -template -void explain_ternary_operation_one_output_error(Operation op, - const TernaryInput &input, - T libc_result, - double ulp_tolerance, - RoundingMode rounding) { - unsigned int precision = get_precision(ulp_tolerance); +template +void explain_ternary_operation_one_output_error( + Operation op, const TernaryInput &input, OutputType libc_result, + double ulp_tolerance, RoundingMode rounding) { + unsigned int precision = get_precision(ulp_tolerance); MPFRNumber mpfrX(input.x, precision); MPFRNumber mpfrY(input.y, precision); MPFRNumber mpfrZ(input.z, precision); - FPBits xbits(input.x); - FPBits ybits(input.y); - FPBits zbits(input.z); + FPBits xbits(input.x); + FPBits ybits(input.y); + FPBits zbits(input.z); MPFRNumber mpfr_result = ternary_operation_one_output( op, input.x, input.y, input.z, precision, rounding); MPFRNumber mpfrMatchValue(libc_result); tlog << "Input decimal: x: " << mpfrX.str() << " y: " << mpfrY.str() << " z: " << mpfrZ.str() << '\n'; - tlog << " First input bits: " << str(FPBits(input.x)) << '\n'; - tlog << "Second input bits: " << str(FPBits(input.y)) << '\n'; - tlog << " Third input bits: " << str(FPBits(input.z)) << '\n'; + tlog << " First input bits: " << str(FPBits(input.x)) << '\n'; + tlog << "Second input bits: " << str(FPBits(input.y)) << '\n'; + tlog << " Third input bits: " << str(FPBits(input.z)) << '\n'; tlog << "Libc result: " << mpfrMatchValue.str() << '\n' << "MPFR result: " << mpfr_result.str() << '\n'; - tlog << "Libc floating point result bits: " << str(FPBits(libc_result)) - << '\n'; + tlog << "Libc floating point result bits: " + << str(FPBits(libc_result)) << '\n'; tlog << " MPFR rounded bits: " - << str(FPBits(mpfr_result.as())) << '\n'; + << str(FPBits(mpfr_result.as())) << '\n'; tlog << "ULP error: " << mpfr_result.ulp_as_mpfr_number(libc_result).str() << '\n'; } -template void explain_ternary_operation_one_output_error( +template void explain_ternary_operation_one_output_error( Operation, const TernaryInput &, float, double, RoundingMode); -template void explain_ternary_operation_one_output_error( +template void explain_ternary_operation_one_output_error( Operation, const TernaryInput &, double, double, RoundingMode); -template void explain_ternary_operation_one_output_error( - Operation, const TernaryInput &, long double, double, - RoundingMode); +template void +explain_ternary_operation_one_output_error(Operation, + const TernaryInput &, + long double, double, RoundingMode); +#ifdef LIBC_TYPES_HAS_FLOAT16 +template void explain_ternary_operation_one_output_error( + Operation, const TernaryInput &, float16, double, RoundingMode); +#endif template bool compare_unary_operation_single_output(Operation op, InputType input, @@ -1069,12 +1072,13 @@ template bool compare_binary_operation_one_output( Operation, const BinaryInput &, long double, double, RoundingMode); -template +template bool compare_ternary_operation_one_output(Operation op, - const TernaryInput &input, - T libc_result, double ulp_tolerance, + const TernaryInput &input, + OutputType libc_result, + double ulp_tolerance, RoundingMode rounding) { - unsigned int precision = get_precision(ulp_tolerance); + unsigned int precision = get_precision(ulp_tolerance); MPFRNumber mpfr_result = ternary_operation_one_output( op, input.x, input.y, input.z, precision, rounding); double ulp = mpfr_result.ulp(libc_result); @@ -1082,13 +1086,23 @@ bool compare_ternary_operation_one_output(Operation op, return (ulp <= ulp_tolerance); } -template bool compare_ternary_operation_one_output( - Operation, const TernaryInput &, float, double, RoundingMode); -template bool compare_ternary_operation_one_output( - Operation, const TernaryInput &, double, double, RoundingMode); -template bool compare_ternary_operation_one_output( - Operation, const TernaryInput &, long double, double, - RoundingMode); +template bool compare_ternary_operation_one_output(Operation, + const TernaryInput &, + float, double, RoundingMode); +template bool compare_ternary_operation_one_output(Operation, + const TernaryInput &, + double, double, + RoundingMode); +template bool +compare_ternary_operation_one_output(Operation, + const TernaryInput &, + long double, double, RoundingMode); +#ifdef LIBC_TYPES_HAS_FLOAT16 +template bool compare_ternary_operation_one_output(Operation, + const TernaryInput &, + float16, double, + RoundingMode); +#endif } // namespace internal diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h index 805678b96c2efd..0b4f42a72ec813 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.h +++ b/libc/utils/MPFRWrapper/MPFRUtils.h @@ -129,6 +129,19 @@ struct AreMatchingBinaryInputAndBinaryOutput, BinaryOutput> { static constexpr bool VALUE = cpp::is_floating_point_v; }; +template struct IsTernaryInput { + static constexpr bool VALUE = false; +}; + +template struct IsTernaryInput> { + static constexpr bool VALUE = true; +}; + +template struct MakeScalarInput : cpp::type_identity {}; + +template +struct MakeScalarInput> : cpp::type_identity {}; + template bool compare_unary_operation_single_output(Operation op, InputType input, OutputType libc_output, @@ -152,10 +165,11 @@ bool compare_binary_operation_one_output(Operation op, T libc_output, double ulp_tolerance, RoundingMode rounding); -template +template bool compare_ternary_operation_one_output(Operation op, - const TernaryInput &input, - T libc_output, double ulp_tolerance, + const TernaryInput &input, + OutputType libc_output, + double ulp_tolerance, RoundingMode rounding); template @@ -180,12 +194,10 @@ void explain_binary_operation_one_output_error(Operation op, double ulp_tolerance, RoundingMode rounding); -template -void explain_ternary_operation_one_output_error(Operation op, - const TernaryInput &input, - T match_value, - double ulp_tolerance, - RoundingMode rounding); +template +void explain_ternary_operation_one_output_error( + Operation op, const TernaryInput &input, OutputType match_value, + double ulp_tolerance, RoundingMode rounding); template class MPFRMatcher : public testing::Matcher { @@ -234,7 +246,8 @@ class MPFRMatcher : public testing::Matcher { rounding); } - template bool match(const TernaryInput &in, T out) { + template + bool match(const TernaryInput &in, U out) { return compare_ternary_operation_one_output(op, in, out, ulp_tolerance, rounding); } @@ -260,7 +273,8 @@ class MPFRMatcher : public testing::Matcher { rounding); } - template void explain_error(const TernaryInput &in, T out) { + template + void explain_error(const TernaryInput &in, U out) { explain_ternary_operation_one_output_error(op, in, out, ulp_tolerance, rounding); } @@ -272,10 +286,14 @@ class MPFRMatcher : public testing::Matcher { // types. template constexpr bool is_valid_operation() { - constexpr bool IS_NARROWING_OP = op == Operation::Sqrt && - cpp::is_floating_point_v && - cpp::is_floating_point_v && - sizeof(OutputType) <= sizeof(InputType); + constexpr bool IS_NARROWING_OP = + (op == Operation::Sqrt && cpp::is_floating_point_v && + cpp::is_floating_point_v && + sizeof(OutputType) <= sizeof(InputType)) || + (op == Operation::Fma && internal::IsTernaryInput::VALUE && + cpp::is_floating_point_v< + typename internal::MakeScalarInput::type> && + cpp::is_floating_point_v); if (IS_NARROWING_OP) return true; return (Operation::BeginUnaryOperationsSingleOutput < op && From 2ed2975e8bd9e9e1a0f376bded1dad627d5eab4e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 14 Jun 2024 09:33:12 -0700 Subject: [PATCH 115/155] [RISCV] Add isel patterns for bf16 riscv_vfmv_v_f_vl of FP constant. We try not let bf16 splats through to isel, but constant folding allows FP constants to get through. Thankfully we can handle those using vmv.v.i or vmv.v.x. --- .../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 4 +++- llvm/test/CodeGen/RISCV/rvv/vsplats-bf16.ll | 24 +++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/vsplats-bf16.ll diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index 372f2c5bbf9f18..a7945f2ee6c1b8 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -2654,7 +2654,7 @@ foreach fvti = AllFloatVectors in { } } -foreach fvti = AllFloatVectors in { +foreach fvti = !listconcat(AllFloatVectors, AllBFloatVectors) in { defvar ivti = GetIntVTypeInfo.Vti; let Predicates = GetVTypePredicates.Predicates in { // 13.16. Vector Floating-Point Move Instruction @@ -2668,7 +2668,9 @@ foreach fvti = AllFloatVectors in { (!cast("PseudoVMV_V_X_"#fvti.LMul.MX) $passthru, GPR:$imm, GPR:$vl, fvti.Log2SEW, TU_MU)>; } +} +foreach fvti = AllFloatVectors in { let Predicates = GetVTypePredicates.Predicates in { def : Pat<(fvti.Vector (riscv_vfmv_v_f_vl fvti.Vector:$passthru, (fvti.Scalar fvti.ScalarRegClass:$rs2), VLOpFrag)), diff --git a/llvm/test/CodeGen/RISCV/rvv/vsplats-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/vsplats-bf16.ll new file mode 100644 index 00000000000000..26eb55e5f93aa7 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vsplats-bf16.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+f,+d,+experimental-zfbfmin,+experimental-zvfbfmin,+v -target-abi ilp32d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+experimental-zfbfmin,+experimental-zvfbfmin,+v -target-abi lp64d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +define @vsplat_zero_nxv8f16() { +; CHECK-LABEL: vsplat_zero_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: ret + ret splat (bfloat zeroinitializer) +} + +define @vsplat_negzero_nxv8f16() { +; CHECK-LABEL: vsplat_negzero_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, 1048568 +; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: ret + ret splat (bfloat -0.0) +} From a4f6b7dfa42bb3e129073704a5d9544f6618d222 Mon Sep 17 00:00:00 2001 From: Vlad Serebrennikov Date: Fri, 14 Jun 2024 20:33:38 +0400 Subject: [PATCH 116/155] [lldb] Stop testing LLDB on Clang changes in pre-commit CI (#95537) This is a temporary measure to alleviate Linux pre-commit CI waiting times that started snowballing [recently](https://discourse.llvm.org/t/long-wait-for-linux-presubmit-testing/79547/5). My [initial estimate](https://github.com/llvm/llvm-project/pull/94208#issuecomment-2155972973) of 4 additional minutes spent per built seems to be in the right ballpark, but looks like that was the last straw to break camel's back. It seems that CI load got past the tipping point, and now it's not able to burn through the queue over the night on workdays. I don't intend to overthrow the consensus we reached in #94208, but it shouldn't come at the expense of the whole LLVM community. I'll enable this back as soon as we have news that we got more capacity for Linux pre-commit CI. --- .ci/generate-buildkite-pipeline-premerge | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.ci/generate-buildkite-pipeline-premerge b/.ci/generate-buildkite-pipeline-premerge index fd603de611e562..98a8b8fff3687a 100755 --- a/.ci/generate-buildkite-pipeline-premerge +++ b/.ci/generate-buildkite-pipeline-premerge @@ -74,7 +74,8 @@ function compute-projects-to-test() { fi ;; clang) - for p in clang-tools-extra compiler-rt lldb cross-project-tests; do + # lldb is temporarily removed to alleviate Linux pre-commit CI waiting times + for p in clang-tools-extra compiler-rt cross-project-tests; do echo $p done ;; From 2f5ec13761fa672cb39ff147d876c2604c08bcae Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 14 Jun 2024 09:35:32 -0700 Subject: [PATCH 117/155] [Transforms] Migrate to a new version of getValueProfDataFromInst (#95442) Note that the version of getValueProfDataFromInst that returns bool has been "deprecated" since: commit 1e15371dd8843dfc52b9435afaa133997c1773d8 Author: Mingming Liu Date: Mon Apr 1 15:14:49 2024 -0700 --------- Co-authored-by: Mingming Liu --- .../Instrumentation/PGOMemOPSizeOpt.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp index fa93f4bd63ce67..ec19942c1d5f4a 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp @@ -177,10 +177,7 @@ class MemOPSizeOpt : public InstVisitor { MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI, OptimizationRemarkEmitter &ORE, DominatorTree *DT, TargetLibraryInfo &TLI) - : Func(Func), BFI(BFI), ORE(ORE), DT(DT), TLI(TLI), Changed(false) { - ValueDataArray = - std::make_unique(INSTR_PROF_NUM_BUCKETS); - } + : Func(Func), BFI(BFI), ORE(ORE), DT(DT), TLI(TLI), Changed(false) {} bool isChanged() const { return Changed; } void perform() { WorkList.clear(); @@ -222,8 +219,6 @@ class MemOPSizeOpt : public InstVisitor { TargetLibraryInfo &TLI; bool Changed; std::vector WorkList; - // The space to read the profile annotation. - std::unique_ptr ValueDataArray; bool perform(MemOp MO); }; @@ -252,10 +247,12 @@ bool MemOPSizeOpt::perform(MemOp MO) { if (!MemOPOptMemcmpBcmp && (MO.isMemcmp(TLI) || MO.isBcmp(TLI))) return false; - uint32_t NumVals, MaxNumVals = INSTR_PROF_NUM_BUCKETS; + uint32_t NumVals = INSTR_PROF_NUM_BUCKETS; + uint32_t MaxNumVals = INSTR_PROF_NUM_BUCKETS; uint64_t TotalCount; - if (!getValueProfDataFromInst(*MO.I, IPVK_MemOPSize, MaxNumVals, - ValueDataArray.get(), NumVals, TotalCount)) + auto ValueDataArray = getValueProfDataFromInst( + *MO.I, IPVK_MemOPSize, MaxNumVals, NumVals, TotalCount); + if (!ValueDataArray) return false; uint64_t ActualCount = TotalCount; From c0cba5198155dba246ddd5764f57595d9bbbddef Mon Sep 17 00:00:00 2001 From: Vijay Kandiah Date: Fri, 14 Jun 2024 11:36:05 -0500 Subject: [PATCH 118/155] [Flang] Hoisting constant-sized allocas at flang codegen. (#95310) This change modifies the `AllocaOpConversion` in flang codegen to insert constant-sized LLVM allocas at the entry block of `LLVMFuncOp` or OpenACC/OpenMP Op, rather than in-place at the `fir.alloca`. This effectively hoists constant-sized FIR allocas to the proper block. When compiling the example subroutine below with `flang-new`, we get a llvm.stacksave/stackrestore pair around a constant-sized `fir.alloca i32`. ``` subroutine test(n) block integer :: n print *, n end block end subroutine test ``` Without the proposed change, downstream LLVM compilation cannot hoist this constant-sized alloca out of the stacksave/stackrestore region which may lead to missed downstream optimizations: ``` *** IR Dump After Safe Stack instrumentation pass (safe-stack) *** define void @test_(ptr %0) !dbg !3 { %2 = call ptr @llvm.stacksave.p0(), !dbg !7 %3 = alloca i32, i64 1, align 4, !dbg !8 %4 = call ptr @_FortranAioBeginExternalListOutput(i32 6, ptr @_QQclX62c91d05f046c7a656e7978eb13f2821, i32 4), !dbg !9 %5 = load i32, ptr %3, align 4, !dbg !10, !tbaa !11 %6 = call i1 @_FortranAioOutputInteger32(ptr %4, i32 %5), !dbg !10 %7 = call i32 @_FortranAioEndIoStatement(ptr %4), !dbg !9 call void @llvm.stackrestore.p0(ptr %2), !dbg !15 ret void, !dbg !16 } ``` With this change, the `llvm.alloca` is already hoisted out of the stacksave/stackrestore region during flang codegen: ``` // -----// IR Dump After FIRToLLVMLowering (fir-to-llvm-ir) //----- // llvm.func @test_(%arg0: !llvm.ptr {fir.bindc_name = "n"}) attributes {fir.internal_name = "_QPtest"} { %0 = llvm.mlir.constant(4 : i32) : i32 %1 = llvm.mlir.constant(1 : i64) : i64 %2 = llvm.alloca %1 x i32 {bindc_name = "n"} : (i64) -> !llvm.ptr %3 = llvm.mlir.constant(6 : i32) : i32 %4 = llvm.mlir.undef : i1 %5 = llvm.call @llvm.stacksave.p0() {fastmathFlags = #llvm.fastmath} : () -> !llvm.ptr %6 = llvm.mlir.addressof @_QQclX62c91d05f046c7a656e7978eb13f2821 : !llvm.ptr %7 = llvm.call @_FortranAioBeginExternalListOutput(%3, %6, %0) {fastmathFlags = #llvm.fastmath} : (i32, !llvm.ptr, i32) -> !llvm.ptr %8 = llvm.load %2 {tbaa = [#tbaa_tag]} : !llvm.ptr -> i32 %9 = llvm.call @_FortranAioOutputInteger32(%7, %8) {fastmathFlags = #llvm.fastmath} : (!llvm.ptr, i32) -> i1 %10 = llvm.call @_FortranAioEndIoStatement(%7) {fastmathFlags = #llvm.fastmath} : (!llvm.ptr) -> i32 llvm.call @llvm.stackrestore.p0(%5) {fastmathFlags = #llvm.fastmath} : (!llvm.ptr) -> () llvm.return } ``` --------- Co-authored-by: Vijay Kandiah --- .../flang/Optimizer/CodeGen/FIROpPatterns.h | 13 +- flang/lib/Optimizer/CodeGen/CodeGen.cpp | 20 ++- flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp | 48 +++---- flang/test/Fir/alloc.fir | 12 +- flang/test/Fir/boxproc.fir | 10 +- .../Fir/convert-to-llvm-openmp-and-fir.fir | 117 +++++++++--------- flang/test/Fir/convert-to-llvm.fir | 22 ++-- flang/test/Integration/OpenMP/copyprivate.f90 | 2 +- flang/test/Transforms/debug-local-var-2.f90 | 10 +- 9 files changed, 137 insertions(+), 117 deletions(-) diff --git a/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h b/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h index 211acdc8a38e69..ac095664f61884 100644 --- a/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h +++ b/flang/include/flang/Optimizer/CodeGen/FIROpPatterns.h @@ -51,7 +51,9 @@ class ConvertFIRToLLVMPattern : public mlir::ConvertToLLVMPattern { /// appropriate reified structures. mlir::Value integerCast(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter, - mlir::Type ty, mlir::Value val) const; + mlir::Type ty, mlir::Value val, + bool fold = false) const; + struct TypePair { mlir::Type fir; mlir::Type llvm; @@ -144,9 +146,12 @@ class ConvertFIRToLLVMPattern : public mlir::ConvertToLLVMPattern { // Find the Block in which the alloca should be inserted. // The order to recursively find the proper block: // 1. An OpenMP Op that will be outlined. - // 2. A LLVMFuncOp - // 3. The first ancestor that is an OpenMP Op or a LLVMFuncOp - mlir::Block *getBlockForAllocaInsert(mlir::Operation *op) const; + // 2. An OpenMP or OpenACC Op with one or more regions holding executable + // code. + // 3. A LLVMFuncOp + // 4. The first ancestor that is one of the above. + mlir::Block *getBlockForAllocaInsert(mlir::Operation *op, + mlir::Region *parentRegion) const; // Generate an alloca of size 1 for an object of type \p llvmObjectTy in the // allocation address space provided for the architecture in the DataLayout diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 9f21c6b0cf0972..4448224024f206 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -218,7 +218,7 @@ struct AllocaOpConversion : public fir::FIROpConversion { chrTy.getContext(), chrTy.getFKind()); llvmObjectType = convertType(rawCharTy); assert(end == 1); - size = integerCast(loc, rewriter, ity, lenParams[0]); + size = integerCast(loc, rewriter, ity, lenParams[0], /*fold=*/true); } else if (auto recTy = mlir::dyn_cast(scalarType)) { mlir::LLVM::LLVMFuncOp memSizeFn = getDependentTypeMemSizeFn(recTy, alloc, rewriter); @@ -236,17 +236,29 @@ struct AllocaOpConversion : public fir::FIROpConversion { } } if (auto scaleSize = genAllocationScaleSize(alloc, ity, rewriter)) - size = rewriter.create(loc, ity, size, scaleSize); + size = + rewriter.createOrFold(loc, ity, size, scaleSize); if (alloc.hasShapeOperands()) { unsigned end = operands.size(); for (; i < end; ++i) - size = rewriter.create( - loc, ity, size, integerCast(loc, rewriter, ity, operands[i])); + size = rewriter.createOrFold( + loc, ity, size, + integerCast(loc, rewriter, ity, operands[i], /*fold=*/true)); } unsigned allocaAs = getAllocaAddressSpace(rewriter); unsigned programAs = getProgramAddressSpace(rewriter); + if (mlir::isa(size.getDefiningOp())) { + // Set the Block in which the llvm alloca should be inserted. + mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp(); + mlir::Region *parentRegion = rewriter.getInsertionBlock()->getParent(); + mlir::Block *insertBlock = + getBlockForAllocaInsert(parentOp, parentRegion); + size.getDefiningOp()->moveAfter(insertBlock, insertBlock->begin()); + rewriter.setInsertionPointAfter(size.getDefiningOp()); + } + // NOTE: we used to pass alloc->getAttrs() in the builder for non opaque // pointers! Only propagate pinned and bindc_name to help debugging, but // this should have no functional purpose (and passing the operand segment diff --git a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp index 72e072db374328..b9a28b89d9a558 100644 --- a/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp +++ b/flang/lib/Optimizer/CodeGen/FIROpPatterns.cpp @@ -62,10 +62,9 @@ mlir::LLVM::ConstantOp ConvertFIRToLLVMPattern::genConstantOffset( /// to the specific target may involve some sign-extending or truncation of /// values, particularly to fit them from abstract box types to the /// appropriate reified structures. -mlir::Value -ConvertFIRToLLVMPattern::integerCast(mlir::Location loc, - mlir::ConversionPatternRewriter &rewriter, - mlir::Type ty, mlir::Value val) const { +mlir::Value ConvertFIRToLLVMPattern::integerCast( + mlir::Location loc, mlir::ConversionPatternRewriter &rewriter, + mlir::Type ty, mlir::Value val, bool fold) const { auto valTy = val.getType(); // If the value was not yet lowered, lower its type so that it can // be used in getPrimitiveTypeSizeInBits. @@ -73,10 +72,17 @@ ConvertFIRToLLVMPattern::integerCast(mlir::Location loc, valTy = convertType(valTy); auto toSize = mlir::LLVM::getPrimitiveTypeSizeInBits(ty); auto fromSize = mlir::LLVM::getPrimitiveTypeSizeInBits(valTy); - if (toSize < fromSize) - return rewriter.create(loc, ty, val); - if (toSize > fromSize) - return rewriter.create(loc, ty, val); + if (fold) { + if (toSize < fromSize) + return rewriter.createOrFold(loc, ty, val); + if (toSize > fromSize) + return rewriter.createOrFold(loc, ty, val); + } else { + if (toSize < fromSize) + return rewriter.create(loc, ty, val); + if (toSize > fromSize) + return rewriter.create(loc, ty, val); + } return val; } @@ -274,16 +280,19 @@ mlir::Value ConvertFIRToLLVMPattern::computeBoxSize( // Find the Block in which the alloca should be inserted. // The order to recursively find the proper block: // 1. An OpenMP Op that will be outlined. -// 2. A LLVMFuncOp -// 3. The first ancestor that is an OpenMP Op or a LLVMFuncOp -mlir::Block * -ConvertFIRToLLVMPattern::getBlockForAllocaInsert(mlir::Operation *op) const { +// 2. An OpenMP or OpenACC Op with one or more regions holding executable code. +// 3. A LLVMFuncOp +// 4. The first ancestor that is one of the above. +mlir::Block *ConvertFIRToLLVMPattern::getBlockForAllocaInsert( + mlir::Operation *op, mlir::Region *parentRegion) const { if (auto iface = mlir::dyn_cast(op)) return iface.getAllocaBlock(); + if (auto recipeIface = mlir::dyn_cast(op)) + return recipeIface.getAllocaBlock(*parentRegion); if (auto llvmFuncOp = mlir::dyn_cast(op)) return &llvmFuncOp.front(); - return getBlockForAllocaInsert(op->getParentOp()); + return getBlockForAllocaInsert(op->getParentOp(), parentRegion); } // Generate an alloca of size 1 for an object of type \p llvmObjectTy in the @@ -297,16 +306,9 @@ mlir::Value ConvertFIRToLLVMPattern::genAllocaAndAddrCastWithType( mlir::ConversionPatternRewriter &rewriter) const { auto thisPt = rewriter.saveInsertionPoint(); mlir::Operation *parentOp = rewriter.getInsertionBlock()->getParentOp(); - if (mlir::isa(parentOp) || - mlir::isa(parentOp)) { - // DeclareReductionOp & PrivateClauseOp have multiple child regions. We want - // to get the first block of whichever of those regions we are currently in - mlir::Region *parentRegion = rewriter.getInsertionBlock()->getParent(); - rewriter.setInsertionPointToStart(&parentRegion->front()); - } else { - mlir::Block *insertBlock = getBlockForAllocaInsert(parentOp); - rewriter.setInsertionPointToStart(insertBlock); - } + mlir::Region *parentRegion = rewriter.getInsertionBlock()->getParent(); + mlir::Block *insertBlock = getBlockForAllocaInsert(parentOp, parentRegion); + rewriter.setInsertionPointToStart(insertBlock); auto size = genI32Constant(loc, rewriter, 1); unsigned allocaAs = getAllocaAddressSpace(rewriter); unsigned programAs = getProgramAddressSpace(rewriter); diff --git a/flang/test/Fir/alloc.fir b/flang/test/Fir/alloc.fir index ca624c0d1f9d6b..e00fc9d6649c4c 100644 --- a/flang/test/Fir/alloc.fir +++ b/flang/test/Fir/alloc.fir @@ -156,7 +156,7 @@ func.func @allocmem_array_of_dynchar(%l: i32) -> !fir.heap !fir.ref> { %1 = fir.alloca !fir.array<3x?xi32>, %e @@ -165,7 +165,7 @@ func.func @alloca_dynarray_of_nonchar(%e: index) -> !fir.ref // CHECK-LABEL: define ptr @alloca_dynarray_of_nonchar2( // CHECK-SAME: i64 %[[extent:.*]]) -// CHECK: %[[prod1:.*]] = mul i64 1, %[[extent]] +// CHECK: %[[prod1:.*]] = mul i64 %[[extent]], 1 // CHECK: %[[prod2:.*]] = mul i64 %[[prod1]], %[[extent]] // CHECK: alloca i32, i64 %[[prod2]] func.func @alloca_dynarray_of_nonchar2(%e: index) -> !fir.ref> { @@ -194,7 +194,7 @@ func.func @allocmem_dynarray_of_nonchar2(%e: index) -> !fir.heap !fir.ref>> { %1 = fir.alloca !fir.array<3x?x!fir.char<2,10>>, %e @@ -203,7 +203,7 @@ func.func @alloca_dynarray_of_char(%e : index) -> !fir.ref !fir.ref>> { @@ -334,10 +334,10 @@ func.func @allocmem_array_with_holes_dynchar(%arg0: index, %arg1: index) -> !fir } // CHECK-LABEL: define void @alloca_unlimited_polymorphic_box -// CHECK: %[[VAL_0:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, i64 1 // CHECK: %[[VAL_1:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, i64 1 -// CHECK: %[[VAL_2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, i64 1 +// CHECK: %[[VAL_0:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, i64 1 // CHECK: %[[VAL_3:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]], ptr, [1 x i64] }, i64 1 +// CHECK: %[[VAL_2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, ptr, [1 x i64] }, i64 1 func.func @alloca_unlimited_polymorphic_box() { %0 = fir.alloca !fir.class diff --git a/flang/test/Fir/boxproc.fir b/flang/test/Fir/boxproc.fir index 1fed16a808af04..834017bff71aa3 100644 --- a/flang/test/Fir/boxproc.fir +++ b/flang/test/Fir/boxproc.fir @@ -1,12 +1,12 @@ // RUN: tco %s | FileCheck %s // CHECK-LABEL: define void @_QPtest_proc_dummy() -// CHECK: %[[VAL_0:.*]] = alloca i32, i64 1, align 4 +// CHECK: %[[VAL_3:.*]] = alloca [32 x i8], i64 1, align 1 // CHECK: %[[VAL_1:.*]] = alloca { ptr }, i64 1, align 8 +// CHECK: %[[VAL_0:.*]] = alloca i32, i64 1, align 4 // CHECK: %[[VAL_2:.*]] = getelementptr { ptr }, ptr %[[VAL_1]], i32 0, i32 0 // CHECK: store ptr %[[VAL_0]], ptr %[[VAL_2]], align 8 // CHECK: store i32 1, ptr %[[VAL_0]], align 4 -// CHECK: %[[VAL_3:.*]] = alloca [32 x i8], i64 1, align 1 // CHECK: call void @llvm.init.trampoline(ptr %[[VAL_3]], ptr @_QFtest_proc_dummyPtest_proc_dummy_a, ptr %[[VAL_1]]) // CHECK: %[[VAL_6:.*]] = call ptr @llvm.adjust.trampoline(ptr %[[VAL_3]]) // CHECK: call void @_QPtest_proc_dummy_other(ptr %[[VAL_6]]) @@ -61,9 +61,10 @@ func.func @_QPtest_proc_dummy_other(%arg0: !fir.boxproc<() -> ()>) { } // CHECK-LABEL: define void @_QPtest_proc_dummy_char() -// CHECK: %[[VAL_0:.*]] = alloca [40 x i8], i64 1, align 1 -// CHECK: %[[VAL_1:.*]] = alloca [10 x i8], i64 1, align 1 +// CHECK: %[[VAL_20:.*]] = alloca [32 x i8], i64 1, align 1 // CHECK: %[[VAL_2:.*]] = alloca { { ptr, i64 } }, i64 1, align 8 +// CHECK: %[[VAL_1:.*]] = alloca [10 x i8], i64 1, align 1 +// CHECK: %[[VAL_0:.*]] = alloca [40 x i8], i64 1, align 1 // CHECK: %[[VAL_3:.*]] = getelementptr { { ptr, i64 } }, ptr %[[VAL_2]], i32 0, i32 0 // CHECK: %[[VAL_5:.*]] = insertvalue { ptr, i64 } undef, ptr %[[VAL_1]], 0 // CHECK: %[[VAL_6:.*]] = insertvalue { ptr, i64 } %[[VAL_5]], i64 10, 1 @@ -75,7 +76,6 @@ func.func @_QPtest_proc_dummy_other(%arg0: !fir.boxproc<() -> ()>) { // CHECK: %[[VAL_15:.*]] = icmp sgt i64 %[[VAL_13]], 0 // CHECK: %[[VAL_18:.*]] = getelementptr [10 x [1 x i8]], ptr %[[VAL_1]], i32 0, i64 %[[VAL_11]] // CHECK: store [1 x i8] c" ", ptr %[[VAL_18]], align 1 -// CHECK: %[[VAL_20:.*]] = alloca [32 x i8], i64 1, align 1 // CHECK: call void @llvm.init.trampoline(ptr %[[VAL_20]], ptr @_QFtest_proc_dummy_charPgen_message, ptr %[[VAL_2]]) // CHECK: %[[VAL_23:.*]] = call ptr @llvm.adjust.trampoline(ptr %[[VAL_20]]) // CHECK: %[[VAL_25:.*]] = insertvalue { ptr, i64 } undef, ptr %[[VAL_23]], 0 diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir index 72cd0a763e71af..45ff89bc40943b 100644 --- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir +++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir @@ -280,58 +280,58 @@ func.func @_QPomp_target_data() { return } - // CHECK-LABEL: llvm.func @_QPomp_target_data() { - // CHECK: %0 = llvm.mlir.constant(1024 : index) : i64 - // CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(1 : i64) : i64 - // CHECK: %[[VAL_1:.*]] = llvm.alloca %[[VAL_0]] x !llvm.array<1024 x i32> {bindc_name = "a"} : (i64) -> !llvm.ptr - // CHECK: %3 = llvm.mlir.constant(1024 : index) : i64 - // CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(1 : i64) : i64 - // CHECK: %[[VAL_3:.*]] = llvm.alloca %[[VAL_2]] x !llvm.array<1024 x i32> {bindc_name = "b"} : (i64) -> !llvm.ptr - // CHECK: %6 = llvm.mlir.constant(1024 : index) : i64 - // CHECK: %[[VAL_4:.*]] = llvm.mlir.constant(1 : i64) : i64 - // CHECK: %[[VAL_5:.*]] = llvm.alloca %[[VAL_4]] x !llvm.array<1024 x i32> {bindc_name = "c"} : (i64) -> !llvm.ptr - // CHECK: %9 = llvm.mlir.constant(1024 : index) : i64 - // CHECK: %[[VAL_6:.*]] = llvm.mlir.constant(1 : i64) : i64 - // CHECK: %[[VAL_7:.*]] = llvm.alloca %[[VAL_6]] x !llvm.array<1024 x i32> {bindc_name = "d"} : (i64) -> !llvm.ptr - // CHECK: %12 = llvm.mlir.constant(1 : index) : i64 - // CHECK: %13 = llvm.mlir.constant(0 : index) : i64 - // CHECK: %14 = llvm.mlir.constant(1023 : index) : i64 - // CHECK: %15 = omp.map.bounds lower_bound(%13 : i64) upper_bound(%14 : i64) extent(%0 : i64) stride(%12 : i64) start_idx(%12 : i64) - // CHECK: %16 = omp.map.info var_ptr(%[[VAL_1]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(to) capture(ByRef) bounds(%15) -> !llvm.ptr {name = "a"} - // CHECK: %17 = llvm.mlir.constant(1 : index) : i64 - // CHECK: %18 = llvm.mlir.constant(0 : index) : i64 - // CHECK: %19 = llvm.mlir.constant(1023 : index) : i64 - // CHECK: %20 = omp.map.bounds lower_bound(%18 : i64) upper_bound(%19 : i64) extent(%3 : i64) stride(%17 : i64) start_idx(%17 : i64) - // CHECK: %21 = omp.map.info var_ptr(%[[VAL_3]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(to) capture(ByRef) bounds(%20) -> !llvm.ptr {name = "b"} - // CHECK: %22 = llvm.mlir.constant(1 : index) : i64 - // CHECK: %23 = llvm.mlir.constant(0 : index) : i64 - // CHECK: %24 = llvm.mlir.constant(1023 : index) : i64 - // CHECK: %25 = omp.map.bounds lower_bound(%23 : i64) upper_bound(%24 : i64) extent(%6 : i64) stride(%22 : i64) start_idx(%22 : i64) - // CHECK: %26 = omp.map.info var_ptr(%[[VAL_5]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(always, exit_release_or_enter_alloc) capture(ByRef) bounds(%25) -> !llvm.ptr {name = "c"} - // CHECK: omp.target_enter_data map_entries(%16, %21, %26 : !llvm.ptr, !llvm.ptr, !llvm.ptr) - // CHECK: %27 = llvm.mlir.constant(1 : index) : i64 - // CHECK: %28 = llvm.mlir.constant(0 : index) : i64 - // CHECK: %29 = llvm.mlir.constant(1023 : index) : i64 - // CHECK: %30 = omp.map.bounds lower_bound(%28 : i64) upper_bound(%29 : i64) extent(%0 : i64) stride(%27 : i64) start_idx(%27 : i64) - // CHECK: %31 = omp.map.info var_ptr(%[[VAL_1]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(from) capture(ByRef) bounds(%30) -> !llvm.ptr {name = "a"} - // CHECK: %32 = llvm.mlir.constant(1 : index) : i64 - // CHECK: %33 = llvm.mlir.constant(0 : index) : i64 - // CHECK: %34 = llvm.mlir.constant(1023 : index) : i64 - // CHECK: %35 = omp.map.bounds lower_bound(%33 : i64) upper_bound(%34 : i64) extent(%3 : i64) stride(%32 : i64) start_idx(%32 : i64) - // CHECK: %36 = omp.map.info var_ptr(%[[VAL_3]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(from) capture(ByRef) bounds(%35) -> !llvm.ptr {name = "b"} - // CHECK: %37 = llvm.mlir.constant(1 : index) : i64 - // CHECK: %38 = llvm.mlir.constant(0 : index) : i64 - // CHECK: %39 = llvm.mlir.constant(1023 : index) : i64 - // CHECK: %40 = omp.map.bounds lower_bound(%38 : i64) upper_bound(%39 : i64) extent(%6 : i64) stride(%37 : i64) start_idx(%37 : i64) - // CHECK: %41 = omp.map.info var_ptr(%[[VAL_5]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) bounds(%40) -> !llvm.ptr {name = "c"} - // CHECK: %42 = llvm.mlir.constant(1 : index) : i64 - // CHECK: %43 = llvm.mlir.constant(0 : index) : i64 - // CHECK: %44 = llvm.mlir.constant(1023 : index) : i64 - // CHECK: %45 = omp.map.bounds lower_bound(%43 : i64) upper_bound(%44 : i64) extent(%9 : i64) stride(%42 : i64) start_idx(%42 : i64) - // CHECK: %46 = omp.map.info var_ptr(%[[VAL_7]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(always, delete) capture(ByRef) bounds(%45) -> !llvm.ptr {name = "d"} - // CHECK: omp.target_exit_data map_entries(%31, %36, %41, %46 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) - // CHECK: llvm.return - // CHECK: } +// CHECK-LABEL: llvm.func @_QPomp_target_data() { +// CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(1024 : index) : i64 +// CHECK: %[[VAL_1:.*]] = llvm.mlir.constant(1 : i64) : i64 +// CHECK: %[[VAL_2:.*]] = llvm.alloca %[[VAL_1]] x !llvm.array<1024 x i32> {bindc_name = "d"} : (i64) -> !llvm.ptr +// CHECK: %[[VAL_3:.*]] = llvm.mlir.constant(1 : i64) : i64 +// CHECK: %[[VAL_4:.*]] = llvm.alloca %[[VAL_3]] x !llvm.array<1024 x i32> {bindc_name = "c"} : (i64) -> !llvm.ptr +// CHECK: %[[VAL_5:.*]] = llvm.mlir.constant(1 : i64) : i64 +// CHECK: %[[VAL_6:.*]] = llvm.alloca %[[VAL_5]] x !llvm.array<1024 x i32> {bindc_name = "b"} : (i64) -> !llvm.ptr +// CHECK: %[[VAL_7:.*]] = llvm.mlir.constant(1 : i64) : i64 +// CHECK: %[[VAL_8:.*]] = llvm.alloca %[[VAL_7]] x !llvm.array<1024 x i32> {bindc_name = "a"} : (i64) -> !llvm.ptr +// CHECK: %[[VAL_9:.*]] = llvm.mlir.constant(1024 : index) : i64 +// CHECK: %[[VAL_10:.*]] = llvm.mlir.constant(1024 : index) : i64 +// CHECK: %[[VAL_11:.*]] = llvm.mlir.constant(1024 : index) : i64 +// CHECK: %[[VAL_12:.*]] = llvm.mlir.constant(1 : index) : i64 +// CHECK: %[[VAL_13:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK: %[[VAL_14:.*]] = llvm.mlir.constant(1023 : index) : i64 +// CHECK: %[[VAL_15:.*]] = omp.map.bounds lower_bound(%[[VAL_13]] : i64) upper_bound(%[[VAL_14]] : i64) extent(%[[VAL_0]] : i64) stride(%[[VAL_12]] : i64) start_idx(%[[VAL_12]] : i64) +// CHECK: %[[VAL_16:.*]] = omp.map.info var_ptr(%[[VAL_8]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(to) capture(ByRef) bounds(%[[VAL_15]]) -> !llvm.ptr {name = "a"} +// CHECK: %[[VAL_17:.*]] = llvm.mlir.constant(1 : index) : i64 +// CHECK: %[[VAL_18:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK: %[[VAL_19:.*]] = llvm.mlir.constant(1023 : index) : i64 +// CHECK: %[[VAL_20:.*]] = omp.map.bounds lower_bound(%[[VAL_18]] : i64) upper_bound(%[[VAL_19]] : i64) extent(%[[VAL_9]] : i64) stride(%[[VAL_17]] : i64) start_idx(%[[VAL_17]] : i64) +// CHECK: %[[VAL_21:.*]] = omp.map.info var_ptr(%[[VAL_6]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(to) capture(ByRef) bounds(%[[VAL_20]]) -> !llvm.ptr {name = "b"} +// CHECK: %[[VAL_22:.*]] = llvm.mlir.constant(1 : index) : i64 +// CHECK: %[[VAL_23:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK: %[[VAL_24:.*]] = llvm.mlir.constant(1023 : index) : i64 +// CHECK: %[[VAL_25:.*]] = omp.map.bounds lower_bound(%[[VAL_23]] : i64) upper_bound(%[[VAL_24]] : i64) extent(%[[VAL_10]] : i64) stride(%[[VAL_22]] : i64) start_idx(%[[VAL_22]] : i64) +// CHECK: %[[VAL_26:.*]] = omp.map.info var_ptr(%[[VAL_4]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(always, exit_release_or_enter_alloc) capture(ByRef) bounds(%[[VAL_25]]) -> !llvm.ptr {name = "c"} +// CHECK: omp.target_enter_data map_entries(%[[VAL_16]], %[[VAL_21]], %[[VAL_26]] : !llvm.ptr, !llvm.ptr, !llvm.ptr) +// CHECK: %[[VAL_27:.*]] = llvm.mlir.constant(1 : index) : i64 +// CHECK: %[[VAL_28:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK: %[[VAL_29:.*]] = llvm.mlir.constant(1023 : index) : i64 +// CHECK: %[[VAL_30:.*]] = omp.map.bounds lower_bound(%[[VAL_28]] : i64) upper_bound(%[[VAL_29]] : i64) extent(%[[VAL_0]] : i64) stride(%[[VAL_27]] : i64) start_idx(%[[VAL_27]] : i64) +// CHECK: %[[VAL_31:.*]] = omp.map.info var_ptr(%[[VAL_8]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(from) capture(ByRef) bounds(%[[VAL_30]]) -> !llvm.ptr {name = "a"} +// CHECK: %[[VAL_32:.*]] = llvm.mlir.constant(1 : index) : i64 +// CHECK: %[[VAL_33:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK: %[[VAL_34:.*]] = llvm.mlir.constant(1023 : index) : i64 +// CHECK: %[[VAL_35:.*]] = omp.map.bounds lower_bound(%[[VAL_33]] : i64) upper_bound(%[[VAL_34]] : i64) extent(%[[VAL_9]] : i64) stride(%[[VAL_32]] : i64) start_idx(%[[VAL_32]] : i64) +// CHECK: %[[VAL_36:.*]] = omp.map.info var_ptr(%[[VAL_6]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(from) capture(ByRef) bounds(%[[VAL_35]]) -> !llvm.ptr {name = "b"} +// CHECK: %[[VAL_37:.*]] = llvm.mlir.constant(1 : index) : i64 +// CHECK: %[[VAL_38:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK: %[[VAL_39:.*]] = llvm.mlir.constant(1023 : index) : i64 +// CHECK: %[[VAL_40:.*]] = omp.map.bounds lower_bound(%[[VAL_38]] : i64) upper_bound(%[[VAL_39]] : i64) extent(%[[VAL_10]] : i64) stride(%[[VAL_37]] : i64) start_idx(%[[VAL_37]] : i64) +// CHECK: %[[VAL_41:.*]] = omp.map.info var_ptr(%[[VAL_4]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(exit_release_or_enter_alloc) capture(ByRef) bounds(%[[VAL_40]]) -> !llvm.ptr {name = "c"} +// CHECK: %[[VAL_42:.*]] = llvm.mlir.constant(1 : index) : i64 +// CHECK: %[[VAL_43:.*]] = llvm.mlir.constant(0 : index) : i64 +// CHECK: %[[VAL_44:.*]] = llvm.mlir.constant(1023 : index) : i64 +// CHECK: %[[VAL_45:.*]] = omp.map.bounds lower_bound(%[[VAL_43]] : i64) upper_bound(%[[VAL_44]] : i64) extent(%[[VAL_11]] : i64) stride(%[[VAL_42]] : i64) start_idx(%[[VAL_42]] : i64) +// CHECK: %[[VAL_46:.*]] = omp.map.info var_ptr(%[[VAL_2]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(always, delete) capture(ByRef) bounds(%[[VAL_45]]) -> !llvm.ptr {name = "d"} +// CHECK: omp.target_exit_data map_entries(%[[VAL_31]], %[[VAL_36]], %[[VAL_41]], %[[VAL_46]] : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) +// CHECK: llvm.return +// CHECK: } // ----- @@ -374,9 +374,9 @@ func.func @_QPopenmp_target_data_region() { // CHECK-LABEL: llvm.func @_QPopenmp_target_data_region() { // CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[VAL_1:.*]] = llvm.alloca %[[VAL_0]] x !llvm.array<1024 x i32> {bindc_name = "a"} : (i64) -> !llvm.ptr // CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: %[[VAL_3:.*]] = llvm.alloca %[[VAL_2]] x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr +// CHECK: %[[VAL_1:.*]] = llvm.alloca %[[VAL_0]] x !llvm.array<1024 x i32> {bindc_name = "a"} : (i64) -> !llvm.ptr // CHECK: %[[VAL_MAX:.*]] = llvm.mlir.constant(1024 : index) : i64 // CHECK: %[[VAL_ONE:.*]] = llvm.mlir.constant(1 : index) : i64 // CHECK: %[[VAL_ZERO:.*]] = llvm.mlir.constant(0 : index) : i64 @@ -675,9 +675,9 @@ func.func @_QPsb() { } // CHECK: llvm.func @_QPsb() { -// CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : i32 // CHECK: %[[SIZE:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[LI_REF:.*]] = llvm.alloca %6 x i32 {bindc_name = "li"} : (i64) -> !llvm.ptr +// CHECK: %[[LI_REF:.*]] = llvm.alloca %[[SIZE]] x i32 {bindc_name = "li"} : (i64) -> !llvm.ptr +// CHECK: %[[ONE:.*]] = llvm.mlir.constant(1 : i32) : i32 // CHECK: omp.sections { // CHECK: omp.section { // CHECK: llvm.br ^[[BB_ENTRY:.*]]({{.*}}) @@ -715,7 +715,7 @@ func.func @_QPsb() { // CHECK: } // CHECK-LABEL: @_QPsimple_reduction // CHECK-SAME: %[[ARRAY_REF:.*]]: !llvm.ptr -// CHECK: %[[RED_ACCUMULATOR:.*]] = llvm.alloca %2 x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr +// CHECK: %[[RED_ACCUMULATOR:.*]] = llvm.alloca %1 x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr // CHECK: omp.parallel { // CHECK: omp.wsloop reduction(@[[EQV_REDUCTION]] %[[RED_ACCUMULATOR]] -> %[[PRV:.+]] : !llvm.ptr) { // CHECK-NEXT: omp.loop_nest @@ -797,6 +797,7 @@ func.func @_QPs(%arg0: !fir.ref> {fir.bindc_name = "x"}) { // Test if llvm.alloca is properly inserted in the omp section +//CHECK: %[[CONST0:.*]] = llvm.mlir.constant(1 : i64) : i64 //CHECK: %[[CONST:.*]] = llvm.mlir.constant(1 : i64) : i64 //CHECK: %[[ALLOCA:.*]] = llvm.alloca %[[CONST]] x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {bindc_name = "iattr"} : (i64) -> !llvm.ptr //CHECK: omp.parallel { @@ -907,9 +908,9 @@ omp.critical.declare @help hint(contended) // CHECK: llvm.func @omp_critical_() { func.func @omp_critical_() { -// CHECK: %[[X_REF:.*]] = llvm.alloca %{{.*}} x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr - %0 = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFomp_criticalEx"} // CHECK: %[[Y_REF:.*]] = llvm.alloca %{{.*}} x i32 {bindc_name = "y"} : (i64) -> !llvm.ptr + %0 = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFomp_criticalEx"} +// CHECK: %[[X_REF:.*]] = llvm.alloca %{{.*}} x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr %1 = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFomp_criticalEy"} // CHECK: omp.critical(@help) omp.critical(@help) { diff --git a/flang/test/Fir/convert-to-llvm.fir b/flang/test/Fir/convert-to-llvm.fir index ee116e998c22fa..d7059671d3a882 100644 --- a/flang/test/Fir/convert-to-llvm.fir +++ b/flang/test/Fir/convert-to-llvm.fir @@ -1178,7 +1178,7 @@ func.func @alloca_fixed_char_array(%e : index) -> !fir.ref !llvm.ptr // CHECK-DAG: [[ONE:%.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: [[PROD1:%.*]] = llvm.mul [[ONE]], [[E]] : i64 +// CHECK: [[PROD1:%.*]] = llvm.mul [[E]], [[ONE]] : i64 // CHECK: [[PROD2:%.*]] = llvm.mul [[PROD1]], [[E]] : i64 // GENERIC: [[A:%.*]] = llvm.alloca [[PROD2]] x !llvm.array<8 x i8> // AMDGPU: [[AA:%.*]] = llvm.alloca [[PROD2]] x !llvm.array<8 x i8> : (i64) -> !llvm.ptr<5> @@ -1225,7 +1225,7 @@ func.func @alloca_multidim_array(%0 : index) -> !fir.ref // CHECK-SAME: ([[OP1:%.*]]: i64) -> !llvm.ptr // CHECK: [[OP2:%.*]] = llvm.mlir.constant(24 : index) : i64 // CHECK: [[ONE:%.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: [[MUL1:%.*]] = llvm.mul [[ONE]], [[OP1]] : i64 +// CHECK: [[MUL1:%.*]] = llvm.mul [[OP1]], [[ONE]] : i64 // CHECK: [[TOTAL:%.*]] = llvm.mul [[MUL1]], [[OP2]] : i64 // GENERIC: [[A:%.*]] = llvm.alloca [[TOTAL]] x !llvm.array<32 x array<16 x array<8 x f32>>> // AMDGPU: [[AA:%.*]] = llvm.alloca [[TOTAL]] x !llvm.array<32 x array<16 x array<8 x f32>>> : (i64) -> !llvm.ptr<5> @@ -1246,7 +1246,7 @@ func.func @alloca_const_interior_array(%0 : index) -> !fir.ref !llvm.ptr // CHECK: [[OP2:%.*]] = llvm.mlir.constant(64 : index) : i64 // CHECK: [[ONE:%.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: [[MUL1:%.*]] = llvm.mul [[ONE]], [[OP1]] : i64 +// CHECK: [[MUL1:%.*]] = llvm.mul [[OP1]], [[ONE]] : i64 // CHECK: [[TOTAL:%.*]] = llvm.mul [[MUL1]], [[OP2]] : i64 // GENERIC: [[A:%.*]] = llvm.alloca [[TOTAL]] x !llvm.array<9 x array<8 x f32>> // AMDGPU: [[AA:%.*]] = llvm.alloca [[TOTAL]] x !llvm.array<9 x array<8 x f32>> : (i64) -> !llvm.ptr<5> @@ -1937,7 +1937,7 @@ func.func private @_QPxb(!fir.box>) // CHECK: %[[N2_TMP:.*]] = llvm.sub %[[N]], %[[SH2]] : i64 // CHECK: %[[N2:.*]] = llvm.add %[[N2_TMP]], %[[C1]] : i64 // CHECK: %[[C1_0:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[ARR_SIZE_TMP1:.*]] = llvm.mul %[[C1_0]], %[[N1]] : i64 +// CHECK: %[[ARR_SIZE_TMP1:.*]] = llvm.mul %[[N1]], %[[C1_0]] : i64 // CHECK: %[[ARR_SIZE:.*]] = llvm.mul %[[ARR_SIZE_TMP1]], %[[N2]] : i64 // GENERIC: %[[ARR:.*]] = llvm.alloca %[[ARR_SIZE]] x f64 {bindc_name = "arr"} : (i64) -> !llvm.ptr // AMDGPU: %[[AR:.*]] = llvm.alloca %[[ARR_SIZE]] x f64 {bindc_name = "arr"} : (i64) -> !llvm.ptr<5> @@ -2015,17 +2015,17 @@ func.func private @_QPtest_dt_callee(%arg0: !fir.box>) // AMDGPU: %[[AA:.*]] = llvm.alloca %[[ALLOCA_SIZE]] x !llvm.struct<(ptr, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, i{{.*}}, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5> // AMDGPU: %[[ALLOCA:.*]] = llvm.addrspacecast %[[AA]] : !llvm.ptr<5> to !llvm.ptr // CHECK: %[[C20:.*]] = llvm.mlir.constant(20 : index) : i64 -// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64 -// CHECK: %[[C10:.*]] = llvm.mlir.constant(10 : i64) : i64 -// CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : i64) : i64 -// CHECK: %[[ALLOCA_SIZE_V:.*]] = llvm.mlir.constant(1 : i64) : i64 -// GENERIC: %[[V:.*]] = llvm.alloca %[[ALLOCA_SIZE_V]] x i32 {bindc_name = "v"} : (i64) -> !llvm.ptr -// AMDGPU: %[[AB:.*]] = llvm.alloca %[[ALLOCA_SIZE_V]] x i32 {bindc_name = "v"} : (i64) -> !llvm.ptr<5> -// AMDGPU: %[[V:.*]] = llvm.addrspacecast %[[AB]] : !llvm.ptr<5> to !llvm.ptr // CHECK: %[[ALLOCA_SIZE_X:.*]] = llvm.mlir.constant(1 : i64) : i64 // GENERIC: %[[X:.*]] = llvm.alloca %[[ALLOCA_SIZE_X]] x !llvm.array<20 x struct<"_QFtest_dt_sliceTt", (i32, i32)>> {bindc_name = "x"} : (i64) -> !llvm.ptr // AMDGPU: %[[AC:.*]] = llvm.alloca %[[ALLOCA_SIZE_X]] x !llvm.array<20 x struct<"_QFtest_dt_sliceTt", (i32, i32)>> {bindc_name = "x"} : (i64) -> !llvm.ptr<5> // AMDGPU: %[[X:.*]] = llvm.addrspacecast %[[AC]] : !llvm.ptr<5> to !llvm.ptr +// CHECK: %[[ALLOCA_SIZE_V:.*]] = llvm.mlir.constant(1 : i64) : i64 +// GENERIC: %[[V:.*]] = llvm.alloca %[[ALLOCA_SIZE_V]] x i32 {bindc_name = "v"} : (i64) -> !llvm.ptr +// AMDGPU: %[[AB:.*]] = llvm.alloca %[[ALLOCA_SIZE_V]] x i32 {bindc_name = "v"} : (i64) -> !llvm.ptr<5> +// AMDGPU: %[[V:.*]] = llvm.addrspacecast %[[AB]] : !llvm.ptr<5> to !llvm.ptr +// CHECK: %[[C1:.*]] = llvm.mlir.constant(1 : i64) : i64 +// CHECK: %[[C10:.*]] = llvm.mlir.constant(10 : i64) : i64 +// CHECK: %[[C2:.*]] = llvm.mlir.constant(2 : i64) : i64 // CHECK: %[[TYPE_CODE:.*]] = llvm.mlir.constant(9 : i32) : i32 // CHECK: %[[NULL:.*]] = llvm.mlir.zero : !llvm.ptr // CHECK: %[[GEP:.*]] = llvm.getelementptr %[[NULL]][1] diff --git a/flang/test/Integration/OpenMP/copyprivate.f90 b/flang/test/Integration/OpenMP/copyprivate.f90 index d32319a18c28bb..dd69ebdb881a17 100644 --- a/flang/test/Integration/OpenMP/copyprivate.f90 +++ b/flang/test/Integration/OpenMP/copyprivate.f90 @@ -33,8 +33,8 @@ !CHECK-NEXT: } !CHECK-LABEL: define internal void @test_scalar_..omp_par({{.*}}) -!CHECK: %[[I:.*]] = alloca i32, i64 1 !CHECK: %[[J:.*]] = alloca i32, i64 1 +!CHECK: %[[I:.*]] = alloca i32, i64 1 !CHECK: %[[DID_IT:.*]] = alloca i32 !CHECK: store i32 0, ptr %[[DID_IT]] !CHECK: %[[THREAD_NUM1:.*]] = call i32 @__kmpc_global_thread_num(ptr @[[LOC:.*]]) diff --git a/flang/test/Transforms/debug-local-var-2.f90 b/flang/test/Transforms/debug-local-var-2.f90 index ce78bfd0050569..79fe1bab6e35bc 100644 --- a/flang/test/Transforms/debug-local-var-2.f90 +++ b/flang/test/Transforms/debug-local-var-2.f90 @@ -6,12 +6,12 @@ ! This tests checks the debug information for local variables in llvm IR. ! BOTH-LABEL: define void @_QQmain -! BOTH-DAG: %[[AL11:.*]] = alloca i32 -! BOTH-DAG: %[[AL12:.*]] = alloca i64 -! BOTH-DAG: %[[AL13:.*]] = alloca i8 -! BOTH-DAG: %[[AL14:.*]] = alloca i32 -! BOTH-DAG: %[[AL15:.*]] = alloca float ! BOTH-DAG: %[[AL16:.*]] = alloca double +! BOTH-DAG: %[[AL15:.*]] = alloca float +! BOTH-DAG: %[[AL14:.*]] = alloca i32 +! BOTH-DAG: %[[AL13:.*]] = alloca i8 +! BOTH-DAG: %[[AL12:.*]] = alloca i64 +! BOTH-DAG: %[[AL11:.*]] = alloca i32 ! INTRINSICS-DAG: call void @llvm.dbg.declare(metadata ptr %[[AL11]], metadata ![[I4:.*]], metadata !DIExpression()) ! INTRINSICS-DAG: call void @llvm.dbg.declare(metadata ptr %[[AL12]], metadata ![[I8:.*]], metadata !DIExpression()) ! INTRINSICS-DAG: call void @llvm.dbg.declare(metadata ptr %[[AL13]], metadata ![[L1:.*]], metadata !DIExpression()) From 8b9dce333f71bc21b3534e89a41e1ea8672aa063 Mon Sep 17 00:00:00 2001 From: Haowei Date: Fri, 14 Jun 2024 09:37:26 -0700 Subject: [PATCH 119/155] [libc++] Add default copy ctor to "__chrono/exception.h" (#95338) After PR#90394, "__chrono/exception.h" will trigger "deprecated-copy-with-user-provided-dtor" warning on Windows x64 runtime testing with ToT Clang. This patch addresses this issue by explicitly adding those default copy ctors. It is a bit weird that the same warning will not happen when testing on Linux x64 under the same condition, despite the warning flag was enabled (with `-Wdeprecated-copy -Wdeprecated-copy-dtor`). It might be a bug. --- libcxx/include/__chrono/exception.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libcxx/include/__chrono/exception.h b/libcxx/include/__chrono/exception.h index 75fd0615b7e081..266f8fac441760 100644 --- a/libcxx/include/__chrono/exception.h +++ b/libcxx/include/__chrono/exception.h @@ -48,6 +48,9 @@ class nonexistent_local_time : public runtime_error { "creating an nonexistent_local_time from a local_info that is not non-existent"); } + _LIBCPP_HIDE_FROM_ABI nonexistent_local_time(const nonexistent_local_time&) = default; + _LIBCPP_HIDE_FROM_ABI nonexistent_local_time& operator=(const nonexistent_local_time&) = default; + _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI ~nonexistent_local_time() override; // exported as key function private: @@ -89,6 +92,9 @@ class ambiguous_local_time : public runtime_error { "creating an ambiguous_local_time from a local_info that is not ambiguous"); } + _LIBCPP_HIDE_FROM_ABI ambiguous_local_time(const ambiguous_local_time&) = default; + _LIBCPP_HIDE_FROM_ABI ambiguous_local_time& operator=(const ambiguous_local_time&) = default; + _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI ~ambiguous_local_time() override; // exported as key function private: From f808abf508a6b890b40fc2594ea36ce896bb1f37 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 14 Jun 2024 09:39:32 -0700 Subject: [PATCH 120/155] [MC] Add MCFragment allocation helpers `allocFragment` might be changed to a placement new when the allocation strategy changes. `allocInitialFragment` is to deduplicate the following pattern ``` auto *F = new MCDataFragment(); Result->addFragment(*F); F->setParent(Result); ``` Pull Request: https://github.com/llvm/llvm-project/pull/95197 --- llvm/include/llvm/MC/MCCodeView.h | 4 ++- llvm/include/llvm/MC/MCContext.h | 7 ++++ llvm/lib/MC/MCAssembler.cpp | 7 ++-- llvm/lib/MC/MCCodeView.cpp | 16 ++++----- llvm/lib/MC/MCContext.cpp | 36 ++++++++----------- llvm/lib/MC/MCELFStreamer.cpp | 9 ++--- llvm/lib/MC/MCMachOStreamer.cpp | 10 ++++-- llvm/lib/MC/MCObjectStreamer.cpp | 29 +++++++++------ llvm/lib/MC/MCPseudoProbe.cpp | 3 +- llvm/lib/MC/MCWinCOFFStreamer.cpp | 5 +-- llvm/lib/MC/WinCOFFObjectWriter.cpp | 10 +++--- .../Target/X86/MCTargetDesc/X86AsmBackend.cpp | 6 ++-- 12 files changed, 83 insertions(+), 59 deletions(-) diff --git a/llvm/include/llvm/MC/MCCodeView.h b/llvm/include/llvm/MC/MCCodeView.h index d15f2e42c6cc96..b1d8fe37a3188e 100644 --- a/llvm/include/llvm/MC/MCCodeView.h +++ b/llvm/include/llvm/MC/MCCodeView.h @@ -143,7 +143,7 @@ struct MCCVFunctionInfo { /// Holds state from .cv_file and .cv_loc directives for later emission. class CodeViewContext { public: - CodeViewContext(); + CodeViewContext(MCContext *MCCtx) : MCCtx(MCCtx) {} ~CodeViewContext(); CodeViewContext &operator=(const CodeViewContext &other) = delete; @@ -223,6 +223,8 @@ class CodeViewContext { std::pair addToStringTable(StringRef S); private: + MCContext *MCCtx; + /// Map from string to string table offset. StringMap StringTable; diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h index ad412409b3e13d..7c70a29b243de6 100644 --- a/llvm/include/llvm/MC/MCContext.h +++ b/llvm/include/llvm/MC/MCContext.h @@ -44,6 +44,7 @@ namespace llvm { class CodeViewContext; class MCAsmInfo; +class MCDataFragment; class MCInst; class MCLabel; class MCObjectFileInfo; @@ -345,6 +346,8 @@ class MCContext { void reportCommon(SMLoc Loc, std::function); + MCDataFragment *allocInitialFragment(MCSection &Sec); + MCSymbol *createSymbolImpl(const StringMapEntry *Name, bool IsTemporary); MCSymbol *createSymbol(StringRef Name, bool AlwaysAddSuffix, @@ -437,6 +440,10 @@ class MCContext { /// Create and return a new MC instruction. MCInst *createMCInst(); + template F *allocFragment(Args &&...args) { + return new F(std::forward(args)...); + } + /// \name Symbol Management /// @{ diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp index 4ff606d3732388..08420ed2b3a39e 100644 --- a/llvm/lib/MC/MCAssembler.cpp +++ b/llvm/lib/MC/MCAssembler.cpp @@ -820,8 +820,11 @@ void MCAssembler::layout(MCAsmLayout &Layout) { for (MCSection &Sec : *this) { // Create dummy fragments to eliminate any empty sections, this simplifies // layout. - if (Sec.empty()) - new MCDataFragment(&Sec); + if (Sec.empty()) { + auto *F = getContext().allocFragment(); + F->setParent(&Sec); + Sec.addFragment(*F); + } Sec.setOrdinal(SectionIndex++); } diff --git a/llvm/lib/MC/MCCodeView.cpp b/llvm/lib/MC/MCCodeView.cpp index d234ce110918e8..713ae07c3a730c 100644 --- a/llvm/lib/MC/MCCodeView.cpp +++ b/llvm/lib/MC/MCCodeView.cpp @@ -26,8 +26,6 @@ using namespace llvm; using namespace llvm::codeview; -CodeViewContext::CodeViewContext() = default; - CodeViewContext::~CodeViewContext() { // If someone inserted strings into the string table but never actually // emitted them somewhere, clean up the fragment. @@ -138,7 +136,7 @@ void CodeViewContext::recordCVLoc(MCContext &Ctx, const MCSymbol *Label, MCDataFragment *CodeViewContext::getStringTableFragment() { if (!StrTabFragment) { - StrTabFragment = new MCDataFragment(); + StrTabFragment = MCCtx->allocFragment(); // Start a new string table out with a null byte. StrTabFragment->getContents().push_back('\0'); } @@ -450,9 +448,9 @@ void CodeViewContext::emitInlineLineTableForFunction(MCObjectStreamer &OS, const MCSymbol *FnEndSym) { // Create and insert a fragment into the current section that will be encoded // later. - new MCCVInlineLineTableFragment(PrimaryFunctionId, SourceFileId, - SourceLineNum, FnStartSym, FnEndSym, - OS.getCurrentSectionOnly()); + auto *F = MCCtx->allocFragment( + PrimaryFunctionId, SourceFileId, SourceLineNum, FnStartSym, FnEndSym); + OS.insert(F); } MCFragment *CodeViewContext::emitDefRange( @@ -461,8 +459,10 @@ MCFragment *CodeViewContext::emitDefRange( StringRef FixedSizePortion) { // Create and insert a fragment into the current section that will be encoded // later. - return new MCCVDefRangeFragment(Ranges, FixedSizePortion, - OS.getCurrentSectionOnly()); + auto *F = + MCCtx->allocFragment(Ranges, FixedSizePortion); + OS.insert(F); + return F; } static unsigned computeLabelDiff(MCAsmLayout &Layout, const MCSymbol *Begin, diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp index 0522f1bd7c7e4a..f12a3bc0e56f57 100644 --- a/llvm/lib/MC/MCContext.cpp +++ b/llvm/lib/MC/MCContext.cpp @@ -195,6 +195,15 @@ MCInst *MCContext::createMCInst() { return new (MCInstAllocator.Allocate()) MCInst; } +// Allocate the initial MCDataFragment for the begin symbol. +MCDataFragment *MCContext::allocInitialFragment(MCSection &Sec) { + assert(!Sec.curFragList()->Head); + auto *F = allocFragment(); + F->setParent(&Sec); + Sec.addFragment(*F); + return F; +} + //===----------------------------------------------------------------------===// // Symbol Manipulation //===----------------------------------------------------------------------===// @@ -497,11 +506,8 @@ MCSectionELF *MCContext::createELFSectionImpl(StringRef Section, unsigned Type, MCSectionELF(Section, Type, Flags, K, EntrySize, Group, Comdat, UniqueID, R, LinkedToSym); - auto *F = new MCDataFragment(); - Ret->addFragment(*F); - F->setParent(Ret); + auto *F = allocInitialFragment(*Ret); R->setFragment(F); - return Ret; } @@ -797,11 +803,8 @@ MCSectionWasm *MCContext::getWasmSection(const Twine &Section, SectionKind Kind, MCSectionWasm(CachedName, Kind, Flags, GroupSym, UniqueID, Begin); Entry.second = Result; - auto *F = new MCDataFragment(); - Result->addFragment(*F); - F->setParent(Result); + auto *F = allocInitialFragment(*Result); Begin->setFragment(F); - return Result; } @@ -863,10 +866,7 @@ MCSectionXCOFF *MCContext::getXCOFFSection( Entry.second = Result; - auto *F = new MCDataFragment(); - Result->addFragment(*F); - F->setParent(Result); - + auto *F = allocInitialFragment(*Result); if (Begin) Begin->setFragment(F); @@ -886,10 +886,7 @@ MCSectionSPIRV *MCContext::getSPIRVSection() { MCSectionSPIRV *Result = new (SPIRVAllocator.Allocate()) MCSectionSPIRV(SectionKind::getText(), Begin); - auto *F = new MCDataFragment(); - Result->addFragment(*F); - F->setParent(Result); - + allocInitialFragment(*Result); return Result; } @@ -909,10 +906,7 @@ MCSectionDXContainer *MCContext::getDXContainerSection(StringRef Section, new (DXCAllocator.Allocate()) MCSectionDXContainer(Name, K, nullptr); // The first fragment will store the header - auto *F = new MCDataFragment(); - MapIt->second->addFragment(*F); - F->setParent(MapIt->second); - + allocInitialFragment(*MapIt->second); return MapIt->second; } @@ -1043,7 +1037,7 @@ void MCContext::finalizeDwarfSections(MCStreamer &MCOS) { CodeViewContext &MCContext::getCVContext() { if (!CVContext) - CVContext.reset(new CodeViewContext); + CVContext.reset(new CodeViewContext(this)); return *CVContext; } diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp index 23e926c3a9d14b..6bd6fe7edc87a0 100644 --- a/llvm/lib/MC/MCELFStreamer.cpp +++ b/llvm/lib/MC/MCELFStreamer.cpp @@ -585,7 +585,7 @@ void MCELFStreamer::emitInstToData(const MCInst &Inst, // When not in a bundle-locked group and the -mc-relax-all flag is used, // we create a new temporary fragment which will be later merged into // the current fragment. - DF = new MCDataFragment(); + DF = getContext().allocFragment(); else if (isBundleLocked() && !Sec.isBundleGroupBeforeFirstInst()) { // If we are bundle-locked, we re-use the current fragment. // The bundle-locking directive ensures this is a new data fragment. @@ -596,13 +596,14 @@ void MCELFStreamer::emitInstToData(const MCInst &Inst, // Optimize memory usage by emitting the instruction to a // MCCompactEncodedInstFragment when not in a bundle-locked group and // there are no fixups registered. - MCCompactEncodedInstFragment *CEIF = new MCCompactEncodedInstFragment(); + MCCompactEncodedInstFragment *CEIF = + getContext().allocFragment(); insert(CEIF); CEIF->getContents().append(Code.begin(), Code.end()); CEIF->setHasInstructions(STI); return; } else { - DF = new MCDataFragment(); + DF = getContext().allocFragment(); insert(DF); } if (Sec.getBundleLockState() == MCSection::BundleLockedAlignToEnd) { @@ -661,7 +662,7 @@ void MCELFStreamer::emitBundleLock(bool AlignToEnd) { if (getAssembler().getRelaxAll() && !isBundleLocked()) { // TODO: drop the lock state and set directly in the fragment - MCDataFragment *DF = new MCDataFragment(); + MCDataFragment *DF = getContext().allocFragment(); BundleGroups.push_back(DF); } diff --git a/llvm/lib/MC/MCMachOStreamer.cpp b/llvm/lib/MC/MCMachOStreamer.cpp index 466aa633f00871..6b2e411b61505a 100644 --- a/llvm/lib/MC/MCMachOStreamer.cpp +++ b/llvm/lib/MC/MCMachOStreamer.cpp @@ -199,7 +199,7 @@ void MCMachOStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) { // We have to create a new fragment if this is an atom defining symbol, // fragments cannot span atoms. if (getAssembler().isSymbolLinkerVisible(*Symbol)) - insert(new MCDataFragment()); + insert(getContext().allocFragment()); MCObjectStreamer::emitLabel(Symbol, Loc); @@ -555,7 +555,9 @@ void MCMachOStreamer::finalizeCGProfile() { MCSection *CGProfileSection = Asm.getContext().getMachOSection( "__LLVM", "__cg_profile", 0, SectionKind::getMetadata()); Asm.registerSection(*CGProfileSection); - auto *Frag = new MCDataFragment(CGProfileSection); + auto *Frag = getContext().allocFragment(); + Frag->setParent(CGProfileSection); + CGProfileSection->addFragment(*Frag); // For each entry, reserve space for 2 32-bit indices and a 64-bit count. size_t SectionBytes = Asm.CGProfile.size() * (2 * sizeof(uint32_t) + sizeof(uint64_t)); @@ -595,7 +597,9 @@ void MCMachOStreamer::createAddrSigSection() { MCSection *AddrSigSection = Asm.getContext().getObjectFileInfo()->getAddrSigSection(); Asm.registerSection(*AddrSigSection); - auto *Frag = new MCDataFragment(AddrSigSection); + auto *Frag = getContext().allocFragment(); + Frag->setParent(AddrSigSection); + AddrSigSection->addFragment(*Frag); // We will generate a series of pointer-sized symbol relocations at offset // 0x0. Set the section size to be large enough to contain a single pointer // (instead of emitting a zero-sized section) so these relocations are diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp index bf1ce76cdc14bd..24bed3119d6639 100644 --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -225,7 +225,7 @@ MCDataFragment * MCObjectStreamer::getOrCreateDataFragment(const MCSubtargetInfo *STI) { MCDataFragment *F = dyn_cast_or_null(getCurrentFragment()); if (!F || !canReuseDataFragment(*F, *Assembler, STI)) { - F = new MCDataFragment(); + F = getContext().allocFragment(); insert(F); } return F; @@ -343,7 +343,7 @@ void MCObjectStreamer::emitULEB128Value(const MCExpr *Value) { emitULEB128IntValue(IntValue); return; } - insert(new MCLEBFragment(*Value, false)); + insert(getContext().allocFragment(*Value, false)); } void MCObjectStreamer::emitSLEB128Value(const MCExpr *Value) { @@ -352,7 +352,7 @@ void MCObjectStreamer::emitSLEB128Value(const MCExpr *Value) { emitSLEB128IntValue(IntValue); return; } - insert(new MCLEBFragment(*Value, true)); + insert(getContext().allocFragment(*Value, true)); } void MCObjectStreamer::emitWeakReference(MCSymbol *Alias, @@ -470,7 +470,8 @@ void MCObjectStreamer::emitInstToFragment(const MCInst &Inst, // Always create a new, separate fragment here, because its size can change // during relaxation. - MCRelaxableFragment *IF = new MCRelaxableFragment(Inst, STI); + MCRelaxableFragment *IF = + getContext().allocFragment(Inst, STI); insert(IF); SmallString<128> Code; @@ -544,7 +545,8 @@ void MCObjectStreamer::emitDwarfAdvanceLineAddr(int64_t LineDelta, return; } const MCExpr *AddrDelta = buildSymbolDiff(*this, Label, LastLabel, SMLoc()); - insert(new MCDwarfLineAddrFragment(LineDelta, *AddrDelta)); + insert(getContext().allocFragment(LineDelta, + *AddrDelta)); } void MCObjectStreamer::emitDwarfLineEndEntry(MCSection *Section, @@ -569,7 +571,8 @@ void MCObjectStreamer::emitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel, const MCSymbol *Label, SMLoc Loc) { const MCExpr *AddrDelta = buildSymbolDiff(*this, Label, LastLabel, Loc); - insert(new MCDwarfCallFrameFragment(*AddrDelta, nullptr)); + insert(getContext().allocFragment(*AddrDelta, + nullptr)); } void MCObjectStreamer::emitCVLocDirective(unsigned FunctionId, unsigned FileNo, @@ -640,7 +643,8 @@ void MCObjectStreamer::emitValueToAlignment(Align Alignment, int64_t Value, unsigned MaxBytesToEmit) { if (MaxBytesToEmit == 0) MaxBytesToEmit = Alignment.value(); - insert(new MCAlignFragment(Alignment, Value, ValueSize, MaxBytesToEmit)); + insert(getContext().allocFragment( + Alignment, Value, ValueSize, MaxBytesToEmit)); // Update the maximum alignment on the current section if necessary. MCSection *CurSec = getCurrentSectionOnly(); @@ -657,7 +661,7 @@ void MCObjectStreamer::emitCodeAlignment(Align Alignment, void MCObjectStreamer::emitValueToOffset(const MCExpr *Offset, unsigned char Value, SMLoc Loc) { - insert(new MCOrgFragment(*Offset, Value, Loc)); + insert(getContext().allocFragment(*Offset, Value, Loc)); } // Associate DTPRel32 fixup with data and resize data area @@ -844,7 +848,8 @@ void MCObjectStreamer::emitFill(const MCExpr &NumBytes, uint64_t FillValue, flushPendingLabels(DF, DF->getContents().size()); assert(getCurrentSectionOnly() && "need a section"); - insert(new MCFillFragment(FillValue, 1, NumBytes, Loc)); + insert( + getContext().allocFragment(FillValue, 1, NumBytes, Loc)); } void MCObjectStreamer::emitFill(const MCExpr &NumValues, int64_t Size, @@ -874,7 +879,8 @@ void MCObjectStreamer::emitFill(const MCExpr &NumValues, int64_t Size, flushPendingLabels(DF, DF->getContents().size()); assert(getCurrentSectionOnly() && "need a section"); - insert(new MCFillFragment(Expr, Size, NumValues, Loc)); + insert( + getContext().allocFragment(Expr, Size, NumValues, Loc)); } void MCObjectStreamer::emitNops(int64_t NumBytes, int64_t ControlledNopLength, @@ -885,7 +891,8 @@ void MCObjectStreamer::emitNops(int64_t NumBytes, int64_t ControlledNopLength, assert(getCurrentSectionOnly() && "need a section"); - insert(new MCNopsFragment(NumBytes, ControlledNopLength, Loc, STI)); + insert(getContext().allocFragment( + NumBytes, ControlledNopLength, Loc, STI)); } void MCObjectStreamer::emitFileDirective(StringRef Filename) { diff --git a/llvm/lib/MC/MCPseudoProbe.cpp b/llvm/lib/MC/MCPseudoProbe.cpp index 2a75f46c57aa87..db0443dd54356b 100644 --- a/llvm/lib/MC/MCPseudoProbe.cpp +++ b/llvm/lib/MC/MCPseudoProbe.cpp @@ -80,7 +80,8 @@ void MCPseudoProbe::emit(MCObjectStreamer *MCOS, if (AddrDelta->evaluateAsAbsolute(Delta, MCOS->getAssemblerPtr())) { MCOS->emitSLEB128IntValue(Delta); } else { - MCOS->insert(new MCPseudoProbeAddrFragment(AddrDelta)); + MCOS->insert(MCOS->getContext().allocFragment( + AddrDelta)); } } else { // Emit the GUID of the split function that the sentinel probe represents. diff --git a/llvm/lib/MC/MCWinCOFFStreamer.cpp b/llvm/lib/MC/MCWinCOFFStreamer.cpp index e510e1e4031cdd..5732b29b85d941 100644 --- a/llvm/lib/MC/MCWinCOFFStreamer.cpp +++ b/llvm/lib/MC/MCWinCOFFStreamer.cpp @@ -196,7 +196,7 @@ void MCWinCOFFStreamer::emitCOFFSafeSEH(MCSymbol const *Symbol) { getAssembler().registerSection(*SXData); SXData->ensureMinAlignment(Align(4)); - new MCSymbolIdFragment(Symbol, SXData); + getContext().allocFragment(Symbol, SXData); getAssembler().registerSymbol(*Symbol); CSymbol->setIsSafeSEH(); @@ -212,7 +212,8 @@ void MCWinCOFFStreamer::emitCOFFSymbolIndex(MCSymbol const *Symbol) { getAssembler().registerSection(*Sec); Sec->ensureMinAlignment(Align(4)); - new MCSymbolIdFragment(Symbol, getCurrentSectionOnly()); + getContext().allocFragment(Symbol, + getCurrentSectionOnly()); getAssembler().registerSymbol(*Symbol); } diff --git a/llvm/lib/MC/WinCOFFObjectWriter.cpp b/llvm/lib/MC/WinCOFFObjectWriter.cpp index a2b6c4e5c3a5c5..e877bf88df81b6 100644 --- a/llvm/lib/MC/WinCOFFObjectWriter.cpp +++ b/llvm/lib/MC/WinCOFFObjectWriter.cpp @@ -1097,8 +1097,9 @@ uint64_t WinCOFFWriter::writeObject(MCAssembler &Asm, // Create the contents of the .llvm_addrsig section. if (Mode != DwoOnly && OWriter.EmitAddrsigSection) { - auto Frag = new MCDataFragment(AddrsigSection); - Frag->setLayoutOrder(0); + auto *Frag = Asm.getContext().allocFragment(); + Frag->setParent(AddrsigSection); + AddrsigSection->addFragment(*Frag); raw_svector_ostream OS(Frag->getContents()); for (const MCSymbol *S : OWriter.AddrsigSyms) { if (!S->isRegistered()) @@ -1118,8 +1119,9 @@ uint64_t WinCOFFWriter::writeObject(MCAssembler &Asm, // Create the contents of the .llvm.call-graph-profile section. if (Mode != DwoOnly && CGProfileSection) { - auto *Frag = new MCDataFragment(CGProfileSection); - Frag->setLayoutOrder(0); + auto *Frag = Asm.getContext().allocFragment(); + Frag->setParent(CGProfileSection); + CGProfileSection->addFragment(*Frag); raw_svector_ostream OS(Frag->getContents()); for (const MCAssembler::CGProfileEntry &CGPE : Asm.CGProfile) { uint32_t FromIndex = CGPE.From->getSymbol().getIndex(); diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 1b8462f2d258ca..02d0aac4fed411 100644 --- a/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -556,7 +556,9 @@ void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS, isFirstMacroFusibleInst(Inst, *MCII))) { // If we meet a unfused branch or the first instuction in a fusiable pair, // insert a BoundaryAlign fragment. - OS.insert(PendingBA = new MCBoundaryAlignFragment(AlignBoundary, STI)); + PendingBA = OS.getContext().allocFragment( + AlignBoundary, STI); + OS.insert(PendingBA); } } @@ -589,7 +591,7 @@ void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS, // MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty // DataFragment. if (isa_and_nonnull(CF)) - OS.insert(new MCDataFragment()); + OS.insert(OS.getContext().allocFragment()); // Update the maximum alignment on the current section if necessary. MCSection *Sec = OS.getCurrentSectionOnly(); From c7b32341e9f885cc7e6ba4b2ff017f748a6f76ee Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 14 Jun 2024 12:43:12 -0400 Subject: [PATCH 121/155] [libc++] Rename workflow that restarts preempted jobs All the libc++ workflows start with `libcxx-`, so use that prefix for this job as well. Also, remove trailing whitespace from the yaml file. --- ...aml => libcxx-restart-preempted-jobs.yaml} | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) rename .github/workflows/{restart-preempted-libcxx-jobs.yaml => libcxx-restart-preempted-jobs.yaml} (97%) diff --git a/.github/workflows/restart-preempted-libcxx-jobs.yaml b/.github/workflows/libcxx-restart-preempted-jobs.yaml similarity index 97% rename from .github/workflows/restart-preempted-libcxx-jobs.yaml rename to .github/workflows/libcxx-restart-preempted-jobs.yaml index f8faaf25045bf2..21879ce19c27c5 100644 --- a/.github/workflows/restart-preempted-libcxx-jobs.yaml +++ b/.github/workflows/libcxx-restart-preempted-jobs.yaml @@ -34,7 +34,7 @@ jobs: script: | const failure_regex = /Process completed with exit code 1./ const preemption_regex = /The runner has received a shutdown signal/ - + const wf_run = context.payload.workflow_run core.notice(`Running on "${wf_run.display_title}" by @${wf_run.actor.login} (event: ${wf_run.event})\nWorkflow run URL: ${wf_run.html_url}`) @@ -80,30 +80,30 @@ jobs: } check_run_ids.push(check_run.id); } - + has_preempted_job = false; for (check_run_id of check_run_ids) { console.log('Listing annotations for check run: ' + check_run_id); - + annotations = await github.rest.checks.listAnnotations({ owner: context.repo.owner, repo: context.repo.repo, check_run_id: check_run_id }) - + for (annotation of annotations.data) { if (annotation.annotation_level != 'failure') { continue; } - + const preemption_match = annotation.message.match(preemption_regex); - + if (preemption_match != null) { console.log('Found preemption message: ' + annotation.message); has_preempted_job = true; } - + const failure_match = annotation.message.match(failure_regex); if (failure_match != null) { // We only want to restart the workflow if all of the failures were due to preemption. @@ -115,14 +115,14 @@ jobs: return; } } - } - + } + if (!has_preempted_job) { core.notice('No preempted jobs found. Not restarting workflow.'); await create_check_run('neutral', 'No preempted jobs found. Not restarting workflow.') return; } - + core.notice("Restarted workflow: " + context.payload.workflow_run.id); await github.rest.actions.reRunWorkflowFailedJobs({ owner: context.repo.owner, @@ -130,5 +130,3 @@ jobs: run_id: context.payload.workflow_run.id }) await create_check_run('success', 'Restarted workflow run due to preempted job') - - From 9a92f2f742347d9b31470349f3b777ecab580ac1 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 14 Jun 2024 09:57:21 -0700 Subject: [PATCH 122/155] Make diagnostic pragma override -Werror=foo and DefaultError warnings In GCC, `#pragma GCC diagnostic warning "-Wfoo"` overrides command-line `-Werror=foo` and errors that can become warnings (pedwarn with -pedantic-errors and permerror). ``` #pragma GCC diagnostic warning "-Wnarrowing" int x = {4.2}; #pragma GCC diagnostic warning "-Wundef" #if FOO #endif // gcc -c -Werror=undef -Werror=narrowing => two warnings ``` These diagnostics are similar to our Warning/ExtWarn/Extension diagnostics with DefaultError. This patch ports the behavior to Clang. Fix #93474 Pull Request: https://github.com/llvm/llvm-project/pull/93647 --- clang/docs/ReleaseNotes.rst | 3 +++ clang/docs/UsersManual.rst | 11 ++++++++++- clang/lib/Basic/Diagnostic.cpp | 5 +++-- .../implicit-built-Werror-using-W/convert.h | 4 ++++ .../Modules/implicit-built-Werror-using-W.cpp | 15 +++++++++++---- clang/test/Preprocessor/pragma_diagnostic.c | 10 ++++++++-- clang/test/Sema/implicit-decl.c | 17 +++++++++++++++++ 7 files changed, 56 insertions(+), 9 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index bae9f5e1bd02a1..36efeb8c70cca8 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -351,6 +351,9 @@ Non-comprehensive list of changes in this release - Added ``__is_bitwise_cloneable`` which is used to check whether a type can be safely copied by memcpy/memmove. +- ``#pragma GCC diagnostic warning "-Wfoo"`` can now downgrade ``-Werror=foo`` + errors and certain default-to-error ``-W`` diagnostics to warnings. + New Compiler Flags ------------------ - ``-fsanitize=implicit-bitfield-conversion`` checks implicit truncation and diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst index 15bf5e30cf8e29..8e01ea15064ba7 100644 --- a/clang/docs/UsersManual.rst +++ b/clang/docs/UsersManual.rst @@ -1138,7 +1138,7 @@ and ``#pragma clang diagnostic`` are synonyms for Clang. GCC will ignore The pragma may control any warning that can be used from the command line. Warnings may be set to ignored, warning, error, or fatal. The -following example code will tell Clang or GCC to ignore the -Wall +following example code will tell Clang or GCC to ignore the ``-Wall`` warnings: .. code-block:: c @@ -1186,6 +1186,15 @@ severity levels. They can be used to change severity of a particular diagnostic for a region of source file. A notable difference from GCC is that diagnostic not enabled via command line arguments can't be enabled this way yet. +Some diagnostics associated with a ``-W`` flag have the error severity by +default. They can be ignored or downgraded to warnings: + +.. code-block:: cpp + + // C only + #pragma GCC diagnostic warning "-Wimplicit-function-declaration" + int main(void) { puts(""); } + In addition to controlling warnings and errors generated by the compiler, it is possible to generate custom warning and error messages through the following pragmas: diff --git a/clang/lib/Basic/Diagnostic.cpp b/clang/lib/Basic/Diagnostic.cpp index 10136b4cd94351..66776daa5e1493 100644 --- a/clang/lib/Basic/Diagnostic.cpp +++ b/clang/lib/Basic/Diagnostic.cpp @@ -360,9 +360,10 @@ void DiagnosticsEngine::setSeverity(diag::kind Diag, diag::Severity Map, "Cannot map errors into warnings!"); assert((L.isInvalid() || SourceMgr) && "No SourceMgr for valid location"); - // Don't allow a mapping to a warning override an error/fatal mapping. + // A command line -Wfoo has an invalid L and cannot override error/fatal + // mapping, while a warning pragma can. bool WasUpgradedFromWarning = false; - if (Map == diag::Severity::Warning) { + if (Map == diag::Severity::Warning && L.isInvalid()) { DiagnosticMapping &Info = GetCurDiagState()->getOrAddMapping(Diag); if (Info.getSeverity() == diag::Severity::Error || Info.getSeverity() == diag::Severity::Fatal) { diff --git a/clang/test/Modules/Inputs/implicit-built-Werror-using-W/convert.h b/clang/test/Modules/Inputs/implicit-built-Werror-using-W/convert.h index 0ed02bc793bd1e..532fd6e28ccc4b 100644 --- a/clang/test/Modules/Inputs/implicit-built-Werror-using-W/convert.h +++ b/clang/test/Modules/Inputs/implicit-built-Werror-using-W/convert.h @@ -1,6 +1,10 @@ #ifdef USE_PRAGMA #pragma clang diagnostic push +#if USE_PRAGMA == 1 #pragma clang diagnostic warning "-Wshorten-64-to-32" +#else +#pragma clang diagnostic error "-Wshorten-64-to-32" +#endif #endif template int convert(T V) { return V; } #ifdef USE_PRAGMA diff --git a/clang/test/Modules/implicit-built-Werror-using-W.cpp b/clang/test/Modules/implicit-built-Werror-using-W.cpp index 9fb7a6bf0b0352..973dbba130b7f6 100644 --- a/clang/test/Modules/implicit-built-Werror-using-W.cpp +++ b/clang/test/Modules/implicit-built-Werror-using-W.cpp @@ -22,16 +22,23 @@ // RUN: | FileCheck %s -allow-empty // // In the presence of a warning pragma, build with -Werror and then without. -// RUN: not %clang_cc1 -triple x86_64-apple-darwin16 -fsyntax-only -fmodules \ -// RUN: -DUSE_PRAGMA -Werror=shorten-64-to-32 \ +// RUN: %clang_cc1 -triple x86_64-apple-darwin16 -fsyntax-only -fmodules \ +// RUN: -DUSE_PRAGMA=1 -Werror=shorten-64-to-32 \ // RUN: -I%S/Inputs/implicit-built-Werror-using-W -fimplicit-module-maps \ // RUN: -fmodules-cache-path=%t-pragma.cache -x c++ %s 2>&1 \ -// RUN: | FileCheck %s -check-prefix=CHECK-ERROR +// RUN: | FileCheck %s -check-prefix=CHECK-WARN // RUN: %clang_cc1 -triple x86_64-apple-darwin16 -fsyntax-only -fmodules \ -// RUN: -DUSE_PRAGMA \ +// RUN: -DUSE_PRAGMA=1 \ // RUN: -I%S/Inputs/implicit-built-Werror-using-W -fimplicit-module-maps \ // RUN: -fmodules-cache-path=%t-pragma.cache -x c++ %s 2>&1 \ // RUN: | FileCheck %s -check-prefix=CHECK-WARN + +// Test an error pragma. +// RUN: not %clang_cc1 -triple x86_64-apple-darwin16 -fsyntax-only -fmodules \ +// RUN: -DUSE_PRAGMA=2 -Wshorten-64-to-32 \ +// RUN: -I%S/Inputs/implicit-built-Werror-using-W -fimplicit-module-maps \ +// RUN: -fmodules-cache-path=%t-pragma.cache -x c++ %s 2>&1 \ +// RUN: | FileCheck %s -check-prefix=CHECK-ERROR #include long long foo() { return convert(0); } diff --git a/clang/test/Preprocessor/pragma_diagnostic.c b/clang/test/Preprocessor/pragma_diagnostic.c index 8a5adcf6ab55bd..ff379079b7bafa 100644 --- a/clang/test/Preprocessor/pragma_diagnostic.c +++ b/clang/test/Preprocessor/pragma_diagnostic.c @@ -1,8 +1,14 @@ // RUN: %clang_cc1 -fsyntax-only -verify -Wno-undef %s // RUN: %clang_cc1 -fsyntax-only -verify -Wno-undef -Wno-unknown-warning-option -DAVOID_UNKNOWN_WARNING %s +// RUN: %clang_cc1 -fsyntax-only -verify -Werror=undef -DINITIAL_UNDEF %s +#ifdef INITIAL_UNDEF +#if FOO // expected-error {{'FOO' is not defined}} +#endif +#else #if FOO // ok. #endif +#endif #pragma GCC diagnostic warning "-Wundef" @@ -52,6 +58,6 @@ void ppq(void){} void ppr(void){} // expected-error {{no previous prototype for function 'ppr'}} // expected-note@-1{{declare 'static' if the function is not intended to be used outside of this translation unit}} -#pragma clang diagnostic warning "-Weverything" // This should not be effective -void pps(void){} // expected-error {{no previous prototype for function 'pps'}} +#pragma clang diagnostic warning "-Weverything" // Set to warning +void pps(void){} // expected-warning {{no previous prototype for function 'pps'}} // expected-note@-1{{declare 'static' if the function is not intended to be used outside of this translation unit}} diff --git a/clang/test/Sema/implicit-decl.c b/clang/test/Sema/implicit-decl.c index d7d3e108e80488..a3f35222d833cc 100644 --- a/clang/test/Sema/implicit-decl.c +++ b/clang/test/Sema/implicit-decl.c @@ -74,3 +74,20 @@ void GH48579_2(void) { int GH48579_3 = ({a();}); // both-error {{statement expression not allowed at file scope}} void GH48579_4(int array[({ a(); })]); // both-error {{statement expression not allowed at file scope}} + +void pragma_warning(void) { +#pragma clang diagnostic warning "-Wimplicit-function-declaration" + bark(); // expected-warning {{call to undeclared function 'bark'; ISO C99 and later do not support implicit function declarations}} \ + c2x-error {{use of undeclared identifier 'bark'}} +} + +void pragma_error(void) { +#pragma clang diagnostic error "-Wimplicit-function-declaration" + bark(); // expected-error {{call to undeclared function 'bark'; ISO C99 and later do not support implicit function declarations}} \ + c2x-error {{use of undeclared identifier 'bark'}} +} + +void pragma_ignored(void) { +#pragma clang diagnostic ignored "-Wimplicit-function-declaration" + bark(); // c2x-error {{use of undeclared identifier 'bark'}} +} From 72b841d016c7403ac1f7678d25b864dac80d06dc Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Fri, 14 Jun 2024 09:59:15 -0700 Subject: [PATCH 123/155] [Analysis] Migrate to a new version of getValueProfDataFromInst (#95561) Note that the version of getValueProfDataFromInst that returns bool has been "deprecated" since: commit 1e15371dd8843dfc52b9435afaa133997c1773d8 Author: Mingming Liu Date: Mon Apr 1 15:14:49 2024 -0700 --- llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp index ab53717eb889a0..84b0a1b2a5387c 100644 --- a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp +++ b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp @@ -91,13 +91,13 @@ ArrayRef ICallPromotionAnalysis::getPromotionCandidatesForInstruction( const Instruction *I, uint32_t &NumVals, uint64_t &TotalCount, uint32_t &NumCandidates) { - bool Res = - getValueProfDataFromInst(*I, IPVK_IndirectCallTarget, MaxNumPromotions, - ValueDataArray.get(), NumVals, TotalCount); + auto Res = getValueProfDataFromInst(*I, IPVK_IndirectCallTarget, + MaxNumPromotions, NumVals, TotalCount); if (!Res) { NumCandidates = 0; return ArrayRef(); } + ValueDataArray = std::move(Res); NumCandidates = getProfitablePromotionCandidates(I, NumVals, TotalCount); return ArrayRef(ValueDataArray.get(), NumVals); } From b1932b8483011c2bfebbea1ef56a565634570e6b Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Fri, 14 Jun 2024 10:01:36 -0700 Subject: [PATCH 124/155] [MC] Aligned bundling: remove special handling for RelaxAll When both aligned bundling and RelaxAll are enabled, bundle padding is directly written into fragments (https://reviews.llvm.org/D8072). (The original motivation was memory usage, which has been achieved from different angles with recent assembler improvement). The code presents challenges with the work to replace fragment representation (e.g. #94950 #95077). This patch removes the special handling. RelaxAll still works but the behavior seems slightly different as revealed by 2 changed tests. However, most `-mc-relax-all` tests are unchanged. RelaxAll used to be the default for clang -O0. This mode has significant code size drawbacks and newer Clang doesn't use it (#90013). --- flushPendingLabels: The FOffset parameter can be removed: pending labels will be assigned to the incoming fragment at offset 0. Pull Request: https://github.com/llvm/llvm-project/pull/95188 --- llvm/include/llvm/MC/MCELFStreamer.h | 8 -- llvm/include/llvm/MC/MCSection.h | 3 +- llvm/include/llvm/MC/MCWasmStreamer.h | 3 - llvm/lib/MC/MCAssembler.cpp | 8 +- llvm/lib/MC/MCELFStreamer.cpp | 89 +------------------ llvm/lib/MC/MCObjectStreamer.cpp | 10 +-- llvm/lib/MC/MCSection.cpp | 7 +- llvm/lib/MC/MCWasmStreamer.cpp | 13 --- .../AlignedBundling/misaligned-bundle-group.s | 6 +- .../X86/AlignedBundling/misaligned-bundle.s | 7 +- 10 files changed, 13 insertions(+), 141 deletions(-) diff --git a/llvm/include/llvm/MC/MCELFStreamer.h b/llvm/include/llvm/MC/MCELFStreamer.h index 1ff029d44d376c..5f8ae5ace56fd1 100644 --- a/llvm/include/llvm/MC/MCELFStreamer.h +++ b/llvm/include/llvm/MC/MCELFStreamer.h @@ -39,7 +39,6 @@ class MCELFStreamer : public MCObjectStreamer { /// state management void reset() override { SeenIdent = false; - BundleGroups.clear(); MCObjectStreamer::reset(); } @@ -142,14 +141,7 @@ class MCELFStreamer : public MCObjectStreamer { void finalizeCGProfileEntry(const MCSymbolRefExpr *&S, uint64_t Offset); void finalizeCGProfile(); - /// Merge the content of the fragment \p EF into the fragment \p DF. - void mergeFragment(MCDataFragment *, MCDataFragment *); - bool SeenIdent = false; - - /// BundleGroups - The stack of fragments holding the bundle-locked - /// instructions. - SmallVector BundleGroups; }; MCELFStreamer *createARMELFStreamer(MCContext &Context, diff --git a/llvm/include/llvm/MC/MCSection.h b/llvm/include/llvm/MC/MCSection.h index e5455292d5c625..c7c0de60a411e7 100644 --- a/llvm/include/llvm/MC/MCSection.h +++ b/llvm/include/llvm/MC/MCSection.h @@ -227,8 +227,7 @@ class MCSection { void addPendingLabel(MCSymbol* label, unsigned Subsection = 0); /// Associate all pending labels in a subsection with a fragment. - void flushPendingLabels(MCFragment *F, uint64_t FOffset = 0, - unsigned Subsection = 0); + void flushPendingLabels(MCFragment *F, unsigned Subsection); /// Associate all pending labels with empty data fragments. One fragment /// will be created for each subsection as necessary. diff --git a/llvm/include/llvm/MC/MCWasmStreamer.h b/llvm/include/llvm/MC/MCWasmStreamer.h index f58405214a80a3..f95628d5e0e5f7 100644 --- a/llvm/include/llvm/MC/MCWasmStreamer.h +++ b/llvm/include/llvm/MC/MCWasmStreamer.h @@ -73,9 +73,6 @@ class MCWasmStreamer : public MCObjectStreamer { void fixSymbolsInTLSFixups(const MCExpr *expr); - /// Merge the content of the fragment \p EF into the fragment \p DF. - void mergeFragment(MCDataFragment *, MCDataFragment *); - bool SeenIdent; }; diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp index 08420ed2b3a39e..17f09001b184aa 100644 --- a/llvm/lib/MC/MCAssembler.cpp +++ b/llvm/lib/MC/MCAssembler.cpp @@ -420,12 +420,6 @@ void MCAsmLayout::layoutBundle(MCFragment *F) { // The fragment's offset will point to after the padding, and its computed // size won't include the padding. // - // When the -mc-relax-all flag is used, we optimize bundling by writting the - // padding directly into fragments when the instructions are emitted inside - // the streamer. When the fragment is larger than the bundle size, we need to - // ensure that it's bundle aligned. This means that if we end up with - // multiple fragments, we must emit bundle padding between fragments. - // // ".align N" is an example of a directive that introduces multiple // fragments. We could add a special case to handle ".align N" by emitting // within-fragment padding (which would produce less padding when N is less @@ -436,7 +430,7 @@ void MCAsmLayout::layoutBundle(MCFragment *F) { MCEncodedFragment *EF = cast(F); uint64_t FSize = Assembler.computeFragmentSize(*this, *EF); - if (!Assembler.getRelaxAll() && FSize > Assembler.getBundleAlignSize()) + if (FSize > Assembler.getBundleAlignSize()) report_fatal_error("Fragment can't be larger than a bundle size"); uint64_t RequiredBundlePadding = diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp index 6bd6fe7edc87a0..8be3c0e7211891 100644 --- a/llvm/lib/MC/MCELFStreamer.cpp +++ b/llvm/lib/MC/MCELFStreamer.cpp @@ -50,44 +50,6 @@ bool MCELFStreamer::isBundleLocked() const { return getCurrentSectionOnly()->isBundleLocked(); } -void MCELFStreamer::mergeFragment(MCDataFragment *DF, - MCDataFragment *EF) { - MCAssembler &Assembler = getAssembler(); - - if (Assembler.isBundlingEnabled() && Assembler.getRelaxAll()) { - uint64_t FSize = EF->getContents().size(); - - if (FSize > Assembler.getBundleAlignSize()) - report_fatal_error("Fragment can't be larger than a bundle size"); - - uint64_t RequiredBundlePadding = computeBundlePadding( - Assembler, EF, DF->getContents().size(), FSize); - - if (RequiredBundlePadding > UINT8_MAX) - report_fatal_error("Padding cannot exceed 255 bytes"); - - if (RequiredBundlePadding > 0) { - SmallString<256> Code; - raw_svector_ostream VecOS(Code); - EF->setBundlePadding(static_cast(RequiredBundlePadding)); - Assembler.writeFragmentPadding(VecOS, *EF, FSize); - - DF->getContents().append(Code.begin(), Code.end()); - } - } - - flushPendingLabels(DF, DF->getContents().size()); - - for (unsigned i = 0, e = EF->getFixups().size(); i != e; ++i) { - EF->getFixups()[i].setOffset(EF->getFixups()[i].getOffset() + - DF->getContents().size()); - DF->getFixups().push_back(EF->getFixups()[i]); - } - if (DF->getSubtargetInfo() == nullptr && EF->getSubtargetInfo()) - DF->setHasInstructions(*EF->getSubtargetInfo()); - DF->getContents().append(EF->getContents().begin(), EF->getContents().end()); -} - void MCELFStreamer::initSections(bool NoExecStack, const MCSubtargetInfo &STI) { MCContext &Ctx = getContext(); switchSection(Ctx.getObjectFileInfo()->getTextSection()); @@ -575,24 +537,12 @@ void MCELFStreamer::emitInstToData(const MCInst &Inst, if (Assembler.isBundlingEnabled()) { MCSection &Sec = *getCurrentSectionOnly(); - if (Assembler.getRelaxAll() && isBundleLocked()) { - // If the -mc-relax-all flag is used and we are bundle-locked, we re-use - // the current bundle group. - DF = BundleGroups.back(); - CheckBundleSubtargets(DF->getSubtargetInfo(), &STI); - } - else if (Assembler.getRelaxAll() && !isBundleLocked()) - // When not in a bundle-locked group and the -mc-relax-all flag is used, - // we create a new temporary fragment which will be later merged into - // the current fragment. - DF = getContext().allocFragment(); - else if (isBundleLocked() && !Sec.isBundleGroupBeforeFirstInst()) { + if (isBundleLocked() && !Sec.isBundleGroupBeforeFirstInst()) { // If we are bundle-locked, we re-use the current fragment. // The bundle-locking directive ensures this is a new data fragment. DF = cast(getCurrentFragment()); CheckBundleSubtargets(DF->getSubtargetInfo(), &STI); - } - else if (!isBundleLocked() && Fixups.size() == 0) { + } else if (!isBundleLocked() && Fixups.size() == 0) { // Optimize memory usage by emitting the instruction to a // MCCompactEncodedInstFragment when not in a bundle-locked group and // there are no fixups registered. @@ -632,13 +582,6 @@ void MCELFStreamer::emitInstToData(const MCInst &Inst, getAssembler().getBackend().RelaxFixupKind) DF->setLinkerRelaxable(); DF->getContents().append(Code.begin(), Code.end()); - - if (Assembler.isBundlingEnabled() && Assembler.getRelaxAll()) { - if (!isBundleLocked()) { - mergeFragment(getOrCreateDataFragment(&STI), DF); - delete DF; - } - } } void MCELFStreamer::emitBundleAlignMode(Align Alignment) { @@ -660,12 +603,6 @@ void MCELFStreamer::emitBundleLock(bool AlignToEnd) { if (!isBundleLocked()) Sec.setBundleGroupBeforeFirstInst(true); - if (getAssembler().getRelaxAll() && !isBundleLocked()) { - // TODO: drop the lock state and set directly in the fragment - MCDataFragment *DF = getContext().allocFragment(); - BundleGroups.push_back(DF); - } - Sec.setBundleLockState(AlignToEnd ? MCSection::BundleLockedAlignToEnd : MCSection::BundleLocked); } @@ -680,27 +617,7 @@ void MCELFStreamer::emitBundleUnlock() { else if (Sec.isBundleGroupBeforeFirstInst()) report_fatal_error("Empty bundle-locked group is forbidden"); - // When the -mc-relax-all flag is used, we emit instructions to fragments - // stored on a stack. When the bundle unlock is emitted, we pop a fragment - // from the stack a merge it to the one below. - if (getAssembler().getRelaxAll()) { - assert(!BundleGroups.empty() && "There are no bundle groups"); - MCDataFragment *DF = BundleGroups.back(); - - // FIXME: Use BundleGroups to track the lock state instead. - Sec.setBundleLockState(MCSection::NotBundleLocked); - - // FIXME: Use more separate fragments for nested groups. - if (!isBundleLocked()) { - mergeFragment(getOrCreateDataFragment(DF->getSubtargetInfo()), DF); - BundleGroups.pop_back(); - delete DF; - } - - if (Sec.getBundleLockState() != MCSection::BundleLockedAlignToEnd) - getOrCreateDataFragment()->setAlignToBundleEnd(false); - } else - Sec.setBundleLockState(MCSection::NotBundleLocked); + Sec.setBundleLockState(MCSection::NotBundleLocked); } void MCELFStreamer::finishImpl() { diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp index 24bed3119d6639..35521ddab47772 100644 --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -81,7 +81,7 @@ void MCObjectStreamer::flushPendingLabels(MCFragment *F, uint64_t FOffset) { } // Associate the labels with F. - CurSection->flushPendingLabels(F, FOffset, CurSubsectionIdx); + CurSection->flushPendingLabels(F, CurSubsectionIdx); } void MCObjectStreamer::flushPendingLabels() { @@ -215,7 +215,7 @@ static bool canReuseDataFragment(const MCDataFragment &F, // When bundling is enabled, we don't want to add data to a fragment that // already has instructions (see MCELFStreamer::emitInstToData for details) if (Assembler.isBundlingEnabled()) - return Assembler.getRelaxAll(); + return false; // If the subtarget is changed mid fragment we start a new fragment to record // the new STI. return !STI || F.getSubtargetInfo() == STI; @@ -292,8 +292,7 @@ void MCObjectStreamer::emitLabel(MCSymbol *Symbol, SMLoc Loc) { // Otherwise queue the label and set its fragment pointer when we emit the // next fragment. auto *F = dyn_cast_or_null(getCurrentFragment()); - if (F && !(getAssembler().isBundlingEnabled() && - getAssembler().getRelaxAll())) { + if (F) { Symbol->setFragment(F); Symbol->setOffset(F->getContents().size()); } else { @@ -465,9 +464,6 @@ void MCObjectStreamer::emitInstructionImpl(const MCInst &Inst, void MCObjectStreamer::emitInstToFragment(const MCInst &Inst, const MCSubtargetInfo &STI) { - if (getAssembler().getRelaxAll() && getAssembler().isBundlingEnabled()) - llvm_unreachable("All instructions should have already been relaxed"); - // Always create a new, separate fragment here, because its size can change // during relaxation. MCRelaxableFragment *IF = diff --git a/llvm/lib/MC/MCSection.cpp b/llvm/lib/MC/MCSection.cpp index 59fdfd76f444ae..3a7a8a0898c5bf 100644 --- a/llvm/lib/MC/MCSection.cpp +++ b/llvm/lib/MC/MCSection.cpp @@ -82,15 +82,14 @@ void MCSection::addPendingLabel(MCSymbol *label, unsigned Subsection) { PendingLabels.push_back(PendingLabel(label, Subsection)); } -void MCSection::flushPendingLabels(MCFragment *F, uint64_t FOffset, - unsigned Subsection) { +void MCSection::flushPendingLabels(MCFragment *F, unsigned Subsection) { // Set the fragment and fragment offset for all pending symbols in the // specified Subsection, and remove those symbols from the pending list. for (auto It = PendingLabels.begin(); It != PendingLabels.end(); ++It) { PendingLabel& Label = *It; if (Label.Subsection == Subsection) { Label.Sym->setFragment(F); - Label.Sym->setOffset(FOffset); + assert(Label.Sym->getOffset() == 0); PendingLabels.erase(It--); } } @@ -105,7 +104,7 @@ void MCSection::flushPendingLabels() { MCFragment *F = new MCDataFragment(); addFragment(*F); F->setParent(this); - flushPendingLabels(F, 0, Label.Subsection); + flushPendingLabels(F, Label.Subsection); } } diff --git a/llvm/lib/MC/MCWasmStreamer.cpp b/llvm/lib/MC/MCWasmStreamer.cpp index c553ede77555a5..8b59a6c3446f9d 100644 --- a/llvm/lib/MC/MCWasmStreamer.cpp +++ b/llvm/lib/MC/MCWasmStreamer.cpp @@ -39,19 +39,6 @@ using namespace llvm; MCWasmStreamer::~MCWasmStreamer() = default; // anchor. -void MCWasmStreamer::mergeFragment(MCDataFragment *DF, MCDataFragment *EF) { - flushPendingLabels(DF, DF->getContents().size()); - - for (unsigned I = 0, E = EF->getFixups().size(); I != E; ++I) { - EF->getFixups()[I].setOffset(EF->getFixups()[I].getOffset() + - DF->getContents().size()); - DF->getFixups().push_back(EF->getFixups()[I]); - } - if (DF->getSubtargetInfo() == nullptr && EF->getSubtargetInfo()) - DF->setHasInstructions(*EF->getSubtargetInfo()); - DF->getContents().append(EF->getContents().begin(), EF->getContents().end()); -} - void MCWasmStreamer::emitLabel(MCSymbol *S, SMLoc Loc) { auto *Symbol = cast(S); MCObjectStreamer::emitLabel(Symbol, Loc); diff --git a/llvm/test/MC/X86/AlignedBundling/misaligned-bundle-group.s b/llvm/test/MC/X86/AlignedBundling/misaligned-bundle-group.s index 6962a2a65960d6..92bd9ec016bd57 100644 --- a/llvm/test/MC/X86/AlignedBundling/misaligned-bundle-group.s +++ b/llvm/test/MC/X86/AlignedBundling/misaligned-bundle-group.s @@ -3,7 +3,7 @@ # RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-OPT %s # RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu -mcpu=pentiumpro -mc-relax-all %s -o - \ # RUN: | llvm-objdump -d --no-show-raw-insn - \ -# RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-RELAX %s +# RUN: | FileCheck --check-prefixes=CHECK,CHECK-OPT %s .text foo: @@ -13,11 +13,7 @@ foo: .bundle_lock align_to_end # CHECK: 1: nopw %cs:(%eax,%eax) # CHECK: 10: nopw %cs:(%eax,%eax) -# CHECK-RELAX: 1a: nop -# CHECK-RELAX: 20: nopw %cs:(%eax,%eax) -# CHECK-RELAX: 2a: nopw %cs:(%eax,%eax) # CHECK-OPT: 1b: calll 0x1c -# CHECK-RELAX: 3b: calll 0x3c calll bar # 5 bytes .bundle_unlock ret # 1 byte diff --git a/llvm/test/MC/X86/AlignedBundling/misaligned-bundle.s b/llvm/test/MC/X86/AlignedBundling/misaligned-bundle.s index 7a84bffc1821eb..0bf5cfd802be91 100644 --- a/llvm/test/MC/X86/AlignedBundling/misaligned-bundle.s +++ b/llvm/test/MC/X86/AlignedBundling/misaligned-bundle.s @@ -3,7 +3,7 @@ # RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-OPT %s # RUN: llvm-mc -filetype=obj -triple i686-pc-linux-gnu -mcpu=pentiumpro -mc-relax-all %s -o - \ # RUN: | llvm-objdump --no-print-imm-hex -d --no-show-raw-insn - \ -# RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-RELAX %s +# RUN: | FileCheck --check-prefixes=CHECK,CHECK-OPT %s .text foo: @@ -11,17 +11,12 @@ foo: push %ebp # 1 byte .align 16 # CHECK: 1: nopw %cs:(%eax,%eax) -# CHECK-RELAX: 10: nopw %cs:(%eax,%eax) -# CHECK-RELAX: 1a: nop # CHECK-OPT: 10: movl $1, (%esp) -# CHECK-RELAX: 20: movl $1, (%esp) movl $0x1, (%esp) # 7 bytes movl $0x1, (%esp) # 7 bytes # CHECK-OPT: 1e: nop movl $0x2, 0x1(%esp) # 8 bytes movl $0x2, 0x1(%esp) # 8 bytes -# CHECK-RELAX: 3e: nop -# CHECK-RELAX: 40: movl $2, 1(%esp) movl $0x2, 0x1(%esp) # 8 bytes movl $0x2, (%esp) # 7 bytes # CHECK-OPT: 3f: nop From e7e90dd1c1014b4a7ef77f74af3682168d23ddbf Mon Sep 17 00:00:00 2001 From: Brian Favela Date: Fri, 14 Jun 2024 13:14:19 -0400 Subject: [PATCH 125/155] [AMDGPU] Adding multiple use analysis to SIPeepholeSDWA (#94800) Allow for multiple uses of an operand where each instruction can be promoted to SDWA. For instance: ; v_and_b32 v2, lit(0x0000ffff), v2 ; v_and_b32 v3, 6, v2 ; v_and_b32 v2, 1, v2 Can be folded to: ; v_and_b32 v3, 6, sel_lo(v2) ; v_and_b32 v2, 1, sel_lo(v2) --- llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp | 68 ++- .../AMDGPU/GlobalISel/cvt_f32_ubyte.ll | 27 +- llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll | 308 +++++----- llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll | 359 ++++++------ .../CodeGen/AMDGPU/GlobalISel/llvm.abs.ll | 55 +- .../test/CodeGen/AMDGPU/GlobalISel/saddsat.ll | 316 +++++----- .../test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll | 314 +++++----- llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll | 36 +- llvm/test/CodeGen/AMDGPU/fract-match.ll | 25 +- llvm/test/CodeGen/AMDGPU/fshr.ll | 92 ++- llvm/test/CodeGen/AMDGPU/idiv-licm.ll | 86 ++- llvm/test/CodeGen/AMDGPU/idot4u.ll | 31 +- llvm/test/CodeGen/AMDGPU/llvm.frexp.ll | 42 +- ...ne-sink-temporal-divergence-swdev407790.ll | 25 +- llvm/test/CodeGen/AMDGPU/permute_i8.ll | 545 ++++++++---------- .../AMDGPU/reassoc-mul-add-1-to-mad.ll | 5 +- llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll | 102 +++- llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll | 6 +- 18 files changed, 1250 insertions(+), 1192 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp index 1fadd8ce45b1f5..f47731bf6aac3f 100644 --- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp +++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp @@ -37,20 +37,22 @@ STATISTIC(NumSDWAInstructionsPeepholed, namespace { +bool isConvertibleToSDWA(MachineInstr &MI, const GCNSubtarget &ST, + const SIInstrInfo *TII); class SDWAOperand; class SDWADstOperand; -class SIPeepholeSDWA : public MachineFunctionPass { -public: - using SDWAOperandsVector = SmallVector; +using SDWAOperandsVector = SmallVector; +using SDWAOperandsMap = MapVector; +class SIPeepholeSDWA : public MachineFunctionPass { private: MachineRegisterInfo *MRI; const SIRegisterInfo *TRI; const SIInstrInfo *TII; MapVector> SDWAOperands; - MapVector PotentialMatches; + SDWAOperandsMap PotentialMatches; SmallVector ConvertedInstructions; std::optional foldToImm(const MachineOperand &Op) const; @@ -65,7 +67,6 @@ class SIPeepholeSDWA : public MachineFunctionPass { bool runOnMachineFunction(MachineFunction &MF) override; void matchSDWAOperands(MachineBasicBlock &MBB); std::unique_ptr matchSDWAOperand(MachineInstr &MI); - bool isConvertibleToSDWA(MachineInstr &MI, const GCNSubtarget &ST) const; void pseudoOpConvertToVOP2(MachineInstr &MI, const GCNSubtarget &ST) const; bool convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands); @@ -93,7 +94,9 @@ class SDWAOperand { virtual ~SDWAOperand() = default; - virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII) = 0; + virtual MachineInstr *potentialToConvert(const SIInstrInfo *TII, + const GCNSubtarget &ST, + SDWAOperandsMap *PotentialMatches = nullptr) = 0; virtual bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) = 0; MachineOperand *getTargetOperand() const { return Target; } @@ -126,7 +129,9 @@ class SDWASrcOperand : public SDWAOperand { : SDWAOperand(TargetOp, ReplacedOp), SrcSel(SrcSel_), Abs(Abs_), Neg(Neg_), Sext(Sext_) {} - MachineInstr *potentialToConvert(const SIInstrInfo *TII) override; + MachineInstr *potentialToConvert(const SIInstrInfo *TII, + const GCNSubtarget &ST, + SDWAOperandsMap *PotentialMatches = nullptr) override; bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override; SdwaSel getSrcSel() const { return SrcSel; } @@ -153,7 +158,9 @@ class SDWADstOperand : public SDWAOperand { SdwaSel DstSel_ = DWORD, DstUnused DstUn_ = UNUSED_PAD) : SDWAOperand(TargetOp, ReplacedOp), DstSel(DstSel_), DstUn(DstUn_) {} - MachineInstr *potentialToConvert(const SIInstrInfo *TII) override; + MachineInstr *potentialToConvert(const SIInstrInfo *TII, + const GCNSubtarget &ST, + SDWAOperandsMap *PotentialMatches = nullptr) override; bool convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) override; SdwaSel getDstSel() const { return DstSel; } @@ -327,7 +334,33 @@ uint64_t SDWASrcOperand::getSrcMods(const SIInstrInfo *TII, return Mods; } -MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII) { +MachineInstr *SDWASrcOperand::potentialToConvert(const SIInstrInfo *TII, + const GCNSubtarget &ST, + SDWAOperandsMap *PotentialMatches) { + if (PotentialMatches != nullptr) { + // Fill out the map for all uses if all can be converted + MachineOperand *Reg = getReplacedOperand(); + if (!Reg->isReg() || !Reg->isDef()) + return nullptr; + + for (MachineInstr &UseMI : getMRI()->use_nodbg_instructions(Reg->getReg())) + // Check that all instructions that use Reg can be converted + if (!isConvertibleToSDWA(UseMI, ST, TII)) + return nullptr; + + // Now that it's guaranteed all uses are legal, iterate over the uses again + // to add them for later conversion. + for (MachineOperand &UseMO : getMRI()->use_nodbg_operands(Reg->getReg())) { + // Should not get a subregister here + assert(isSameReg(UseMO, *Reg)); + + SDWAOperandsMap &potentialMatchesMap = *PotentialMatches; + MachineInstr *UseMI = UseMO.getParent(); + potentialMatchesMap[UseMI].push_back(this); + } + return nullptr; + } + // For SDWA src operand potential instruction is one that use register // defined by parent instruction MachineOperand *PotentialMO = findSingleRegUse(getReplacedOperand(), getMRI()); @@ -420,7 +453,9 @@ bool SDWASrcOperand::convertToSDWA(MachineInstr &MI, const SIInstrInfo *TII) { return true; } -MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII) { +MachineInstr *SDWADstOperand::potentialToConvert(const SIInstrInfo *TII, + const GCNSubtarget &ST, + SDWAOperandsMap *PotentialMatches) { // For SDWA dst operand potential instruction is one that defines register // that this operand uses MachineRegisterInfo *MRI = getMRI(); @@ -919,8 +954,10 @@ void SIPeepholeSDWA::pseudoOpConvertToVOP2(MachineInstr &MI, MISucc.substituteRegister(CarryIn->getReg(), TRI->getVCC(), 0, *TRI); } -bool SIPeepholeSDWA::isConvertibleToSDWA(MachineInstr &MI, - const GCNSubtarget &ST) const { +namespace { +bool isConvertibleToSDWA(MachineInstr &MI, + const GCNSubtarget &ST, + const SIInstrInfo* TII) { // Check if this is already an SDWA instruction unsigned Opc = MI.getOpcode(); if (TII->isSDWA(Opc)) @@ -980,6 +1017,7 @@ bool SIPeepholeSDWA::isConvertibleToSDWA(MachineInstr &MI, return true; } +} // namespace bool SIPeepholeSDWA::convertToSDWA(MachineInstr &MI, const SDWAOperandsVector &SDWAOperands) { @@ -1215,7 +1253,7 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) { matchSDWAOperands(MBB); for (const auto &OperandPair : SDWAOperands) { const auto &Operand = OperandPair.second; - MachineInstr *PotentialMI = Operand->potentialToConvert(TII); + MachineInstr *PotentialMI = Operand->potentialToConvert(TII, ST); if (PotentialMI && (PotentialMI->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 || PotentialMI->getOpcode() == AMDGPU::V_SUB_CO_U32_e64)) @@ -1228,8 +1266,8 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) { for (const auto &OperandPair : SDWAOperands) { const auto &Operand = OperandPair.second; - MachineInstr *PotentialMI = Operand->potentialToConvert(TII); - if (PotentialMI && isConvertibleToSDWA(*PotentialMI, ST)) { + MachineInstr *PotentialMI = Operand->potentialToConvert(TII, ST, &PotentialMatches); + if (PotentialMI && isConvertibleToSDWA(*PotentialMI, ST, TII)) { PotentialMatches[PotentialMI].push_back(Operand.get()); } } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll index 02781e763f44a1..eb20178f9f4d88 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll @@ -771,7 +771,8 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(ptr addrspace(1) noalias %o ; VI: ; %bb.0: ; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 ; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0 -; VI-NEXT: v_mov_b32_e32 v6, 8 +; VI-NEXT: v_mov_b32_e32 v6, 9 +; VI-NEXT: v_mov_b32_e32 v7, 8 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s2 ; VI-NEXT: v_mov_b32_e32 v1, s3 @@ -779,28 +780,28 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(ptr addrspace(1) noalias %o ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: flat_load_dword v1, v[0:1] ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 -; VI-NEXT: v_mov_b32_e32 v2, 9 +; VI-NEXT: v_mov_b32_e32 v2, 0xff ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v5, s1 ; VI-NEXT: v_mov_b32_e32 v4, s0 ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_lshrrev_b32_e32 v7, 8, v1 -; VI-NEXT: v_lshrrev_b32_e32 v8, 16, v1 +; VI-NEXT: v_lshrrev_b32_e32 v8, 8, v1 +; VI-NEXT: v_and_b32_sdwa v2, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-NEXT: v_cvt_f32_ubyte0_sdwa v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 ; VI-NEXT: v_cvt_f32_ubyte3_e32 v3, v1 ; VI-NEXT: v_add_u16_e32 v9, 9, v1 -; VI-NEXT: v_add_u16_sdwa v10, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD -; VI-NEXT: v_cvt_f32_ubyte0_sdwa v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 -; VI-NEXT: v_cvt_f32_ubyte0_sdwa v2, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 -; VI-NEXT: v_add_u16_e32 v7, 9, v7 +; VI-NEXT: v_add_u16_sdwa v10, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_add_u16_sdwa v6, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; VI-NEXT: v_cvt_f32_ubyte0_sdwa v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 +; VI-NEXT: v_cvt_f32_ubyte0_e32 v2, v2 ; VI-NEXT: v_add_u16_e32 v8, 9, v8 -; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; VI-NEXT: v_and_b32_e32 v10, 0xff, v10 -; VI-NEXT: v_lshlrev_b32_sdwa v0, v6, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; VI-NEXT: v_and_b32_e32 v1, 0xff, v8 +; VI-NEXT: flat_store_dwordx4 v[4:5], v[0:3] +; VI-NEXT: v_and_b32_e32 v6, 0xff, v6 +; VI-NEXT: v_lshlrev_b32_sdwa v0, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v10 ; VI-NEXT: v_or_b32_sdwa v0, v9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; VI-NEXT: v_lshlrev_b32_e32 v2, 24, v10 +; VI-NEXT: v_lshlrev_b32_e32 v2, 24, v6 ; VI-NEXT: v_or_b32_e32 v0, v0, v1 ; VI-NEXT: v_or_b32_e32 v2, v0, v2 ; VI-NEXT: v_mov_b32_e32 v0, s2 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll index 06930388901b0f..4df5fa18e2942d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshl.ll @@ -1271,46 +1271,45 @@ define i32 @v_fshl_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) { ; GFX8-LABEL: v_fshl_v4i8: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_not_b32_e32 v7, v2 +; GFX8-NEXT: v_mov_b32_e32 v9, 1 +; GFX8-NEXT: v_and_b32_e32 v6, 7, v2 +; GFX8-NEXT: v_and_b32_e32 v7, 7, v7 +; GFX8-NEXT: v_lshrrev_b16_sdwa v10, v9, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v2 -; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v2 -; GFX8-NEXT: v_and_b32_e32 v8, 7, v2 -; GFX8-NEXT: v_not_b32_e32 v2, v2 -; GFX8-NEXT: v_mov_b32_e32 v10, 1 -; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 -; GFX8-NEXT: v_lshrrev_b16_sdwa v11, v10, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_lshlrev_b16_e32 v8, v8, v0 -; GFX8-NEXT: v_lshrrev_b16_e32 v2, v2, v11 +; GFX8-NEXT: v_lshlrev_b16_e32 v6, v6, v0 +; GFX8-NEXT: v_lshrrev_b16_e32 v7, v7, v10 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1 -; GFX8-NEXT: v_or_b32_e32 v2, v8, v2 -; GFX8-NEXT: v_and_b32_e32 v8, 7, v5 +; GFX8-NEXT: v_or_b32_e32 v6, v6, v7 +; GFX8-NEXT: v_and_b32_e32 v7, 7, v5 ; GFX8-NEXT: v_not_b32_e32 v5, v5 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 ; GFX8-NEXT: v_and_b32_e32 v5, 7, v5 -; GFX8-NEXT: v_lshrrev_b16_sdwa v4, v10, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_mov_b32_e32 v9, 0xff -; GFX8-NEXT: v_lshlrev_b16_e32 v3, v8, v3 +; GFX8-NEXT: v_lshrrev_b16_sdwa v4, v9, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_lshlrev_b16_e32 v3, v7, v3 ; GFX8-NEXT: v_lshrrev_b16_e32 v4, v5, v4 +; GFX8-NEXT: v_mov_b32_e32 v8, 0xff ; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX8-NEXT: v_and_b32_e32 v4, 7, v6 -; GFX8-NEXT: v_not_b32_e32 v5, v6 -; GFX8-NEXT: v_and_b32_sdwa v6, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_e32 v5, 7, v5 -; GFX8-NEXT: v_lshrrev_b16_e32 v6, 1, v6 -; GFX8-NEXT: v_lshlrev_b16_sdwa v4, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_lshrrev_b16_e32 v5, v5, v6 -; GFX8-NEXT: v_not_b32_e32 v6, v7 -; GFX8-NEXT: v_or_b32_e32 v4, v4, v5 -; GFX8-NEXT: v_and_b32_e32 v5, 7, v7 -; GFX8-NEXT: v_and_b32_e32 v6, 7, v6 -; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v10, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 -; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 -; GFX8-NEXT: v_lshrrev_b16_e32 v1, v6, v1 +; GFX8-NEXT: v_mov_b32_e32 v4, 7 +; GFX8-NEXT: v_and_b32_sdwa v5, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_not_b32_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX8-NEXT: v_and_b32_sdwa v8, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_sdwa v4, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_not_b32_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 +; GFX8-NEXT: v_and_b32_e32 v7, 7, v7 +; GFX8-NEXT: v_lshrrev_b16_e32 v8, 1, v8 +; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 +; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v9, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 +; GFX8-NEXT: v_lshlrev_b16_sdwa v5, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_lshrrev_b16_e32 v7, v7, v8 +; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 +; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1 +; GFX8-NEXT: v_or_b32_e32 v5, v5, v7 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_mov_b32_e32 v1, 8 ; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_e32 v2, 0xff, v4 +; GFX8-NEXT: v_and_b32_e32 v2, 0xff, v5 +; GFX8-NEXT: v_or_b32_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; GFX8-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 @@ -1321,47 +1320,46 @@ define i32 @v_fshl_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) { ; GFX9-LABEL: v_fshl_v4i8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_not_b32_e32 v7, v2 +; GFX9-NEXT: v_mov_b32_e32 v9, 1 +; GFX9-NEXT: v_and_b32_e32 v6, 7, v2 +; GFX9-NEXT: v_and_b32_e32 v7, 7, v7 +; GFX9-NEXT: v_lshrrev_b16_sdwa v10, v9, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v2 -; GFX9-NEXT: v_and_b32_e32 v8, 7, v2 -; GFX9-NEXT: v_not_b32_e32 v2, v2 -; GFX9-NEXT: v_mov_b32_e32 v10, 1 -; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 -; GFX9-NEXT: v_lshrrev_b16_sdwa v11, v10, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_lshlrev_b16_e32 v8, v8, v0 -; GFX9-NEXT: v_lshrrev_b16_e32 v2, v2, v11 +; GFX9-NEXT: v_lshlrev_b16_e32 v6, v6, v0 +; GFX9-NEXT: v_lshrrev_b16_e32 v7, v7, v10 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1 -; GFX9-NEXT: v_or_b32_e32 v2, v8, v2 -; GFX9-NEXT: v_and_b32_e32 v8, 7, v5 +; GFX9-NEXT: v_or_b32_e32 v6, v6, v7 +; GFX9-NEXT: v_and_b32_e32 v7, 7, v5 ; GFX9-NEXT: v_not_b32_e32 v5, v5 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0 ; GFX9-NEXT: v_and_b32_e32 v5, 7, v5 -; GFX9-NEXT: v_lshrrev_b16_sdwa v4, v10, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_mov_b32_e32 v9, 0xff -; GFX9-NEXT: v_lshlrev_b16_e32 v3, v8, v3 +; GFX9-NEXT: v_lshrrev_b16_sdwa v4, v9, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_lshlrev_b16_e32 v3, v7, v3 ; GFX9-NEXT: v_lshrrev_b16_e32 v4, v5, v4 +; GFX9-NEXT: v_mov_b32_e32 v8, 0xff ; GFX9-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX9-NEXT: v_and_b32_e32 v4, 7, v6 -; GFX9-NEXT: v_not_b32_e32 v5, v6 -; GFX9-NEXT: v_and_b32_sdwa v6, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_e32 v5, 7, v5 -; GFX9-NEXT: v_lshrrev_b16_e32 v6, 1, v6 -; GFX9-NEXT: v_lshlrev_b16_sdwa v4, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-NEXT: v_lshrrev_b16_e32 v5, v5, v6 -; GFX9-NEXT: v_not_b32_e32 v6, v7 -; GFX9-NEXT: v_or_b32_e32 v4, v4, v5 -; GFX9-NEXT: v_and_b32_e32 v5, 7, v7 -; GFX9-NEXT: v_and_b32_e32 v6, 7, v6 -; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v10, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 -; GFX9-NEXT: v_lshlrev_b16_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 -; GFX9-NEXT: v_lshrrev_b16_e32 v1, v6, v1 +; GFX9-NEXT: v_mov_b32_e32 v4, 7 +; GFX9-NEXT: v_and_b32_sdwa v5, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_not_b32_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9-NEXT: v_and_b32_sdwa v10, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_b32_sdwa v4, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_not_b32_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 +; GFX9-NEXT: v_and_b32_e32 v7, 7, v7 +; GFX9-NEXT: v_lshrrev_b16_e32 v10, 1, v10 +; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 +; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v9, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 +; GFX9-NEXT: v_lshlrev_b16_sdwa v5, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: v_lshrrev_b16_e32 v7, v7, v10 +; GFX9-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 +; GFX9-NEXT: v_lshrrev_b16_e32 v1, v2, v1 +; GFX9-NEXT: v_or_b32_e32 v5, v5, v7 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX9-NEXT: v_mov_b32_e32 v1, 8 ; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_or_b32 v1, v2, v9, v1 -; GFX9-NEXT: v_and_b32_e32 v2, 0xff, v4 +; GFX9-NEXT: v_and_b32_e32 v2, 0xff, v5 ; GFX9-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX9-NEXT: v_and_or_b32 v1, v6, v8, v1 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 24, v0 ; GFX9-NEXT: v_or3_b32 v0, v1, v2, v0 @@ -1370,42 +1368,41 @@ define i32 @v_fshl_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) { ; GFX10-LABEL: v_fshl_v4i8: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_lshrrev_b32_e32 v8, 8, v2 -; GFX10-NEXT: v_and_b32_e32 v10, 7, v2 +; GFX10-NEXT: v_lshrrev_b32_e32 v7, 8, v2 +; GFX10-NEXT: v_and_b32_e32 v9, 7, v2 +; GFX10-NEXT: v_and_b32_e32 v11, 0xff, v1 ; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX10-NEXT: v_not_b32_e32 v12, v7 ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v0 ; GFX10-NEXT: v_lshrrev_b32_e32 v6, 8, v1 -; GFX10-NEXT: v_not_b32_e32 v9, v2 -; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v2 -; GFX10-NEXT: v_lshrrev_b32_e32 v2, 24, v2 -; GFX10-NEXT: v_lshlrev_b16 v0, v10, v0 -; GFX10-NEXT: v_not_b32_e32 v10, v8 -; GFX10-NEXT: v_and_b32_e32 v8, 7, v8 -; GFX10-NEXT: v_mov_b32_e32 v13, 0xff -; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GFX10-NEXT: v_and_b32_e32 v12, 0xff, v1 +; GFX10-NEXT: v_lshlrev_b16 v0, v9, v0 +; GFX10-NEXT: v_and_b32_e32 v7, 7, v7 +; GFX10-NEXT: v_lshrrev_b16 v9, 1, v11 +; GFX10-NEXT: v_and_b32_e32 v11, 7, v12 +; GFX10-NEXT: v_mov_b32_e32 v12, 0xff +; GFX10-NEXT: v_lshrrev_b32_e32 v10, 24, v1 ; GFX10-NEXT: v_and_b32_e32 v6, 0xff, v6 -; GFX10-NEXT: v_lshlrev_b16 v3, v8, v3 -; GFX10-NEXT: v_not_b32_e32 v8, v11 -; GFX10-NEXT: v_and_b32_sdwa v1, v1, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_not_b32_e32 v13, v2 -; GFX10-NEXT: v_and_b32_e32 v10, 7, v10 +; GFX10-NEXT: v_lshlrev_b16 v3, v7, v3 +; GFX10-NEXT: v_mov_b32_e32 v7, 7 +; GFX10-NEXT: v_not_b32_sdwa v13, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX10-NEXT: v_and_b32_sdwa v1, v1, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX10-NEXT: v_not_b32_sdwa v12, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 +; GFX10-NEXT: v_not_b32_e32 v8, v2 ; GFX10-NEXT: v_lshrrev_b16 v6, 1, v6 -; GFX10-NEXT: v_and_b32_e32 v11, 7, v11 -; GFX10-NEXT: v_and_b32_e32 v8, 7, v8 -; GFX10-NEXT: v_lshrrev_b16 v1, 1, v1 -; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 +; GFX10-NEXT: v_and_b32_sdwa v14, v2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX10-NEXT: v_and_b32_e32 v13, 7, v13 -; GFX10-NEXT: v_lshrrev_b16 v7, 1, v7 -; GFX10-NEXT: v_and_b32_e32 v9, 7, v9 -; GFX10-NEXT: v_lshrrev_b16 v12, 1, v12 -; GFX10-NEXT: v_lshrrev_b16 v6, v10, v6 -; GFX10-NEXT: v_lshlrev_b16 v4, v11, v4 -; GFX10-NEXT: v_lshrrev_b16 v1, v8, v1 +; GFX10-NEXT: v_lshrrev_b16 v1, 1, v1 +; GFX10-NEXT: v_and_b32_sdwa v2, v2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX10-NEXT: v_and_b32_e32 v7, 7, v12 +; GFX10-NEXT: v_lshrrev_b16 v10, 1, v10 +; GFX10-NEXT: v_and_b32_e32 v8, 7, v8 +; GFX10-NEXT: v_lshrrev_b16 v6, v11, v6 +; GFX10-NEXT: v_lshlrev_b16 v4, v14, v4 +; GFX10-NEXT: v_lshrrev_b16 v1, v13, v1 ; GFX10-NEXT: v_lshlrev_b16 v2, v2, v5 -; GFX10-NEXT: v_lshrrev_b16 v5, v13, v7 -; GFX10-NEXT: v_lshrrev_b16 v7, v9, v12 +; GFX10-NEXT: v_lshrrev_b16 v5, v7, v10 +; GFX10-NEXT: v_lshrrev_b16 v7, v8, v9 ; GFX10-NEXT: v_or_b32_e32 v3, v3, v6 ; GFX10-NEXT: v_mov_b32_e32 v6, 8 ; GFX10-NEXT: v_or_b32_e32 v1, v4, v1 @@ -3932,25 +3929,26 @@ define <2 x i16> @v_fshl_v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) { ; GFX8-LABEL: v_fshl_v2i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; GFX8-NEXT: v_and_b32_e32 v4, 15, v2 -; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 -; GFX8-NEXT: v_and_b32_e32 v2, 15, v2 +; GFX8-NEXT: v_xor_b32_e32 v4, -1, v2 +; GFX8-NEXT: v_and_b32_e32 v3, 15, v2 +; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 ; GFX8-NEXT: v_lshrrev_b16_e32 v5, 1, v1 -; GFX8-NEXT: v_lshlrev_b16_e32 v4, v4, v0 -; GFX8-NEXT: v_lshrrev_b16_e32 v2, v2, v5 -; GFX8-NEXT: v_or_b32_e32 v2, v4, v2 -; GFX8-NEXT: v_and_b32_e32 v4, 15, v3 -; GFX8-NEXT: v_xor_b32_e32 v3, -1, v3 +; GFX8-NEXT: v_lshlrev_b16_e32 v3, v3, v0 +; GFX8-NEXT: v_lshrrev_b16_e32 v4, v4, v5 +; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 +; GFX8-NEXT: v_mov_b32_e32 v4, 15 +; GFX8-NEXT: v_and_b32_sdwa v4, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v5, -1 +; GFX8-NEXT: v_xor_b32_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: v_mov_b32_e32 v4, 1 -; GFX8-NEXT: v_and_b32_e32 v3, 15, v3 +; GFX8-NEXT: v_and_b32_e32 v2, 15, v2 ; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_lshrrev_b16_e32 v1, v3, v1 +; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_fshl_v2i16: @@ -4083,27 +4081,28 @@ define amdgpu_ps float @v_fshl_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg % ; ; GFX8-LABEL: v_fshl_v2i16_ssv: ; GFX8: ; %bb.0: -; GFX8-NEXT: v_and_b32_e32 v2, 15, v0 +; GFX8-NEXT: v_and_b32_e32 v1, 15, v0 ; GFX8-NEXT: s_lshr_b32 s2, s0, 16 -; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 -; GFX8-NEXT: v_lshlrev_b16_e64 v2, v2, s0 +; GFX8-NEXT: v_xor_b32_e32 v2, -1, v0 +; GFX8-NEXT: v_lshlrev_b16_e64 v1, v1, s0 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s1 -; GFX8-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX8-NEXT: v_and_b32_e32 v2, 15, v2 ; GFX8-NEXT: s_lshr_b32 s0, s0, 1 -; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s0 +; GFX8-NEXT: v_lshrrev_b16_e64 v2, v2, s0 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX8-NEXT: v_mov_b32_e32 v2, 15 +; GFX8-NEXT: v_mov_b32_e32 v3, -1 ; GFX8-NEXT: s_lshr_b32 s3, s1, 16 -; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX8-NEXT: v_and_b32_e32 v2, 15, v1 -; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1 -; GFX8-NEXT: v_and_b32_e32 v1, 15, v1 +; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_xor_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_e32 v0, 15, v0 ; GFX8-NEXT: s_lshr_b32 s0, s3, 1 ; GFX8-NEXT: v_lshlrev_b16_e64 v2, v2, s2 -; GFX8-NEXT: v_lshrrev_b16_e64 v1, v1, s0 -; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 -; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s0 +; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: v_fshl_v2i16_ssv: @@ -4620,32 +4619,33 @@ define <3 x half> @v_fshl_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) ; GFX8-LABEL: v_fshl_v3i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v4 -; GFX8-NEXT: v_and_b32_e32 v7, 15, v4 -; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 -; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 +; GFX8-NEXT: v_xor_b32_e32 v7, -1, v4 +; GFX8-NEXT: v_and_b32_e32 v6, 15, v4 +; GFX8-NEXT: v_and_b32_e32 v7, 15, v7 ; GFX8-NEXT: v_lshrrev_b16_e32 v8, 1, v2 -; GFX8-NEXT: v_lshlrev_b16_e32 v7, v7, v0 -; GFX8-NEXT: v_lshrrev_b16_e32 v4, v4, v8 -; GFX8-NEXT: v_or_b32_e32 v4, v7, v4 -; GFX8-NEXT: v_and_b32_e32 v7, 15, v6 -; GFX8-NEXT: v_xor_b32_e32 v6, -1, v6 +; GFX8-NEXT: v_lshlrev_b16_e32 v6, v6, v0 +; GFX8-NEXT: v_lshrrev_b16_e32 v7, v7, v8 +; GFX8-NEXT: v_or_b32_e32 v6, v6, v7 +; GFX8-NEXT: v_mov_b32_e32 v7, 15 +; GFX8-NEXT: v_and_b32_sdwa v7, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v8, -1 +; GFX8-NEXT: v_xor_b32_sdwa v4, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: v_mov_b32_e32 v7, 1 -; GFX8-NEXT: v_and_b32_e32 v6, 15, v6 +; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 ; GFX8-NEXT: v_lshrrev_b16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_lshrrev_b16_e32 v2, v6, v2 +; GFX8-NEXT: v_lshrrev_b16_e32 v2, v4, v2 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX8-NEXT: v_and_b32_e32 v2, 15, v5 -; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5 -; GFX8-NEXT: v_and_b32_e32 v5, 15, v5 +; GFX8-NEXT: v_xor_b32_e32 v4, -1, v5 +; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, v2, v1 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, 1, v3 -; GFX8-NEXT: v_lshrrev_b16_e32 v2, v5, v2 +; GFX8-NEXT: v_lshrrev_b16_e32 v2, v4, v2 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; @@ -4984,42 +4984,42 @@ define <4 x half> @v_fshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) ; GFX8-LABEL: v_fshl_v4i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v4 -; GFX8-NEXT: v_and_b32_e32 v8, 15, v4 -; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 -; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 -; GFX8-NEXT: v_lshrrev_b16_e32 v9, 1, v2 -; GFX8-NEXT: v_lshlrev_b16_e32 v8, v8, v0 -; GFX8-NEXT: v_lshrrev_b16_e32 v4, v4, v9 -; GFX8-NEXT: v_or_b32_e32 v4, v8, v4 -; GFX8-NEXT: v_and_b32_e32 v8, 15, v6 -; GFX8-NEXT: v_xor_b32_e32 v6, -1, v6 +; GFX8-NEXT: v_xor_b32_e32 v7, -1, v4 +; GFX8-NEXT: v_and_b32_e32 v6, 15, v4 +; GFX8-NEXT: v_and_b32_e32 v7, 15, v7 +; GFX8-NEXT: v_lshrrev_b16_e32 v8, 1, v2 +; GFX8-NEXT: v_lshlrev_b16_e32 v6, v6, v0 +; GFX8-NEXT: v_lshrrev_b16_e32 v7, v7, v8 +; GFX8-NEXT: v_or_b32_e32 v6, v6, v7 +; GFX8-NEXT: v_mov_b32_e32 v7, 15 +; GFX8-NEXT: v_and_b32_sdwa v8, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v9, -1 +; GFX8-NEXT: v_xor_b32_sdwa v4, v4, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: v_mov_b32_e32 v8, 1 -; GFX8-NEXT: v_and_b32_e32 v6, 15, v6 +; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 ; GFX8-NEXT: v_lshrrev_b16_sdwa v2, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_lshrrev_b16_e32 v2, v6, v2 -; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v5 +; GFX8-NEXT: v_lshrrev_b16_e32 v2, v4, v2 +; GFX8-NEXT: v_xor_b32_e32 v4, -1, v5 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX8-NEXT: v_and_b32_e32 v2, 15, v5 -; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5 -; GFX8-NEXT: v_and_b32_e32 v5, 15, v5 -; GFX8-NEXT: v_lshrrev_b16_e32 v6, 1, v3 +; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 +; GFX8-NEXT: v_lshrrev_b16_e32 v10, 1, v3 ; GFX8-NEXT: v_lshlrev_b16_e32 v2, v2, v1 -; GFX8-NEXT: v_lshrrev_b16_e32 v5, v5, v6 -; GFX8-NEXT: v_xor_b32_e32 v6, -1, v7 -; GFX8-NEXT: v_or_b32_e32 v2, v2, v5 -; GFX8-NEXT: v_and_b32_e32 v5, 15, v7 -; GFX8-NEXT: v_and_b32_e32 v6, 15, v6 +; GFX8-NEXT: v_lshrrev_b16_e32 v4, v4, v10 +; GFX8-NEXT: v_or_b32_e32 v2, v2, v4 +; GFX8-NEXT: v_and_b32_sdwa v4, v5, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_xor_b32_sdwa v5, v5, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_e32 v5, 15, v5 ; GFX8-NEXT: v_lshrrev_b16_sdwa v3, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_lshrrev_b16_e32 v3, v6, v3 +; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_lshrrev_b16_e32 v3, v5, v3 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll index ff93cddafc8728..61588e640be185 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fshr.ll @@ -1272,46 +1272,45 @@ define i32 @v_fshr_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) { ; GFX8-LABEL: v_fshr_v4i8: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_not_b32_e32 v7, v2 +; GFX8-NEXT: v_and_b32_e32 v6, 7, v2 +; GFX8-NEXT: v_and_b32_e32 v7, 7, v7 +; GFX8-NEXT: v_lshlrev_b16_e32 v8, 1, v0 ; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v2 -; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX8-NEXT: v_lshrrev_b32_e32 v7, 24, v2 -; GFX8-NEXT: v_and_b32_e32 v8, 7, v2 -; GFX8-NEXT: v_not_b32_e32 v2, v2 -; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 -; GFX8-NEXT: v_lshlrev_b16_e32 v9, 1, v0 -; GFX8-NEXT: v_lshlrev_b16_e32 v2, v2, v9 -; GFX8-NEXT: v_lshrrev_b16_sdwa v8, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_lshlrev_b16_e32 v7, v7, v8 +; GFX8-NEXT: v_lshrrev_b16_sdwa v6, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX8-NEXT: v_or_b32_e32 v2, v2, v8 -; GFX8-NEXT: v_and_b32_e32 v8, 7, v5 +; GFX8-NEXT: v_or_b32_e32 v6, v7, v6 +; GFX8-NEXT: v_and_b32_e32 v7, 7, v5 ; GFX8-NEXT: v_not_b32_e32 v5, v5 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1 ; GFX8-NEXT: v_and_b32_e32 v5, 7, v5 ; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v3 ; GFX8-NEXT: v_lshlrev_b16_e32 v3, v5, v3 -; GFX8-NEXT: v_lshrrev_b16_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX8-NEXT: v_lshrrev_b16_sdwa v4, v7, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX8-NEXT: v_and_b32_e32 v4, 7, v6 -; GFX8-NEXT: v_not_b32_e32 v5, v6 -; GFX8-NEXT: v_mov_b32_e32 v6, 1 -; GFX8-NEXT: v_mov_b32_e32 v9, 0xff -; GFX8-NEXT: v_and_b32_e32 v5, 7, v5 -; GFX8-NEXT: v_lshlrev_b16_sdwa v8, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_lshlrev_b16_e32 v5, v5, v8 -; GFX8-NEXT: v_and_b32_sdwa v8, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX8-NEXT: v_lshrrev_b16_e32 v4, v4, v8 -; GFX8-NEXT: v_or_b32_e32 v4, v5, v4 -; GFX8-NEXT: v_and_b32_e32 v5, 7, v7 -; GFX8-NEXT: v_not_b32_e32 v7, v7 +; GFX8-NEXT: v_mov_b32_e32 v4, 7 +; GFX8-NEXT: v_mov_b32_e32 v8, 0xff +; GFX8-NEXT: v_and_b32_sdwa v5, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_not_b32_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX8-NEXT: v_mov_b32_e32 v9, 1 +; GFX8-NEXT: v_and_b32_sdwa v4, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX8-NEXT: v_not_b32_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 ; GFX8-NEXT: v_and_b32_e32 v7, 7, v7 -; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 -; GFX8-NEXT: v_lshlrev_b16_e32 v0, v7, v0 -; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 +; GFX8-NEXT: v_lshlrev_b16_sdwa v10, v9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_and_b32_sdwa v8, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 +; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 +; GFX8-NEXT: v_lshlrev_b16_e32 v7, v7, v10 +; GFX8-NEXT: v_lshrrev_b16_e32 v5, v5, v8 +; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0 +; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 +; GFX8-NEXT: v_or_b32_e32 v5, v7, v5 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_mov_b32_e32 v1, 8 ; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX8-NEXT: v_and_b32_e32 v2, 0xff, v4 +; GFX8-NEXT: v_and_b32_e32 v2, 0xff, v5 +; GFX8-NEXT: v_or_b32_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; GFX8-NEXT: v_and_b32_e32 v0, 0xff, v0 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 @@ -1322,47 +1321,46 @@ define i32 @v_fshr_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) { ; GFX9-LABEL: v_fshr_v4i8: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_not_b32_e32 v7, v2 +; GFX9-NEXT: v_and_b32_e32 v6, 7, v2 +; GFX9-NEXT: v_and_b32_e32 v7, 7, v7 +; GFX9-NEXT: v_lshlrev_b16_e32 v8, 1, v0 ; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v7, 24, v2 -; GFX9-NEXT: v_and_b32_e32 v8, 7, v2 -; GFX9-NEXT: v_not_b32_e32 v2, v2 -; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 -; GFX9-NEXT: v_lshlrev_b16_e32 v9, 1, v0 -; GFX9-NEXT: v_lshlrev_b16_e32 v2, v2, v9 -; GFX9-NEXT: v_lshrrev_b16_sdwa v8, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_lshlrev_b16_e32 v7, v7, v8 +; GFX9-NEXT: v_lshrrev_b16_sdwa v6, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GFX9-NEXT: v_or_b32_e32 v2, v2, v8 -; GFX9-NEXT: v_and_b32_e32 v8, 7, v5 +; GFX9-NEXT: v_or_b32_e32 v6, v7, v6 +; GFX9-NEXT: v_and_b32_e32 v7, 7, v5 ; GFX9-NEXT: v_not_b32_e32 v5, v5 ; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1 ; GFX9-NEXT: v_and_b32_e32 v5, 7, v5 ; GFX9-NEXT: v_lshlrev_b16_e32 v3, 1, v3 ; GFX9-NEXT: v_lshlrev_b16_e32 v3, v5, v3 -; GFX9-NEXT: v_lshrrev_b16_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_lshrrev_b16_sdwa v4, v7, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NEXT: v_or_b32_e32 v3, v3, v4 -; GFX9-NEXT: v_and_b32_e32 v4, 7, v6 -; GFX9-NEXT: v_not_b32_e32 v5, v6 -; GFX9-NEXT: v_mov_b32_e32 v6, 1 -; GFX9-NEXT: v_mov_b32_e32 v9, 0xff -; GFX9-NEXT: v_and_b32_e32 v5, 7, v5 -; GFX9-NEXT: v_lshlrev_b16_sdwa v8, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-NEXT: v_lshlrev_b16_e32 v5, v5, v8 -; GFX9-NEXT: v_and_b32_sdwa v8, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_lshrrev_b16_e32 v4, v4, v8 -; GFX9-NEXT: v_or_b32_e32 v4, v5, v4 -; GFX9-NEXT: v_and_b32_e32 v5, 7, v7 -; GFX9-NEXT: v_not_b32_e32 v7, v7 +; GFX9-NEXT: v_mov_b32_e32 v4, 7 +; GFX9-NEXT: v_not_b32_sdwa v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9-NEXT: v_mov_b32_e32 v9, 1 +; GFX9-NEXT: v_mov_b32_e32 v8, 0xff +; GFX9-NEXT: v_and_b32_sdwa v5, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX9-NEXT: v_and_b32_e32 v7, 7, v7 -; GFX9-NEXT: v_lshlrev_b16_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 -; GFX9-NEXT: v_lshlrev_b16_e32 v0, v7, v0 -; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 +; GFX9-NEXT: v_lshlrev_b16_sdwa v10, v9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: v_and_b32_sdwa v4, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_not_b32_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 +; GFX9-NEXT: v_lshlrev_b16_e32 v7, v7, v10 +; GFX9-NEXT: v_and_b32_sdwa v10, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 +; GFX9-NEXT: v_lshlrev_b16_sdwa v0, v9, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 +; GFX9-NEXT: v_lshrrev_b16_e32 v5, v5, v10 +; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0 +; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 +; GFX9-NEXT: v_or_b32_e32 v5, v7, v5 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX9-NEXT: v_mov_b32_e32 v1, 8 ; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_and_or_b32 v1, v2, v9, v1 -; GFX9-NEXT: v_and_b32_e32 v2, 0xff, v4 +; GFX9-NEXT: v_and_b32_e32 v2, 0xff, v5 ; GFX9-NEXT: v_and_b32_e32 v0, 0xff, v0 +; GFX9-NEXT: v_and_or_b32 v1, v6, v8, v1 ; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 24, v0 ; GFX9-NEXT: v_or3_b32 v0, v1, v2, v0 @@ -1372,52 +1370,51 @@ define i32 @v_fshr_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) { ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v2 -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0 +; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v0 ; GFX10-NEXT: v_not_b32_e32 v8, v2 -; GFX10-NEXT: v_lshrrev_b32_e32 v11, 16, v2 -; GFX10-NEXT: v_lshrrev_b32_e32 v12, 24, v2 +; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v0 ; GFX10-NEXT: v_not_b32_e32 v10, v5 -; GFX10-NEXT: v_lshlrev_b16 v3, 1, v3 -; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v6, 24, v0 -; GFX10-NEXT: v_lshrrev_b32_e32 v7, 8, v1 -; GFX10-NEXT: v_and_b32_e32 v10, 7, v10 ; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 ; GFX10-NEXT: v_and_b32_e32 v8, 7, v8 -; GFX10-NEXT: v_mov_b32_e32 v13, 0xff -; GFX10-NEXT: v_not_b32_e32 v14, v12 -; GFX10-NEXT: v_lshlrev_b16 v3, v10, v3 -; GFX10-NEXT: v_not_b32_e32 v10, v11 -; GFX10-NEXT: v_lshrrev_b32_e32 v9, 24, v1 +; GFX10-NEXT: v_lshlrev_b16 v4, 1, v4 +; GFX10-NEXT: v_mov_b32_e32 v3, 7 +; GFX10-NEXT: v_and_b32_e32 v10, 7, v10 +; GFX10-NEXT: v_lshrrev_b32_e32 v9, 8, v1 ; GFX10-NEXT: v_lshlrev_b16 v0, v8, v0 -; GFX10-NEXT: v_and_b32_e32 v8, 0xff, v1 +; GFX10-NEXT: v_not_b32_sdwa v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX10-NEXT: v_not_b32_sdwa v14, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 +; GFX10-NEXT: v_lshlrev_b16 v4, v10, v4 +; GFX10-NEXT: v_mov_b32_e32 v10, 0xff +; GFX10-NEXT: v_lshrrev_b32_e32 v11, 24, v1 +; GFX10-NEXT: v_and_b32_e32 v12, 7, v2 +; GFX10-NEXT: v_and_b32_e32 v13, 0xff, v1 ; GFX10-NEXT: v_and_b32_e32 v5, 7, v5 -; GFX10-NEXT: v_and_b32_e32 v7, 0xff, v7 -; GFX10-NEXT: v_and_b32_e32 v11, 7, v11 -; GFX10-NEXT: v_and_b32_e32 v10, 7, v10 -; GFX10-NEXT: v_lshlrev_b16 v4, 1, v4 -; GFX10-NEXT: v_and_b32_sdwa v1, v1, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX10-NEXT: v_and_b32_e32 v13, 7, v14 +; GFX10-NEXT: v_and_b32_e32 v9, 0xff, v9 +; GFX10-NEXT: v_and_b32_sdwa v15, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX10-NEXT: v_and_b32_e32 v8, 7, v8 ; GFX10-NEXT: v_lshlrev_b16 v6, 1, v6 -; GFX10-NEXT: v_and_b32_e32 v12, 7, v12 -; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 -; GFX10-NEXT: v_lshrrev_b16 v5, v5, v7 -; GFX10-NEXT: v_lshlrev_b16 v4, v10, v4 -; GFX10-NEXT: v_lshrrev_b16 v1, v11, v1 -; GFX10-NEXT: v_lshlrev_b16 v6, v13, v6 -; GFX10-NEXT: v_lshrrev_b16 v7, v12, v9 -; GFX10-NEXT: v_lshrrev_b16 v2, v2, v8 -; GFX10-NEXT: v_or_b32_e32 v3, v3, v5 -; GFX10-NEXT: v_mov_b32_e32 v5, 8 -; GFX10-NEXT: v_or_b32_e32 v1, v4, v1 -; GFX10-NEXT: v_or_b32_e32 v4, v6, v7 -; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX10-NEXT: v_and_b32_sdwa v1, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX10-NEXT: v_and_b32_e32 v10, 7, v14 +; GFX10-NEXT: v_lshlrev_b16 v7, 1, v7 +; GFX10-NEXT: v_and_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX10-NEXT: v_lshrrev_b16 v3, v5, v9 +; GFX10-NEXT: v_lshlrev_b16 v5, v8, v6 +; GFX10-NEXT: v_lshrrev_b16 v1, v15, v1 +; GFX10-NEXT: v_lshlrev_b16 v6, v10, v7 +; GFX10-NEXT: v_lshrrev_b16 v2, v2, v11 +; GFX10-NEXT: v_lshrrev_b16 v7, v12, v13 +; GFX10-NEXT: v_or_b32_e32 v3, v4, v3 +; GFX10-NEXT: v_mov_b32_e32 v4, 8 +; GFX10-NEXT: v_or_b32_e32 v1, v5, v1 +; GFX10-NEXT: v_or_b32_e32 v2, v6, v2 +; GFX10-NEXT: v_or_b32_e32 v0, v0, v7 +; GFX10-NEXT: v_lshlrev_b32_sdwa v3, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GFX10-NEXT: v_and_b32_e32 v3, 0xff, v4 -; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v2 +; GFX10-NEXT: v_and_b32_e32 v2, 0xff, v2 +; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v3 ; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v3 +; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v2 ; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -3718,29 +3715,29 @@ define <2 x i16> @v_fshr_v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) { ; GFX8-NEXT: v_mov_b32_e32 v4, 1 ; GFX8-NEXT: v_mov_b32_e32 v5, 15 ; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_lshrrev_b16_sdwa v5, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_lshrrev_b16_sdwa v6, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v5 -; GFX8-NEXT: v_lshlrev_b16_e32 v5, 1, v1 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v6 +; GFX8-NEXT: v_lshlrev_b16_e32 v6, 1, v1 ; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v2 -; GFX8-NEXT: v_and_b32_e32 v6, 15, v2 -; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 +; GFX8-NEXT: v_and_b32_e32 v4, 15, v2 +; GFX8-NEXT: v_xor_b32_e32 v7, -1, v2 +; GFX8-NEXT: v_and_b32_e32 v7, 15, v7 +; GFX8-NEXT: v_lshlrev_b16_e32 v3, v4, v3 +; GFX8-NEXT: v_lshrrev_b16_e32 v4, 1, v6 +; GFX8-NEXT: v_lshrrev_b16_e32 v4, v7, v4 +; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 +; GFX8-NEXT: v_and_b32_sdwa v4, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v5, -1 +; GFX8-NEXT: v_xor_b32_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_and_b32_e32 v2, 15, v2 -; GFX8-NEXT: v_lshrrev_b16_e32 v5, 1, v5 -; GFX8-NEXT: v_lshlrev_b16_e32 v3, v6, v3 -; GFX8-NEXT: v_lshrrev_b16_e32 v2, v2, v5 -; GFX8-NEXT: v_or_b32_e32 v2, v3, v2 -; GFX8-NEXT: v_and_b32_e32 v3, 15, v4 -; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 -; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 ; GFX8-NEXT: v_lshrrev_b16_e32 v1, 1, v1 -; GFX8-NEXT: v_lshlrev_b16_e32 v0, v3, v0 -; GFX8-NEXT: v_lshrrev_b16_e32 v1, v4, v1 +; GFX8-NEXT: v_lshlrev_b16_e32 v0, v4, v0 +; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_fshr_v2i16: @@ -3896,30 +3893,31 @@ define amdgpu_ps float @v_fshr_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg % ; GFX8-NEXT: s_lshr_b32 s3, s1, 16 ; GFX8-NEXT: s_or_b32 s0, s0, s4 ; GFX8-NEXT: s_lshl_b32 s1, s1, 1 -; GFX8-NEXT: v_and_b32_e32 v2, 15, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 -; GFX8-NEXT: v_lshlrev_b16_e64 v2, v2, s0 +; GFX8-NEXT: v_and_b32_e32 v1, 15, v0 +; GFX8-NEXT: v_xor_b32_e32 v2, -1, v0 +; GFX8-NEXT: v_lshlrev_b16_e64 v1, v1, s0 ; GFX8-NEXT: s_and_b32 s0, 0xffff, s1 -; GFX8-NEXT: v_and_b32_e32 v0, 15, v0 +; GFX8-NEXT: v_and_b32_e32 v2, 15, v2 ; GFX8-NEXT: s_lshr_b32 s0, s0, 1 +; GFX8-NEXT: v_lshrrev_b16_e64 v2, v2, s0 ; GFX8-NEXT: s_lshr_b32 s4, s3, 15 ; GFX8-NEXT: s_lshl_b32 s3, s3, 1 -; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s0 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX8-NEXT: v_mov_b32_e32 v2, 15 +; GFX8-NEXT: v_mov_b32_e32 v3, -1 ; GFX8-NEXT: s_lshl_b32 s2, s2, 1 -; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX8-NEXT: v_and_b32_e32 v2, 15, v1 -; GFX8-NEXT: v_xor_b32_e32 v1, -1, v1 +; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_xor_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: s_and_b32 s0, 0xffff, s3 ; GFX8-NEXT: s_or_b32 s2, s2, s4 -; GFX8-NEXT: v_and_b32_e32 v1, 15, v1 +; GFX8-NEXT: v_and_b32_e32 v0, 15, v0 ; GFX8-NEXT: s_lshr_b32 s0, s0, 1 ; GFX8-NEXT: v_lshlrev_b16_e64 v2, v2, s2 -; GFX8-NEXT: v_lshrrev_b16_e64 v1, v1, s0 -; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 -; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s0 +; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: v_fshr_v2i16_ssv: @@ -4536,47 +4534,47 @@ define <3 x half> @v_fshr_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) ; GFX8-LABEL: v_fshr_v3i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_lshlrev_b16_e32 v7, 1, v0 -; GFX8-NEXT: v_lshrrev_b16_e32 v8, 15, v2 -; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v2 -; GFX8-NEXT: v_or_b32_e32 v7, v7, v8 -; GFX8-NEXT: v_mov_b32_e32 v8, 1 -; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_lshrrev_b16_e32 v8, 15, v6 -; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v8 -; GFX8-NEXT: v_lshlrev_b16_e32 v2, 1, v2 -; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v4 -; GFX8-NEXT: v_and_b32_e32 v9, 15, v4 +; GFX8-NEXT: v_lshlrev_b16_e32 v6, 1, v0 +; GFX8-NEXT: v_lshrrev_b16_e32 v7, 15, v2 +; GFX8-NEXT: v_or_b32_e32 v6, v6, v7 +; GFX8-NEXT: v_mov_b32_e32 v7, 1 +; GFX8-NEXT: v_mov_b32_e32 v8, 15 +; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_lshrrev_b16_sdwa v9, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v9 +; GFX8-NEXT: v_lshlrev_b16_e32 v9, 1, v2 +; GFX8-NEXT: v_lshlrev_b16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_and_b32_e32 v7, 15, v4 +; GFX8-NEXT: v_xor_b32_e32 v10, -1, v4 +; GFX8-NEXT: v_and_b32_e32 v10, 15, v10 +; GFX8-NEXT: v_lshlrev_b16_e32 v6, v7, v6 +; GFX8-NEXT: v_lshrrev_b16_e32 v7, 1, v9 +; GFX8-NEXT: v_lshrrev_b16_e32 v7, v10, v7 +; GFX8-NEXT: v_or_b32_e32 v6, v6, v7 +; GFX8-NEXT: v_and_b32_sdwa v7, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_mov_b32_e32 v8, -1 +; GFX8-NEXT: v_xor_b32_sdwa v4, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, 1, v2 -; GFX8-NEXT: v_lshlrev_b16_e32 v7, v9, v7 +; GFX8-NEXT: v_lshlrev_b16_e32 v0, v7, v0 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, v4, v2 -; GFX8-NEXT: v_lshlrev_b16_e32 v6, 1, v6 -; GFX8-NEXT: v_or_b32_e32 v2, v7, v2 -; GFX8-NEXT: v_and_b32_e32 v4, 15, v8 -; GFX8-NEXT: v_xor_b32_e32 v7, -1, v8 -; GFX8-NEXT: v_and_b32_e32 v7, 15, v7 -; GFX8-NEXT: v_lshlrev_b16_e32 v0, v4, v0 -; GFX8-NEXT: v_lshrrev_b16_e32 v4, 1, v6 -; GFX8-NEXT: v_lshrrev_b16_e32 v4, v7, v4 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 +; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, 1, v1 -; GFX8-NEXT: v_lshrrev_b16_e32 v4, 15, v3 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX8-NEXT: v_xor_b32_e32 v4, -1, v5 -; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v3 -; GFX8-NEXT: v_and_b32_e32 v5, 15, v4 -; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 -; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 -; GFX8-NEXT: v_lshrrev_b16_e32 v3, 1, v3 -; GFX8-NEXT: v_lshlrev_b16_e32 v1, v5, v1 -; GFX8-NEXT: v_lshrrev_b16_e32 v3, v4, v3 +; GFX8-NEXT: v_lshrrev_b16_e32 v2, 15, v3 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX8-NEXT: v_lshlrev_b16_e32 v2, 1, v3 +; GFX8-NEXT: v_xor_b32_e32 v3, -1, v5 +; GFX8-NEXT: v_and_b32_e32 v4, 15, v3 +; GFX8-NEXT: v_xor_b32_e32 v3, -1, v3 +; GFX8-NEXT: v_and_b32_e32 v3, 15, v3 +; GFX8-NEXT: v_lshrrev_b16_e32 v2, 1, v2 +; GFX8-NEXT: v_lshlrev_b16_e32 v1, v4, v1 +; GFX8-NEXT: v_lshrrev_b16_e32 v2, v3, v2 ; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 +; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; @@ -5001,44 +4999,43 @@ define <4 x half> @v_fshr_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) ; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v9 ; GFX8-NEXT: v_lshlrev_b16_e32 v9, 1, v2 -; GFX8-NEXT: v_lshrrev_b32_e32 v10, 16, v4 -; GFX8-NEXT: v_and_b32_e32 v11, 15, v4 -; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 -; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 +; GFX8-NEXT: v_xor_b32_e32 v11, -1, v4 +; GFX8-NEXT: v_and_b32_e32 v10, 15, v4 +; GFX8-NEXT: v_and_b32_e32 v11, 15, v11 ; GFX8-NEXT: v_lshrrev_b16_e32 v9, 1, v9 +; GFX8-NEXT: v_lshlrev_b16_e32 v6, v10, v6 +; GFX8-NEXT: v_lshrrev_b16_e32 v9, v11, v9 +; GFX8-NEXT: v_mov_b32_e32 v10, -1 ; GFX8-NEXT: v_lshlrev_b16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_lshlrev_b16_e32 v6, v11, v6 -; GFX8-NEXT: v_lshrrev_b16_e32 v4, v4, v9 -; GFX8-NEXT: v_xor_b32_e32 v9, -1, v10 -; GFX8-NEXT: v_or_b32_e32 v4, v6, v4 -; GFX8-NEXT: v_and_b32_e32 v6, 15, v10 -; GFX8-NEXT: v_and_b32_e32 v9, 15, v9 +; GFX8-NEXT: v_or_b32_e32 v6, v6, v9 +; GFX8-NEXT: v_and_b32_sdwa v9, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_xor_b32_sdwa v4, v4, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 ; GFX8-NEXT: v_lshrrev_b16_e32 v2, 1, v2 -; GFX8-NEXT: v_lshlrev_b16_e32 v0, v6, v0 -; GFX8-NEXT: v_lshrrev_b16_e32 v2, v9, v2 +; GFX8-NEXT: v_lshlrev_b16_e32 v0, v9, v0 +; GFX8-NEXT: v_lshrrev_b16_e32 v2, v4, v2 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 -; GFX8-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX8-NEXT: v_lshlrev_b16_e32 v2, 1, v1 ; GFX8-NEXT: v_lshrrev_b16_e32 v4, 15, v3 +; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v4 ; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: v_lshrrev_b16_sdwa v4, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5 +; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 ; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 ; GFX8-NEXT: v_lshlrev_b16_e32 v4, 1, v3 ; GFX8-NEXT: v_lshlrev_b16_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v5 -; GFX8-NEXT: v_and_b32_e32 v7, 15, v5 -; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5 -; GFX8-NEXT: v_and_b32_e32 v5, 15, v5 +; GFX8-NEXT: v_xor_b32_e32 v7, -1, v5 +; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX8-NEXT: v_and_b32_e32 v6, 15, v5 +; GFX8-NEXT: v_and_b32_e32 v7, 15, v7 ; GFX8-NEXT: v_lshrrev_b16_e32 v4, 1, v4 -; GFX8-NEXT: v_lshlrev_b16_e32 v2, v7, v2 -; GFX8-NEXT: v_lshrrev_b16_e32 v4, v5, v4 -; GFX8-NEXT: v_xor_b32_e32 v5, -1, v6 +; GFX8-NEXT: v_lshlrev_b16_e32 v2, v6, v2 +; GFX8-NEXT: v_lshrrev_b16_e32 v4, v7, v4 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v4 -; GFX8-NEXT: v_and_b32_e32 v4, 15, v6 +; GFX8-NEXT: v_and_b32_sdwa v4, v5, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_xor_b32_sdwa v5, v5, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_and_b32_e32 v5, 15, v5 ; GFX8-NEXT: v_lshrrev_b16_e32 v3, 1, v3 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, v4, v1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll index 3ef059057ac8e3..41e915a4c1011b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll @@ -248,13 +248,12 @@ define amdgpu_cs <2 x i8> @abs_vgpr_v2i8(<2 x i8> %arg) { ; GFX8-LABEL: abs_vgpr_v2i8: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0 -; GFX8-NEXT: v_ashrrev_i16_e32 v0, 8, v0 +; GFX8-NEXT: v_mov_b32_e32 v2, 0 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX8-NEXT: v_sub_u16_e32 v2, 0, v0 -; GFX8-NEXT: v_ashrrev_i16_e32 v1, 8, v1 -; GFX8-NEXT: v_max_i16_e32 v0, v0, v2 -; GFX8-NEXT: v_sub_u16_e32 v2, 0, v1 -; GFX8-NEXT: v_max_i16_e32 v1, v1, v2 +; GFX8-NEXT: v_sub_u16_sdwa v3, v2, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_sub_u16_sdwa v2, v2, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_max_i16_sdwa v0, sext(v0), v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD +; GFX8-NEXT: v_max_i16_sdwa v1, sext(v1), v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD ; GFX8-NEXT: v_readfirstlane_b32 s0, v0 ; GFX8-NEXT: v_readfirstlane_b32 s1, v1 ; GFX8-NEXT: ; return to shader part epilog @@ -340,17 +339,15 @@ define amdgpu_cs <3 x i8> @abs_vgpr_v3i8(<3 x i8> %arg) { ; GFX8-LABEL: abs_vgpr_v3i8: ; GFX8: ; %bb.0: ; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0 -; GFX8-NEXT: v_ashrrev_i16_e32 v0, 8, v0 +; GFX8-NEXT: v_mov_b32_e32 v3, 0 +; GFX8-NEXT: v_sub_u16_sdwa v4, v3, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 ; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GFX8-NEXT: v_sub_u16_e32 v3, 0, v0 -; GFX8-NEXT: v_ashrrev_i16_e32 v1, 8, v1 ; GFX8-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; GFX8-NEXT: v_max_i16_e32 v0, v0, v3 -; GFX8-NEXT: v_sub_u16_e32 v3, 0, v1 -; GFX8-NEXT: v_ashrrev_i16_e32 v2, 8, v2 -; GFX8-NEXT: v_max_i16_e32 v1, v1, v3 -; GFX8-NEXT: v_sub_u16_e32 v3, 0, v2 -; GFX8-NEXT: v_max_i16_e32 v2, v2, v3 +; GFX8-NEXT: v_max_i16_sdwa v0, sext(v0), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD +; GFX8-NEXT: v_sub_u16_sdwa v4, v3, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_sub_u16_sdwa v3, v3, sext(v2) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX8-NEXT: v_max_i16_sdwa v1, sext(v1), v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD +; GFX8-NEXT: v_max_i16_sdwa v2, sext(v2), v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD ; GFX8-NEXT: v_readfirstlane_b32 s0, v0 ; GFX8-NEXT: v_readfirstlane_b32 s1, v1 ; GFX8-NEXT: v_readfirstlane_b32 s2, v2 @@ -424,12 +421,12 @@ define amdgpu_cs <2 x i16> @abs_vgpr_v2i16(<2 x i16> %arg) { ; ; GFX8-LABEL: abs_vgpr_v2i16: ; GFX8: ; %bb.0: -; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GFX8-NEXT: v_sub_u16_e32 v2, 0, v0 -; GFX8-NEXT: v_sub_u16_e32 v3, 0, v1 -; GFX8-NEXT: v_max_i16_e32 v0, v0, v2 -; GFX8-NEXT: v_max_i16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: v_mov_b32_e32 v2, 0 +; GFX8-NEXT: v_sub_u16_e32 v1, 0, v0 +; GFX8-NEXT: v_sub_u16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_max_i16_e32 v1, v0, v1 +; GFX8-NEXT: v_max_i16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0 ; GFX8-NEXT: ; return to shader part epilog ; @@ -503,14 +500,14 @@ define amdgpu_cs <3 x i16> @abs_vgpr_v3i16(<3 x i16> %arg) { ; ; GFX8-LABEL: abs_vgpr_v3i16: ; GFX8: ; %bb.0: -; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX8-NEXT: v_sub_u16_e32 v3, 0, v0 -; GFX8-NEXT: v_sub_u16_e32 v4, 0, v2 -; GFX8-NEXT: v_sub_u16_e32 v5, 0, v1 -; GFX8-NEXT: v_max_i16_e32 v0, v0, v3 -; GFX8-NEXT: v_max_i16_sdwa v2, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_max_i16_e32 v1, v1, v5 +; GFX8-NEXT: v_mov_b32_e32 v3, 0 +; GFX8-NEXT: v_sub_u16_e32 v2, 0, v0 +; GFX8-NEXT: v_sub_u16_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_sub_u16_e32 v4, 0, v1 +; GFX8-NEXT: v_max_i16_e32 v2, v0, v2 +; GFX8-NEXT: v_max_i16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX8-NEXT: v_max_i16_e32 v1, v1, v4 ; GFX8-NEXT: v_readfirstlane_b32 s0, v0 ; GFX8-NEXT: v_readfirstlane_b32 s1, v1 ; GFX8-NEXT: ; return to shader part epilog diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll index a6f9bb7ee055d4..168e6dfa5f147d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll @@ -2774,22 +2774,22 @@ define <2 x i16> @v_saddsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) { ; GFX8-LABEL: v_saddsat_v2i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_min_i16_e32 v4, 0, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX8-NEXT: v_max_i16_e32 v3, 0, v0 -; GFX8-NEXT: v_sub_u16_e32 v4, 0x8000, v4 -; GFX8-NEXT: v_sub_u16_e32 v3, 0x7fff, v3 -; GFX8-NEXT: v_max_i16_e32 v4, v4, v1 -; GFX8-NEXT: v_min_i16_e32 v5, 0, v2 -; GFX8-NEXT: v_min_i16_e32 v3, v4, v3 -; GFX8-NEXT: v_max_i16_e32 v4, 0, v2 -; GFX8-NEXT: v_sub_u16_e32 v5, 0x8000, v5 +; GFX8-NEXT: v_min_i16_e32 v3, 0, v0 +; GFX8-NEXT: v_max_i16_e32 v2, 0, v0 +; GFX8-NEXT: v_sub_u16_e32 v3, 0x8000, v3 +; GFX8-NEXT: v_sub_u16_e32 v2, 0x7fff, v2 +; GFX8-NEXT: v_max_i16_e32 v3, v3, v1 +; GFX8-NEXT: v_min_i16_e32 v2, v3, v2 +; GFX8-NEXT: v_mov_b32_e32 v3, 0 +; GFX8-NEXT: v_max_i16_sdwa v4, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_min_i16_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_sub_u16_e32 v3, 0x8000, v3 ; GFX8-NEXT: v_sub_u16_e32 v4, 0x7fff, v4 -; GFX8-NEXT: v_max_i16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_max_i16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: v_min_i16_e32 v1, v1, v4 -; GFX8-NEXT: v_add_u16_e32 v0, v0, v3 -; GFX8-NEXT: v_add_u16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: v_add_u16_e32 v2, v0, v2 +; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_saddsat_v2i16: @@ -2987,23 +2987,23 @@ define amdgpu_ps float @saddsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) { ; ; GFX8-LABEL: saddsat_v2i16_vs: ; GFX8: ; %bb.0: -; GFX8-NEXT: v_min_i16_e32 v3, 0, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GFX8-NEXT: v_max_i16_e32 v2, 0, v0 -; GFX8-NEXT: v_sub_u16_e32 v3, 0x8000, v3 -; GFX8-NEXT: v_sub_u16_e32 v2, 0x7fff, v2 -; GFX8-NEXT: v_max_i16_e32 v3, s0, v3 -; GFX8-NEXT: v_min_i16_e32 v4, 0, v1 +; GFX8-NEXT: v_min_i16_e32 v2, 0, v0 +; GFX8-NEXT: v_max_i16_e32 v1, 0, v0 +; GFX8-NEXT: v_sub_u16_e32 v2, 0x8000, v2 +; GFX8-NEXT: v_sub_u16_e32 v1, 0x7fff, v1 +; GFX8-NEXT: v_max_i16_e32 v2, s0, v2 +; GFX8-NEXT: v_min_i16_e32 v1, v2, v1 +; GFX8-NEXT: v_mov_b32_e32 v2, 0 +; GFX8-NEXT: v_max_i16_sdwa v3, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_min_i16_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: s_lshr_b32 s1, s0, 16 -; GFX8-NEXT: v_min_i16_e32 v2, v3, v2 -; GFX8-NEXT: v_max_i16_e32 v3, 0, v1 -; GFX8-NEXT: v_sub_u16_e32 v4, 0x8000, v4 +; GFX8-NEXT: v_sub_u16_e32 v2, 0x8000, v2 ; GFX8-NEXT: v_sub_u16_e32 v3, 0x7fff, v3 -; GFX8-NEXT: v_max_i16_e32 v4, s1, v4 -; GFX8-NEXT: v_min_i16_e32 v3, v4, v3 -; GFX8-NEXT: v_add_u16_e32 v0, v0, v2 -; GFX8-NEXT: v_add_u16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: v_max_i16_e32 v2, s1, v2 +; GFX8-NEXT: v_min_i16_e32 v2, v2, v3 +; GFX8-NEXT: v_add_u16_e32 v1, v0, v1 +; GFX8-NEXT: v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: saddsat_v2i16_vs: @@ -3090,38 +3090,37 @@ define <2 x float> @v_saddsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { ; GFX8-LABEL: v_saddsat_v4i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_min_i16_e32 v7, 0, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v0 -; GFX8-NEXT: v_max_i16_e32 v6, 0, v0 +; GFX8-NEXT: v_min_i16_e32 v5, 0, v0 +; GFX8-NEXT: v_max_i16_e32 v4, 0, v0 +; GFX8-NEXT: v_sub_u16_e32 v5, 0x8000, v5 +; GFX8-NEXT: v_sub_u16_e32 v4, 0x7fff, v4 +; GFX8-NEXT: v_max_i16_e32 v5, v5, v2 +; GFX8-NEXT: v_min_i16_e32 v4, v5, v4 +; GFX8-NEXT: v_mov_b32_e32 v5, 0 +; GFX8-NEXT: v_min_i16_sdwa v7, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_max_i16_sdwa v6, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_sub_u16_e32 v7, 0x8000, v7 ; GFX8-NEXT: v_sub_u16_e32 v6, 0x7fff, v6 -; GFX8-NEXT: v_max_i16_e32 v7, v7, v2 -; GFX8-NEXT: v_min_i16_e32 v8, 0, v4 +; GFX8-NEXT: v_max_i16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_min_i16_e32 v7, 0, v1 +; GFX8-NEXT: v_min_i16_e32 v2, v2, v6 +; GFX8-NEXT: v_max_i16_e32 v6, 0, v1 +; GFX8-NEXT: v_sub_u16_e32 v7, 0x8000, v7 +; GFX8-NEXT: v_sub_u16_e32 v6, 0x7fff, v6 +; GFX8-NEXT: v_max_i16_e32 v7, v7, v3 ; GFX8-NEXT: v_min_i16_e32 v6, v7, v6 -; GFX8-NEXT: v_max_i16_e32 v7, 0, v4 -; GFX8-NEXT: v_sub_u16_e32 v8, 0x8000, v8 -; GFX8-NEXT: v_sub_u16_e32 v7, 0x7fff, v7 -; GFX8-NEXT: v_max_i16_sdwa v2, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_min_i16_e32 v8, 0, v1 -; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v1 -; GFX8-NEXT: v_min_i16_e32 v2, v2, v7 -; GFX8-NEXT: v_max_i16_e32 v7, 0, v1 -; GFX8-NEXT: v_sub_u16_e32 v8, 0x8000, v8 +; GFX8-NEXT: v_max_i16_sdwa v7, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_min_i16_sdwa v5, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_sub_u16_e32 v5, 0x8000, v5 ; GFX8-NEXT: v_sub_u16_e32 v7, 0x7fff, v7 -; GFX8-NEXT: v_max_i16_e32 v8, v8, v3 -; GFX8-NEXT: v_min_i16_e32 v9, 0, v5 -; GFX8-NEXT: v_min_i16_e32 v7, v8, v7 -; GFX8-NEXT: v_max_i16_e32 v8, 0, v5 -; GFX8-NEXT: v_sub_u16_e32 v9, 0x8000, v9 -; GFX8-NEXT: v_sub_u16_e32 v8, 0x7fff, v8 -; GFX8-NEXT: v_max_i16_sdwa v3, v9, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_min_i16_e32 v3, v3, v8 -; GFX8-NEXT: v_add_u16_e32 v0, v0, v6 -; GFX8-NEXT: v_add_u16_sdwa v2, v4, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_add_u16_e32 v1, v1, v7 -; GFX8-NEXT: v_add_u16_sdwa v2, v5, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX8-NEXT: v_max_i16_sdwa v3, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_min_i16_e32 v3, v3, v7 +; GFX8-NEXT: v_add_u16_e32 v4, v0, v4 +; GFX8-NEXT: v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_add_u16_e32 v2, v1, v6 +; GFX8-NEXT: v_add_u16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_saddsat_v4i16: @@ -3376,54 +3375,52 @@ define <3 x float> @v_saddsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) { ; GFX8-LABEL: v_saddsat_v6i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_min_i16_e32 v10, 0, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v0 -; GFX8-NEXT: v_max_i16_e32 v9, 0, v0 +; GFX8-NEXT: v_min_i16_e32 v7, 0, v0 +; GFX8-NEXT: v_max_i16_e32 v6, 0, v0 +; GFX8-NEXT: v_sub_u16_e32 v7, 0x8000, v7 +; GFX8-NEXT: v_sub_u16_e32 v6, 0x7fff, v6 +; GFX8-NEXT: v_max_i16_e32 v7, v7, v3 +; GFX8-NEXT: v_min_i16_e32 v6, v7, v6 +; GFX8-NEXT: v_mov_b32_e32 v7, 0 +; GFX8-NEXT: v_min_i16_sdwa v9, v0, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_max_i16_sdwa v8, v0, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_sub_u16_e32 v9, 0x8000, v9 +; GFX8-NEXT: v_sub_u16_e32 v8, 0x7fff, v8 +; GFX8-NEXT: v_max_i16_sdwa v3, v9, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_min_i16_e32 v9, 0, v1 +; GFX8-NEXT: v_min_i16_e32 v3, v3, v8 +; GFX8-NEXT: v_max_i16_e32 v8, 0, v1 +; GFX8-NEXT: v_sub_u16_e32 v9, 0x8000, v9 +; GFX8-NEXT: v_sub_u16_e32 v8, 0x7fff, v8 +; GFX8-NEXT: v_max_i16_e32 v9, v9, v4 +; GFX8-NEXT: v_min_i16_sdwa v10, v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_min_i16_e32 v8, v9, v8 +; GFX8-NEXT: v_max_i16_sdwa v9, v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_sub_u16_e32 v10, 0x8000, v10 +; GFX8-NEXT: v_sub_u16_e32 v9, 0x7fff, v9 +; GFX8-NEXT: v_max_i16_sdwa v4, v10, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_min_i16_e32 v10, 0, v2 +; GFX8-NEXT: v_min_i16_e32 v4, v4, v9 +; GFX8-NEXT: v_max_i16_e32 v9, 0, v2 ; GFX8-NEXT: v_sub_u16_e32 v10, 0x8000, v10 ; GFX8-NEXT: v_sub_u16_e32 v9, 0x7fff, v9 -; GFX8-NEXT: v_max_i16_e32 v10, v10, v3 -; GFX8-NEXT: v_min_i16_e32 v11, 0, v6 +; GFX8-NEXT: v_max_i16_e32 v10, v10, v5 ; GFX8-NEXT: v_min_i16_e32 v9, v10, v9 -; GFX8-NEXT: v_max_i16_e32 v10, 0, v6 -; GFX8-NEXT: v_sub_u16_e32 v11, 0x8000, v11 -; GFX8-NEXT: v_sub_u16_e32 v10, 0x7fff, v10 -; GFX8-NEXT: v_max_i16_sdwa v3, v11, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_min_i16_e32 v11, 0, v1 -; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v1 -; GFX8-NEXT: v_min_i16_e32 v3, v3, v10 -; GFX8-NEXT: v_max_i16_e32 v10, 0, v1 -; GFX8-NEXT: v_sub_u16_e32 v11, 0x8000, v11 +; GFX8-NEXT: v_max_i16_sdwa v10, v2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_min_i16_sdwa v7, v2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_sub_u16_e32 v7, 0x8000, v7 ; GFX8-NEXT: v_sub_u16_e32 v10, 0x7fff, v10 -; GFX8-NEXT: v_max_i16_e32 v11, v11, v4 -; GFX8-NEXT: v_min_i16_e32 v12, 0, v7 -; GFX8-NEXT: v_min_i16_e32 v10, v11, v10 -; GFX8-NEXT: v_max_i16_e32 v11, 0, v7 -; GFX8-NEXT: v_sub_u16_e32 v12, 0x8000, v12 -; GFX8-NEXT: v_sub_u16_e32 v11, 0x7fff, v11 -; GFX8-NEXT: v_max_i16_sdwa v4, v12, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_min_i16_e32 v12, 0, v2 -; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v2 -; GFX8-NEXT: v_min_i16_e32 v4, v4, v11 -; GFX8-NEXT: v_max_i16_e32 v11, 0, v2 -; GFX8-NEXT: v_sub_u16_e32 v12, 0x8000, v12 -; GFX8-NEXT: v_sub_u16_e32 v11, 0x7fff, v11 -; GFX8-NEXT: v_max_i16_e32 v12, v12, v5 -; GFX8-NEXT: v_min_i16_e32 v13, 0, v8 -; GFX8-NEXT: v_min_i16_e32 v11, v12, v11 -; GFX8-NEXT: v_max_i16_e32 v12, 0, v8 -; GFX8-NEXT: v_sub_u16_e32 v13, 0x8000, v13 -; GFX8-NEXT: v_sub_u16_e32 v12, 0x7fff, v12 -; GFX8-NEXT: v_max_i16_sdwa v5, v13, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_add_u16_e32 v0, v0, v9 -; GFX8-NEXT: v_add_u16_sdwa v3, v6, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_min_i16_e32 v5, v5, v12 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX8-NEXT: v_add_u16_e32 v1, v1, v10 -; GFX8-NEXT: v_add_u16_sdwa v3, v7, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 -; GFX8-NEXT: v_add_u16_e32 v2, v2, v11 -; GFX8-NEXT: v_add_u16_sdwa v3, v8, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX8-NEXT: v_max_i16_sdwa v5, v7, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_min_i16_e32 v5, v5, v10 +; GFX8-NEXT: v_add_u16_e32 v6, v0, v6 +; GFX8-NEXT: v_add_u16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_add_u16_e32 v3, v1, v8 +; GFX8-NEXT: v_add_u16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v1, v3, v1 +; GFX8-NEXT: v_add_u16_e32 v3, v2, v9 +; GFX8-NEXT: v_add_u16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v6, v0 +; GFX8-NEXT: v_or_b32_e32 v2, v3, v2 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_saddsat_v6i16: @@ -3752,70 +3749,67 @@ define <4 x float> @v_saddsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { ; GFX8-LABEL: v_saddsat_v8i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_min_i16_e32 v13, 0, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v0 -; GFX8-NEXT: v_max_i16_e32 v12, 0, v0 +; GFX8-NEXT: v_min_i16_e32 v9, 0, v0 +; GFX8-NEXT: v_max_i16_e32 v8, 0, v0 +; GFX8-NEXT: v_sub_u16_e32 v9, 0x8000, v9 +; GFX8-NEXT: v_sub_u16_e32 v8, 0x7fff, v8 +; GFX8-NEXT: v_max_i16_e32 v9, v9, v4 +; GFX8-NEXT: v_min_i16_e32 v8, v9, v8 +; GFX8-NEXT: v_mov_b32_e32 v9, 0 +; GFX8-NEXT: v_min_i16_sdwa v11, v0, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_max_i16_sdwa v10, v0, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_sub_u16_e32 v11, 0x8000, v11 +; GFX8-NEXT: v_sub_u16_e32 v10, 0x7fff, v10 +; GFX8-NEXT: v_max_i16_sdwa v4, v11, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_min_i16_e32 v11, 0, v1 +; GFX8-NEXT: v_min_i16_e32 v4, v4, v10 +; GFX8-NEXT: v_max_i16_e32 v10, 0, v1 +; GFX8-NEXT: v_sub_u16_e32 v11, 0x8000, v11 +; GFX8-NEXT: v_sub_u16_e32 v10, 0x7fff, v10 +; GFX8-NEXT: v_max_i16_e32 v11, v11, v5 +; GFX8-NEXT: v_min_i16_sdwa v12, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_min_i16_e32 v10, v11, v10 +; GFX8-NEXT: v_max_i16_sdwa v11, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_sub_u16_e32 v12, 0x8000, v12 +; GFX8-NEXT: v_sub_u16_e32 v11, 0x7fff, v11 +; GFX8-NEXT: v_max_i16_sdwa v5, v12, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_min_i16_e32 v12, 0, v2 +; GFX8-NEXT: v_min_i16_e32 v5, v5, v11 +; GFX8-NEXT: v_max_i16_e32 v11, 0, v2 +; GFX8-NEXT: v_sub_u16_e32 v12, 0x8000, v12 +; GFX8-NEXT: v_sub_u16_e32 v11, 0x7fff, v11 +; GFX8-NEXT: v_max_i16_e32 v12, v12, v6 +; GFX8-NEXT: v_min_i16_sdwa v13, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_min_i16_e32 v11, v12, v11 +; GFX8-NEXT: v_max_i16_sdwa v12, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_sub_u16_e32 v13, 0x8000, v13 +; GFX8-NEXT: v_sub_u16_e32 v12, 0x7fff, v12 +; GFX8-NEXT: v_max_i16_sdwa v6, v13, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_min_i16_e32 v13, 0, v3 +; GFX8-NEXT: v_min_i16_e32 v6, v6, v12 +; GFX8-NEXT: v_max_i16_e32 v12, 0, v3 ; GFX8-NEXT: v_sub_u16_e32 v13, 0x8000, v13 ; GFX8-NEXT: v_sub_u16_e32 v12, 0x7fff, v12 -; GFX8-NEXT: v_max_i16_e32 v13, v13, v4 -; GFX8-NEXT: v_min_i16_e32 v14, 0, v8 +; GFX8-NEXT: v_max_i16_e32 v13, v13, v7 ; GFX8-NEXT: v_min_i16_e32 v12, v13, v12 -; GFX8-NEXT: v_max_i16_e32 v13, 0, v8 -; GFX8-NEXT: v_sub_u16_e32 v14, 0x8000, v14 -; GFX8-NEXT: v_sub_u16_e32 v13, 0x7fff, v13 -; GFX8-NEXT: v_max_i16_sdwa v4, v14, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_min_i16_e32 v14, 0, v1 -; GFX8-NEXT: v_lshrrev_b32_e32 v9, 16, v1 -; GFX8-NEXT: v_min_i16_e32 v4, v4, v13 -; GFX8-NEXT: v_max_i16_e32 v13, 0, v1 -; GFX8-NEXT: v_sub_u16_e32 v14, 0x8000, v14 +; GFX8-NEXT: v_max_i16_sdwa v13, v3, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_min_i16_sdwa v9, v3, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_sub_u16_e32 v9, 0x8000, v9 ; GFX8-NEXT: v_sub_u16_e32 v13, 0x7fff, v13 -; GFX8-NEXT: v_max_i16_e32 v14, v14, v5 -; GFX8-NEXT: v_min_i16_e32 v15, 0, v9 -; GFX8-NEXT: v_min_i16_e32 v13, v14, v13 -; GFX8-NEXT: v_max_i16_e32 v14, 0, v9 -; GFX8-NEXT: v_sub_u16_e32 v15, 0x8000, v15 -; GFX8-NEXT: v_sub_u16_e32 v14, 0x7fff, v14 -; GFX8-NEXT: v_max_i16_sdwa v5, v15, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_min_i16_e32 v15, 0, v2 -; GFX8-NEXT: v_lshrrev_b32_e32 v10, 16, v2 -; GFX8-NEXT: v_min_i16_e32 v5, v5, v14 -; GFX8-NEXT: v_max_i16_e32 v14, 0, v2 -; GFX8-NEXT: v_sub_u16_e32 v15, 0x8000, v15 -; GFX8-NEXT: v_sub_u16_e32 v14, 0x7fff, v14 -; GFX8-NEXT: v_max_i16_e32 v15, v15, v6 -; GFX8-NEXT: v_min_i16_e32 v16, 0, v10 -; GFX8-NEXT: v_min_i16_e32 v14, v15, v14 -; GFX8-NEXT: v_max_i16_e32 v15, 0, v10 -; GFX8-NEXT: v_sub_u16_e32 v16, 0x8000, v16 -; GFX8-NEXT: v_sub_u16_e32 v15, 0x7fff, v15 -; GFX8-NEXT: v_max_i16_sdwa v6, v16, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_min_i16_e32 v16, 0, v3 -; GFX8-NEXT: v_lshrrev_b32_e32 v11, 16, v3 -; GFX8-NEXT: v_min_i16_e32 v6, v6, v15 -; GFX8-NEXT: v_max_i16_e32 v15, 0, v3 -; GFX8-NEXT: v_sub_u16_e32 v16, 0x8000, v16 -; GFX8-NEXT: v_sub_u16_e32 v15, 0x7fff, v15 -; GFX8-NEXT: v_max_i16_e32 v16, v16, v7 -; GFX8-NEXT: v_min_i16_e32 v17, 0, v11 -; GFX8-NEXT: v_min_i16_e32 v15, v16, v15 -; GFX8-NEXT: v_max_i16_e32 v16, 0, v11 -; GFX8-NEXT: v_sub_u16_e32 v17, 0x8000, v17 -; GFX8-NEXT: v_add_u16_e32 v0, v0, v12 -; GFX8-NEXT: v_add_u16_sdwa v4, v8, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_sub_u16_e32 v16, 0x7fff, v16 -; GFX8-NEXT: v_max_i16_sdwa v7, v17, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX8-NEXT: v_add_u16_e32 v1, v1, v13 -; GFX8-NEXT: v_add_u16_sdwa v4, v9, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_min_i16_e32 v7, v7, v16 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX8-NEXT: v_add_u16_e32 v2, v2, v14 -; GFX8-NEXT: v_add_u16_sdwa v4, v10, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v2, v2, v4 -; GFX8-NEXT: v_add_u16_e32 v3, v3, v15 -; GFX8-NEXT: v_add_u16_sdwa v4, v11, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 +; GFX8-NEXT: v_max_i16_sdwa v7, v9, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_add_u16_e32 v8, v0, v8 +; GFX8-NEXT: v_add_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_add_u16_e32 v4, v1, v10 +; GFX8-NEXT: v_add_u16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_min_i16_e32 v7, v7, v13 +; GFX8-NEXT: v_or_b32_e32 v1, v4, v1 +; GFX8-NEXT: v_add_u16_e32 v4, v2, v11 +; GFX8-NEXT: v_add_u16_sdwa v2, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v2, v4, v2 +; GFX8-NEXT: v_add_u16_e32 v4, v3, v12 +; GFX8-NEXT: v_add_u16_sdwa v3, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v8, v0 +; GFX8-NEXT: v_or_b32_e32 v3, v4, v3 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_saddsat_v8i16: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll index 320dfbb4980e4c..2572f8581f0edf 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll @@ -2774,22 +2774,22 @@ define <2 x i16> @v_ssubsat_v2i16(<2 x i16> %lhs, <2 x i16> %rhs) { ; GFX8-LABEL: v_ssubsat_v2i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_i16_e32 v3, -1, v0 -; GFX8-NEXT: v_subrev_u16_e32 v3, 0x7fff, v3 -; GFX8-NEXT: v_min_i16_e32 v4, -1, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX8-NEXT: v_subrev_u16_e32 v4, 0x8000, v4 -; GFX8-NEXT: v_max_i16_e32 v3, v3, v1 -; GFX8-NEXT: v_min_i16_e32 v3, v3, v4 -; GFX8-NEXT: v_max_i16_e32 v4, -1, v2 +; GFX8-NEXT: v_max_i16_e32 v2, -1, v0 +; GFX8-NEXT: v_subrev_u16_e32 v2, 0x7fff, v2 +; GFX8-NEXT: v_min_i16_e32 v3, -1, v0 +; GFX8-NEXT: v_subrev_u16_e32 v3, 0x8000, v3 +; GFX8-NEXT: v_max_i16_e32 v2, v2, v1 +; GFX8-NEXT: v_min_i16_e32 v2, v2, v3 +; GFX8-NEXT: v_mov_b32_e32 v3, -1 +; GFX8-NEXT: v_max_i16_sdwa v4, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_subrev_u16_e32 v4, 0x7fff, v4 -; GFX8-NEXT: v_min_i16_e32 v5, -1, v2 -; GFX8-NEXT: v_subrev_u16_e32 v5, 0x8000, v5 +; GFX8-NEXT: v_min_i16_sdwa v3, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_subrev_u16_e32 v3, 0x8000, v3 ; GFX8-NEXT: v_max_i16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_min_i16_e32 v1, v1, v5 -; GFX8-NEXT: v_sub_u16_e32 v0, v0, v3 -; GFX8-NEXT: v_sub_u16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: v_min_i16_e32 v1, v1, v3 +; GFX8-NEXT: v_sub_u16_e32 v2, v0, v2 +; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_ssubsat_v2i16: @@ -2987,23 +2987,23 @@ define amdgpu_ps float @ssubsat_v2i16_vs(<2 x i16> %lhs, <2 x i16> inreg %rhs) { ; ; GFX8-LABEL: ssubsat_v2i16_vs: ; GFX8: ; %bb.0: -; GFX8-NEXT: v_max_i16_e32 v2, -1, v0 -; GFX8-NEXT: v_subrev_u16_e32 v2, 0x7fff, v2 -; GFX8-NEXT: v_min_i16_e32 v3, -1, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GFX8-NEXT: v_subrev_u16_e32 v3, 0x8000, v3 -; GFX8-NEXT: v_max_i16_e32 v2, s0, v2 -; GFX8-NEXT: v_min_i16_e32 v2, v2, v3 -; GFX8-NEXT: v_max_i16_e32 v3, -1, v1 +; GFX8-NEXT: v_max_i16_e32 v1, -1, v0 +; GFX8-NEXT: v_subrev_u16_e32 v1, 0x7fff, v1 +; GFX8-NEXT: v_min_i16_e32 v2, -1, v0 +; GFX8-NEXT: v_subrev_u16_e32 v2, 0x8000, v2 +; GFX8-NEXT: v_max_i16_e32 v1, s0, v1 +; GFX8-NEXT: v_min_i16_e32 v1, v1, v2 +; GFX8-NEXT: v_mov_b32_e32 v2, -1 +; GFX8-NEXT: v_max_i16_sdwa v3, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: s_lshr_b32 s1, s0, 16 ; GFX8-NEXT: v_subrev_u16_e32 v3, 0x7fff, v3 -; GFX8-NEXT: v_min_i16_e32 v4, -1, v1 -; GFX8-NEXT: v_subrev_u16_e32 v4, 0x8000, v4 +; GFX8-NEXT: v_min_i16_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_subrev_u16_e32 v2, 0x8000, v2 ; GFX8-NEXT: v_max_i16_e32 v3, s1, v3 -; GFX8-NEXT: v_min_i16_e32 v3, v3, v4 -; GFX8-NEXT: v_sub_u16_e32 v0, v0, v2 -; GFX8-NEXT: v_sub_u16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 +; GFX8-NEXT: v_min_i16_e32 v2, v3, v2 +; GFX8-NEXT: v_sub_u16_e32 v1, v0, v1 +; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX8-NEXT: ; return to shader part epilog ; ; GFX9-LABEL: ssubsat_v2i16_vs: @@ -3090,38 +3090,37 @@ define <2 x float> @v_ssubsat_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { ; GFX8-LABEL: v_ssubsat_v4i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_i16_e32 v6, -1, v0 +; GFX8-NEXT: v_max_i16_e32 v4, -1, v0 +; GFX8-NEXT: v_subrev_u16_e32 v4, 0x7fff, v4 +; GFX8-NEXT: v_min_i16_e32 v5, -1, v0 +; GFX8-NEXT: v_subrev_u16_e32 v5, 0x8000, v5 +; GFX8-NEXT: v_max_i16_e32 v4, v4, v2 +; GFX8-NEXT: v_min_i16_e32 v4, v4, v5 +; GFX8-NEXT: v_mov_b32_e32 v5, -1 +; GFX8-NEXT: v_max_i16_sdwa v6, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_subrev_u16_e32 v6, 0x7fff, v6 -; GFX8-NEXT: v_min_i16_e32 v7, -1, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v0 +; GFX8-NEXT: v_min_i16_sdwa v7, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_subrev_u16_e32 v7, 0x8000, v7 +; GFX8-NEXT: v_max_i16_sdwa v2, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_max_i16_e32 v6, -1, v1 +; GFX8-NEXT: v_min_i16_e32 v2, v2, v7 +; GFX8-NEXT: v_subrev_u16_e32 v6, 0x7fff, v6 +; GFX8-NEXT: v_min_i16_e32 v7, -1, v1 ; GFX8-NEXT: v_subrev_u16_e32 v7, 0x8000, v7 -; GFX8-NEXT: v_max_i16_e32 v6, v6, v2 +; GFX8-NEXT: v_max_i16_e32 v6, v6, v3 ; GFX8-NEXT: v_min_i16_e32 v6, v6, v7 -; GFX8-NEXT: v_max_i16_e32 v7, -1, v4 -; GFX8-NEXT: v_subrev_u16_e32 v7, 0x7fff, v7 -; GFX8-NEXT: v_min_i16_e32 v8, -1, v4 -; GFX8-NEXT: v_subrev_u16_e32 v8, 0x8000, v8 -; GFX8-NEXT: v_max_i16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_max_i16_e32 v7, -1, v1 -; GFX8-NEXT: v_min_i16_e32 v2, v2, v8 +; GFX8-NEXT: v_max_i16_sdwa v7, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_subrev_u16_e32 v7, 0x7fff, v7 -; GFX8-NEXT: v_min_i16_e32 v8, -1, v1 -; GFX8-NEXT: v_lshrrev_b32_e32 v5, 16, v1 -; GFX8-NEXT: v_subrev_u16_e32 v8, 0x8000, v8 -; GFX8-NEXT: v_max_i16_e32 v7, v7, v3 -; GFX8-NEXT: v_min_i16_e32 v7, v7, v8 -; GFX8-NEXT: v_max_i16_e32 v8, -1, v5 -; GFX8-NEXT: v_subrev_u16_e32 v8, 0x7fff, v8 -; GFX8-NEXT: v_min_i16_e32 v9, -1, v5 -; GFX8-NEXT: v_subrev_u16_e32 v9, 0x8000, v9 -; GFX8-NEXT: v_max_i16_sdwa v3, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_min_i16_e32 v3, v3, v9 -; GFX8-NEXT: v_sub_u16_e32 v0, v0, v6 -; GFX8-NEXT: v_sub_u16_sdwa v2, v4, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 -; GFX8-NEXT: v_sub_u16_e32 v1, v1, v7 -; GFX8-NEXT: v_sub_u16_sdwa v2, v5, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 +; GFX8-NEXT: v_min_i16_sdwa v5, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_subrev_u16_e32 v5, 0x8000, v5 +; GFX8-NEXT: v_max_i16_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_min_i16_e32 v3, v3, v5 +; GFX8-NEXT: v_sub_u16_e32 v4, v0, v4 +; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_sub_u16_e32 v2, v1, v6 +; GFX8-NEXT: v_sub_u16_sdwa v1, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 +; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_ssubsat_v4i16: @@ -3376,54 +3375,52 @@ define <3 x float> @v_ssubsat_v6i16(<6 x i16> %lhs, <6 x i16> %rhs) { ; GFX8-LABEL: v_ssubsat_v6i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_i16_e32 v9, -1, v0 +; GFX8-NEXT: v_max_i16_e32 v6, -1, v0 +; GFX8-NEXT: v_subrev_u16_e32 v6, 0x7fff, v6 +; GFX8-NEXT: v_min_i16_e32 v7, -1, v0 +; GFX8-NEXT: v_subrev_u16_e32 v7, 0x8000, v7 +; GFX8-NEXT: v_max_i16_e32 v6, v6, v3 +; GFX8-NEXT: v_min_i16_e32 v6, v6, v7 +; GFX8-NEXT: v_mov_b32_e32 v7, -1 +; GFX8-NEXT: v_max_i16_sdwa v8, v0, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_subrev_u16_e32 v8, 0x7fff, v8 +; GFX8-NEXT: v_min_i16_sdwa v9, v0, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_subrev_u16_e32 v9, 0x8000, v9 +; GFX8-NEXT: v_max_i16_sdwa v3, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_max_i16_e32 v8, -1, v1 +; GFX8-NEXT: v_min_i16_e32 v3, v3, v9 +; GFX8-NEXT: v_subrev_u16_e32 v8, 0x7fff, v8 +; GFX8-NEXT: v_min_i16_e32 v9, -1, v1 +; GFX8-NEXT: v_subrev_u16_e32 v9, 0x8000, v9 +; GFX8-NEXT: v_max_i16_e32 v8, v8, v4 +; GFX8-NEXT: v_min_i16_e32 v8, v8, v9 +; GFX8-NEXT: v_max_i16_sdwa v9, v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_subrev_u16_e32 v9, 0x7fff, v9 +; GFX8-NEXT: v_min_i16_sdwa v10, v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_subrev_u16_e32 v10, 0x8000, v10 +; GFX8-NEXT: v_max_i16_sdwa v4, v9, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_max_i16_e32 v9, -1, v2 +; GFX8-NEXT: v_min_i16_e32 v4, v4, v10 ; GFX8-NEXT: v_subrev_u16_e32 v9, 0x7fff, v9 -; GFX8-NEXT: v_min_i16_e32 v10, -1, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v0 +; GFX8-NEXT: v_min_i16_e32 v10, -1, v2 ; GFX8-NEXT: v_subrev_u16_e32 v10, 0x8000, v10 -; GFX8-NEXT: v_max_i16_e32 v9, v9, v3 +; GFX8-NEXT: v_max_i16_e32 v9, v9, v5 ; GFX8-NEXT: v_min_i16_e32 v9, v9, v10 -; GFX8-NEXT: v_max_i16_e32 v10, -1, v6 -; GFX8-NEXT: v_subrev_u16_e32 v10, 0x7fff, v10 -; GFX8-NEXT: v_min_i16_e32 v11, -1, v6 -; GFX8-NEXT: v_subrev_u16_e32 v11, 0x8000, v11 -; GFX8-NEXT: v_max_i16_sdwa v3, v10, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_max_i16_e32 v10, -1, v1 -; GFX8-NEXT: v_min_i16_e32 v3, v3, v11 +; GFX8-NEXT: v_max_i16_sdwa v10, v2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_subrev_u16_e32 v10, 0x7fff, v10 -; GFX8-NEXT: v_min_i16_e32 v11, -1, v1 -; GFX8-NEXT: v_lshrrev_b32_e32 v7, 16, v1 -; GFX8-NEXT: v_subrev_u16_e32 v11, 0x8000, v11 -; GFX8-NEXT: v_max_i16_e32 v10, v10, v4 -; GFX8-NEXT: v_min_i16_e32 v10, v10, v11 -; GFX8-NEXT: v_max_i16_e32 v11, -1, v7 -; GFX8-NEXT: v_subrev_u16_e32 v11, 0x7fff, v11 -; GFX8-NEXT: v_min_i16_e32 v12, -1, v7 -; GFX8-NEXT: v_subrev_u16_e32 v12, 0x8000, v12 -; GFX8-NEXT: v_max_i16_sdwa v4, v11, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_max_i16_e32 v11, -1, v2 -; GFX8-NEXT: v_min_i16_e32 v4, v4, v12 -; GFX8-NEXT: v_subrev_u16_e32 v11, 0x7fff, v11 -; GFX8-NEXT: v_min_i16_e32 v12, -1, v2 -; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v2 -; GFX8-NEXT: v_subrev_u16_e32 v12, 0x8000, v12 -; GFX8-NEXT: v_max_i16_e32 v11, v11, v5 -; GFX8-NEXT: v_min_i16_e32 v11, v11, v12 -; GFX8-NEXT: v_max_i16_e32 v12, -1, v8 -; GFX8-NEXT: v_subrev_u16_e32 v12, 0x7fff, v12 -; GFX8-NEXT: v_min_i16_e32 v13, -1, v8 -; GFX8-NEXT: v_subrev_u16_e32 v13, 0x8000, v13 -; GFX8-NEXT: v_max_i16_sdwa v5, v12, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_sub_u16_e32 v0, v0, v9 -; GFX8-NEXT: v_sub_u16_sdwa v3, v6, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_min_i16_e32 v5, v5, v13 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 -; GFX8-NEXT: v_sub_u16_e32 v1, v1, v10 -; GFX8-NEXT: v_sub_u16_sdwa v3, v7, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 -; GFX8-NEXT: v_sub_u16_e32 v2, v2, v11 -; GFX8-NEXT: v_sub_u16_sdwa v3, v8, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX8-NEXT: v_min_i16_sdwa v7, v2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_subrev_u16_e32 v7, 0x8000, v7 +; GFX8-NEXT: v_max_i16_sdwa v5, v10, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_min_i16_e32 v5, v5, v7 +; GFX8-NEXT: v_sub_u16_e32 v6, v0, v6 +; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_sub_u16_e32 v3, v1, v8 +; GFX8-NEXT: v_sub_u16_sdwa v1, v1, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v1, v3, v1 +; GFX8-NEXT: v_sub_u16_e32 v3, v2, v9 +; GFX8-NEXT: v_sub_u16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v6, v0 +; GFX8-NEXT: v_or_b32_e32 v2, v3, v2 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_ssubsat_v6i16: @@ -3752,70 +3749,67 @@ define <4 x float> @v_ssubsat_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { ; GFX8-LABEL: v_ssubsat_v8i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_max_i16_e32 v12, -1, v0 +; GFX8-NEXT: v_max_i16_e32 v8, -1, v0 +; GFX8-NEXT: v_subrev_u16_e32 v8, 0x7fff, v8 +; GFX8-NEXT: v_min_i16_e32 v9, -1, v0 +; GFX8-NEXT: v_subrev_u16_e32 v9, 0x8000, v9 +; GFX8-NEXT: v_max_i16_e32 v8, v8, v4 +; GFX8-NEXT: v_min_i16_e32 v8, v8, v9 +; GFX8-NEXT: v_mov_b32_e32 v9, -1 +; GFX8-NEXT: v_max_i16_sdwa v10, v0, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_subrev_u16_e32 v10, 0x7fff, v10 +; GFX8-NEXT: v_min_i16_sdwa v11, v0, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_subrev_u16_e32 v11, 0x8000, v11 +; GFX8-NEXT: v_max_i16_sdwa v4, v10, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_max_i16_e32 v10, -1, v1 +; GFX8-NEXT: v_min_i16_e32 v4, v4, v11 +; GFX8-NEXT: v_subrev_u16_e32 v10, 0x7fff, v10 +; GFX8-NEXT: v_min_i16_e32 v11, -1, v1 +; GFX8-NEXT: v_subrev_u16_e32 v11, 0x8000, v11 +; GFX8-NEXT: v_max_i16_e32 v10, v10, v5 +; GFX8-NEXT: v_min_i16_e32 v10, v10, v11 +; GFX8-NEXT: v_max_i16_sdwa v11, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_subrev_u16_e32 v11, 0x7fff, v11 +; GFX8-NEXT: v_min_i16_sdwa v12, v1, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_subrev_u16_e32 v12, 0x8000, v12 +; GFX8-NEXT: v_max_i16_sdwa v5, v11, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_max_i16_e32 v11, -1, v2 +; GFX8-NEXT: v_min_i16_e32 v5, v5, v12 +; GFX8-NEXT: v_subrev_u16_e32 v11, 0x7fff, v11 +; GFX8-NEXT: v_min_i16_e32 v12, -1, v2 +; GFX8-NEXT: v_subrev_u16_e32 v12, 0x8000, v12 +; GFX8-NEXT: v_max_i16_e32 v11, v11, v6 +; GFX8-NEXT: v_min_i16_e32 v11, v11, v12 +; GFX8-NEXT: v_max_i16_sdwa v12, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_subrev_u16_e32 v12, 0x7fff, v12 +; GFX8-NEXT: v_min_i16_sdwa v13, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_subrev_u16_e32 v13, 0x8000, v13 +; GFX8-NEXT: v_max_i16_sdwa v6, v12, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_max_i16_e32 v12, -1, v3 +; GFX8-NEXT: v_min_i16_e32 v6, v6, v13 ; GFX8-NEXT: v_subrev_u16_e32 v12, 0x7fff, v12 -; GFX8-NEXT: v_min_i16_e32 v13, -1, v0 -; GFX8-NEXT: v_lshrrev_b32_e32 v8, 16, v0 +; GFX8-NEXT: v_min_i16_e32 v13, -1, v3 ; GFX8-NEXT: v_subrev_u16_e32 v13, 0x8000, v13 -; GFX8-NEXT: v_max_i16_e32 v12, v12, v4 +; GFX8-NEXT: v_max_i16_e32 v12, v12, v7 ; GFX8-NEXT: v_min_i16_e32 v12, v12, v13 -; GFX8-NEXT: v_max_i16_e32 v13, -1, v8 -; GFX8-NEXT: v_subrev_u16_e32 v13, 0x7fff, v13 -; GFX8-NEXT: v_min_i16_e32 v14, -1, v8 -; GFX8-NEXT: v_subrev_u16_e32 v14, 0x8000, v14 -; GFX8-NEXT: v_max_i16_sdwa v4, v13, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_max_i16_e32 v13, -1, v1 -; GFX8-NEXT: v_min_i16_e32 v4, v4, v14 +; GFX8-NEXT: v_max_i16_sdwa v13, v3, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX8-NEXT: v_subrev_u16_e32 v13, 0x7fff, v13 -; GFX8-NEXT: v_min_i16_e32 v14, -1, v1 -; GFX8-NEXT: v_lshrrev_b32_e32 v9, 16, v1 -; GFX8-NEXT: v_subrev_u16_e32 v14, 0x8000, v14 -; GFX8-NEXT: v_max_i16_e32 v13, v13, v5 -; GFX8-NEXT: v_min_i16_e32 v13, v13, v14 -; GFX8-NEXT: v_max_i16_e32 v14, -1, v9 -; GFX8-NEXT: v_subrev_u16_e32 v14, 0x7fff, v14 -; GFX8-NEXT: v_min_i16_e32 v15, -1, v9 -; GFX8-NEXT: v_subrev_u16_e32 v15, 0x8000, v15 -; GFX8-NEXT: v_max_i16_sdwa v5, v14, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_max_i16_e32 v14, -1, v2 -; GFX8-NEXT: v_min_i16_e32 v5, v5, v15 -; GFX8-NEXT: v_subrev_u16_e32 v14, 0x7fff, v14 -; GFX8-NEXT: v_min_i16_e32 v15, -1, v2 -; GFX8-NEXT: v_lshrrev_b32_e32 v10, 16, v2 -; GFX8-NEXT: v_subrev_u16_e32 v15, 0x8000, v15 -; GFX8-NEXT: v_max_i16_e32 v14, v14, v6 -; GFX8-NEXT: v_min_i16_e32 v14, v14, v15 -; GFX8-NEXT: v_max_i16_e32 v15, -1, v10 -; GFX8-NEXT: v_subrev_u16_e32 v15, 0x7fff, v15 -; GFX8-NEXT: v_min_i16_e32 v16, -1, v10 -; GFX8-NEXT: v_subrev_u16_e32 v16, 0x8000, v16 -; GFX8-NEXT: v_max_i16_sdwa v6, v15, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_max_i16_e32 v15, -1, v3 -; GFX8-NEXT: v_min_i16_e32 v6, v6, v16 -; GFX8-NEXT: v_subrev_u16_e32 v15, 0x7fff, v15 -; GFX8-NEXT: v_min_i16_e32 v16, -1, v3 -; GFX8-NEXT: v_lshrrev_b32_e32 v11, 16, v3 -; GFX8-NEXT: v_subrev_u16_e32 v16, 0x8000, v16 -; GFX8-NEXT: v_max_i16_e32 v15, v15, v7 -; GFX8-NEXT: v_min_i16_e32 v15, v15, v16 -; GFX8-NEXT: v_max_i16_e32 v16, -1, v11 -; GFX8-NEXT: v_subrev_u16_e32 v16, 0x7fff, v16 -; GFX8-NEXT: v_min_i16_e32 v17, -1, v11 -; GFX8-NEXT: v_sub_u16_e32 v0, v0, v12 -; GFX8-NEXT: v_sub_u16_sdwa v4, v8, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_subrev_u16_e32 v17, 0x8000, v17 -; GFX8-NEXT: v_max_i16_sdwa v7, v16, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 -; GFX8-NEXT: v_sub_u16_e32 v1, v1, v13 -; GFX8-NEXT: v_sub_u16_sdwa v4, v9, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_min_i16_e32 v7, v7, v17 -; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 -; GFX8-NEXT: v_sub_u16_e32 v2, v2, v14 -; GFX8-NEXT: v_sub_u16_sdwa v4, v10, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v2, v2, v4 -; GFX8-NEXT: v_sub_u16_e32 v3, v3, v15 -; GFX8-NEXT: v_sub_u16_sdwa v4, v11, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 +; GFX8-NEXT: v_min_i16_sdwa v9, v3, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_subrev_u16_e32 v9, 0x8000, v9 +; GFX8-NEXT: v_max_i16_sdwa v7, v13, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_sub_u16_e32 v8, v0, v8 +; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_sub_u16_e32 v4, v1, v10 +; GFX8-NEXT: v_sub_u16_sdwa v1, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_min_i16_e32 v7, v7, v9 +; GFX8-NEXT: v_or_b32_e32 v1, v4, v1 +; GFX8-NEXT: v_sub_u16_e32 v4, v2, v11 +; GFX8-NEXT: v_sub_u16_sdwa v2, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v2, v4, v2 +; GFX8-NEXT: v_sub_u16_e32 v4, v3, v12 +; GFX8-NEXT: v_sub_u16_sdwa v3, v3, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_or_b32_e32 v0, v8, v0 +; GFX8-NEXT: v_or_b32_e32 v3, v4, v3 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: v_ssubsat_v8i16: diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll index 028a28ed9a23b7..3f513e120e141b 100644 --- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll +++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll @@ -1608,34 +1608,35 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(ptr addrspace(1) noalias %o ; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; VI-NEXT: s_mov_b32 s7, 0xf000 ; VI-NEXT: s_mov_b32 s6, -1 +; VI-NEXT: v_mov_b32_e32 v5, 0xffffff00 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v1, s3 ; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc ; VI-NEXT: flat_load_dword v4, v[0:1] ; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 +; VI-NEXT: v_mov_b32_e32 v6, 9 +; VI-NEXT: v_mov_b32_e32 v7, 0x900 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: s_mov_b32 s4, s2 ; VI-NEXT: s_mov_b32 s5, s3 ; VI-NEXT: s_mov_b32 s2, s6 ; VI-NEXT: s_mov_b32 s3, s7 ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_lshrrev_b32_e32 v5, 16, v4 ; VI-NEXT: v_cvt_f32_ubyte3_e32 v3, v4 ; VI-NEXT: v_cvt_f32_ubyte2_e32 v2, v4 ; VI-NEXT: v_cvt_f32_ubyte1_e32 v1, v4 ; VI-NEXT: v_cvt_f32_ubyte0_e32 v0, v4 -; VI-NEXT: v_and_b32_e32 v6, 0xffffff00, v4 -; VI-NEXT: v_add_u16_e32 v4, 9, v4 +; VI-NEXT: v_and_b32_e32 v8, 0xffffff00, v4 +; VI-NEXT: v_add_u16_e32 v9, 9, v4 +; VI-NEXT: v_and_b32_sdwa v5, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_add_u16_sdwa v4, v4, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 ; VI-NEXT: s_nop 0 -; VI-NEXT: v_and_b32_e32 v1, 0xffffff00, v5 -; VI-NEXT: v_add_u16_e32 v2, 9, v5 -; VI-NEXT: v_or_b32_sdwa v0, v6, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; VI-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; VI-NEXT: v_mov_b32_e32 v2, 0x900 +; VI-NEXT: v_or_b32_sdwa v0, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; VI-NEXT: v_or_b32_sdwa v1, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; VI-NEXT: v_add_u16_e32 v0, 0x900, v0 -; VI-NEXT: v_add_u16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-NEXT: v_add_u16_sdwa v1, v1, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NEXT: v_or_b32_e32 v0, v0, v1 ; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; VI-NEXT: s_endpgm @@ -1674,28 +1675,29 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(ptr addrspace(1) noalias %o ; GFX9: ; %bb.0: ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34 ; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX9-NEXT: s_movk_i32 s4, 0xff00 +; GFX9-NEXT: v_mov_b32_e32 v6, 9 ; GFX9-NEXT: v_mov_b32_e32 v5, 0 -; GFX9-NEXT: s_movk_i32 s4, 0x900 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_load_dword v4, v0, s[0:1] ; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24 +; GFX9-NEXT: s_movk_i32 s5, 0x900 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v4 ; GFX9-NEXT: v_cvt_f32_ubyte3_e32 v3, v4 ; GFX9-NEXT: v_cvt_f32_ubyte2_e32 v2, v4 ; GFX9-NEXT: v_cvt_f32_ubyte1_e32 v1, v4 ; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, v4 ; GFX9-NEXT: v_and_b32_e32 v7, 0xffffff00, v4 -; GFX9-NEXT: v_add_u16_e32 v4, 9, v4 +; GFX9-NEXT: v_add_u16_e32 v8, 9, v4 +; GFX9-NEXT: v_and_b32_sdwa v9, v4, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_add_u16_sdwa v4, v4, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_dwordx4 v5, v[0:3], s[0:1] ; GFX9-NEXT: s_nop 0 -; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff00, v6 -; GFX9-NEXT: v_add_u16_e32 v2, 9, v6 -; GFX9-NEXT: v_or_b32_sdwa v0, v7, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GFX9-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_or_b32_sdwa v0, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 +; GFX9-NEXT: v_or_b32_sdwa v1, v9, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 ; GFX9-NEXT: v_add_u16_e32 v0, 0x900, v0 -; GFX9-NEXT: v_add_u16_sdwa v1, v1, s4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX9-NEXT: v_add_u16_sdwa v1, v1, s5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX9-NEXT: global_store_dword v5, v0, s[2:3] ; GFX9-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/fract-match.ll b/llvm/test/CodeGen/AMDGPU/fract-match.ll index e361aa4db2aa94..1b28ddb2c58620 100644 --- a/llvm/test/CodeGen/AMDGPU/fract-match.ll +++ b/llvm/test/CodeGen/AMDGPU/fract-match.ll @@ -2135,19 +2135,18 @@ define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) nocaptu ; GFX8-LABEL: safe_math_fract_v2f16: ; GFX8: ; %bb.0: ; %entry ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v3, 16, v0 -; GFX8-NEXT: v_mov_b32_e32 v7, 0x204 -; GFX8-NEXT: v_floor_f16_e32 v4, v3 -; GFX8-NEXT: v_floor_f16_e32 v5, v0 -; GFX8-NEXT: v_fract_f16_e32 v6, v3 -; GFX8-NEXT: v_cmp_class_f16_e32 vcc, v3, v7 -; GFX8-NEXT: v_pack_b32_f16 v4, v5, v4 -; GFX8-NEXT: v_fract_f16_e32 v5, v0 -; GFX8-NEXT: v_cndmask_b32_e64 v3, v6, 0, vcc -; GFX8-NEXT: v_cmp_class_f16_e32 vcc, v0, v7 -; GFX8-NEXT: v_cndmask_b32_e64 v0, v5, 0, vcc -; GFX8-NEXT: v_pack_b32_f16 v0, v0, v3 -; GFX8-NEXT: global_store_dword v[1:2], v4, off +; GFX8-NEXT: v_mov_b32_e32 v6, 0x204 +; GFX8-NEXT: v_floor_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX8-NEXT: v_floor_f16_e32 v4, v0 +; GFX8-NEXT: v_cmp_class_f16_sdwa s[4:5], v0, v6 src0_sel:WORD_1 src1_sel:DWORD +; GFX8-NEXT: v_pack_b32_f16 v3, v4, v3 +; GFX8-NEXT: v_fract_f16_e32 v4, v0 +; GFX8-NEXT: v_fract_f16_sdwa v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX8-NEXT: v_cmp_class_f16_e32 vcc, v0, v6 +; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, 0, s[4:5] +; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, vcc +; GFX8-NEXT: v_pack_b32_f16 v0, v0, v5 +; GFX8-NEXT: global_store_dword v[1:2], v3, off ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/fshr.ll b/llvm/test/CodeGen/AMDGPU/fshr.ll index 3118d637880425..e8310e73f9a475 100644 --- a/llvm/test/CodeGen/AMDGPU/fshr.ll +++ b/llvm/test/CodeGen/AMDGPU/fshr.ll @@ -803,13 +803,13 @@ define <2 x i16> @v_fshr_v2i16(<2 x i16> %src0, <2 x i16> %src1, <2 x i16> %src2 ; VI-LABEL: v_fshr_v2i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshrrev_b32_e32 v3, 16, v2 -; VI-NEXT: v_mov_b32_e32 v5, 1 -; VI-NEXT: v_lshrrev_b16_sdwa v4, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; VI-NEXT: v_lshlrev_b16_sdwa v5, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; VI-NEXT: v_xor_b32_e32 v3, -1, v3 -; VI-NEXT: v_lshlrev_b16_e32 v3, v3, v5 -; VI-NEXT: v_or_b32_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-NEXT: v_mov_b32_e32 v4, 1 +; VI-NEXT: v_mov_b32_e32 v5, -1 +; VI-NEXT: v_lshlrev_b16_sdwa v4, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-NEXT: v_xor_b32_sdwa v5, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_lshrrev_b16_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_lshlrev_b16_e32 v4, v5, v4 +; VI-NEXT: v_or_b32_sdwa v3, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NEXT: v_lshlrev_b16_e32 v0, 1, v0 ; VI-NEXT: v_xor_b32_e32 v4, -1, v2 ; VI-NEXT: v_lshlrev_b16_e32 v0, v4, v0 @@ -887,13 +887,13 @@ define <3 x i16> @v_fshr_v3i16(<3 x i16> %src0, <3 x i16> %src1, <3 x i16> %src2 ; VI-LABEL: v_fshr_v3i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshrrev_b32_e32 v6, 16, v4 -; VI-NEXT: v_mov_b32_e32 v8, 1 -; VI-NEXT: v_lshrrev_b16_sdwa v7, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; VI-NEXT: v_lshlrev_b16_sdwa v8, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; VI-NEXT: v_xor_b32_e32 v6, -1, v6 -; VI-NEXT: v_lshlrev_b16_e32 v6, v6, v8 -; VI-NEXT: v_or_b32_sdwa v6, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-NEXT: v_mov_b32_e32 v7, 1 +; VI-NEXT: v_mov_b32_e32 v8, -1 +; VI-NEXT: v_lshlrev_b16_sdwa v7, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-NEXT: v_xor_b32_sdwa v8, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_lshrrev_b16_sdwa v6, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_lshlrev_b16_e32 v7, v8, v7 +; VI-NEXT: v_or_b32_sdwa v6, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NEXT: v_lshlrev_b16_e32 v1, 1, v1 ; VI-NEXT: v_xor_b32_e32 v7, -1, v5 ; VI-NEXT: v_lshlrev_b16_e32 v1, v7, v1 @@ -910,13 +910,13 @@ define <3 x i16> @v_fshr_v3i16(<3 x i16> %src0, <3 x i16> %src1, <3 x i16> %src2 ; GFX9-LABEL: v_fshr_v3i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v4 -; GFX9-NEXT: v_mov_b32_e32 v8, 1 -; GFX9-NEXT: v_lshrrev_b16_sdwa v7, v6, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-NEXT: v_lshlrev_b16_sdwa v8, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-NEXT: v_xor_b32_e32 v6, -1, v6 -; GFX9-NEXT: v_lshlrev_b16_e32 v6, v6, v8 -; GFX9-NEXT: v_or_b32_e32 v6, v6, v7 +; GFX9-NEXT: v_mov_b32_e32 v7, 1 +; GFX9-NEXT: v_mov_b32_e32 v8, -1 +; GFX9-NEXT: v_lshlrev_b16_sdwa v7, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: v_xor_b32_sdwa v8, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshrrev_b16_sdwa v6, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX9-NEXT: v_lshlrev_b16_e32 v7, v8, v7 +; GFX9-NEXT: v_or_b32_e32 v6, v7, v6 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 1, v1 ; GFX9-NEXT: v_xor_b32_e32 v7, -1, v5 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, v7, v1 @@ -1019,18 +1019,18 @@ define <4 x i16> @v_fshr_v4i16(<4 x i16> %src0, <4 x i16> %src1, <4 x i16> %src2 ; VI-LABEL: v_fshr_v4i16: ; VI: ; %bb.0: ; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; VI-NEXT: v_lshrrev_b32_e32 v6, 16, v5 -; VI-NEXT: v_mov_b32_e32 v8, 1 -; VI-NEXT: v_lshrrev_b16_sdwa v7, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; VI-NEXT: v_lshlrev_b16_sdwa v9, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; VI-NEXT: v_xor_b32_e32 v6, -1, v6 -; VI-NEXT: v_lshlrev_b16_e32 v6, v6, v9 -; VI-NEXT: v_or_b32_sdwa v6, v6, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; VI-NEXT: v_lshrrev_b32_e32 v7, 16, v4 -; VI-NEXT: v_lshrrev_b16_sdwa v9, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; VI-NEXT: v_lshlrev_b16_sdwa v8, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; VI-NEXT: v_xor_b32_e32 v7, -1, v7 -; VI-NEXT: v_lshlrev_b16_e32 v7, v7, v8 +; VI-NEXT: v_mov_b32_e32 v7, 1 +; VI-NEXT: v_mov_b32_e32 v9, -1 +; VI-NEXT: v_lshlrev_b16_sdwa v8, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-NEXT: v_xor_b32_sdwa v10, v5, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_lshrrev_b16_sdwa v6, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_lshlrev_b16_e32 v8, v10, v8 +; VI-NEXT: v_lshlrev_b16_sdwa v7, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; VI-NEXT: v_xor_b32_sdwa v9, v4, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; VI-NEXT: v_or_b32_sdwa v6, v8, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; VI-NEXT: v_lshrrev_b16_sdwa v8, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; VI-NEXT: v_lshlrev_b16_e32 v7, v9, v7 +; VI-NEXT: v_or_b32_sdwa v7, v7, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NEXT: v_lshlrev_b16_e32 v1, 1, v1 ; VI-NEXT: v_xor_b32_e32 v8, -1, v5 ; VI-NEXT: v_lshlrev_b16_e32 v1, v8, v1 @@ -1040,7 +1040,6 @@ define <4 x i16> @v_fshr_v4i16(<4 x i16> %src0, <4 x i16> %src1, <4 x i16> %src2 ; VI-NEXT: v_xor_b32_e32 v3, -1, v4 ; VI-NEXT: v_lshlrev_b16_e32 v0, v3, v0 ; VI-NEXT: v_lshrrev_b16_e32 v2, v4, v2 -; VI-NEXT: v_or_b32_sdwa v7, v7, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NEXT: v_or_b32_e32 v0, v0, v2 ; VI-NEXT: v_or_b32_sdwa v0, v0, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; VI-NEXT: v_or_b32_sdwa v1, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD @@ -1049,18 +1048,18 @@ define <4 x i16> @v_fshr_v4i16(<4 x i16> %src0, <4 x i16> %src1, <4 x i16> %src2 ; GFX9-LABEL: v_fshr_v4i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v5 -; GFX9-NEXT: v_mov_b32_e32 v8, 1 -; GFX9-NEXT: v_lshrrev_b16_sdwa v7, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-NEXT: v_lshlrev_b16_sdwa v9, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-NEXT: v_xor_b32_e32 v6, -1, v6 -; GFX9-NEXT: v_lshlrev_b16_e32 v6, v6, v9 -; GFX9-NEXT: v_or_b32_e32 v6, v6, v7 -; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v4 -; GFX9-NEXT: v_lshrrev_b16_sdwa v9, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-NEXT: v_lshlrev_b16_sdwa v8, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-NEXT: v_xor_b32_e32 v7, -1, v7 -; GFX9-NEXT: v_lshlrev_b16_e32 v7, v7, v8 +; GFX9-NEXT: v_mov_b32_e32 v7, 1 +; GFX9-NEXT: v_mov_b32_e32 v9, -1 +; GFX9-NEXT: v_lshlrev_b16_sdwa v8, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: v_xor_b32_sdwa v10, v5, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_lshrrev_b16_sdwa v6, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX9-NEXT: v_lshlrev_b16_e32 v8, v10, v8 +; GFX9-NEXT: v_lshlrev_b16_sdwa v7, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: v_xor_b32_sdwa v9, v4, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_or_b32_e32 v6, v8, v6 +; GFX9-NEXT: v_lshrrev_b16_sdwa v8, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX9-NEXT: v_lshlrev_b16_e32 v7, v9, v7 +; GFX9-NEXT: v_or_b32_e32 v7, v7, v8 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, 1, v1 ; GFX9-NEXT: v_xor_b32_e32 v8, -1, v5 ; GFX9-NEXT: v_lshlrev_b16_e32 v1, v8, v1 @@ -1070,7 +1069,6 @@ define <4 x i16> @v_fshr_v4i16(<4 x i16> %src0, <4 x i16> %src1, <4 x i16> %src2 ; GFX9-NEXT: v_xor_b32_e32 v3, -1, v4 ; GFX9-NEXT: v_lshlrev_b16_e32 v0, v3, v0 ; GFX9-NEXT: v_lshrrev_b16_e32 v2, v4, v2 -; GFX9-NEXT: v_or_b32_e32 v7, v7, v9 ; GFX9-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX9-NEXT: s_mov_b32 s4, 0x5040100 ; GFX9-NEXT: v_perm_b32 v0, v7, v0, s4 diff --git a/llvm/test/CodeGen/AMDGPU/idiv-licm.ll b/llvm/test/CodeGen/AMDGPU/idiv-licm.ll index d4ff845e1edf3a..7ee31bf4dce7cd 100644 --- a/llvm/test/CodeGen/AMDGPU/idiv-licm.ll +++ b/llvm/test/CodeGen/AMDGPU/idiv-licm.ll @@ -637,6 +637,7 @@ define amdgpu_kernel void @udiv16_invariant_denom(ptr addrspace(1) nocapture %ar ; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c ; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_movk_i32 s4, 0x400 +; GFX9-NEXT: v_mov_b32_e32 v3, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: s_and_b32 s2, s2, 0xffff ; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s2 @@ -644,19 +645,18 @@ define amdgpu_kernel void @udiv16_invariant_denom(ptr addrspace(1) nocapture %ar ; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v0 ; GFX9-NEXT: .LBB4_1: ; %bb3 ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-NEXT: v_and_b32_e32 v3, 0xffff, v2 -; GFX9-NEXT: v_cvt_f32_u32_e32 v4, v3 +; GFX9-NEXT: v_cvt_f32_u32_sdwa v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v5, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX9-NEXT: v_add_u16_e32 v2, 1, v2 ; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, s4, v2 -; GFX9-NEXT: v_lshlrev_b32_e32 v3, 1, v3 -; GFX9-NEXT: v_mul_f32_e32 v5, v4, v1 -; GFX9-NEXT: v_trunc_f32_e32 v5, v5 -; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v5 -; GFX9-NEXT: v_mad_f32 v4, -v5, v0, v4 +; GFX9-NEXT: v_mul_f32_e32 v6, v4, v1 +; GFX9-NEXT: v_trunc_f32_e32 v6, v6 +; GFX9-NEXT: v_cvt_u32_f32_e32 v7, v6 +; GFX9-NEXT: v_mad_f32 v4, -v6, v0, v4 ; GFX9-NEXT: v_cmp_ge_f32_e64 s[0:1], |v4|, v0 -; GFX9-NEXT: v_addc_co_u32_e64 v4, s[0:1], 0, v6, s[0:1] +; GFX9-NEXT: v_addc_co_u32_e64 v4, s[0:1], 0, v7, s[0:1] ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_store_short v3, v4, s[2:3] +; GFX9-NEXT: global_store_short v5, v4, s[2:3] ; GFX9-NEXT: s_cbranch_vccz .LBB4_1 ; GFX9-NEXT: ; %bb.2: ; %bb2 ; GFX9-NEXT: s_endpgm @@ -667,25 +667,25 @@ define amdgpu_kernel void @udiv16_invariant_denom(ptr addrspace(1) nocapture %ar ; GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c ; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: v_mov_b32_e32 v3, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_and_b32 s0, s4, 0xffff ; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s0 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v0 ; GFX10-NEXT: .LBB4_1: ; %bb3 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v2 +; GFX10-NEXT: v_cvt_f32_u32_sdwa v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 +; GFX10-NEXT: v_lshlrev_b32_sdwa v5, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX10-NEXT: v_add_nc_u16 v2, v2, 1 -; GFX10-NEXT: v_cvt_f32_u32_e32 v4, v3 +; GFX10-NEXT: v_mul_f32_e32 v6, v4, v1 ; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x400, v2 -; GFX10-NEXT: v_lshlrev_b32_e32 v3, 1, v3 -; GFX10-NEXT: v_mul_f32_e32 v5, v4, v1 +; GFX10-NEXT: v_trunc_f32_e32 v6, v6 ; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo -; GFX10-NEXT: v_trunc_f32_e32 v5, v5 -; GFX10-NEXT: v_mad_f32 v4, -v5, v0, v4 -; GFX10-NEXT: v_cvt_u32_f32_e32 v5, v5 +; GFX10-NEXT: v_mad_f32 v4, -v6, v0, v4 +; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v6 ; GFX10-NEXT: v_cmp_ge_f32_e64 s0, |v4|, v0 -; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s0, 0, v5, s0 -; GFX10-NEXT: global_store_short v3, v4, s[2:3] +; GFX10-NEXT: v_add_co_ci_u32_e64 v4, s0, 0, v6, s0 +; GFX10-NEXT: global_store_short v5, v4, s[2:3] ; GFX10-NEXT: s_cbranch_vccz .LBB4_1 ; GFX10-NEXT: ; %bb.2: ; %bb2 ; GFX10-NEXT: s_endpgm @@ -748,30 +748,28 @@ define amdgpu_kernel void @urem16_invariant_denom(ptr addrspace(1) nocapture %ar ; GFX9: ; %bb.0: ; %bb ; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c ; GFX9-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NEXT: s_movk_i32 s5, 0x400 +; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 +; GFX9-NEXT: s_movk_i32 s3, 0x400 +; GFX9-NEXT: v_mov_b32_e32 v3, 1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: s_and_b32 s4, s2, 0xffff -; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s4 -; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 +; GFX9-NEXT: s_and_b32 s2, s2, 0xffff +; GFX9-NEXT: v_cvt_f32_u32_e32 v0, s2 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v0 ; GFX9-NEXT: .LBB5_1: ; %bb3 ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX9-NEXT: v_and_b32_e32 v3, 0xffff, v2 -; GFX9-NEXT: v_cvt_f32_u32_e32 v4, v3 +; GFX9-NEXT: v_cvt_f32_u32_sdwa v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 +; GFX9-NEXT: v_mul_f32_e32 v5, v4, v1 +; GFX9-NEXT: v_trunc_f32_e32 v5, v5 +; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v5 +; GFX9-NEXT: v_mad_f32 v4, -v5, v0, v4 +; GFX9-NEXT: v_cmp_ge_f32_e64 vcc, |v4|, v0 +; GFX9-NEXT: v_lshlrev_b32_sdwa v5, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v6, vcc +; GFX9-NEXT: v_mul_lo_u32 v4, v4, s2 +; GFX9-NEXT: v_sub_u32_sdwa v4, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX9-NEXT: v_add_u16_e32 v2, 1, v2 -; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, s5, v2 -; GFX9-NEXT: v_lshlrev_b32_e32 v5, 1, v3 -; GFX9-NEXT: v_mul_f32_e32 v6, v4, v1 -; GFX9-NEXT: v_trunc_f32_e32 v6, v6 -; GFX9-NEXT: v_cvt_u32_f32_e32 v7, v6 -; GFX9-NEXT: v_mad_f32 v4, -v6, v0, v4 -; GFX9-NEXT: v_cmp_ge_f32_e64 s[0:1], |v4|, v0 -; GFX9-NEXT: s_and_b64 vcc, exec, vcc -; GFX9-NEXT: v_addc_co_u32_e64 v4, s[0:1], 0, v7, s[0:1] -; GFX9-NEXT: v_mul_lo_u32 v4, v4, s4 -; GFX9-NEXT: v_sub_u32_e32 v3, v3, v4 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NEXT: global_store_short v5, v3, s[2:3] +; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, s3, v2 +; GFX9-NEXT: global_store_short v5, v4, s[0:1] ; GFX9-NEXT: s_cbranch_vccz .LBB5_1 ; GFX9-NEXT: ; %bb.2: ; %bb2 ; GFX9-NEXT: s_endpgm @@ -782,26 +780,26 @@ define amdgpu_kernel void @urem16_invariant_denom(ptr addrspace(1) nocapture %ar ; GFX10-NEXT: s_load_dword s4, s[0:1], 0x2c ; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24 ; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: v_mov_b32_e32 v3, 1 ; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; GFX10-NEXT: s_and_b32 s0, s4, 0xffff ; GFX10-NEXT: v_cvt_f32_u32_e32 v0, s0 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v1, v0 ; GFX10-NEXT: .LBB5_1: ; %bb3 ; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v2 -; GFX10-NEXT: v_add_nc_u16 v2, v2, 1 -; GFX10-NEXT: v_cvt_f32_u32_e32 v4, v3 +; GFX10-NEXT: v_cvt_f32_u32_sdwa v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 ; GFX10-NEXT: v_mul_f32_e32 v5, v4, v1 ; GFX10-NEXT: v_trunc_f32_e32 v5, v5 ; GFX10-NEXT: v_mad_f32 v4, -v5, v0, v4 ; GFX10-NEXT: v_cvt_u32_f32_e32 v5, v5 ; GFX10-NEXT: v_cmp_ge_f32_e64 vcc_lo, |v4|, v0 ; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, 0, v5, vcc_lo -; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x400, v2 -; GFX10-NEXT: v_lshlrev_b32_e32 v5, 1, v3 +; GFX10-NEXT: v_lshlrev_b32_sdwa v5, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX10-NEXT: v_mul_lo_u32 v4, v4, s0 -; GFX10-NEXT: v_sub_nc_u32_e32 v3, v3, v4 -; GFX10-NEXT: global_store_short v5, v3, s[2:3] +; GFX10-NEXT: v_sub_nc_u32_sdwa v4, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX10-NEXT: v_add_nc_u16 v2, v2, 1 +; GFX10-NEXT: global_store_short v5, v4, s[2:3] +; GFX10-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x400, v2 ; GFX10-NEXT: s_cbranch_vccz .LBB5_1 ; GFX10-NEXT: ; %bb.2: ; %bb2 ; GFX10-NEXT: s_endpgm diff --git a/llvm/test/CodeGen/AMDGPU/idot4u.ll b/llvm/test/CodeGen/AMDGPU/idot4u.ll index 9a1de74034cd83..0b131ea74f1abb 100644 --- a/llvm/test/CodeGen/AMDGPU/idot4u.ll +++ b/llvm/test/CodeGen/AMDGPU/idot4u.ll @@ -4713,29 +4713,24 @@ define amdgpu_kernel void @udot4_acc32_multi(ptr addrspace(1) %src1, ; GFX9-NODL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 ; GFX9-NODL-NEXT: v_lshlrev_b32_e32 v2, 3, v0 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: global_load_dword v3, v2, s[6:7] ; GFX9-NODL-NEXT: global_load_dwordx2 v[0:1], v2, s[4:5] +; GFX9-NODL-NEXT: global_load_dword v3, v2, s[6:7] ; GFX9-NODL-NEXT: s_load_dword s0, s[2:3], 0x0 ; GFX9-NODL-NEXT: v_mov_b32_e32 v2, 0 -; GFX9-NODL-NEXT: s_waitcnt vmcnt(1) -; GFX9-NODL-NEXT: v_and_b32_e32 v4, 0xff, v3 -; GFX9-NODL-NEXT: v_bfe_u32 v6, v3, 16, 8 -; GFX9-NODL-NEXT: v_bfe_u32 v5, v3, 8, 8 -; GFX9-NODL-NEXT: v_lshrrev_b32_e32 v3, 24, v3 ; GFX9-NODL-NEXT: s_waitcnt vmcnt(0) -; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v7, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v9, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:DWORD -; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v8, v0, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD -; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD -; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v4, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v5, v1, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD -; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v6, v1, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:DWORD -; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v4, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v6, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:BYTE_2 +; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v5, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1 +; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 +; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v7, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 +; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v8, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1 +; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v9, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:BYTE_2 +; GFX9-NODL-NEXT: v_mul_u32_u24_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_3 ; GFX9-NODL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-NODL-NEXT: v_add3_u32 v3, v7, s0, v9 -; GFX9-NODL-NEXT: v_add3_u32 v3, v3, v4, v6 -; GFX9-NODL-NEXT: v_add3_u32 v0, v8, v3, v0 -; GFX9-NODL-NEXT: v_add3_u32 v0, v0, v5, v1 +; GFX9-NODL-NEXT: v_add3_u32 v3, v4, s0, v6 +; GFX9-NODL-NEXT: v_add3_u32 v3, v3, v7, v9 +; GFX9-NODL-NEXT: v_add3_u32 v0, v5, v3, v0 +; GFX9-NODL-NEXT: v_add3_u32 v0, v0, v8, v1 ; GFX9-NODL-NEXT: global_store_dword v2, v0, s[2:3] ; GFX9-NODL-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.frexp.ll b/llvm/test/CodeGen/AMDGPU/llvm.frexp.ll index 14742c5827c1e4..b9fef0834cb245 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.frexp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.frexp.ll @@ -183,11 +183,10 @@ define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) { ; GFX8-SDAG-LABEL: test_frexp_v2f16_v2i32: ; GFX8-SDAG: ; %bb.0: ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX8-SDAG-NEXT: v_frexp_mant_f16_e32 v1, v0 -; GFX8-SDAG-NEXT: v_frexp_mant_f16_sdwa v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; GFX8-SDAG-NEXT: v_or_b32_e32 v3, v1, v3 -; GFX8-SDAG-NEXT: v_frexp_exp_i16_f16_e32 v1, v2 +; GFX8-SDAG-NEXT: v_frexp_mant_f16_sdwa v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX8-SDAG-NEXT: v_or_b32_e32 v3, v1, v2 +; GFX8-SDAG-NEXT: v_frexp_exp_i16_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX8-SDAG-NEXT: v_frexp_exp_i16_f16_e32 v0, v0 ; GFX8-SDAG-NEXT: v_bfe_i32 v2, v1, 0, 16 ; GFX8-SDAG-NEXT: v_bfe_i32 v1, v0, 0, 16 @@ -197,11 +196,10 @@ define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) { ; GFX9-SDAG-LABEL: test_frexp_v2f16_v2i32: ; GFX9-SDAG: ; %bb.0: ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GFX9-SDAG-NEXT: v_frexp_mant_f16_e32 v2, v1 -; GFX9-SDAG-NEXT: v_frexp_mant_f16_e32 v3, v0 -; GFX9-SDAG-NEXT: v_pack_b32_f16 v3, v3, v2 -; GFX9-SDAG-NEXT: v_frexp_exp_i16_f16_e32 v1, v1 +; GFX9-SDAG-NEXT: v_frexp_mant_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9-SDAG-NEXT: v_frexp_mant_f16_e32 v2, v0 +; GFX9-SDAG-NEXT: v_pack_b32_f16 v3, v2, v1 +; GFX9-SDAG-NEXT: v_frexp_exp_i16_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; GFX9-SDAG-NEXT: v_frexp_exp_i16_f16_e32 v0, v0 ; GFX9-SDAG-NEXT: v_bfe_i32 v2, v1, 0, 16 ; GFX9-SDAG-NEXT: v_bfe_i32 v1, v0, 0, 16 @@ -246,27 +244,25 @@ define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) { ; GFX8-GISEL-LABEL: test_frexp_v2f16_v2i32: ; GFX8-GISEL: ; %bb.0: ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX8-GISEL-NEXT: v_frexp_mant_f16_e32 v3, v0 -; GFX8-GISEL-NEXT: v_frexp_exp_i16_f16_e32 v0, v0 -; GFX8-GISEL-NEXT: v_bfe_i32 v1, v0, 0, 16 -; GFX8-GISEL-NEXT: v_frexp_mant_f16_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; GFX8-GISEL-NEXT: v_frexp_exp_i16_f16_e32 v2, v2 -; GFX8-GISEL-NEXT: v_bfe_i32 v2, v2, 0, 16 -; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v3, v0 +; GFX8-GISEL-NEXT: v_frexp_exp_i16_f16_e32 v1, v0 +; GFX8-GISEL-NEXT: v_frexp_mant_f16_sdwa v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX8-GISEL-NEXT: v_frexp_exp_i16_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX8-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 16 +; GFX8-GISEL-NEXT: v_bfe_i32 v2, v0, 0, 16 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v3, v4 ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-GISEL-LABEL: test_frexp_v2f16_v2i32: ; GFX9-GISEL: ; %bb.0: ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 ; GFX9-GISEL-NEXT: v_frexp_mant_f16_e32 v3, v0 -; GFX9-GISEL-NEXT: v_frexp_exp_i16_f16_e32 v0, v0 -; GFX9-GISEL-NEXT: v_bfe_i32 v1, v0, 0, 16 -; GFX9-GISEL-NEXT: v_frexp_mant_f16_e32 v0, v2 -; GFX9-GISEL-NEXT: v_frexp_exp_i16_f16_e32 v2, v2 -; GFX9-GISEL-NEXT: v_bfe_i32 v2, v2, 0, 16 -; GFX9-GISEL-NEXT: v_pack_b32_f16 v0, v3, v0 +; GFX9-GISEL-NEXT: v_frexp_exp_i16_f16_e32 v1, v0 +; GFX9-GISEL-NEXT: v_frexp_mant_f16_sdwa v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9-GISEL-NEXT: v_frexp_exp_i16_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9-GISEL-NEXT: v_bfe_i32 v1, v1, 0, 16 +; GFX9-GISEL-NEXT: v_bfe_i32 v2, v0, 0, 16 +; GFX9-GISEL-NEXT: v_pack_b32_f16 v0, v3, v4 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a) ret { <2 x half>, <2 x i32> } %result diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll index 6672568b98a203..8861ee380be031 100644 --- a/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll +++ b/llvm/test/CodeGen/AMDGPU/machine-sink-temporal-divergence-swdev407790.ll @@ -147,11 +147,10 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: s_add_i32 s48, s49, 1 ; CHECK-NEXT: s_add_i32 s5, s49, 5 ; CHECK-NEXT: v_or3_b32 v57, s4, v43, s48 -; CHECK-NEXT: ds_read_u8 v0, v0 +; CHECK-NEXT: s_waitcnt lgkmcnt(0) +; CHECK-NEXT: ds_read_u8 v56, v0 ; CHECK-NEXT: v_mov_b32_e32 v58, s48 ; CHECK-NEXT: s_mov_b32 s52, exec_lo -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_and_b32_e32 v56, 0xff, v0 ; CHECK-NEXT: v_cmpx_lt_u32_e64 s5, v42 ; CHECK-NEXT: s_cbranch_execz .LBB0_17 ; CHECK-NEXT: ; %bb.6: ; %.preheader2 @@ -175,10 +174,10 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: ; => This Inner Loop Header: Depth=2 ; CHECK-NEXT: v_add_nc_u32_e32 v59, s54, v46 ; CHECK-NEXT: v_add_nc_u32_e32 v58, s54, v57 -; CHECK-NEXT: s_mov_b32 s55, exec_lo ; CHECK-NEXT: ds_read_u8 v0, v59 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_cmpx_eq_u16_e64 v56, v0 +; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD +; CHECK-NEXT: s_and_saveexec_b32 s55, s4 ; CHECK-NEXT: s_cbranch_execz .LBB0_10 ; CHECK-NEXT: ; %bb.9: ; in Loop: Header=BB0_8 Depth=2 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 @@ -200,9 +199,9 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: .LBB0_10: ; in Loop: Header=BB0_8 Depth=2 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s55 ; CHECK-NEXT: ds_read_u8 v0, v59 offset:1 -; CHECK-NEXT: s_mov_b32 s55, exec_lo ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_cmpx_eq_u16_e64 v56, v0 +; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD +; CHECK-NEXT: s_and_saveexec_b32 s55, s4 ; CHECK-NEXT: s_cbranch_execz .LBB0_12 ; CHECK-NEXT: ; %bb.11: ; in Loop: Header=BB0_8 Depth=2 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 @@ -225,9 +224,9 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: .LBB0_12: ; in Loop: Header=BB0_8 Depth=2 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s55 ; CHECK-NEXT: ds_read_u8 v0, v59 offset:2 -; CHECK-NEXT: s_mov_b32 s55, exec_lo ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_cmpx_eq_u16_e64 v56, v0 +; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD +; CHECK-NEXT: s_and_saveexec_b32 s55, s4 ; CHECK-NEXT: s_cbranch_execz .LBB0_14 ; CHECK-NEXT: ; %bb.13: ; in Loop: Header=BB0_8 Depth=2 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 @@ -250,9 +249,9 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: .LBB0_14: ; in Loop: Header=BB0_8 Depth=2 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s55 ; CHECK-NEXT: ds_read_u8 v0, v59 offset:3 -; CHECK-NEXT: s_mov_b32 s55, exec_lo ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_cmpx_eq_u16_e64 v56, v0 +; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD +; CHECK-NEXT: s_and_saveexec_b32 s55, s4 ; CHECK-NEXT: s_cbranch_execz .LBB0_7 ; CHECK-NEXT: ; %bb.15: ; in Loop: Header=BB0_8 Depth=2 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 @@ -300,10 +299,10 @@ define protected amdgpu_kernel void @kernel_round1(ptr addrspace(1) nocapture no ; CHECK-NEXT: .LBB0_20: ; Parent Loop BB0_5 Depth=1 ; CHECK-NEXT: ; => This Inner Loop Header: Depth=2 ; CHECK-NEXT: v_add_nc_u32_e32 v0, v44, v58 -; CHECK-NEXT: s_mov_b32 s53, exec_lo ; CHECK-NEXT: ds_read_u8 v0, v0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: v_cmpx_eq_u16_e64 v56, v0 +; CHECK-NEXT: v_cmp_eq_u16_sdwa s4, v56, v0 src0_sel:BYTE_0 src1_sel:DWORD +; CHECK-NEXT: s_and_saveexec_b32 s53, s4 ; CHECK-NEXT: s_cbranch_execz .LBB0_19 ; CHECK-NEXT: ; %bb.21: ; in Loop: Header=BB0_20 Depth=2 ; CHECK-NEXT: v_mov_b32_e32 v31, v40 diff --git a/llvm/test/CodeGen/AMDGPU/permute_i8.ll b/llvm/test/CodeGen/AMDGPU/permute_i8.ll index 7ca9ae359a4992..352c1ecf8ece4a 100644 --- a/llvm/test/CodeGen/AMDGPU/permute_i8.ll +++ b/llvm/test/CodeGen/AMDGPU/permute_i8.ll @@ -697,18 +697,16 @@ define hidden void @add(ptr addrspace(1) %in0, ptr addrspace(1) %in1, i8 %elt, p ; GFX9-LABEL: add: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: global_load_dword v4, v[2:3], off -; GFX9-NEXT: global_load_dword v7, v[0:1], off -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshrrev_b16_e32 v0, 8, v4 +; GFX9-NEXT: global_load_dword v4, v[0:1], off +; GFX9-NEXT: global_load_dword v7, v[2:3], off ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_add_u16_sdwa v1, v7, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD -; GFX9-NEXT: v_add_u16_sdwa v2, v4, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 -; GFX9-NEXT: v_add_u16_sdwa v3, v7, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD -; GFX9-NEXT: v_add_u16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-NEXT: v_or_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX9-NEXT: v_add_u16_sdwa v0, v4, v7 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_1 +; GFX9-NEXT: v_add_u16_sdwa v1, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD +; GFX9-NEXT: v_add_u16_sdwa v2, v7, v7 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 +; GFX9-NEXT: v_add_u16_sdwa v3, v7, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:WORD_1 +; GFX9-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX9-NEXT: v_or_b32_sdwa v1, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX9-NEXT: global_store_dword v[5:6], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] @@ -808,17 +806,16 @@ define hidden void @add_store(ptr addrspace(1) %in0, ptr addrspace(1) %in1, i8 % ; GFX9-NEXT: global_load_dword v9, v[2:3], off ; GFX9-NEXT: s_movk_i32 s4, 0xff00 ; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshrrev_b16_e32 v0, 8, v4 -; GFX9-NEXT: v_and_b32_sdwa v1, v4, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD +; GFX9-NEXT: v_and_b32_sdwa v0, v4, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_add_u16_sdwa v2, v4, v9 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_1 -; GFX9-NEXT: v_or_b32_e32 v1, v0, v1 -; GFX9-NEXT: v_add_u16_e32 v0, v0, v9 -; GFX9-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-NEXT: v_add_u16_sdwa v1, v4, v9 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_1 +; GFX9-NEXT: v_add_u16_sdwa v2, v4, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD +; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX9-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD ; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 -; GFX9-NEXT: global_store_dword v[5:6], v0, off -; GFX9-NEXT: global_store_dword v[7:8], v1, off +; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX9-NEXT: global_store_dword v[5:6], v1, off +; GFX9-NEXT: global_store_dword v[7:8], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] %vec0 = load <4 x i8>, ptr addrspace(1) %in0, align 4 @@ -868,23 +865,22 @@ define hidden void @add_store_div_16(ptr addrspace(1) %in0, ptr addrspace(1) %in ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 2, v4 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v4 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: global_load_dword v9, v[0:1], off -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v2, v4 -; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc -; GFX9-NEXT: global_load_dword v0, v[0:1], off +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc +; GFX9-NEXT: global_load_dword v4, v[0:1], off +; GFX9-NEXT: global_load_dword v9, v[2:3], off ; GFX9-NEXT: s_movk_i32 s4, 0xff00 ; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshrrev_b16_e32 v1, 8, v9 -; GFX9-NEXT: v_and_b32_sdwa v2, v9, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_e32 v2, v1, v2 +; GFX9-NEXT: v_and_b32_sdwa v0, v4, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_add_u16_sdwa v3, v9, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_1 -; GFX9-NEXT: v_add_u16_e32 v0, v1, v0 -; GFX9-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX9-NEXT: v_add_u16_sdwa v1, v4, v9 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_1 +; GFX9-NEXT: v_add_u16_sdwa v2, v4, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD +; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX9-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD +; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 ; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v2 -; GFX9-NEXT: global_store_dword v[5:6], v0, off -; GFX9-NEXT: global_store_dword v[7:8], v1, off +; GFX9-NEXT: global_store_dword v[5:6], v1, off +; GFX9-NEXT: global_store_dword v[7:8], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -942,22 +938,20 @@ define hidden void @add_store_div(ptr addrspace(1) %in0, ptr addrspace(1) %in1, ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc -; GFX9-NEXT: global_load_dword v4, v[2:3], off -; GFX9-NEXT: global_load_dword v9, v[0:1], off +; GFX9-NEXT: global_load_dword v4, v[0:1], off +; GFX9-NEXT: global_load_dword v9, v[2:3], off ; GFX9-NEXT: s_mov_b32 s4, 0x10705 -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshrrev_b16_e32 v0, 8, v4 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_perm_b32 v1, v9, v4, s4 -; GFX9-NEXT: v_add_u16_sdwa v2, v9, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD -; GFX9-NEXT: v_add_u16_sdwa v3, v4, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 -; GFX9-NEXT: v_add_u16_sdwa v9, v9, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD -; GFX9-NEXT: v_add_u16_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 -; GFX9-NEXT: v_or_b32_sdwa v2, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: global_store_dword v[5:6], v0, off -; GFX9-NEXT: global_store_dword v[7:8], v1, off +; GFX9-NEXT: v_perm_b32 v0, v4, v9, s4 +; GFX9-NEXT: v_add_u16_sdwa v1, v4, v9 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_1 +; GFX9-NEXT: v_add_u16_sdwa v2, v4, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD +; GFX9-NEXT: v_add_u16_sdwa v3, v9, v9 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 +; GFX9-NEXT: v_add_u16_sdwa v4, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:WORD_1 +; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX9-NEXT: v_or_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX9-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX9-NEXT: global_store_dword v[5:6], v1, off +; GFX9-NEXT: global_store_dword v[7:8], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -1391,22 +1385,20 @@ define hidden void @mul_store_div(ptr addrspace(1) %in0, ptr addrspace(1) %in1, ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc -; GFX9-NEXT: global_load_dword v4, v[2:3], off -; GFX9-NEXT: global_load_dword v9, v[0:1], off +; GFX9-NEXT: global_load_dword v4, v[0:1], off +; GFX9-NEXT: global_load_dword v9, v[2:3], off ; GFX9-NEXT: s_mov_b32 s4, 0x2000504 -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v4 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_mul_lo_u16_e32 v2, v9, v4 -; GFX9-NEXT: v_mul_lo_u16_sdwa v3, v9, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1 -; GFX9-NEXT: v_or_b32_sdwa v2, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_mul_lo_u16_sdwa v3, v0, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 -; GFX9-NEXT: v_mul_lo_u16_e32 v0, v4, v0 -; GFX9-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: v_perm_b32 v1, v9, v4, s4 -; GFX9-NEXT: global_store_dword v[5:6], v0, off -; GFX9-NEXT: global_store_dword v[7:8], v1, off +; GFX9-NEXT: v_perm_b32 v0, v4, v9, s4 +; GFX9-NEXT: v_mul_lo_u16_e32 v1, v4, v9 +; GFX9-NEXT: v_mul_lo_u16_sdwa v2, v4, v9 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:BYTE_1 +; GFX9-NEXT: v_mul_lo_u16_sdwa v3, v9, v9 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_3 +; GFX9-NEXT: v_mul_lo_u16_sdwa v4, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX9-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX9-NEXT: v_or_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX9-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX9-NEXT: global_store_dword v[5:6], v1, off +; GFX9-NEXT: global_store_dword v[7:8], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] %tid = call i32 @llvm.amdgcn.workitem.id.x() @@ -1503,67 +1495,61 @@ define hidden void @sdiv_store_div(ptr addrspace(1) %in0, ptr addrspace(1) %in1, ; GFX10-NEXT: global_load_dword v4, v[2:3], off ; GFX10-NEXT: global_load_dword v9, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(1) -; GFX10-NEXT: v_bfe_i32 v0, v4, 0, 8 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_ashrrev_i32_e32 v2, 24, v9 -; GFX10-NEXT: v_bfe_i32 v3, v4, 8, 8 -; GFX10-NEXT: v_bfe_i32 v1, v9, 16, 8 -; GFX10-NEXT: v_bfe_i32 v10, v4, 16, 8 -; GFX10-NEXT: v_cvt_f32_i32_e32 v13, v0 -; GFX10-NEXT: v_ashrrev_i32_e32 v11, 24, v4 -; GFX10-NEXT: v_xor_b32_e32 v15, v2, v3 -; GFX10-NEXT: v_cvt_f32_i32_e32 v3, v3 -; GFX10-NEXT: v_xor_b32_e32 v12, v1, v0 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v16, v13 -; GFX10-NEXT: v_cvt_f32_i32_e32 v14, v1 -; GFX10-NEXT: v_xor_b32_e32 v1, v1, v10 -; GFX10-NEXT: v_cvt_f32_i32_e32 v10, v10 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v17, v3 -; GFX10-NEXT: v_xor_b32_e32 v0, v0, v11 -; GFX10-NEXT: v_cvt_f32_i32_e32 v11, v11 -; GFX10-NEXT: v_cvt_f32_i32_e32 v2, v2 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v18, v10 -; GFX10-NEXT: v_ashrrev_i32_e32 v12, 30, v12 -; GFX10-NEXT: v_mul_f32_e32 v16, v14, v16 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v19, v11 -; GFX10-NEXT: v_ashrrev_i32_e32 v15, 30, v15 -; GFX10-NEXT: v_ashrrev_i32_e32 v1, 30, v1 +; GFX10-NEXT: v_cvt_f32_i32_sdwa v1, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 +; GFX10-NEXT: v_cvt_f32_i32_sdwa v10, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_cvt_f32_i32_sdwa v2, sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 +; GFX10-NEXT: v_cvt_f32_i32_sdwa v12, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 +; GFX10-NEXT: v_cvt_f32_i32_sdwa v14, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v15, v1 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v16, v10 +; GFX10-NEXT: v_cvt_f32_i32_sdwa v19, sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v17, v12 +; GFX10-NEXT: v_xor_b32_sdwa v0, sext(v9), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:BYTE_0 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v18, v14 +; GFX10-NEXT: v_xor_b32_sdwa v3, sext(v9), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_1 +; GFX10-NEXT: v_xor_b32_sdwa v11, sext(v9), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:BYTE_2 +; GFX10-NEXT: v_xor_b32_sdwa v13, sext(v4), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_3 +; GFX10-NEXT: v_ashrrev_i32_e32 v0, 30, v0 +; GFX10-NEXT: v_mul_f32_e32 v15, v2, v15 +; GFX10-NEXT: v_mul_f32_e32 v16, v19, v16 +; GFX10-NEXT: v_ashrrev_i32_e32 v3, 30, v3 ; GFX10-NEXT: v_mul_f32_e32 v17, v2, v17 +; GFX10-NEXT: v_or_b32_e32 v0, 1, v0 +; GFX10-NEXT: v_trunc_f32_e32 v15, v15 ; GFX10-NEXT: v_trunc_f32_e32 v16, v16 -; GFX10-NEXT: v_or_b32_e32 v12, 1, v12 -; GFX10-NEXT: v_or_b32_e32 v15, 1, v15 -; GFX10-NEXT: v_mul_f32_e32 v18, v14, v18 +; GFX10-NEXT: v_mul_f32_e32 v18, v1, v18 ; GFX10-NEXT: v_trunc_f32_e32 v17, v17 -; GFX10-NEXT: v_mad_f32 v20, -v16, v13, v14 -; GFX10-NEXT: v_mul_f32_e32 v19, v13, v19 -; GFX10-NEXT: v_ashrrev_i32_e32 v0, 30, v0 +; GFX10-NEXT: v_ashrrev_i32_e32 v11, 30, v11 +; GFX10-NEXT: v_mad_f32 v20, -v15, v1, v2 +; GFX10-NEXT: v_mad_f32 v19, -v16, v10, v19 +; GFX10-NEXT: v_or_b32_e32 v3, 1, v3 ; GFX10-NEXT: v_trunc_f32_e32 v18, v18 -; GFX10-NEXT: v_mad_f32 v2, -v17, v3, v2 -; GFX10-NEXT: v_cmp_ge_f32_e64 vcc_lo, |v20|, |v13| -; GFX10-NEXT: v_trunc_f32_e32 v19, v19 -; GFX10-NEXT: v_or_b32_e32 v1, 1, v1 -; GFX10-NEXT: v_mad_f32 v14, -v18, v10, v14 -; GFX10-NEXT: v_or_b32_e32 v0, 1, v0 -; GFX10-NEXT: v_cndmask_b32_e32 v12, 0, v12, vcc_lo -; GFX10-NEXT: v_cmp_ge_f32_e64 vcc_lo, |v2|, |v3| -; GFX10-NEXT: v_mad_f32 v21, -v19, v11, v13 +; GFX10-NEXT: v_mad_f32 v2, -v17, v12, v2 +; GFX10-NEXT: v_cmp_ge_f32_e64 vcc_lo, |v20|, |v1| +; GFX10-NEXT: v_ashrrev_i32_e32 v13, 30, v13 +; GFX10-NEXT: v_or_b32_e32 v11, 1, v11 +; GFX10-NEXT: v_mad_f32 v21, -v18, v14, v1 +; GFX10-NEXT: v_cvt_i32_f32_e32 v15, v15 +; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc_lo +; GFX10-NEXT: v_cmp_ge_f32_e64 vcc_lo, |v19|, |v10| +; GFX10-NEXT: v_or_b32_e32 v13, 1, v13 ; GFX10-NEXT: v_cvt_i32_f32_e32 v16, v16 ; GFX10-NEXT: v_cvt_i32_f32_e32 v17, v17 ; GFX10-NEXT: v_cvt_i32_f32_e32 v18, v18 -; GFX10-NEXT: v_cndmask_b32_e32 v2, 0, v15, vcc_lo -; GFX10-NEXT: v_cmp_ge_f32_e64 vcc_lo, |v14|, |v10| -; GFX10-NEXT: v_cvt_i32_f32_e32 v19, v19 -; GFX10-NEXT: v_add_nc_u32_e32 v3, v16, v12 -; GFX10-NEXT: v_add_nc_u32_sdwa v2, v17, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc_lo -; GFX10-NEXT: v_cmp_ge_f32_e64 vcc_lo, |v21|, |v11| -; GFX10-NEXT: v_or_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX10-NEXT: v_add_nc_u32_e32 v1, v18, v1 -; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc_lo -; GFX10-NEXT: v_add_nc_u32_sdwa v0, v19, v0 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc_lo +; GFX10-NEXT: v_cmp_ge_f32_e64 vcc_lo, |v2|, |v12| +; GFX10-NEXT: v_add_nc_u32_e32 v0, v15, v0 +; GFX10-NEXT: v_add_nc_u32_sdwa v1, v16, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_cndmask_b32_e32 v2, 0, v11, vcc_lo +; GFX10-NEXT: v_cmp_ge_f32_e64 vcc_lo, |v21|, |v14| +; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX10-NEXT: v_add_nc_u32_e32 v2, v17, v2 +; GFX10-NEXT: v_cndmask_b32_e32 v3, 0, v13, vcc_lo +; GFX10-NEXT: v_add_nc_u32_sdwa v3, v18, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX10-NEXT: v_or_b32_sdwa v1, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX10-NEXT: v_perm_b32 v1, v9, v4, 0x60706 -; GFX10-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX10-NEXT: global_store_dword v[5:6], v0, off ; GFX10-NEXT: global_store_dword v[7:8], v1, off ; GFX10-NEXT: s_setpc_b64 s[30:31] @@ -1581,67 +1567,61 @@ define hidden void @sdiv_store_div(ptr addrspace(1) %in0, ptr addrspace(1) %in1, ; GFX9-NEXT: global_load_dword v9, v[0:1], off ; GFX9-NEXT: s_mov_b32 s4, 0x60706 ; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_bfe_i32 v1, v4, 0, 8 +; GFX9-NEXT: v_cvt_f32_i32_sdwa v2, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 +; GFX9-NEXT: v_cvt_f32_i32_sdwa v12, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_perm_b32 v0, v9, v4, s4 -; GFX9-NEXT: v_bfe_i32 v2, v9, 16, 8 -; GFX9-NEXT: v_ashrrev_i32_e32 v3, 24, v9 -; GFX9-NEXT: v_bfe_i32 v9, v4, 8, 8 -; GFX9-NEXT: v_cvt_f32_i32_e32 v12, v1 -; GFX9-NEXT: v_bfe_i32 v10, v4, 16, 8 -; GFX9-NEXT: v_ashrrev_i32_e32 v4, 24, v4 -; GFX9-NEXT: v_xor_b32_e32 v14, v3, v9 -; GFX9-NEXT: v_cvt_f32_i32_e32 v9, v9 -; GFX9-NEXT: v_xor_b32_e32 v11, v2, v1 -; GFX9-NEXT: v_cvt_f32_i32_e32 v13, v2 -; GFX9-NEXT: v_xor_b32_e32 v2, v2, v10 -; GFX9-NEXT: v_cvt_f32_i32_e32 v10, v10 -; GFX9-NEXT: v_xor_b32_e32 v1, v1, v4 -; GFX9-NEXT: v_cvt_f32_i32_e32 v4, v4 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v15, v12 -; GFX9-NEXT: v_cvt_f32_i32_e32 v3, v3 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v16, v9 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v17, v10 +; GFX9-NEXT: v_xor_b32_sdwa v1, sext(v9), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:BYTE_0 +; GFX9-NEXT: v_cvt_f32_i32_sdwa v3, sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 +; GFX9-NEXT: v_xor_b32_sdwa v10, sext(v9), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_1 +; GFX9-NEXT: v_cvt_f32_i32_sdwa v11, sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 +; GFX9-NEXT: v_xor_b32_sdwa v9, sext(v9), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:BYTE_2 +; GFX9-NEXT: v_cvt_f32_i32_sdwa v13, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 +; GFX9-NEXT: v_xor_b32_sdwa v14, sext(v4), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_3 +; GFX9-NEXT: v_cvt_f32_i32_sdwa v4, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v15, v2 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v16, v12 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v17, v13 ; GFX9-NEXT: v_rcp_iflag_f32_e32 v18, v4 -; GFX9-NEXT: v_mul_f32_e32 v15, v13, v15 -; GFX9-NEXT: v_mul_f32_e32 v16, v3, v16 +; GFX9-NEXT: v_mul_f32_e32 v15, v3, v15 +; GFX9-NEXT: v_mul_f32_e32 v16, v11, v16 ; GFX9-NEXT: v_trunc_f32_e32 v15, v15 -; GFX9-NEXT: v_ashrrev_i32_e32 v11, 30, v11 -; GFX9-NEXT: v_mul_f32_e32 v17, v13, v17 -; GFX9-NEXT: v_mul_f32_e32 v18, v12, v18 +; GFX9-NEXT: v_ashrrev_i32_e32 v1, 30, v1 +; GFX9-NEXT: v_mul_f32_e32 v17, v3, v17 +; GFX9-NEXT: v_mul_f32_e32 v18, v2, v18 ; GFX9-NEXT: v_trunc_f32_e32 v16, v16 -; GFX9-NEXT: v_mad_f32 v19, -v15, v12, v13 -; GFX9-NEXT: v_ashrrev_i32_e32 v14, 30, v14 -; GFX9-NEXT: v_or_b32_e32 v11, 1, v11 +; GFX9-NEXT: v_mad_f32 v19, -v15, v2, v3 +; GFX9-NEXT: v_ashrrev_i32_e32 v10, 30, v10 +; GFX9-NEXT: v_or_b32_e32 v1, 1, v1 ; GFX9-NEXT: v_trunc_f32_e32 v17, v17 ; GFX9-NEXT: v_trunc_f32_e32 v18, v18 -; GFX9-NEXT: v_mad_f32 v3, -v16, v9, v3 -; GFX9-NEXT: v_cmp_ge_f32_e64 vcc, |v19|, |v12| -; GFX9-NEXT: v_ashrrev_i32_e32 v2, 30, v2 -; GFX9-NEXT: v_or_b32_e32 v14, 1, v14 +; GFX9-NEXT: v_mad_f32 v11, -v16, v12, v11 +; GFX9-NEXT: v_cmp_ge_f32_e64 vcc, |v19|, |v2| +; GFX9-NEXT: v_ashrrev_i32_e32 v9, 30, v9 +; GFX9-NEXT: v_or_b32_e32 v10, 1, v10 ; GFX9-NEXT: v_cvt_i32_f32_e32 v15, v15 ; GFX9-NEXT: v_cvt_i32_f32_e32 v16, v16 -; GFX9-NEXT: v_mad_f32 v13, -v17, v10, v13 +; GFX9-NEXT: v_mad_f32 v3, -v17, v13, v3 ; GFX9-NEXT: v_cvt_i32_f32_e32 v17, v17 -; GFX9-NEXT: v_mad_f32 v20, -v18, v4, v12 +; GFX9-NEXT: v_mad_f32 v20, -v18, v4, v2 ; GFX9-NEXT: v_cvt_i32_f32_e32 v18, v18 -; GFX9-NEXT: v_cndmask_b32_e32 v11, 0, v11, vcc -; GFX9-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, |v9| -; GFX9-NEXT: v_ashrrev_i32_e32 v1, 30, v1 -; GFX9-NEXT: v_or_b32_e32 v2, 1, v2 -; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v14, vcc -; GFX9-NEXT: v_cmp_ge_f32_e64 vcc, |v13|, |v10| -; GFX9-NEXT: v_or_b32_e32 v1, 1, v1 -; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc -; GFX9-NEXT: v_cmp_ge_f32_e64 vcc, |v20|, |v4| ; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc -; GFX9-NEXT: v_add_u32_e32 v4, v15, v11 -; GFX9-NEXT: v_add_u32_sdwa v3, v16, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX9-NEXT: v_add_u32_e32 v2, v17, v2 -; GFX9-NEXT: v_add_u32_sdwa v1, v18, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v3, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX9-NEXT: v_cmp_ge_f32_e64 vcc, |v11|, |v12| +; GFX9-NEXT: v_ashrrev_i32_e32 v14, 30, v14 +; GFX9-NEXT: v_or_b32_e32 v9, 1, v9 +; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v10, vcc +; GFX9-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, |v13| +; GFX9-NEXT: v_or_b32_e32 v14, 1, v14 +; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc +; GFX9-NEXT: v_cmp_ge_f32_e64 vcc, |v20|, |v4| +; GFX9-NEXT: v_cndmask_b32_e32 v4, 0, v14, vcc +; GFX9-NEXT: v_add_u32_e32 v1, v15, v1 +; GFX9-NEXT: v_add_u32_sdwa v2, v16, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX9-NEXT: v_add_u32_e32 v3, v17, v3 +; GFX9-NEXT: v_add_u32_sdwa v4, v18, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX9-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX9-NEXT: v_or_b32_sdwa v2, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX9-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD ; GFX9-NEXT: global_store_dword v[5:6], v1, off ; GFX9-NEXT: global_store_dword v[7:8], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) @@ -1876,73 +1856,67 @@ define hidden void @srem_store_div(ptr addrspace(1) %in0, ptr addrspace(1) %in1, ; GFX10-NEXT: global_load_dword v4, v[2:3], off ; GFX10-NEXT: global_load_dword v9, v[0:1], off ; GFX10-NEXT: s_waitcnt vmcnt(1) -; GFX10-NEXT: v_bfe_i32 v1, v4, 0, 8 -; GFX10-NEXT: v_bfe_i32 v2, v4, 16, 8 -; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: v_ashrrev_i32_e32 v10, 24, v9 -; GFX10-NEXT: v_bfe_i32 v11, v4, 8, 8 -; GFX10-NEXT: v_ashrrev_i32_e32 v12, 24, v4 -; GFX10-NEXT: v_bfe_i32 v13, v9, 16, 8 -; GFX10-NEXT: v_xor_b32_e32 v14, v2, v1 -; GFX10-NEXT: v_cvt_f32_i32_e32 v1, v1 -; GFX10-NEXT: v_xor_b32_e32 v16, v10, v11 -; GFX10-NEXT: v_cvt_f32_i32_e32 v11, v11 -; GFX10-NEXT: v_cvt_f32_i32_e32 v15, v2 -; GFX10-NEXT: v_cvt_f32_i32_e32 v10, v10 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v18, v1 -; GFX10-NEXT: v_cvt_f32_i32_e32 v17, v12 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v19, v11 +; GFX10-NEXT: v_cvt_f32_i32_sdwa v2, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 +; GFX10-NEXT: v_cvt_f32_i32_sdwa v13, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 +; GFX10-NEXT: v_cvt_f32_i32_sdwa v3, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_cvt_f32_i32_sdwa v12, sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 +; GFX10-NEXT: v_cvt_f32_i32_sdwa v15, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v17, v2 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v18, v13 +; GFX10-NEXT: v_rcp_iflag_f32_e32 v19, v3 +; GFX10-NEXT: v_xor_b32_sdwa v1, sext(v4), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:BYTE_0 ; GFX10-NEXT: v_rcp_iflag_f32_e32 v20, v15 -; GFX10-NEXT: v_xor_b32_e32 v2, v12, v2 -; GFX10-NEXT: v_xor_b32_e32 v12, v13, v12 -; GFX10-NEXT: v_rcp_iflag_f32_e32 v21, v17 -; GFX10-NEXT: v_ashrrev_i32_e32 v14, 30, v14 -; GFX10-NEXT: v_cvt_f32_i32_e32 v13, v13 -; GFX10-NEXT: v_ashrrev_i32_e32 v16, 30, v16 -; GFX10-NEXT: v_mul_f32_e32 v18, v15, v18 -; GFX10-NEXT: v_ashrrev_i32_e32 v2, 30, v2 -; GFX10-NEXT: v_mul_f32_e32 v19, v10, v19 -; GFX10-NEXT: v_mul_f32_e32 v20, v17, v20 -; GFX10-NEXT: v_or_b32_e32 v14, 1, v14 +; GFX10-NEXT: v_xor_b32_sdwa v11, sext(v9), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_1 +; GFX10-NEXT: v_cvt_f32_i32_sdwa v21, sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 +; GFX10-NEXT: v_xor_b32_sdwa v14, sext(v4), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_2 +; GFX10-NEXT: v_ashrrev_i32_e32 v1, 30, v1 +; GFX10-NEXT: v_xor_b32_sdwa v16, sext(v9), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:BYTE_3 +; GFX10-NEXT: v_mul_f32_e32 v17, v3, v17 +; GFX10-NEXT: v_mul_f32_e32 v18, v12, v18 +; GFX10-NEXT: v_mul_f32_e32 v19, v15, v19 +; GFX10-NEXT: v_ashrrev_i32_e32 v11, 30, v11 +; GFX10-NEXT: v_or_b32_e32 v1, 1, v1 +; GFX10-NEXT: v_trunc_f32_e32 v17, v17 ; GFX10-NEXT: v_trunc_f32_e32 v18, v18 -; GFX10-NEXT: v_mul_f32_e32 v21, v13, v21 +; GFX10-NEXT: v_mul_f32_e32 v20, v21, v20 ; GFX10-NEXT: v_trunc_f32_e32 v19, v19 +; GFX10-NEXT: v_ashrrev_i32_e32 v14, 30, v14 +; GFX10-NEXT: v_mad_f32 v22, -v17, v2, v3 +; GFX10-NEXT: v_mad_f32 v12, -v18, v13, v12 +; GFX10-NEXT: v_or_b32_e32 v11, 1, v11 ; GFX10-NEXT: v_trunc_f32_e32 v20, v20 +; GFX10-NEXT: v_mad_f32 v23, -v19, v3, v15 +; GFX10-NEXT: v_cmp_ge_f32_e64 vcc_lo, |v22|, |v2| +; GFX10-NEXT: v_ashrrev_i32_e32 v16, 30, v16 +; GFX10-NEXT: v_or_b32_e32 v14, 1, v14 +; GFX10-NEXT: v_mad_f32 v21, -v20, v15, v21 +; GFX10-NEXT: v_cvt_i32_f32_e32 v17, v17 +; GFX10-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc_lo +; GFX10-NEXT: v_cmp_ge_f32_e64 vcc_lo, |v12|, |v13| ; GFX10-NEXT: v_or_b32_e32 v16, 1, v16 -; GFX10-NEXT: v_mad_f32 v22, -v18, v1, v15 -; GFX10-NEXT: v_trunc_f32_e32 v21, v21 -; GFX10-NEXT: v_mad_f32 v10, -v19, v11, v10 -; GFX10-NEXT: v_mad_f32 v23, -v20, v15, v17 -; GFX10-NEXT: v_ashrrev_i32_e32 v12, 30, v12 -; GFX10-NEXT: v_cmp_ge_f32_e64 vcc_lo, |v22|, |v1| -; GFX10-NEXT: v_or_b32_e32 v2, 1, v2 -; GFX10-NEXT: v_mad_f32 v13, -v21, v17, v13 ; GFX10-NEXT: v_cvt_i32_f32_e32 v18, v18 -; GFX10-NEXT: v_or_b32_e32 v12, 1, v12 -; GFX10-NEXT: v_cndmask_b32_e32 v1, 0, v14, vcc_lo -; GFX10-NEXT: v_cmp_ge_f32_e64 vcc_lo, |v10|, |v11| ; GFX10-NEXT: v_cvt_i32_f32_e32 v19, v19 ; GFX10-NEXT: v_cvt_i32_f32_e32 v20, v20 -; GFX10-NEXT: v_cvt_i32_f32_e32 v21, v21 +; GFX10-NEXT: v_cndmask_b32_e32 v2, 0, v11, vcc_lo +; GFX10-NEXT: v_cmp_ge_f32_e64 vcc_lo, |v23|, |v3| ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v4 -; GFX10-NEXT: v_cndmask_b32_e32 v10, 0, v16, vcc_lo -; GFX10-NEXT: v_cmp_ge_f32_e64 vcc_lo, |v23|, |v15| -; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v4 -; GFX10-NEXT: v_add_nc_u32_e32 v1, v18, v1 -; GFX10-NEXT: v_add_nc_u32_e32 v10, v19, v10 -; GFX10-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc_lo -; GFX10-NEXT: v_cmp_ge_f32_e64 vcc_lo, |v13|, |v17| -; GFX10-NEXT: v_mul_lo_u32 v1, v1, v4 -; GFX10-NEXT: v_mul_lo_u32 v3, v10, v3 -; GFX10-NEXT: v_add_nc_u32_e32 v2, v20, v2 -; GFX10-NEXT: v_cndmask_b32_e32 v11, 0, v12, vcc_lo +; GFX10-NEXT: v_lshrrev_b32_e32 v10, 8, v4 ; GFX10-NEXT: v_lshrrev_b32_e32 v12, 24, v4 -; GFX10-NEXT: v_mul_lo_u32 v2, v2, v0 -; GFX10-NEXT: v_add_nc_u32_e32 v11, v21, v11 +; GFX10-NEXT: v_add_nc_u32_e32 v1, v17, v1 +; GFX10-NEXT: v_cndmask_b32_e32 v3, 0, v14, vcc_lo +; GFX10-NEXT: v_cmp_ge_f32_e64 vcc_lo, |v21|, |v15| +; GFX10-NEXT: v_add_nc_u32_e32 v2, v18, v2 +; GFX10-NEXT: v_mul_lo_u32 v1, v1, v4 +; GFX10-NEXT: v_add_nc_u32_e32 v3, v19, v3 +; GFX10-NEXT: v_cndmask_b32_e32 v11, 0, v16, vcc_lo +; GFX10-NEXT: v_mul_lo_u32 v2, v2, v10 +; GFX10-NEXT: v_mul_lo_u32 v3, v3, v0 +; GFX10-NEXT: v_add_nc_u32_e32 v11, v20, v11 ; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1 -; GFX10-NEXT: v_sub_nc_u32_sdwa v1, v9, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX10-NEXT: v_sub_nc_u32_sdwa v1, v9, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD ; GFX10-NEXT: v_mul_lo_u32 v10, v11, v12 -; GFX10-NEXT: v_sub_nc_u32_e32 v2, v12, v2 +; GFX10-NEXT: v_sub_nc_u32_e32 v2, v12, v3 ; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX10-NEXT: v_sub_nc_u32_sdwa v3, v9, v10 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX10-NEXT: v_or_b32_sdwa v1, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -1965,74 +1939,68 @@ define hidden void @srem_store_div(ptr addrspace(1) %in0, ptr addrspace(1) %in1, ; GFX9-NEXT: global_load_dword v9, v[0:1], off ; GFX9-NEXT: s_mov_b32 s4, 0x2070306 ; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_bfe_i32 v2, v4, 0, 8 -; GFX9-NEXT: v_bfe_i32 v3, v4, 16, 8 -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_ashrrev_i32_e32 v11, 24, v9 -; GFX9-NEXT: v_bfe_i32 v12, v4, 8, 8 -; GFX9-NEXT: v_xor_b32_e32 v16, v3, v2 -; GFX9-NEXT: v_cvt_f32_i32_e32 v2, v2 -; GFX9-NEXT: v_ashrrev_i32_e32 v13, 24, v4 -; GFX9-NEXT: v_xor_b32_e32 v18, v11, v12 -; GFX9-NEXT: v_cvt_f32_i32_e32 v12, v12 -; GFX9-NEXT: v_cvt_f32_i32_e32 v17, v3 -; GFX9-NEXT: v_cvt_f32_i32_e32 v19, v13 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v20, v2 -; GFX9-NEXT: v_bfe_i32 v15, v9, 16, 8 -; GFX9-NEXT: v_cvt_f32_i32_e32 v11, v11 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v21, v12 -; GFX9-NEXT: v_xor_b32_e32 v3, v13, v3 -; GFX9-NEXT: v_xor_b32_e32 v13, v15, v13 -; GFX9-NEXT: v_cvt_f32_i32_e32 v15, v15 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v22, v17 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v23, v19 -; GFX9-NEXT: v_mul_f32_e32 v20, v17, v20 -; GFX9-NEXT: v_mul_f32_e32 v21, v11, v21 +; GFX9-NEXT: v_cvt_f32_i32_sdwa v3, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 +; GFX9-NEXT: v_cvt_f32_i32_sdwa v14, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 +; GFX9-NEXT: v_cvt_f32_i32_sdwa v10, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 +; GFX9-NEXT: v_cvt_f32_i32_sdwa v16, sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v20, v3 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_cvt_f32_i32_sdwa v13, sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v21, v14 +; GFX9-NEXT: v_cvt_f32_i32_sdwa v19, sext(v9) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v22, v10 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v23, v16 +; GFX9-NEXT: v_mul_f32_e32 v20, v10, v20 +; GFX9-NEXT: v_xor_b32_sdwa v2, sext(v4), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:BYTE_0 +; GFX9-NEXT: v_mul_f32_e32 v21, v13, v21 ; GFX9-NEXT: v_trunc_f32_e32 v20, v20 -; GFX9-NEXT: v_ashrrev_i32_e32 v16, 30, v16 -; GFX9-NEXT: v_mul_f32_e32 v22, v19, v22 -; GFX9-NEXT: v_mul_f32_e32 v23, v15, v23 +; GFX9-NEXT: v_xor_b32_sdwa v12, sext(v9), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_1 +; GFX9-NEXT: v_ashrrev_i32_e32 v2, 30, v2 +; GFX9-NEXT: v_mul_f32_e32 v22, v16, v22 +; GFX9-NEXT: v_mul_f32_e32 v23, v19, v23 ; GFX9-NEXT: v_trunc_f32_e32 v21, v21 -; GFX9-NEXT: v_mad_f32 v24, -v20, v2, v17 -; GFX9-NEXT: v_ashrrev_i32_e32 v18, 30, v18 -; GFX9-NEXT: v_or_b32_e32 v16, 1, v16 +; GFX9-NEXT: v_mad_f32 v24, -v20, v3, v10 +; GFX9-NEXT: v_xor_b32_sdwa v15, sext(v4), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:BYTE_2 +; GFX9-NEXT: v_ashrrev_i32_e32 v12, 30, v12 +; GFX9-NEXT: v_or_b32_e32 v2, 1, v2 ; GFX9-NEXT: v_trunc_f32_e32 v22, v22 ; GFX9-NEXT: v_trunc_f32_e32 v23, v23 -; GFX9-NEXT: v_mad_f32 v11, -v21, v12, v11 -; GFX9-NEXT: v_cmp_ge_f32_e64 vcc, |v24|, |v2| -; GFX9-NEXT: v_ashrrev_i32_e32 v3, 30, v3 -; GFX9-NEXT: v_or_b32_e32 v18, 1, v18 +; GFX9-NEXT: v_mad_f32 v13, -v21, v14, v13 +; GFX9-NEXT: v_cmp_ge_f32_e64 vcc, |v24|, |v3| +; GFX9-NEXT: v_xor_b32_sdwa v18, sext(v9), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 src1_sel:BYTE_3 +; GFX9-NEXT: v_ashrrev_i32_e32 v15, 30, v15 +; GFX9-NEXT: v_or_b32_e32 v12, 1, v12 ; GFX9-NEXT: v_cvt_i32_f32_e32 v20, v20 ; GFX9-NEXT: v_cvt_i32_f32_e32 v21, v21 -; GFX9-NEXT: v_mad_f32 v25, -v22, v17, v19 +; GFX9-NEXT: v_mad_f32 v25, -v22, v10, v16 ; GFX9-NEXT: v_cvt_i32_f32_e32 v22, v22 -; GFX9-NEXT: v_mad_f32 v15, -v23, v19, v15 +; GFX9-NEXT: v_mad_f32 v19, -v23, v16, v19 ; GFX9-NEXT: v_cvt_i32_f32_e32 v23, v23 -; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v16, vcc -; GFX9-NEXT: v_cmp_ge_f32_e64 vcc, |v11|, |v12| -; GFX9-NEXT: v_ashrrev_i32_e32 v13, 30, v13 -; GFX9-NEXT: v_or_b32_e32 v3, 1, v3 -; GFX9-NEXT: v_cndmask_b32_e32 v11, 0, v18, vcc -; GFX9-NEXT: v_cmp_ge_f32_e64 vcc, |v25|, |v17| -; GFX9-NEXT: v_or_b32_e32 v13, 1, v13 -; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc -; GFX9-NEXT: v_cmp_ge_f32_e64 vcc, |v15|, |v19| -; GFX9-NEXT: v_cndmask_b32_e32 v12, 0, v13, vcc +; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX9-NEXT: v_cmp_ge_f32_e64 vcc, |v13|, |v14| +; GFX9-NEXT: v_ashrrev_i32_e32 v18, 30, v18 +; GFX9-NEXT: v_or_b32_e32 v15, 1, v15 +; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v12, vcc +; GFX9-NEXT: v_cmp_ge_f32_e64 vcc, |v25|, |v10| +; GFX9-NEXT: v_or_b32_e32 v18, 1, v18 +; GFX9-NEXT: v_cndmask_b32_e32 v10, 0, v15, vcc +; GFX9-NEXT: v_cmp_ge_f32_e64 vcc, |v19|, |v16| +; GFX9-NEXT: v_cndmask_b32_e32 v12, 0, v18, vcc ; GFX9-NEXT: v_lshrrev_b32_e32 v0, 16, v4 -; GFX9-NEXT: v_lshrrev_b32_e32 v10, 8, v4 -; GFX9-NEXT: v_lshrrev_b32_e32 v14, 24, v4 +; GFX9-NEXT: v_lshrrev_b32_e32 v11, 8, v4 +; GFX9-NEXT: v_lshrrev_b32_e32 v17, 24, v4 ; GFX9-NEXT: v_add_u32_e32 v2, v20, v2 -; GFX9-NEXT: v_add_u32_e32 v11, v21, v11 -; GFX9-NEXT: v_add_u32_e32 v3, v22, v3 +; GFX9-NEXT: v_add_u32_e32 v3, v21, v3 +; GFX9-NEXT: v_add_u32_e32 v10, v22, v10 ; GFX9-NEXT: v_add_u32_e32 v12, v23, v12 ; GFX9-NEXT: v_perm_b32 v1, v4, v9, s4 ; GFX9-NEXT: v_mul_lo_u32 v2, v2, v4 -; GFX9-NEXT: v_mul_lo_u32 v4, v11, v10 -; GFX9-NEXT: v_mul_lo_u32 v3, v3, v0 -; GFX9-NEXT: v_mul_lo_u32 v10, v12, v14 +; GFX9-NEXT: v_mul_lo_u32 v3, v3, v11 +; GFX9-NEXT: v_mul_lo_u32 v4, v10, v0 +; GFX9-NEXT: v_mul_lo_u32 v10, v12, v17 ; GFX9-NEXT: v_sub_u32_e32 v0, v0, v2 -; GFX9-NEXT: v_sub_u32_sdwa v2, v9, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD -; GFX9-NEXT: v_sub_u32_e32 v3, v14, v3 +; GFX9-NEXT: v_sub_u32_sdwa v2, v9, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_sub_u32_e32 v3, v17, v4 ; GFX9-NEXT: v_sub_u32_sdwa v4, v9, v10 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD ; GFX9-NEXT: v_or_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; GFX9-NEXT: v_or_b32_sdwa v2, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD @@ -2090,27 +2058,24 @@ define hidden void @sub_store_div(ptr addrspace(1) %in0, ptr addrspace(1) %in1, ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: v_and_b32_e32 v4, 0x3ff, v31 ; GFX9-NEXT: v_lshlrev_b32_e32 v4, 2, v4 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v4 -; GFX9-NEXT: global_load_dword v2, v[2:3], off ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: global_load_dword v0, v[0:1], off +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc +; GFX9-NEXT: global_load_dword v4, v[0:1], off +; GFX9-NEXT: global_load_dword v9, v[2:3], off ; GFX9-NEXT: s_mov_b32 s4, 0x6070007 -; GFX9-NEXT: s_waitcnt vmcnt(1) -; GFX9-NEXT: v_lshrrev_b32_e32 v1, 16, v2 -; GFX9-NEXT: v_lshrrev_b32_e32 v3, 24, v2 -; GFX9-NEXT: v_sub_u16_sdwa v9, v1, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_perm_b32 v4, v2, v0, s4 -; GFX9-NEXT: v_sub_u16_sdwa v0, v0, v2 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 -; GFX9-NEXT: v_sub_u16_e32 v2, v3, v2 -; GFX9-NEXT: v_sub_u16_e32 v1, v3, v1 -; GFX9-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v1, v1, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; GFX9-NEXT: global_store_dword v[5:6], v0, off -; GFX9-NEXT: global_store_dword v[7:8], v4, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_perm_b32 v0, v9, v4, s4 +; GFX9-NEXT: v_sub_u16_sdwa v1, v4, v9 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 +; GFX9-NEXT: v_sub_u16_sdwa v2, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD +; GFX9-NEXT: v_sub_u16_sdwa v3, v9, v9 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:BYTE_3 +; GFX9-NEXT: v_sub_u16_sdwa v4, v9, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:WORD_1 +; GFX9-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX9-NEXT: v_or_b32_sdwa v2, v4, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD +; GFX9-NEXT: v_or_b32_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD +; GFX9-NEXT: global_store_dword v[5:6], v1, off +; GFX9-NEXT: global_store_dword v[7:8], v0, off ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: s_setpc_b64 s[30:31] %tid = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll index b62d6ee59a8545..24e420b7d657bf 100644 --- a/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll +++ b/llvm/test/CodeGen/AMDGPU/reassoc-mul-add-1-to-mad.ll @@ -1838,10 +1838,9 @@ define <2 x i16> @v_mul_sub_x_v2i16(<2 x i16> %x, <2 x i16> %y) { ; GFX8-LABEL: v_mul_sub_x_v2i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GFX8-NEXT: v_mul_lo_u16_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX8-NEXT: v_mul_lo_u16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 ; GFX8-NEXT: v_mul_lo_u16_e32 v1, v0, v1 -; GFX8-NEXT: v_sub_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD +; GFX8-NEXT: v_sub_u16_sdwa v2, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 ; GFX8-NEXT: v_sub_u16_e32 v0, v1, v0 ; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX8-NEXT: s_setpc_b64 s[30:31] diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll b/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll index c9dbadcbd23157..0f2eedb1923d63 100644 --- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll +++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll @@ -1718,18 +1718,16 @@ define amdgpu_kernel void @mulmul_v2i16(ptr addrspace(1) %out, ptr addrspace(1) ; GFX89-NEXT: v_mov_b32_e32 v3, s1 ; GFX89-NEXT: v_add_u32_e32 v2, vcc, s0, v2 ; GFX89-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; GFX89-NEXT: flat_load_dword v4, v[0:1] ; GFX89-NEXT: flat_load_dword v2, v[2:3] -; GFX89-NEXT: flat_load_dword v3, v[0:1] ; GFX89-NEXT: v_mov_b32_e32 v0, s4 ; GFX89-NEXT: v_mov_b32_e32 v1, s5 -; GFX89-NEXT: s_waitcnt vmcnt(1) -; GFX89-NEXT: v_lshrrev_b32_e32 v4, 16, v2 ; GFX89-NEXT: s_waitcnt vmcnt(0) -; GFX89-NEXT: v_mul_lo_u16_e32 v5, v3, v2 -; GFX89-NEXT: v_mul_lo_u16_sdwa v3, v3, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GFX89-NEXT: v_mul_lo_u16_e32 v2, v5, v2 -; GFX89-NEXT: v_mul_lo_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX89-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX89-NEXT: v_mul_lo_u16_sdwa v3, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX89-NEXT: v_mul_lo_u16_e32 v4, v4, v2 +; GFX89-NEXT: v_mul_lo_u16_e32 v4, v4, v2 +; GFX89-NEXT: v_mul_lo_u16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 +; GFX89-NEXT: v_or_b32_e32 v2, v4, v2 ; GFX89-NEXT: flat_store_dword v[0:1], v2 ; GFX89-NEXT: s_endpgm ; @@ -2205,6 +2203,94 @@ bb2: br label %bb0 } +define amdgpu_kernel void @mac_v2half_same_srcop(ptr addrspace(1) %out, ptr addrspace(1) %ina, ptr addrspace(1) %inb) #0 { +; NOSDWA-LABEL: mac_v2half_same_srcop: +; NOSDWA: ; %bb.0: ; %entry +; NOSDWA-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; NOSDWA-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; NOSDWA-NEXT: s_waitcnt lgkmcnt(0) +; NOSDWA-NEXT: v_mov_b32_e32 v0, s6 +; NOSDWA-NEXT: v_mov_b32_e32 v2, s0 +; NOSDWA-NEXT: v_mov_b32_e32 v3, s1 +; NOSDWA-NEXT: v_mov_b32_e32 v1, s7 +; NOSDWA-NEXT: flat_load_dword v2, v[2:3] +; NOSDWA-NEXT: flat_load_dword v3, v[0:1] +; NOSDWA-NEXT: v_mov_b32_e32 v0, s4 +; NOSDWA-NEXT: v_mov_b32_e32 v1, s5 +; NOSDWA-NEXT: s_waitcnt vmcnt(1) +; NOSDWA-NEXT: v_lshrrev_b32_e32 v4, 16, v2 +; NOSDWA-NEXT: s_waitcnt vmcnt(0) +; NOSDWA-NEXT: v_lshrrev_b32_e32 v5, 16, v3 +; NOSDWA-NEXT: v_mac_f16_e32 v5, v4, v4 +; NOSDWA-NEXT: v_lshlrev_b32_e32 v4, 16, v5 +; NOSDWA-NEXT: v_mac_f16_e32 v3, v2, v2 +; NOSDWA-NEXT: v_or_b32_e32 v2, v3, v4 +; NOSDWA-NEXT: flat_store_dword v[0:1], v2 +; NOSDWA-NEXT: s_endpgm +; +; GFX89-LABEL: mac_v2half_same_srcop: +; GFX89: ; %bb.0: ; %entry +; GFX89-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; GFX89-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; GFX89-NEXT: s_waitcnt lgkmcnt(0) +; GFX89-NEXT: v_mov_b32_e32 v0, s6 +; GFX89-NEXT: v_mov_b32_e32 v1, s7 +; GFX89-NEXT: v_mov_b32_e32 v2, s0 +; GFX89-NEXT: v_mov_b32_e32 v3, s1 +; GFX89-NEXT: flat_load_dword v4, v[0:1] +; GFX89-NEXT: flat_load_dword v2, v[2:3] +; GFX89-NEXT: v_mov_b32_e32 v0, s4 +; GFX89-NEXT: v_mov_b32_e32 v1, s5 +; GFX89-NEXT: s_waitcnt vmcnt(1) +; GFX89-NEXT: v_lshrrev_b32_e32 v3, 16, v4 +; GFX89-NEXT: s_waitcnt vmcnt(0) +; GFX89-NEXT: v_mac_f16_sdwa v3, v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX89-NEXT: v_lshlrev_b32_e32 v3, 16, v3 +; GFX89-NEXT: v_mac_f16_e32 v4, v2, v2 +; GFX89-NEXT: v_or_b32_e32 v2, v4, v3 +; GFX89-NEXT: flat_store_dword v[0:1], v2 +; GFX89-NEXT: s_endpgm +; +; GFX9-LABEL: mac_v2half_same_srcop: +; GFX9: ; %bb.0: ; %entry +; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 +; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: global_load_dword v1, v0, s[2:3] +; GFX9-NEXT: global_load_dword v2, v0, s[6:7] +; GFX9-NEXT: s_waitcnt vmcnt(1) +; GFX9-NEXT: v_pk_mul_f16 v1, v1, v1 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_pk_add_f16 v1, v1, v2 +; GFX9-NEXT: global_store_dword v0, v1, s[4:5] +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: mac_v2half_same_srcop: +; GFX10: ; %bb.0: ; %entry +; GFX10-NEXT: s_clause 0x1 +; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 +; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 +; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_clause 0x1 +; GFX10-NEXT: global_load_dword v1, v0, s[2:3] +; GFX10-NEXT: global_load_dword v2, v0, s[6:7] +; GFX10-NEXT: s_waitcnt vmcnt(1) +; GFX10-NEXT: v_pk_mul_f16 v1, v1, v1 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_pk_add_f16 v1, v1, v2 +; GFX10-NEXT: global_store_dword v0, v1, s[4:5] +; GFX10-NEXT: s_endpgm +entry: + %a = load <2 x half>, ptr addrspace(1) %ina, align 4 + %b = load <2 x half>, ptr addrspace(1) %inb, align 4 + %mul = fmul <2 x half> %b, %b + %mac = fadd <2 x half> %mul, %a + store <2 x half> %mac, ptr addrspace(1) %out, align 4 + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() attributes #0 = { "denormal-fp-math"="preserve-sign,preserve-sign" } diff --git a/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll b/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll index d7a6be51106917..f8c9827ecf7a99 100644 --- a/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/sminmax.v2i16.ll @@ -32,12 +32,12 @@ define amdgpu_kernel void @s_abs_v2i16(ptr addrspace(1) %out, <2 x i16> %val) #0 ; GFX9: v_pk_max_i16 [[MAX:v[0-9]+]], [[VAL]], [[SUB]] ; GFX9: v_pk_add_u16 [[ADD:v[0-9]+]], [[MAX]], 2 op_sel_hi:[1,0] +; VI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 ; VI-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2 -; VI-DAG: v_lshrrev_b32_e32 v{{[0-9]+}}, 16, ; VI-DAG: v_sub_u16_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} -; VI-DAG: v_sub_u16_e32 v{{[0-9]+}}, 0, v{{[0-9]+}} -; VI-DAG: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; VI-DAG: v_sub_u16_sdwa v{{[0-9]+}}, [[ZERO]], v{{[0-9]+}} ; VI-DAG: v_max_i16_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} +; VI-DAG: v_max_i16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} ; VI: v_add_u16_e32 v{{[0-9]+}}, 2, v{{[0-9]+}} ; VI: v_add_u16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[TWO]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD ; VI-NOT: v_and_b32 From b7599da44983e3921116991bc6de1f10d2c1c3d9 Mon Sep 17 00:00:00 2001 From: RoseZhang03 Date: Fri, 14 Jun 2024 17:14:39 +0000 Subject: [PATCH 126/155] [libc] Fixed NamedType usage in Fenv HeaderSpec Types (#95487) Issue: NamedType<"fenv_t"> and NamedType<"fexcept_t"> were used in Fenv HeaderSpec Types section instead of the actual NamedType Fixed: Changed to FEnvT and FExceptT --- libc/spec/stdc.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index b7375fb4112204..f9c79ee106bbb3 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -134,8 +134,8 @@ def StdC : StandardSpec<"stdc"> { Macro<"FE_DFL_ENV"> ], [ - NamedType<"fenv_t">, - NamedType<"fexcept_t">, + FEnvT, + FExceptT, ], // Types [], // Enumerations [ From 38fd0181a065784afdf9b170fe3988aff01aab66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Fri, 14 Jun 2024 10:21:37 -0700 Subject: [PATCH 127/155] [flang] Lower REDUCE intrinsic for reduction op with args by value (#95353) #95297 Updates the runtime entry points to distinguish between reduction operation with arguments passed by value or by reference. Add lowering to support the arguments passed by value. --- .../Optimizer/Builder/Runtime/RTBuilder.h | 22 + .../Optimizer/Builder/Runtime/Reduction.h | 8 +- flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 16 +- .../Optimizer/Builder/Runtime/Reduction.cpp | 468 ++++++++++++++++-- flang/test/Lower/Intrinsics/reduce.f90 | 235 ++++++++- 5 files changed, 674 insertions(+), 75 deletions(-) diff --git a/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h b/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h index 809d5b8d569dc9..845ba385918d0d 100644 --- a/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h +++ b/flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h @@ -64,6 +64,18 @@ using FuncTypeBuilderFunc = mlir::FunctionType (*)(mlir::MLIRContext *); }; \ } +#define REDUCTION_VALUE_OPERATION_MODEL(T) \ + template <> \ + constexpr TypeBuilderFunc \ + getModel>() { \ + return [](mlir::MLIRContext *context) -> mlir::Type { \ + TypeBuilderFunc f{getModel()}; \ + auto refTy = fir::ReferenceType::get(f(context)); \ + return mlir::FunctionType::get(context, {f(context), f(context)}, \ + refTy); \ + }; \ + } + #define REDUCTION_CHAR_OPERATION_MODEL(T) \ template <> \ constexpr TypeBuilderFunc \ @@ -481,17 +493,27 @@ constexpr TypeBuilderFunc getModel() { } REDUCTION_REF_OPERATION_MODEL(std::int8_t) +REDUCTION_VALUE_OPERATION_MODEL(std::int8_t) REDUCTION_REF_OPERATION_MODEL(std::int16_t) +REDUCTION_VALUE_OPERATION_MODEL(std::int16_t) REDUCTION_REF_OPERATION_MODEL(std::int32_t) +REDUCTION_VALUE_OPERATION_MODEL(std::int32_t) REDUCTION_REF_OPERATION_MODEL(std::int64_t) +REDUCTION_VALUE_OPERATION_MODEL(std::int64_t) REDUCTION_REF_OPERATION_MODEL(Fortran::common::int128_t) +REDUCTION_VALUE_OPERATION_MODEL(Fortran::common::int128_t) REDUCTION_REF_OPERATION_MODEL(float) +REDUCTION_VALUE_OPERATION_MODEL(float) REDUCTION_REF_OPERATION_MODEL(double) +REDUCTION_VALUE_OPERATION_MODEL(double) REDUCTION_REF_OPERATION_MODEL(long double) +REDUCTION_VALUE_OPERATION_MODEL(long double) REDUCTION_REF_OPERATION_MODEL(std::complex) +REDUCTION_VALUE_OPERATION_MODEL(std::complex) REDUCTION_REF_OPERATION_MODEL(std::complex) +REDUCTION_VALUE_OPERATION_MODEL(std::complex) REDUCTION_CHAR_OPERATION_MODEL(char) REDUCTION_CHAR_OPERATION_MODEL(char16_t) diff --git a/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h b/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h index fedf453a6dc8de..2a40cddc0cc2c8 100644 --- a/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h +++ b/flang/include/flang/Optimizer/Builder/Runtime/Reduction.h @@ -229,8 +229,8 @@ void genIParityDim(fir::FirOpBuilder &builder, mlir::Location loc, /// result value. This is used for COMPLEX, CHARACTER and DERIVED TYPES. void genReduce(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value arrayBox, mlir::Value operation, mlir::Value maskBox, - mlir::Value identity, mlir::Value ordered, - mlir::Value resultBox); + mlir::Value identity, mlir::Value ordered, mlir::Value resultBox, + bool argByRef); /// Generate call to `Reduce` intrinsic runtime routine. This is the version /// that does not take a dim argument and return a scalare result. This is used @@ -238,14 +238,14 @@ void genReduce(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value genReduce(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value arrayBox, mlir::Value operation, mlir::Value maskBox, mlir::Value identity, - mlir::Value ordered); + mlir::Value ordered, bool argByRef); /// Generate call to `Reduce` intrinsic runtime routine. This is the version /// that takes arrays of any rank with a dim argument specified. void genReduceDim(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value arrayBox, mlir::Value operation, mlir::Value dim, mlir::Value maskBox, mlir::Value identity, - mlir::Value ordered, mlir::Value resultBox); + mlir::Value ordered, mlir::Value resultBox, bool argByRef); } // namespace fir::runtime diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 3204369b9328a2..ab106f62aecfbf 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -5745,6 +5745,14 @@ IntrinsicLibrary::genReduce(mlir::Type resultType, int rank = arrayTmp.rank(); assert(rank >= 1); + // Arguements to the reduction operation are passed by reference or value? + bool argByRef = true; + if (auto embox = + mlir::dyn_cast_or_null(operation.getDefiningOp())) { + auto fctTy = mlir::dyn_cast(embox.getFunc().getType()); + argByRef = mlir::isa(fctTy.getInput(0)); + } + mlir::Type ty = array.getType(); mlir::Type arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty); mlir::Type eleTy = mlir::cast(arrTy).getEleTy(); @@ -5772,7 +5780,7 @@ IntrinsicLibrary::genReduce(mlir::Type resultType, if (fir::isa_complex(eleTy) || fir::isa_derived(eleTy)) { mlir::Value result = builder.createTemporary(loc, eleTy); fir::runtime::genReduce(builder, loc, array, operation, mask, identity, - ordered, result); + ordered, result, argByRef); if (fir::isa_derived(eleTy)) return result; return builder.create(loc, result); @@ -5789,11 +5797,11 @@ IntrinsicLibrary::genReduce(mlir::Type resultType, charTy.getLen()); fir::CharBoxValue temp = charHelper.createCharacterTemp(eleTy, len); fir::runtime::genReduce(builder, loc, array, operation, mask, identity, - ordered, temp.getBuffer()); + ordered, temp.getBuffer(), argByRef); return temp; } return fir::runtime::genReduce(builder, loc, array, operation, mask, - identity, ordered); + identity, ordered, argByRef); } // Handle cases that have an array result. // Create mutable fir.box to be passed to the runtime for the result. @@ -5804,7 +5812,7 @@ IntrinsicLibrary::genReduce(mlir::Type resultType, fir::factory::getMutableIRBox(builder, loc, resultMutableBox); mlir::Value dim = fir::getBase(args[2]); fir::runtime::genReduceDim(builder, loc, array, operation, dim, mask, - identity, ordered, resultIrBox); + identity, ordered, resultIrBox, argByRef); return readAndAddCleanUp(resultMutableBox, resultType, "REDUCE"); } diff --git a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp index c306b50eb56983..18eff937278562 100644 --- a/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/Reduction.cpp @@ -476,10 +476,30 @@ struct ForcedReduceReal10 { auto ty = mlir::FloatType::getF80(ctx); auto boxTy = fir::runtime::getModel()(ctx); - auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty); + auto refTy = fir::ReferenceType::get(ty); + auto opTy = mlir::FunctionType::get(ctx, {refTy, refTy}, refTy); auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); + auto i1Ty = mlir::IntegerType::get(ctx, 1); + return mlir::FunctionType::get( + ctx, {boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, {ty}); + }; + } +}; + +/// Placeholder for real*10 version of Reduce Intrinsic +struct ForcedReduceReal10Value { + static constexpr const char *name = + ExpandAndQuoteKey(RTNAME(ReduceReal10Value)); + static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { + return [](mlir::MLIRContext *ctx) { + auto ty = mlir::FloatType::getF80(ctx); + auto boxTy = + fir::runtime::getModel()(ctx); auto refTy = fir::ReferenceType::get(ty); + auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, refTy); + auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); + auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); auto i1Ty = mlir::IntegerType::get(ctx, 1); return mlir::FunctionType::get( ctx, {boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, {ty}); @@ -496,10 +516,30 @@ struct ForcedReduceReal16 { auto ty = mlir::FloatType::getF128(ctx); auto boxTy = fir::runtime::getModel()(ctx); - auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty); + auto refTy = fir::ReferenceType::get(ty); + auto opTy = mlir::FunctionType::get(ctx, {refTy, refTy}, refTy); auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); + auto i1Ty = mlir::IntegerType::get(ctx, 1); + return mlir::FunctionType::get( + ctx, {boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, {ty}); + }; + } +}; + +/// Placeholder for real*16 version of Reduce Intrinsic +struct ForcedReduceReal16Value { + static constexpr const char *name = + ExpandAndQuoteKey(RTNAME(ReduceReal16Value)); + static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { + return [](mlir::MLIRContext *ctx) { + auto ty = mlir::FloatType::getF128(ctx); + auto boxTy = + fir::runtime::getModel()(ctx); auto refTy = fir::ReferenceType::get(ty); + auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, refTy); + auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); + auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); auto i1Ty = mlir::IntegerType::get(ctx, 1); return mlir::FunctionType::get( ctx, {boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, {ty}); @@ -516,10 +556,32 @@ struct ForcedReduceReal10Dim { auto ty = mlir::FloatType::getF80(ctx); auto boxTy = fir::runtime::getModel()(ctx); - auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty); + auto refTy = fir::ReferenceType::get(ty); + auto opTy = mlir::FunctionType::get(ctx, {refTy, refTy}, refTy); auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); + auto refBoxTy = fir::ReferenceType::get(boxTy); + auto i1Ty = mlir::IntegerType::get(ctx, 1); + return mlir::FunctionType::get( + ctx, {refBoxTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, + {}); + }; + } +}; + +/// Placeholder for DIM real*10 with value version of Reduce Intrinsic +struct ForcedReduceReal10DimValue { + static constexpr const char *name = + ExpandAndQuoteKey(RTNAME(ReduceReal10DimValue)); + static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { + return [](mlir::MLIRContext *ctx) { + auto ty = mlir::FloatType::getF80(ctx); + auto boxTy = + fir::runtime::getModel()(ctx); auto refTy = fir::ReferenceType::get(ty); + auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, refTy); + auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); + auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); auto refBoxTy = fir::ReferenceType::get(boxTy); auto i1Ty = mlir::IntegerType::get(ctx, 1); return mlir::FunctionType::get( @@ -538,10 +600,32 @@ struct ForcedReduceReal16Dim { auto ty = mlir::FloatType::getF128(ctx); auto boxTy = fir::runtime::getModel()(ctx); - auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty); + auto refTy = fir::ReferenceType::get(ty); + auto opTy = mlir::FunctionType::get(ctx, {refTy, refTy}, refTy); auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); + auto refBoxTy = fir::ReferenceType::get(boxTy); + auto i1Ty = mlir::IntegerType::get(ctx, 1); + return mlir::FunctionType::get( + ctx, {refBoxTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, + {}); + }; + } +}; + +/// Placeholder for DIM real*16 with value version of Reduce Intrinsic +struct ForcedReduceReal16DimValue { + static constexpr const char *name = + ExpandAndQuoteKey(RTNAME(ReduceReal16DimValue)); + static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { + return [](mlir::MLIRContext *ctx) { + auto ty = mlir::FloatType::getF128(ctx); + auto boxTy = + fir::runtime::getModel()(ctx); auto refTy = fir::ReferenceType::get(ty); + auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, refTy); + auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); + auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); auto refBoxTy = fir::ReferenceType::get(boxTy); auto i1Ty = mlir::IntegerType::get(ctx, 1); return mlir::FunctionType::get( @@ -560,10 +644,30 @@ struct ForcedReduceInteger16 { auto ty = mlir::IntegerType::get(ctx, 128); auto boxTy = fir::runtime::getModel()(ctx); - auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty); + auto refTy = fir::ReferenceType::get(ty); + auto opTy = mlir::FunctionType::get(ctx, {refTy, refTy}, refTy); auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); + auto i1Ty = mlir::IntegerType::get(ctx, 1); + return mlir::FunctionType::get( + ctx, {boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, {ty}); + }; + } +}; + +/// Placeholder for integer*16 with value version of Reduce Intrinsic +struct ForcedReduceInteger16Value { + static constexpr const char *name = + ExpandAndQuoteKey(RTNAME(ReduceInteger16Value)); + static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { + return [](mlir::MLIRContext *ctx) { + auto ty = mlir::IntegerType::get(ctx, 128); + auto boxTy = + fir::runtime::getModel()(ctx); auto refTy = fir::ReferenceType::get(ty); + auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, refTy); + auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); + auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); auto i1Ty = mlir::IntegerType::get(ctx, 1); return mlir::FunctionType::get( ctx, {boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, {ty}); @@ -580,10 +684,32 @@ struct ForcedReduceInteger16Dim { auto ty = mlir::IntegerType::get(ctx, 128); auto boxTy = fir::runtime::getModel()(ctx); - auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty); + auto refTy = fir::ReferenceType::get(ty); + auto opTy = mlir::FunctionType::get(ctx, {refTy, refTy}, refTy); auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); + auto refBoxTy = fir::ReferenceType::get(boxTy); + auto i1Ty = mlir::IntegerType::get(ctx, 1); + return mlir::FunctionType::get( + ctx, {refBoxTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, + {}); + }; + } +}; + +/// Placeholder for DIM integer*16 with value version of Reduce Intrinsic +struct ForcedReduceInteger16DimValue { + static constexpr const char *name = + ExpandAndQuoteKey(RTNAME(ReduceInteger16DimValue)); + static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { + return [](mlir::MLIRContext *ctx) { + auto ty = mlir::IntegerType::get(ctx, 128); + auto boxTy = + fir::runtime::getModel()(ctx); auto refTy = fir::ReferenceType::get(ty); + auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, refTy); + auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); + auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); auto refBoxTy = fir::ReferenceType::get(boxTy); auto i1Ty = mlir::IntegerType::get(ctx, 1); return mlir::FunctionType::get( @@ -602,10 +728,31 @@ struct ForcedReduceComplex10 { auto ty = mlir::ComplexType::get(mlir::FloatType::getF80(ctx)); auto boxTy = fir::runtime::getModel()(ctx); - auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty); + auto refTy = fir::ReferenceType::get(ty); + auto opTy = mlir::FunctionType::get(ctx, {refTy, refTy}, refTy); auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); + auto i1Ty = mlir::IntegerType::get(ctx, 1); + return mlir::FunctionType::get( + ctx, {refTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, + {}); + }; + } +}; + +/// Placeholder for complex(10) with value version of Reduce Intrinsic +struct ForcedReduceComplex10Value { + static constexpr const char *name = + ExpandAndQuoteKey(RTNAME(CppReduceComplex10Value)); + static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { + return [](mlir::MLIRContext *ctx) { + auto ty = mlir::ComplexType::get(mlir::FloatType::getF80(ctx)); + auto boxTy = + fir::runtime::getModel()(ctx); auto refTy = fir::ReferenceType::get(ty); + auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, refTy); + auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); + auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); auto i1Ty = mlir::IntegerType::get(ctx, 1); return mlir::FunctionType::get( ctx, {refTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, @@ -623,10 +770,32 @@ struct ForcedReduceComplex10Dim { auto ty = mlir::ComplexType::get(mlir::FloatType::getF80(ctx)); auto boxTy = fir::runtime::getModel()(ctx); - auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty); + auto refTy = fir::ReferenceType::get(ty); + auto opTy = mlir::FunctionType::get(ctx, {refTy, refTy}, refTy); auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); + auto refBoxTy = fir::ReferenceType::get(boxTy); + auto i1Ty = mlir::IntegerType::get(ctx, 1); + return mlir::FunctionType::get( + ctx, {refBoxTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, + {}); + }; + } +}; + +/// Placeholder for Dim complex(10) with value version of Reduce Intrinsic +struct ForcedReduceComplex10DimValue { + static constexpr const char *name = + ExpandAndQuoteKey(RTNAME(CppReduceComplex10DimValue)); + static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { + return [](mlir::MLIRContext *ctx) { + auto ty = mlir::ComplexType::get(mlir::FloatType::getF80(ctx)); + auto boxTy = + fir::runtime::getModel()(ctx); auto refTy = fir::ReferenceType::get(ty); + auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, refTy); + auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); + auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); auto refBoxTy = fir::ReferenceType::get(boxTy); auto i1Ty = mlir::IntegerType::get(ctx, 1); return mlir::FunctionType::get( @@ -645,10 +814,31 @@ struct ForcedReduceComplex16 { auto ty = mlir::ComplexType::get(mlir::FloatType::getF128(ctx)); auto boxTy = fir::runtime::getModel()(ctx); - auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty); + auto refTy = fir::ReferenceType::get(ty); + auto opTy = mlir::FunctionType::get(ctx, {refTy, refTy}, refTy); auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); + auto i1Ty = mlir::IntegerType::get(ctx, 1); + return mlir::FunctionType::get( + ctx, {refTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, + {}); + }; + } +}; + +/// Placeholder for complex(16) with value version of Reduce Intrinsic +struct ForcedReduceComplex16Value { + static constexpr const char *name = + ExpandAndQuoteKey(RTNAME(CppReduceComplex16Value)); + static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { + return [](mlir::MLIRContext *ctx) { + auto ty = mlir::ComplexType::get(mlir::FloatType::getF128(ctx)); + auto boxTy = + fir::runtime::getModel()(ctx); auto refTy = fir::ReferenceType::get(ty); + auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, refTy); + auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); + auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); auto i1Ty = mlir::IntegerType::get(ctx, 1); return mlir::FunctionType::get( ctx, {refTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, @@ -666,10 +856,32 @@ struct ForcedReduceComplex16Dim { auto ty = mlir::ComplexType::get(mlir::FloatType::getF128(ctx)); auto boxTy = fir::runtime::getModel()(ctx); - auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, ty); + auto refTy = fir::ReferenceType::get(ty); + auto opTy = mlir::FunctionType::get(ctx, {refTy, refTy}, refTy); auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); + auto refBoxTy = fir::ReferenceType::get(boxTy); + auto i1Ty = mlir::IntegerType::get(ctx, 1); + return mlir::FunctionType::get( + ctx, {refBoxTy, boxTy, opTy, strTy, intTy, intTy, boxTy, refTy, i1Ty}, + {}); + }; + } +}; + +/// Placeholder for Dim complex(16) with value version of Reduce Intrinsic +struct ForcedReduceComplex16DimValue { + static constexpr const char *name = + ExpandAndQuoteKey(RTNAME(CppReduceComplex16DimValue)); + static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { + return [](mlir::MLIRContext *ctx) { + auto ty = mlir::ComplexType::get(mlir::FloatType::getF128(ctx)); + auto boxTy = + fir::runtime::getModel()(ctx); auto refTy = fir::ReferenceType::get(ty); + auto opTy = mlir::FunctionType::get(ctx, {ty, ty}, refTy); + auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); + auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); auto refBoxTy = fir::ReferenceType::get(boxTy); auto i1Ty = mlir::IntegerType::get(ctx, 1); return mlir::FunctionType::get( @@ -1457,7 +1669,8 @@ GEN_IALL_IANY_IPARITY(IParity) void fir::runtime::genReduce(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value arrayBox, mlir::Value operation, mlir::Value maskBox, mlir::Value identity, - mlir::Value ordered, mlir::Value resultBox) { + mlir::Value ordered, mlir::Value resultBox, + bool argByRef) { mlir::func::FuncOp func; auto ty = arrayBox.getType(); auto arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty); @@ -1472,22 +1685,40 @@ void fir::runtime::genReduce(fir::FirOpBuilder &builder, mlir::Location loc, mlir::MLIRContext *ctx = builder.getContext(); fir::factory::CharacterExprHelper charHelper{builder, loc}; - if (eleTy == fir::ComplexType::get(ctx, 2)) + if (eleTy == fir::ComplexType::get(ctx, 2) && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy == fir::ComplexType::get(ctx, 3)) + else if (eleTy == fir::ComplexType::get(ctx, 2) && !argByRef) + func = fir::runtime::getRuntimeFunc( + loc, builder); + else if (eleTy == fir::ComplexType::get(ctx, 3) && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy == fir::ComplexType::get(ctx, 4)) + else if (eleTy == fir::ComplexType::get(ctx, 3) && !argByRef) + func = fir::runtime::getRuntimeFunc( + loc, builder); + else if (eleTy == fir::ComplexType::get(ctx, 4) && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy == fir::ComplexType::get(ctx, 8)) + else if (eleTy == fir::ComplexType::get(ctx, 4) && !argByRef) + func = fir::runtime::getRuntimeFunc( + loc, builder); + else if (eleTy == fir::ComplexType::get(ctx, 8) && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy == fir::ComplexType::get(ctx, 10)) + else if (eleTy == fir::ComplexType::get(ctx, 8) && !argByRef) + func = fir::runtime::getRuntimeFunc( + loc, builder); + else if (eleTy == fir::ComplexType::get(ctx, 10) && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy == fir::ComplexType::get(ctx, 16)) + else if (eleTy == fir::ComplexType::get(ctx, 10) && !argByRef) + func = + fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy == fir::ComplexType::get(ctx, 16) && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy == fir::ComplexType::get(ctx, 16) && !argByRef) + func = + fir::runtime::getRuntimeFunc(loc, builder); else if (fir::isa_char(eleTy) && charHelper.getCharacterKind(eleTy) == 1) func = fir::runtime::getRuntimeFunc(loc, builder); else if (fir::isa_char(eleTy) && charHelper.getCharacterKind(eleTy) == 2) @@ -1516,7 +1747,8 @@ void fir::runtime::genReduce(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value fir::runtime::genReduce(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value arrayBox, mlir::Value operation, mlir::Value maskBox, - mlir::Value identity, mlir::Value ordered) { + mlir::Value identity, mlir::Value ordered, + bool argByRef) { mlir::func::FuncOp func; auto ty = arrayBox.getType(); auto arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty); @@ -1530,44 +1762,97 @@ mlir::Value fir::runtime::genReduce(fir::FirOpBuilder &builder, mlir::isa(eleTy)) && "expect real, interger or logical"); - if (eleTy.isF16()) + if (eleTy.isF16() && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy.isBF16()) + else if (eleTy.isF16() && !argByRef) + func = + fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isBF16() && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy.isF32()) + else if (eleTy.isBF16() && !argByRef) + func = + fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isF32() && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy.isF64()) + else if (eleTy.isF32() && !argByRef) + func = + fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isF64() && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy.isF80()) + else if (eleTy.isF64() && !argByRef) + func = + fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isF80() && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy.isF128()) + else if (eleTy.isF80() && !argByRef) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isF128() && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(1))) + else if (eleTy.isF128() && !argByRef) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(1)) && + argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(2))) + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(1)) && + !argByRef) + func = fir::runtime::getRuntimeFunc(loc, + builder); + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(2)) && + argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(4))) + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(2)) && + !argByRef) + func = fir::runtime::getRuntimeFunc(loc, + builder); + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(4)) && + argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(8))) + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(4)) && + !argByRef) + func = fir::runtime::getRuntimeFunc(loc, + builder); + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(8)) && + argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(16))) + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(8)) && + !argByRef) + func = fir::runtime::getRuntimeFunc(loc, + builder); + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(16)) && + argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy == fir::LogicalType::get(ctx, 1)) + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(16)) && + !argByRef) + func = + fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy == fir::LogicalType::get(ctx, 1) && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy == fir::LogicalType::get(ctx, 2)) + else if (eleTy == fir::LogicalType::get(ctx, 1) && !argByRef) + func = fir::runtime::getRuntimeFunc(loc, + builder); + else if (eleTy == fir::LogicalType::get(ctx, 2) && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy == fir::LogicalType::get(ctx, 4)) + else if (eleTy == fir::LogicalType::get(ctx, 2) && !argByRef) + func = fir::runtime::getRuntimeFunc(loc, + builder); + else if (eleTy == fir::LogicalType::get(ctx, 4) && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy == fir::LogicalType::get(ctx, 8)) + else if (eleTy == fir::LogicalType::get(ctx, 4) && !argByRef) + func = fir::runtime::getRuntimeFunc(loc, + builder); + else if (eleTy == fir::LogicalType::get(ctx, 8) && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy == fir::LogicalType::get(ctx, 8) && !argByRef) + func = fir::runtime::getRuntimeFunc(loc, + builder); else fir::intrinsicTypeTODO(builder, eleTy, loc, "REDUCE"); @@ -1586,7 +1871,7 @@ void fir::runtime::genReduceDim(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value arrayBox, mlir::Value operation, mlir::Value dim, mlir::Value maskBox, mlir::Value identity, mlir::Value ordered, - mlir::Value resultBox) { + mlir::Value resultBox, bool argByRef) { mlir::func::FuncOp func; auto ty = arrayBox.getType(); auto arrTy = fir::dyn_cast_ptrOrBoxEleTy(ty); @@ -1595,64 +1880,137 @@ void fir::runtime::genReduceDim(fir::FirOpBuilder &builder, mlir::Location loc, mlir::MLIRContext *ctx = builder.getContext(); fir::factory::CharacterExprHelper charHelper{builder, loc}; - if (eleTy.isF16()) + if (eleTy.isF16() && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy.isBF16()) + else if (eleTy.isF16() && !argByRef) + func = fir::runtime::getRuntimeFunc(loc, + builder); + else if (eleTy.isBF16() && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy.isF32()) + else if (eleTy.isBF16() && !argByRef) + func = fir::runtime::getRuntimeFunc(loc, + builder); + else if (eleTy.isF32() && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy.isF64()) + else if (eleTy.isF32() && !argByRef) + func = fir::runtime::getRuntimeFunc(loc, + builder); + else if (eleTy.isF64() && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy.isF80()) + else if (eleTy.isF64() && !argByRef) + func = fir::runtime::getRuntimeFunc(loc, + builder); + else if (eleTy.isF80() && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy.isF128()) + else if (eleTy.isF80() && !argByRef) + func = + fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isF128() && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(1))) + else if (eleTy.isF128() && !argByRef) + func = + fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(1)) && + argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(2))) + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(1)) && + !argByRef) + func = fir::runtime::getRuntimeFunc( + loc, builder); + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(2)) && + argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(4))) + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(2)) && + !argByRef) + func = fir::runtime::getRuntimeFunc( + loc, builder); + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(4)) && + argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(8))) + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(4)) && + !argByRef) + func = fir::runtime::getRuntimeFunc( + loc, builder); + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(8)) && + argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(16))) + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(8)) && + !argByRef) + func = fir::runtime::getRuntimeFunc( + loc, builder); + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(16)) && + argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy == fir::ComplexType::get(ctx, 2)) + else if (eleTy.isInteger(builder.getKindMap().getIntegerBitsize(16)) && + !argByRef) + func = fir::runtime::getRuntimeFunc(loc, + builder); + else if (eleTy == fir::ComplexType::get(ctx, 2) && argByRef) func = fir::runtime::getRuntimeFunc( loc, builder); - else if (eleTy == fir::ComplexType::get(ctx, 3)) + else if (eleTy == fir::ComplexType::get(ctx, 2) && !argByRef) + func = fir::runtime::getRuntimeFunc( + loc, builder); + else if (eleTy == fir::ComplexType::get(ctx, 3) && argByRef) func = fir::runtime::getRuntimeFunc( loc, builder); - else if (eleTy == fir::ComplexType::get(ctx, 4)) + else if (eleTy == fir::ComplexType::get(ctx, 3) && !argByRef) + func = fir::runtime::getRuntimeFunc( + loc, builder); + else if (eleTy == fir::ComplexType::get(ctx, 4) && argByRef) func = fir::runtime::getRuntimeFunc( loc, builder); - else if (eleTy == fir::ComplexType::get(ctx, 8)) + else if (eleTy == fir::ComplexType::get(ctx, 4) && !argByRef) + func = fir::runtime::getRuntimeFunc( + loc, builder); + else if (eleTy == fir::ComplexType::get(ctx, 8) && argByRef) func = fir::runtime::getRuntimeFunc( loc, builder); - else if (eleTy == fir::ComplexType::get(ctx, 10)) + else if (eleTy == fir::ComplexType::get(ctx, 8) && !argByRef) + func = fir::runtime::getRuntimeFunc( + loc, builder); + else if (eleTy == fir::ComplexType::get(ctx, 10) && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy == fir::ComplexType::get(ctx, 16)) + else if (eleTy == fir::ComplexType::get(ctx, 10) && !argByRef) + func = fir::runtime::getRuntimeFunc(loc, + builder); + else if (eleTy == fir::ComplexType::get(ctx, 16) && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy == fir::LogicalType::get(ctx, 1)) + else if (eleTy == fir::ComplexType::get(ctx, 16) && !argByRef) + func = fir::runtime::getRuntimeFunc(loc, + builder); + else if (eleTy == fir::LogicalType::get(ctx, 1) && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy == fir::LogicalType::get(ctx, 2)) + else if (eleTy == fir::LogicalType::get(ctx, 1) && !argByRef) + func = fir::runtime::getRuntimeFunc( + loc, builder); + else if (eleTy == fir::LogicalType::get(ctx, 2) && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy == fir::LogicalType::get(ctx, 4)) + else if (eleTy == fir::LogicalType::get(ctx, 2) && !argByRef) + func = fir::runtime::getRuntimeFunc( + loc, builder); + else if (eleTy == fir::LogicalType::get(ctx, 4) && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); - else if (eleTy == fir::LogicalType::get(ctx, 8)) + else if (eleTy == fir::LogicalType::get(ctx, 4) && !argByRef) + func = fir::runtime::getRuntimeFunc( + loc, builder); + else if (eleTy == fir::LogicalType::get(ctx, 8) && argByRef) func = fir::runtime::getRuntimeFunc(loc, builder); + else if (eleTy == fir::LogicalType::get(ctx, 8) && !argByRef) + func = fir::runtime::getRuntimeFunc( + loc, builder); else if (fir::isa_char(eleTy) && charHelper.getCharacterKind(eleTy) == 1) func = fir::runtime::getRuntimeFunc(loc, builder); diff --git a/flang/test/Lower/Intrinsics/reduce.f90 b/flang/test/Lower/Intrinsics/reduce.f90 index 7619edffd529e9..358897b05adcec 100644 --- a/flang/test/Lower/Intrinsics/reduce.f90 +++ b/flang/test/Lower/Intrinsics/reduce.f90 @@ -14,6 +14,12 @@ pure function red_int1(a,b) red_int1 = a + b end function +pure function red_int1_value(a,b) + integer(1), value, intent(in) :: a, b + integer(1) :: red_int1_value + red_int1_value = a + b +end function + subroutine integer1(a, id) integer(1), intent(in) :: a(:) integer(1) :: res, id @@ -25,6 +31,8 @@ subroutine integer1(a, id) res = reduce(a, red_int1, identity=id, ordered = .true.) res = reduce(a, red_int1, [.true., .true., .false.]) + + res = reduce(a, red_int1_value) end subroutine ! CHECK-LABEL: func.func @_QMreduce_modPinteger1( @@ -55,6 +63,7 @@ subroutine integer1(a, id) ! CHECK: %[[BOXED_MASK:.*]] = fir.embox %[[MASK]]#1(%[[SHAPE_C3]]) : (!fir.ref>>, !fir.shape<1>) -> !fir.box>> ! CHECK: %[[CONV_MASK:.*]] = fir.convert %[[BOXED_MASK]] : (!fir.box>>) -> !fir.box ! CHECK: fir.call @_FortranAReduceInteger1Ref(%{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}}, %[[CONV_MASK]], %{{.*}}, %false{{.*}}) +! CHECK: fir.call @_FortranAReduceInteger1Value pure function red_int2(a,b) integer(2), intent(in) :: a, b @@ -62,13 +71,21 @@ pure function red_int2(a,b) red_int2 = a + b end function +pure function red_int2_value(a,b) + integer(2), value, intent(in) :: a, b + integer(2) :: red_int2_value + red_int2_value = a + b +end function + subroutine integer2(a) integer(2), intent(in) :: a(:) integer(2) :: res res = reduce(a, red_int2) + res = reduce(a, red_int2_value) end subroutine ! CHECK: fir.call @_FortranAReduceInteger2Ref +! CHECK: fir.call @_FortranAReduceInteger2Value pure function red_int4(a,b) integer(4), intent(in) :: a, b @@ -76,13 +93,21 @@ pure function red_int4(a,b) red_int4 = a + b end function +pure function red_int4_value(a,b) + integer(4), value, intent(in) :: a, b + integer(4) :: red_int4_value + red_int4_value = a + b +end function + subroutine integer4(a) integer(4), intent(in) :: a(:) integer(4) :: res res = reduce(a, red_int4) + res = reduce(a, red_int4_value) end subroutine ! CHECK: fir.call @_FortranAReduceInteger4Ref +! CHECK: fir.call @_FortranAReduceInteger4Value pure function red_int8(a,b) integer(8), intent(in) :: a, b @@ -90,13 +115,21 @@ pure function red_int8(a,b) red_int8 = a + b end function +pure function red_int8_value(a,b) + integer(8), value, intent(in) :: a, b + integer(8) :: red_int8_value + red_int8_value = a + b +end function + subroutine integer8(a) integer(8), intent(in) :: a(:) integer(8) :: res res = reduce(a, red_int8) + res = reduce(a, red_int8_value) end subroutine ! CHECK: fir.call @_FortranAReduceInteger8Ref +! CHECK: fir.call @_FortranAReduceInteger8Value pure function red_int16(a,b) integer(16), intent(in) :: a, b @@ -104,13 +137,21 @@ pure function red_int16(a,b) red_int16 = a + b end function +pure function red_int16_value(a,b) + integer(16), value, intent(in) :: a, b + integer(16) :: red_int16_value + red_int16_value = a + b +end function + subroutine integer16(a) integer(16), intent(in) :: a(:) integer(16) :: res res = reduce(a, red_int16) + res = reduce(a, red_int16_value) end subroutine ! CHECK: fir.call @_FortranAReduceInteger16Ref +! CHECK: fir.call @_FortranAReduceInteger16Value pure function red_real2(a,b) real(2), intent(in) :: a, b @@ -118,13 +159,21 @@ pure function red_real2(a,b) red_real2 = a + b end function +pure function red_real2_value(a,b) + real(2), value, intent(in) :: a, b + real(2) :: red_real2_value + red_real2_value = a + b +end function + subroutine real2(a) real(2), intent(in) :: a(:) real(2) :: res res = reduce(a, red_real2) + res = reduce(a, red_real2_value) end subroutine ! CHECK: fir.call @_FortranAReduceReal2Ref +! CHECK: fir.call @_FortranAReduceReal2Value pure function red_real3(a,b) real(3), intent(in) :: a, b @@ -132,13 +181,21 @@ pure function red_real3(a,b) red_real3 = a + b end function +pure function red_real3_value(a,b) + real(3), value, intent(in) :: a, b + real(3) :: red_real3_value + red_real3_value = a + b +end function + subroutine real3(a) real(3), intent(in) :: a(:) real(3) :: res res = reduce(a, red_real3) + res = reduce(a, red_real3_value) end subroutine ! CHECK: fir.call @_FortranAReduceReal3Ref +! CHECK: fir.call @_FortranAReduceReal3Value pure function red_real4(a,b) real(4), intent(in) :: a, b @@ -146,13 +203,21 @@ pure function red_real4(a,b) red_real4 = a + b end function +pure function red_real4_value(a,b) + real(4), value, intent(in) :: a, b + real(4) :: red_real4_value + red_real4_value = a + b +end function + subroutine real4(a) real(4), intent(in) :: a(:) real(4) :: res res = reduce(a, red_real4) + res = reduce(a, red_real4_value) end subroutine ! CHECK: fir.call @_FortranAReduceReal4Ref +! CHECK: fir.call @_FortranAReduceReal4Value pure function red_real8(a,b) real(8), intent(in) :: a, b @@ -160,13 +225,21 @@ pure function red_real8(a,b) red_real8 = a + b end function +pure function red_real8_value(a,b) + real(8), value, intent(in) :: a, b + real(8) :: red_real8_value + red_real8_value = a + b +end function + subroutine real8(a) real(8), intent(in) :: a(:) real(8) :: res res = reduce(a, red_real8) + res = reduce(a, red_real8_value) end subroutine ! CHECK: fir.call @_FortranAReduceReal8Ref +! CHECK: fir.call @_FortranAReduceReal8Value pure function red_real10(a,b) real(10), intent(in) :: a, b @@ -174,13 +247,21 @@ pure function red_real10(a,b) red_real10 = a + b end function +pure function red_real10_value(a,b) + real(10), value, intent(in) :: a, b + real(10) :: red_real10_value + red_real10_value = a + b +end function + subroutine real10(a) real(10), intent(in) :: a(:) real(10) :: res res = reduce(a, red_real10) + res = reduce(a, red_real10_value) end subroutine ! CHECK: fir.call @_FortranAReduceReal10Ref +! CHECK: fir.call @_FortranAReduceReal10Value pure function red_real16(a,b) real(16), intent(in) :: a, b @@ -188,13 +269,21 @@ pure function red_real16(a,b) red_real16 = a + b end function +pure function red_real16_value(a,b) + real(16), value, intent(in) :: a, b + real(16) :: red_real16_value + red_real16_value = a + b +end function + subroutine real16(a) real(16), intent(in) :: a(:) real(16) :: res res = reduce(a, red_real16) + res = reduce(a, red_real16_value) end subroutine ! CHECK: fir.call @_FortranAReduceReal16Ref +! CHECK: fir.call @_FortranAReduceReal16Value pure function red_complex2(a,b) complex(2), intent(in) :: a, b @@ -202,13 +291,21 @@ pure function red_complex2(a,b) red_complex2 = a + b end function +pure function red_complex2_value(a,b) + complex(2), value, intent(in) :: a, b + complex(2) :: red_complex2_value + red_complex2_value = a + b +end function + subroutine complex2(a) complex(2), intent(in) :: a(:) complex(2) :: res res = reduce(a, red_complex2) + res = reduce(a, red_complex2_value) end subroutine -! CHECK: fir.call @_FortranACppReduceComplex2 +! CHECK: fir.call @_FortranACppReduceComplex2Ref +! CHECK: fir.call @_FortranACppReduceComplex2Value pure function red_complex3(a,b) complex(3), intent(in) :: a, b @@ -216,13 +313,21 @@ pure function red_complex3(a,b) red_complex3 = a + b end function +pure function red_complex3_value(a,b) + complex(3), value, intent(in) :: a, b + complex(3) :: red_complex3_value + red_complex3_value = a + b +end function + subroutine complex3(a) complex(3), intent(in) :: a(:) complex(3) :: res res = reduce(a, red_complex3) + res = reduce(a, red_complex3_value) end subroutine -! CHECK: fir.call @_FortranACppReduceComplex3 +! CHECK: fir.call @_FortranACppReduceComplex3Ref +! CHECK: fir.call @_FortranACppReduceComplex3Value pure function red_complex4(a,b) complex(4), intent(in) :: a, b @@ -230,13 +335,21 @@ pure function red_complex4(a,b) red_complex4 = a + b end function +pure function red_complex4_value(a,b) + complex(4), value, intent(in) :: a, b + complex(4) :: red_complex4_value + red_complex4_value = a + b +end function + subroutine complex4(a) complex(4), intent(in) :: a(:) complex(4) :: res res = reduce(a, red_complex4) + res = reduce(a, red_complex4_value) end subroutine -! CHECK: fir.call @_FortranACppReduceComplex4 +! CHECK: fir.call @_FortranACppReduceComplex4Ref +! CHECK: fir.call @_FortranACppReduceComplex4Value pure function red_complex8(a,b) complex(8), intent(in) :: a, b @@ -244,13 +357,21 @@ pure function red_complex8(a,b) red_complex8 = a + b end function +pure function red_complex8_value(a,b) + complex(8), value, intent(in) :: a, b + complex(8) :: red_complex8_value + red_complex8_value = a + b +end function + subroutine complex8(a) complex(8), intent(in) :: a(:) complex(8) :: res res = reduce(a, red_complex8) + res = reduce(a, red_complex8_value) end subroutine -! CHECK: fir.call @_FortranACppReduceComplex8 +! CHECK: fir.call @_FortranACppReduceComplex8Ref +! CHECK: fir.call @_FortranACppReduceComplex8Value pure function red_complex10(a,b) complex(10), intent(in) :: a, b @@ -258,13 +379,21 @@ pure function red_complex10(a,b) red_complex10 = a + b end function +pure function red_complex10_value(a,b) + complex(10), value, intent(in) :: a, b + complex(10) :: red_complex10_value + red_complex10_value = a + b +end function + subroutine complex10(a) complex(10), intent(in) :: a(:) complex(10) :: res res = reduce(a, red_complex10) + res = reduce(a, red_complex10_value) end subroutine -! CHECK: fir.call @_FortranACppReduceComplex10 +! CHECK: fir.call @_FortranACppReduceComplex10Ref +! CHECK: fir.call @_FortranACppReduceComplex10Value pure function red_complex16(a,b) complex(16), intent(in) :: a, b @@ -272,13 +401,21 @@ pure function red_complex16(a,b) red_complex16 = a + b end function +pure function red_complex16_value(a,b) + complex(16), value, intent(in) :: a, b + complex(16) :: red_complex16_value + red_complex16_value = a + b +end function + subroutine complex16(a) complex(16), intent(in) :: a(:) complex(16) :: res res = reduce(a, red_complex16) + res = reduce(a, red_complex16_value) end subroutine -! CHECK: fir.call @_FortranACppReduceComplex16 +! CHECK: fir.call @_FortranACppReduceComplex16Ref +! CHECK: fir.call @_FortranACppReduceComplex16Value pure function red_log1(a,b) logical(1), intent(in) :: a, b @@ -286,13 +423,21 @@ pure function red_log1(a,b) red_log1 = a .and. b end function +pure function red_log1_value(a,b) + logical(1), value, intent(in) :: a, b + logical(1) :: red_log1_value + red_log1_value = a .and. b +end function + subroutine log1(a) logical(1), intent(in) :: a(:) logical(1) :: res res = reduce(a, red_log1) + res = reduce(a, red_log1_value) end subroutine ! CHECK: fir.call @_FortranAReduceLogical1Ref +! CHECK: fir.call @_FortranAReduceLogical1Value pure function red_log2(a,b) logical(2), intent(in) :: a, b @@ -300,13 +445,21 @@ pure function red_log2(a,b) red_log2 = a .and. b end function +pure function red_log2_value(a,b) + logical(2), value, intent(in) :: a, b + logical(2) :: red_log2_value + red_log2_value = a .and. b +end function + subroutine log2(a) logical(2), intent(in) :: a(:) logical(2) :: res res = reduce(a, red_log2) + res = reduce(a, red_log2_value) end subroutine ! CHECK: fir.call @_FortranAReduceLogical2Ref +! CHECK: fir.call @_FortranAReduceLogical2Value pure function red_log4(a,b) logical(4), intent(in) :: a, b @@ -314,13 +467,21 @@ pure function red_log4(a,b) red_log4 = a .and. b end function +pure function red_log4_value(a,b) + logical(4), value, intent(in) :: a, b + logical(4) :: red_log4_value + red_log4_value = a .and. b +end function + subroutine log4(a) logical(4), intent(in) :: a(:) logical(4) :: res res = reduce(a, red_log4) + res = reduce(a, red_log4_value) end subroutine ! CHECK: fir.call @_FortranAReduceLogical4Ref +! CHECK: fir.call @_FortranAReduceLogical4Value pure function red_log8(a,b) logical(8), intent(in) :: a, b @@ -328,13 +489,21 @@ pure function red_log8(a,b) red_log8 = a .and. b end function +pure function red_log8_value(a,b) + logical(8), value, intent(in) :: a, b + logical(8) :: red_log8_value + red_log8_value = a .and. b +end function + subroutine log8(a) logical(8), intent(in) :: a(:) logical(8) :: res res = reduce(a, red_log8) + res = reduce(a, red_log8_value) end subroutine ! CHECK: fir.call @_FortranAReduceLogical8Ref +! CHECK: fir.call @_FortranAReduceLogical8Value pure function red_char1(a,b) character(1), intent(in) :: a, b @@ -401,189 +570,231 @@ subroutine integer1dim(a, id) integer(1), allocatable :: res(:) res = reduce(a, red_int1, 2) + res = reduce(a, red_int1_value, 2) end subroutine ! CHECK: fir.call @_FortranAReduceInteger1DimRef +! CHECK: fir.call @_FortranAReduceInteger1DimValue subroutine integer2dim(a, id) integer(2), intent(in) :: a(:,:) integer(2), allocatable :: res(:) res = reduce(a, red_int2, 2) + res = reduce(a, red_int2_value, 2) end subroutine ! CHECK: fir.call @_FortranAReduceInteger2DimRef +! CHECK: fir.call @_FortranAReduceInteger2DimValue subroutine integer4dim(a, id) integer(4), intent(in) :: a(:,:) integer(4), allocatable :: res(:) res = reduce(a, red_int4, 2) + res = reduce(a, red_int4_value, 2) end subroutine ! CHECK: fir.call @_FortranAReduceInteger4DimRef +! CHECK: fir.call @_FortranAReduceInteger4DimValue subroutine integer8dim(a, id) integer(8), intent(in) :: a(:,:) integer(8), allocatable :: res(:) res = reduce(a, red_int8, 2) + res = reduce(a, red_int8_value, 2) end subroutine ! CHECK: fir.call @_FortranAReduceInteger8DimRef +! CHECK: fir.call @_FortranAReduceInteger8DimValue subroutine integer16dim(a, id) integer(16), intent(in) :: a(:,:) integer(16), allocatable :: res(:) res = reduce(a, red_int16, 2) + res = reduce(a, red_int16_value, 2) end subroutine ! CHECK: fir.call @_FortranAReduceInteger16DimRef +! CHECK: fir.call @_FortranAReduceInteger16DimValue subroutine real2dim(a, id) real(2), intent(in) :: a(:,:) real(2), allocatable :: res(:) res = reduce(a, red_real2, 2) + res = reduce(a, red_real2_value, 2) end subroutine ! CHECK: fir.call @_FortranAReduceReal2DimRef +! CHECK: fir.call @_FortranAReduceReal2DimValue subroutine real3dim(a, id) real(3), intent(in) :: a(:,:) real(3), allocatable :: res(:) res = reduce(a, red_real3, 2) + res = reduce(a, red_real3_value, 2) end subroutine ! CHECK: fir.call @_FortranAReduceReal3DimRef +! CHECK: fir.call @_FortranAReduceReal3DimValue subroutine real4dim(a, id) real(4), intent(in) :: a(:,:) real(4), allocatable :: res(:) res = reduce(a, red_real4, 2) + res = reduce(a, red_real4_value, 2) end subroutine ! CHECK: fir.call @_FortranAReduceReal4DimRef +! CHECK: fir.call @_FortranAReduceReal4DimValue subroutine real8dim(a, id) real(8), intent(in) :: a(:,:) real(8), allocatable :: res(:) res = reduce(a, red_real8, 2) + res = reduce(a, red_real8_value, 2) end subroutine ! CHECK: fir.call @_FortranAReduceReal8DimRef +! CHECK: fir.call @_FortranAReduceReal8DimValue subroutine real10dim(a, id) real(10), intent(in) :: a(:,:) real(10), allocatable :: res(:) res = reduce(a, red_real10, 2) + res = reduce(a, red_real10_value, 2) end subroutine ! CHECK: fir.call @_FortranAReduceReal10DimRef +! CHECK: fir.call @_FortranAReduceReal10DimValue subroutine real16dim(a, id) real(16), intent(in) :: a(:,:) real(16), allocatable :: res(:) res = reduce(a, red_real16, 2) + res = reduce(a, red_real16_value, 2) end subroutine ! CHECK: fir.call @_FortranAReduceReal16DimRef +! CHECK: fir.call @_FortranAReduceReal16DimValue subroutine complex2dim(a, id) complex(2), intent(in) :: a(:,:) complex(2), allocatable :: res(:) res = reduce(a, red_complex2, 2) + res = reduce(a, red_complex2_value, 2) end subroutine -! CHECK: fir.call @_FortranACppReduceComplex2Dim +! CHECK: fir.call @_FortranACppReduceComplex2DimRef +! CHECK: fir.call @_FortranACppReduceComplex2DimValue subroutine complex3dim(a, id) complex(3), intent(in) :: a(:,:) complex(3), allocatable :: res(:) res = reduce(a, red_complex3, 2) + res = reduce(a, red_complex3_value, 2) end subroutine -! CHECK: fir.call @_FortranACppReduceComplex3Dim +! CHECK: fir.call @_FortranACppReduceComplex3DimRef +! CHECK: fir.call @_FortranACppReduceComplex3DimValue subroutine complex4dim(a, id) complex(4), intent(in) :: a(:,:) complex(4), allocatable :: res(:) res = reduce(a, red_complex4, 2) + res = reduce(a, red_complex4_value, 2) end subroutine -! CHECK: fir.call @_FortranACppReduceComplex4Dim +! CHECK: fir.call @_FortranACppReduceComplex4DimRef +! CHECK: fir.call @_FortranACppReduceComplex4DimValue subroutine complex8dim(a, id) complex(8), intent(in) :: a(:,:) complex(8), allocatable :: res(:) res = reduce(a, red_complex8, 2) + res = reduce(a, red_complex8_value, 2) end subroutine -! CHECK: fir.call @_FortranACppReduceComplex8Dim +! CHECK: fir.call @_FortranACppReduceComplex8DimRef +! CHECK: fir.call @_FortranACppReduceComplex8DimValue subroutine complex10dim(a, id) complex(10), intent(in) :: a(:,:) complex(10), allocatable :: res(:) res = reduce(a, red_complex10, 2) + res = reduce(a, red_complex10_value, 2) end subroutine -! CHECK: fir.call @_FortranACppReduceComplex10Dim +! CHECK: fir.call @_FortranACppReduceComplex10DimRef +! CHECK: fir.call @_FortranACppReduceComplex10DimValue subroutine complex16dim(a, id) complex(16), intent(in) :: a(:,:) complex(16), allocatable :: res(:) res = reduce(a, red_complex16, 2) + res = reduce(a, red_complex16_value, 2) end subroutine -! CHECK: fir.call @_FortranACppReduceComplex16Dim +! CHECK: fir.call @_FortranACppReduceComplex16DimRef +! CHECK: fir.call @_FortranACppReduceComplex16DimValue subroutine logical1dim(a, id) logical(1), intent(in) :: a(:,:) logical(1), allocatable :: res(:) res = reduce(a, red_log1, 2) + res = reduce(a, red_log1_value, 2) end subroutine ! CHECK: fir.call @_FortranAReduceLogical1DimRef +! CHECK: fir.call @_FortranAReduceLogical1DimValue subroutine logical2dim(a, id) logical(2), intent(in) :: a(:,:) logical(2), allocatable :: res(:) res = reduce(a, red_log2, 2) + res = reduce(a, red_log2_value, 2) end subroutine ! CHECK: fir.call @_FortranAReduceLogical2DimRef +! CHECK: fir.call @_FortranAReduceLogical2DimValue subroutine logical4dim(a, id) logical(4), intent(in) :: a(:,:) logical(4), allocatable :: res(:) res = reduce(a, red_log4, 2) + res = reduce(a, red_log4_value, 2) end subroutine ! CHECK: fir.call @_FortranAReduceLogical4DimRef +! CHECK: fir.call @_FortranAReduceLogical4DimValue subroutine logical8dim(a, id) logical(8), intent(in) :: a(:,:) logical(8), allocatable :: res(:) res = reduce(a, red_log8, 2) + res = reduce(a, red_log8_value, 2) end subroutine ! CHECK: fir.call @_FortranAReduceLogical8DimRef +! CHECK: fir.call @_FortranAReduceLogical8DimValue subroutine testtypeDim(a) type(t1), intent(in) :: a(:,:) From 29d857f183fe6159c3265d6cee8c87419eb615ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Fri, 14 Jun 2024 10:21:58 -0700 Subject: [PATCH 128/155] [flang] Add stack reclaim pass to reclaim allocas in loop (#95309) Some passes in the flang pipeline are creating `fir.alloca` operation like `hlfir.concat`. When these allocas are located in a loop, the stack can quickly be used too much leading to segfaults. This behavior can be seen in https://github.com/jacobwilliams/json-fortran/blob/master/src/tests/jf_test_36.F90 This patch insert a call to LLVM stacksave/stackrestore in the body of the loop to reclaim the alloca in its scope. This PR is an alternative implementation to #95173 --- .../flang/Optimizer/Transforms/Passes.h | 1 + .../flang/Optimizer/Transforms/Passes.td | 9 ++++ flang/include/flang/Tools/CLOptions.inc | 1 + flang/lib/Optimizer/Transforms/CMakeLists.txt | 1 + .../lib/Optimizer/Transforms/StackReclaim.cpp | 52 +++++++++++++++++++ flang/test/Driver/bbc-mlir-pass-pipeline.f90 | 4 ++ .../test/Driver/mlir-debug-pass-pipeline.f90 | 4 ++ flang/test/Driver/mlir-pass-pipeline.f90 | 4 ++ flang/test/Fir/basic-program.fir | 4 ++ flang/test/Transforms/stack-reclaime.fir | 14 +++++ 10 files changed, 94 insertions(+) create mode 100644 flang/lib/Optimizer/Transforms/StackReclaim.cpp create mode 100644 flang/test/Transforms/stack-reclaime.fir diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h index 9fa819e2bf502e..1ca1539e76fc63 100644 --- a/flang/include/flang/Optimizer/Transforms/Passes.h +++ b/flang/include/flang/Optimizer/Transforms/Passes.h @@ -49,6 +49,7 @@ namespace fir { #define GEN_PASS_DECL_OPENACCDATAOPERANDCONVERSION #define GEN_PASS_DECL_ADDDEBUGINFO #define GEN_PASS_DECL_STACKARRAYS +#define GEN_PASS_DECL_STACKRECLAIM #define GEN_PASS_DECL_LOOPVERSIONING #define GEN_PASS_DECL_ADDALIASTAGS #define GEN_PASS_DECL_OMPMAPINFOFINALIZATIONPASS diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td index 7a3baca4c19dac..27aee5650e75d5 100644 --- a/flang/include/flang/Optimizer/Transforms/Passes.td +++ b/flang/include/flang/Optimizer/Transforms/Passes.td @@ -260,6 +260,15 @@ def StackArrays : Pass<"stack-arrays", "mlir::ModuleOp"> { let dependentDialects = [ "fir::FIROpsDialect" ]; } +def StackReclaim : Pass<"stack-reclaim"> { + let summary = "Insert stacksave/stackrestore in region with allocas"; + let description = [{ + Insert stacksave/stackrestore in loop region to reclaim alloca done in its + scope. + }]; + let dependentDialects = [ "mlir::LLVM::LLVMDialect" ]; +} + def AddAliasTags : Pass<"fir-add-alias-tags", "mlir::ModuleOp"> { let summary = "Add tbaa tags to operations that implement FirAliasAnalysisOpInterface"; let description = [{ diff --git a/flang/include/flang/Tools/CLOptions.inc b/flang/include/flang/Tools/CLOptions.inc index 2a0cfc04aa3509..df396e04b2a762 100644 --- a/flang/include/flang/Tools/CLOptions.inc +++ b/flang/include/flang/Tools/CLOptions.inc @@ -295,6 +295,7 @@ inline void createDefaultFIROptimizerPassPipeline( if (pc.AliasAnalysis && !disableFirAliasTags && !useOldAliasTags) pm.addPass(fir::createAddAliasTags()); + addNestedPassToAllTopLevelOperations(pm, fir::createStackReclaim); // convert control flow to CFG form fir::addCfgConversionPass(pm, pc); pm.addPass(mlir::createConvertSCFToCFPass()); diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt index 5ef930fdb2c2f5..149afdf601c936 100644 --- a/flang/lib/Optimizer/Transforms/CMakeLists.txt +++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt @@ -21,6 +21,7 @@ add_flang_library(FIRTransforms OMPFunctionFiltering.cpp OMPMapInfoFinalization.cpp OMPMarkDeclareTarget.cpp + StackReclaim.cpp VScaleAttr.cpp FunctionAttr.cpp DebugTypeGenerator.cpp diff --git a/flang/lib/Optimizer/Transforms/StackReclaim.cpp b/flang/lib/Optimizer/Transforms/StackReclaim.cpp new file mode 100644 index 00000000000000..e5e0e4eab82982 --- /dev/null +++ b/flang/lib/Optimizer/Transforms/StackReclaim.cpp @@ -0,0 +1,52 @@ +//===- StackReclaim.cpp -- Insert stacksave/stackrestore in region --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "flang/Common/Fortran.h" +#include "flang/Optimizer/Dialect/FIRDialect.h" +#include "flang/Optimizer/Dialect/FIROps.h" +#include "flang/Optimizer/Transforms/Passes.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" +#include "mlir/IR/Matchers.h" +#include "mlir/Pass/Pass.h" + +namespace fir { +#define GEN_PASS_DEF_STACKRECLAIM +#include "flang/Optimizer/Transforms/Passes.h.inc" +} // namespace fir + +using namespace mlir; + +namespace { + +class StackReclaimPass : public fir::impl::StackReclaimBase { +public: + using StackReclaimBase::StackReclaimBase; + + void runOnOperation() override; +}; +} // namespace + +void StackReclaimPass::runOnOperation() { + auto *op = getOperation(); + auto *context = &getContext(); + mlir::OpBuilder builder(context); + mlir::Type voidPtr = mlir::LLVM::LLVMPointerType::get(context); + + op->walk([&](fir::DoLoopOp loopOp) { + mlir::Location loc = loopOp.getLoc(); + + if (!loopOp.getRegion().getOps().empty()) { + builder.setInsertionPointToStart(&loopOp.getRegion().front()); + auto stackSaveOp = builder.create(loc, voidPtr); + + auto *terminator = loopOp.getRegion().back().getTerminator(); + builder.setInsertionPoint(terminator); + builder.create(loc, stackSaveOp); + } + }); +} diff --git a/flang/test/Driver/bbc-mlir-pass-pipeline.f90 b/flang/test/Driver/bbc-mlir-pass-pipeline.f90 index c94b98c7c58053..5520d750e2ce1c 100644 --- a/flang/test/Driver/bbc-mlir-pass-pipeline.f90 +++ b/flang/test/Driver/bbc-mlir-pass-pipeline.f90 @@ -50,12 +50,16 @@ ! CHECK-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private'] ! CHECK-NEXT: 'fir.global' Pipeline +! CHECK-NEXT: StackReclaim ! CHECK-NEXT: CFGConversion ! CHECK-NEXT: 'func.func' Pipeline +! CHECK-NEXT: StackReclaim ! CHECK-NEXT: CFGConversion ! CHECK-NEXT: 'omp.declare_reduction' Pipeline +! CHECK-NEXT: StackReclaim ! CHECK-NEXT: CFGConversion ! CHECK-NEXT: 'omp.private' Pipeline +! CHECK-NEXT: StackReclaim ! CHECK-NEXT: CFGConversion ! CHECK-NEXT: SCFToControlFlow diff --git a/flang/test/Driver/mlir-debug-pass-pipeline.f90 b/flang/test/Driver/mlir-debug-pass-pipeline.f90 index 49b1f8c5c31343..6e9846fa422e55 100644 --- a/flang/test/Driver/mlir-debug-pass-pipeline.f90 +++ b/flang/test/Driver/mlir-debug-pass-pipeline.f90 @@ -77,12 +77,16 @@ ! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private'] ! ALL-NEXT: 'fir.global' Pipeline +! ALL-NEXT: StackReclaim ! ALL-NEXT: CFGConversion ! ALL-NEXT: 'func.func' Pipeline +! ALL-NEXT: StackReclaim ! ALL-NEXT: CFGConversion ! ALL-NEXT: 'omp.declare_reduction' Pipeline +! ALL-NEXT: StackReclaim ! ALL-NEXT: CFGConversion ! ALL-NEXT: 'omp.private' Pipeline +! ALL-NEXT: StackReclaim ! ALL-NEXT: CFGConversion ! ALL-NEXT: SCFToControlFlow ! ALL-NEXT: Canonicalizer diff --git a/flang/test/Driver/mlir-pass-pipeline.f90 b/flang/test/Driver/mlir-pass-pipeline.f90 index 8e1a3d43edd1c5..db4551e93fe64c 100644 --- a/flang/test/Driver/mlir-pass-pipeline.f90 +++ b/flang/test/Driver/mlir-pass-pipeline.f90 @@ -85,12 +85,16 @@ ! ALL-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private'] ! ALL-NEXT: 'fir.global' Pipeline +! ALL-NEXT: StackReclaim ! ALL-NEXT: CFGConversion ! ALL-NEXT: 'func.func' Pipeline +! ALL-NEXT: StackReclaim ! ALL-NEXT: CFGConversion ! ALL-NEXT: 'omp.declare_reduction' Pipeline +! ALL-NEXT: StackReclaim ! ALL-NEXT: CFGConversion ! ALL-NEXT: 'omp.private' Pipeline +! ALL-NEXT: StackReclaim ! ALL-NEXT: CFGConversion ! ALL-NEXT: SCFToControlFlow diff --git a/flang/test/Fir/basic-program.fir b/flang/test/Fir/basic-program.fir index dd184d99cb8096..7bbfd709b0aaf6 100644 --- a/flang/test/Fir/basic-program.fir +++ b/flang/test/Fir/basic-program.fir @@ -85,12 +85,16 @@ func.func @_QQmain() { // PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private'] // PASSES-NEXT: 'fir.global' Pipeline +// PASSES-NEXT: StackReclaim // PASSES-NEXT: CFGConversion // PASSES-NEXT: 'func.func' Pipeline +// PASSES-NEXT: StackReclaim // PASSES-NEXT: CFGConversion // PASSES-NEXT: 'omp.declare_reduction' Pipeline +// PASSES-NEXT: StackReclaim // PASSES-NEXT: CFGConversion // PASSES-NEXT: 'omp.private' Pipeline +// PASSES-NEXT: StackReclaim // PASSES-NEXT: CFGConversion // PASSES-NEXT: SCFToControlFlow diff --git a/flang/test/Transforms/stack-reclaime.fir b/flang/test/Transforms/stack-reclaime.fir new file mode 100644 index 00000000000000..b53cc960357512 --- /dev/null +++ b/flang/test/Transforms/stack-reclaime.fir @@ -0,0 +1,14 @@ +// RUN: fir-opt --split-input-file --stack-reclaim %s | FileCheck %s + +func.func @alloca_in_loop(%lb : index, %ub : index, %step : index, %b : i1, %addr : !fir.ref) { + fir.do_loop %iv = %lb to %ub step %step unordered { + %0 = fir.alloca !fir.box>> + } + return +} + +// CHECK-LABEL: func.func @alloca_in_loop +// CHECK: fir.do_loop +// CHECK: %[[STACKPTR:.*]] = llvm.intr.stacksave : !llvm.ptr +// CHECK: %{{.*}} = fir.alloca !fir.box>> +// CHECK: llvm.intr.stackrestore %0 : !llvm.ptr From a43d79af782e2730b170c04db49f4c3040399d97 Mon Sep 17 00:00:00 2001 From: Peiming Liu Date: Fri, 14 Jun 2024 10:31:29 -0700 Subject: [PATCH 129/155] [mlir][sparse] add canonicalization patterns for IterateOp. (#95569) --- .../SparseTensor/IR/SparseTensorOps.td | 8 +++++ .../SparseTensor/IR/SparseTensorDialect.cpp | 34 +++++++++++++++++++ .../Dialect/SparseTensor/canonicalize.mlir | 18 ++++++++++ 3 files changed, 60 insertions(+) diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td index 5ae6f9f3443f8c..b2089924291cdf 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td @@ -1601,6 +1601,13 @@ def IterateOp : SparseTensor_Op<"iterate", BlockArgument getIterator() { return getRegion().getArguments().front(); } + std::optional getLvlCrd(Level lvl) { + if (getCrdUsedLvls()[lvl]) { + uint64_t mask = (static_cast(0x01u) << lvl) - 1; + return getCrds()[llvm::popcount(mask & getCrdUsedLvls())]; + } + return std::nullopt; + } Block::BlockArgListType getCrds() { // The first block argument is iterator, the remaining arguments are // referenced coordinates. @@ -1613,6 +1620,7 @@ def IterateOp : SparseTensor_Op<"iterate", let hasVerifier = 1; let hasRegionVerifier = 1; + let hasCanonicalizer = 1; let hasCustomAssemblyFormat = 1; } diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 232d25d718c652..ac711769ed2eab 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -24,6 +24,7 @@ #include "mlir/IR/Matchers.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/PatternMatch.h" +#include "llvm/ADT/Bitset.h" #include "llvm/ADT/TypeSwitch.h" #include "llvm/Support/FormatVariadic.h" @@ -2266,6 +2267,39 @@ LogicalResult ExtractIterSpaceOp::verify() { return success(); } +struct RemoveUnusedLvlCrds : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(IterateOp iterateOp, + PatternRewriter &rewriter) const override { + LevelSet newUsedLvls(0); + llvm::BitVector toRemove(iterateOp.getBody()->getNumArguments()); + for (unsigned i = 0, e = iterateOp.getSpaceDim(); i < e; i++) { + if (auto crd = iterateOp.getLvlCrd(i)) { + if (crd->getUsers().empty()) + toRemove.set(crd->getArgNumber()); + else + newUsedLvls.set(i); + } + } + + // All coordinates are used. + if (toRemove.none()) + return failure(); + + rewriter.startOpModification(iterateOp); + iterateOp.setCrdUsedLvls(newUsedLvls); + iterateOp.getBody()->eraseArguments(toRemove); + rewriter.finalizeOpModification(iterateOp); + return success(); + } +}; + +void IterateOp::getCanonicalizationPatterns(mlir::RewritePatternSet &results, + mlir::MLIRContext *context) { + results.add(context); +} + ParseResult IterateOp::parse(OpAsmParser &parser, OperationState &result) { OpAsmParser::Argument iterator; OpAsmParser::UnresolvedOperand iterSpace; diff --git a/mlir/test/Dialect/SparseTensor/canonicalize.mlir b/mlir/test/Dialect/SparseTensor/canonicalize.mlir index b1d3d7916c142d..ceb82cab516ed2 100644 --- a/mlir/test/Dialect/SparseTensor/canonicalize.mlir +++ b/mlir/test/Dialect/SparseTensor/canonicalize.mlir @@ -21,3 +21,21 @@ func.func @sparse_slice_canonicalize(%arg0 : tensor, %arg1 : i %0 = tensor.extract_slice %arg0[%c0, %arg1, %c1] [%c4, %c1, %arg2] [%c1, %c1, %c1] : tensor to tensor return %0 : tensor } + +// ----- + +#CSR = #sparse_tensor.encoding<{ + map = (i, j) -> (i : dense, j : compressed) +}> + +// Make sure that the first unused coordinate is optimized. +// CHECK-LABEL: @sparse_iterate_canonicalize +// CHECK: sparse_tensor.iterate {{.*}} at(_, %{{.*}}) +func.func @sparse_iterate_canonicalize(%sp : tensor) { + %l1 = sparse_tensor.extract_iteration_space %sp lvls = 0 to 2 + : tensor -> !sparse_tensor.iter_space<#CSR, lvls = 0 to 2> + sparse_tensor.iterate %it1 in %l1 at (%coord0, %coord1) : !sparse_tensor.iter_space<#CSR, lvls = 0 to 2> { + "test.op"(%coord1) : (index) -> () + } + return +} From eca988aa4420f33810f9830c80ff9f149b7928ff Mon Sep 17 00:00:00 2001 From: Haowei Wu Date: Fri, 14 Jun 2024 10:31:14 -0700 Subject: [PATCH 130/155] Revert "[libc] printf, putchar and vprintf in bareemetal entrypoints (#95436)" This reverts commit b1de42a81d838bb0c6dea7d2436820a2456c730b, which breaks libc build for baremetal targets. --- libc/config/baremetal/arm/entrypoints.txt | 3 --- libc/config/baremetal/riscv/entrypoints.txt | 3 --- 2 files changed, 6 deletions(-) diff --git a/libc/config/baremetal/arm/entrypoints.txt b/libc/config/baremetal/arm/entrypoints.txt index 2930d718fdb210..7fb82c60a1bb85 100644 --- a/libc/config/baremetal/arm/entrypoints.txt +++ b/libc/config/baremetal/arm/entrypoints.txt @@ -80,11 +80,8 @@ set(TARGET_LIBC_ENTRYPOINTS # stdio.h entrypoints libc.src.stdio.remove - libc.src.stdio.printf - libc.src.stdio.putchar libc.src.stdio.sprintf libc.src.stdio.snprintf - libc.src.stdio.vprintf libc.src.stdio.vsprintf libc.src.stdio.vsnprintf diff --git a/libc/config/baremetal/riscv/entrypoints.txt b/libc/config/baremetal/riscv/entrypoints.txt index 6d38676b1e8a81..b769b43f03a2c6 100644 --- a/libc/config/baremetal/riscv/entrypoints.txt +++ b/libc/config/baremetal/riscv/entrypoints.txt @@ -80,11 +80,8 @@ set(TARGET_LIBC_ENTRYPOINTS # stdio.h entrypoints libc.src.stdio.remove - libc.src.stdio.printf - libc.src.stdio.putchar libc.src.stdio.sprintf libc.src.stdio.snprintf - libc.src.stdio.vprintf libc.src.stdio.vsprintf libc.src.stdio.vsnprintf From 6f538f6a2d3224efda985e9eb09012fa4275ea92 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Fri, 14 Jun 2024 17:41:47 +0000 Subject: [PATCH 131/155] Revert "Recommit "[VPlan] First step towards VPlan cost modeling. (#92555)"" This reverts commit 90fd99c0795711e1cf762a02b29b0a702f86a264. This reverts commit 43e6f46936e177e47de6627a74b047ba27561b44. Causes crashes, see comments on https://github.com/llvm/llvm-project/pull/92555. --- .../Vectorize/LoopVectorizationPlanner.h | 17 +- .../Transforms/Vectorize/LoopVectorize.cpp | 236 ++---------------- llvm/lib/Transforms/Vectorize/VPlan.cpp | 86 ------- llvm/lib/Transforms/Vectorize/VPlan.h | 71 +----- .../lib/Transforms/Vectorize/VPlanRecipes.cpp | 35 --- .../Transforms/Vectorize/VPlanTransforms.cpp | 5 - llvm/lib/Transforms/Vectorize/VPlanValue.h | 3 +- .../RISCV/riscv-vector-reverse.ll | 2 - 8 files changed, 27 insertions(+), 428 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h index 6011e160762202..c03c278fcebe78 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h @@ -344,16 +344,6 @@ class LoopVectorizationPlanner { /// A builder used to construct the current plan. VPBuilder Builder; - /// Computes the cost of \p Plan for vectorization factor \p VF. - /// - /// The current implementation requires access to the - /// LoopVectorizationLegality to handle inductions and reductions, which is - /// why it is kept separate from the VPlan-only cost infrastructure. - /// - /// TODO: Move to VPlan::cost once the use of LoopVectorizationLegality has - /// been retired. - InstructionCost cost(VPlan &Plan, ElementCount VF) const; - public: LoopVectorizationPlanner( Loop *L, LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI, @@ -375,9 +365,6 @@ class LoopVectorizationPlanner { /// Return the best VPlan for \p VF. VPlan &getBestPlanFor(ElementCount VF) const; - /// Return the most profitable plan and fix its VF to the most profitable one. - VPlan &getBestPlan() const; - /// Generate the IR code for the vectorized loop captured in VPlan \p BestPlan /// according to the best selected \p VF and \p UF. /// @@ -456,9 +443,7 @@ class LoopVectorizationPlanner { ElementCount MinVF); /// \return The most profitable vectorization factor and the cost of that VF. - /// This method checks every VF in \p CandidateVFs. This is now only used to - /// verify the decisions by the new VPlan-based cost-model and will be retired - /// once the VPlan-based cost-model is stabilized. + /// This method checks every VF in \p CandidateVFs. VectorizationFactor selectVectorizationFactor(const ElementCountSet &CandidateVFs); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 9fc068a0689268..37b8023e1fcf2f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -290,7 +290,7 @@ static cl::opt ForceTargetMaxVectorInterleaveFactor( cl::desc("A flag that overrides the target's max interleave factor for " "vectorized loops.")); -cl::opt ForceTargetInstructionCost( +static cl::opt ForceTargetInstructionCost( "force-target-instruction-cost", cl::init(0), cl::Hidden, cl::desc("A flag that overrides the target's expected cost for " "an instruction to a single constant value. Mostly " @@ -412,6 +412,14 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL) { return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty); } +/// A helper function that returns the reciprocal of the block probability of +/// predicated blocks. If we return X, we are assuming the predicated block +/// will execute once for every X iterations of the loop header. +/// +/// TODO: We should use actual block probability here, if available. Currently, +/// we always assume predicated blocks have a 50% chance of executing. +static unsigned getReciprocalPredBlockProb() { return 2; } + /// Returns "best known" trip count for the specified loop \p L as defined by /// the following procedure: /// 1) Returns exact trip count if it is known. @@ -1613,16 +1621,6 @@ class LoopVectorizationCostModel { /// \p VF is the vectorization factor chosen for the original loop. bool isEpilogueVectorizationProfitable(const ElementCount VF) const; - /// Return the cost of instructions in an inloop reduction pattern, if I is - /// part of that pattern. - std::optional - getReductionPatternCost(Instruction *I, ElementCount VF, Type *VectorTy, - TTI::TargetCostKind CostKind) const; - - /// Returns the execution time cost of an instruction for a given vector - /// width. Vector width of one means scalar. - VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF); - private: unsigned NumPredStores = 0; @@ -1648,11 +1646,21 @@ class LoopVectorizationCostModel { /// of elements. ElementCount getMaxLegalScalableVF(unsigned MaxSafeElements); + /// Returns the execution time cost of an instruction for a given vector + /// width. Vector width of one means scalar. + VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF); + /// The cost-computation logic from getInstructionCost which provides /// the vector type as an output parameter. InstructionCost getInstructionCost(Instruction *I, ElementCount VF, Type *&VectorTy); + /// Return the cost of instructions in an inloop reduction pattern, if I is + /// part of that pattern. + std::optional + getReductionPatternCost(Instruction *I, ElementCount VF, Type *VectorTy, + TTI::TargetCostKind CostKind) const; + /// Calculate vectorization cost of memory instruction \p I. InstructionCost getMemoryInstructionCost(Instruction *I, ElementCount VF); @@ -7280,10 +7288,7 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { if (!MaxFactors.hasVector()) return VectorizationFactor::Disabled(); - // Select the optimal vectorization factor according to the legacy cost-model. - // This is now only used to verify the decisions by the new VPlan-based - // cost-model and will be retired once the VPlan-based cost-model is - // stabilized. + // Select the optimal vectorization factor. VectorizationFactor VF = selectVectorizationFactor(VFCandidates); assert((VF.Width.isScalar() || VF.ScalarCost > 0) && "when vectorizing, the scalar cost must be non-zero."); if (!hasPlanWithVF(VF.Width)) { @@ -7294,196 +7299,6 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) { return VF; } -InstructionCost VPCostContext::getLegacyCost(Instruction *UI, - ElementCount VF) const { - return CM.getInstructionCost(UI, VF).first; -} - -bool VPCostContext::skipCostComputation(Instruction *UI, bool IsVector) const { - return (IsVector && CM.VecValuesToIgnore.contains(UI)) || - SkipCostComputation.contains(UI); -} - -InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan, - ElementCount VF) const { - InstructionCost Cost = 0; - LLVMContext &LLVMCtx = OrigLoop->getHeader()->getContext(); - VPCostContext CostCtx(CM.TTI, Legal->getWidestInductionType(), LLVMCtx, CM); - - // Cost modeling for inductions is inaccurate in the legacy cost model - // compared to the recipes that are generated. To match here initially during - // VPlan cost model bring up directly use the induction costs from the legacy - // cost model. Note that we do this as pre-processing; the VPlan may not have - // any recipes associated with the original induction increment instruction - // and may replace truncates with VPWidenIntOrFpInductionRecipe. We precompute - // the cost of both induction increment instructions that are represented by - // recipes and those that are not, to avoid distinguishing between them here, - // and skip all recipes that represent induction increments (the former case) - // later on, if they exist, to avoid counting them twice. Similarly we - // pre-compute the cost of any optimized truncates. - // TODO: Switch to more accurate costing based on VPlan. - for (const auto &[IV, IndDesc] : Legal->getInductionVars()) { - Instruction *IVInc = cast( - IV->getIncomingValueForBlock(OrigLoop->getLoopLatch())); - if (CostCtx.SkipCostComputation.insert(IVInc).second) { - InstructionCost InductionCost = CostCtx.getLegacyCost(IVInc, VF); - LLVM_DEBUG({ - dbgs() << "Cost of " << InductionCost << " for VF " << VF - << ":\n induction increment " << *IVInc << "\n"; - IVInc->dump(); - }); - Cost += InductionCost; - } - for (User *U : IV->users()) { - auto *CI = cast(U); - if (!CostCtx.CM.isOptimizableIVTruncate(CI, VF)) - continue; - assert(!CostCtx.SkipCostComputation.contains(CI) && - "Same cast for multiple inductions?"); - CostCtx.SkipCostComputation.insert(CI); - InstructionCost CastCost = CostCtx.getLegacyCost(CI, VF); - LLVM_DEBUG({ - dbgs() << "Cost of " << CastCost << " for VF " << VF - << ":\n induction cast " << *CI << "\n"; - CI->dump(); - }); - Cost += CastCost; - } - } - - /// Compute the cost of all exiting conditions of the loop using the legacy - /// cost model. This is to match the legacy behavior, which adds the cost of - /// all exit conditions. Note that this over-estimates the cost, as there will - /// be a single condition to control the vector loop. - SmallVector Exiting; - CM.TheLoop->getExitingBlocks(Exiting); - SetVector ExitInstrs; - // Collect all exit conditions. - for (BasicBlock *EB : Exiting) { - auto *Term = dyn_cast(EB->getTerminator()); - if (!Term) - continue; - if (auto *CondI = dyn_cast(Term->getOperand(0))) { - ExitInstrs.insert(CondI); - } - } - // Compute the cost of all instructions only feeding the exit conditions. - for (unsigned I = 0; I != ExitInstrs.size(); ++I) { - Instruction *CondI = ExitInstrs[I]; - if (!OrigLoop->contains(CondI) || - !CostCtx.SkipCostComputation.insert(CondI).second) - continue; - Cost += CostCtx.getLegacyCost(CondI, VF); - for (Value *Op : CondI->operands()) { - auto *OpI = dyn_cast(Op); - if (!OpI || any_of(OpI->users(), [&ExitInstrs](User *U) { - return !ExitInstrs.contains(cast(U)); - })) - continue; - ExitInstrs.insert(OpI); - } - } - - // The legacy cost model has special logic to compute the cost of in-loop - // reductions, which may be smaller than the sum of all instructions involved - // in the reduction. For AnyOf reductions, VPlan codegen may remove the select - // which the legacy cost model uses to assign cost. Pre-compute their costs - // for now. - // TODO: Switch to costing based on VPlan once the logic has been ported. - for (const auto &[RedPhi, RdxDesc] : Legal->getReductionVars()) { - if (!CM.isInLoopReduction(RedPhi) && - !RecurrenceDescriptor::isAnyOfRecurrenceKind( - RdxDesc.getRecurrenceKind())) - continue; - - // AnyOf reduction codegen may remove the select. To match the legacy cost - // model, pre-compute the cost for AnyOf reductions here. - if (RecurrenceDescriptor::isAnyOfRecurrenceKind( - RdxDesc.getRecurrenceKind())) { - auto *Select = cast(*find_if( - RedPhi->users(), [](User *U) { return isa(U); })); - assert(!CostCtx.SkipCostComputation.contains(Select) && - "reduction op visited multiple times"); - CostCtx.SkipCostComputation.insert(Select); - auto ReductionCost = CostCtx.getLegacyCost(Select, VF); - LLVM_DEBUG(dbgs() << "Cost of " << ReductionCost << " for VF " << VF - << ":\n any-of reduction " << *Select << "\n"); - Cost += ReductionCost; - continue; - } - - const auto &ChainOps = RdxDesc.getReductionOpChain(RedPhi, OrigLoop); - SetVector ChainOpsAndOperands(ChainOps.begin(), - ChainOps.end()); - // Also include the operands of instructions in the chain, as the cost-model - // may mark extends as free. - for (auto *ChainOp : ChainOps) { - for (Value *Op : ChainOp->operands()) { - if (auto *I = dyn_cast(Op)) - ChainOpsAndOperands.insert(I); - } - } - - // Pre-compute the cost for I, if it has a reduction pattern cost. - for (Instruction *I : ChainOpsAndOperands) { - auto ReductionCost = CM.getReductionPatternCost( - I, VF, ToVectorTy(I->getType(), VF), TTI::TCK_RecipThroughput); - if (!ReductionCost) - continue; - - assert(!CostCtx.SkipCostComputation.contains(I) && - "reduction op visited multiple times"); - CostCtx.SkipCostComputation.insert(I); - LLVM_DEBUG(dbgs() << "Cost of " << ReductionCost << " for VF " << VF - << ":\n in-loop reduction " << *I << "\n"); - Cost += *ReductionCost; - } - } - - // Now compute and add the VPlan-based cost. - Cost += Plan.cost(VF, CostCtx); - LLVM_DEBUG(dbgs() << "Cost for VF " << VF << ": " << Cost << "\n"); - return Cost; -} - -VPlan &LoopVectorizationPlanner::getBestPlan() const { - // If there is a single VPlan with a single VF, return it directly. - VPlan &FirstPlan = *VPlans[0]; - if (VPlans.size() == 1 && size(FirstPlan.vectorFactors()) == 1) - return FirstPlan; - - VPlan *BestPlan = &FirstPlan; - ElementCount ScalarVF = ElementCount::getFixed(1); - assert(hasPlanWithVF(ScalarVF) && - "More than a single plan/VF w/o any plan having scalar VF"); - - InstructionCost ScalarCost = cost(getBestPlanFor(ScalarVF), ScalarVF); - VectorizationFactor BestFactor(ScalarVF, ScalarCost, ScalarCost); - - bool ForceVectorization = Hints.getForce() == LoopVectorizeHints::FK_Enabled; - if (ForceVectorization) { - // Ignore scalar width, because the user explicitly wants vectorization. - // Initialize cost to max so that VF = 2 is, at least, chosen during cost - // evaluation. - BestFactor.Cost = InstructionCost::getMax(); - } - - for (auto &P : VPlans) { - for (ElementCount VF : P->vectorFactors()) { - if (VF.isScalar()) - continue; - InstructionCost Cost = cost(*P, VF); - VectorizationFactor CurrentFactor(VF, Cost, ScalarCost); - if (isMoreProfitable(CurrentFactor, BestFactor)) { - BestFactor = CurrentFactor; - BestPlan = &*P; - } - } - } - BestPlan->setVF(BestFactor.Width); - return *BestPlan; -} - VPlan &LoopVectorizationPlanner::getBestPlanFor(ElementCount VF) const { assert(count_if(VPlans, [VF](const VPlanPtr &Plan) { return Plan->hasVF(VF); }) == @@ -10342,15 +10157,8 @@ bool LoopVectorizePass::processLoop(Loop *L) { VF.MinProfitableTripCount, IC, &LVL, &CM, BFI, PSI, Checks); - VPlan &BestPlan = LVP.getBestPlan(); - assert(size(BestPlan.vectorFactors()) == 1 && - "Plan should have a single VF"); - ElementCount Width = *BestPlan.vectorFactors().begin(); - LLVM_DEBUG(dbgs() << "VF picked by VPlan cost model: " << Width - << "\n"); - assert(VF.Width == Width && - "VPlan cost model and legacy cost model disagreed"); - LVP.executePlan(Width, IC, BestPlan, LB, DT, false); + VPlan &BestPlan = LVP.getBestPlanFor(VF.Width); + LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false); ++LoopsVectorized; // Add metadata to disable runtime unrolling a scalar loop when there diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index ad6a7183208300..f17be451e6846a 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -747,64 +747,6 @@ void VPRegionBlock::execute(VPTransformState *State) { State->Instance.reset(); } -InstructionCost VPBasicBlock::cost(ElementCount VF, VPCostContext &Ctx) { - InstructionCost Cost = 0; - for (VPRecipeBase &R : Recipes) - Cost += R.cost(VF, Ctx); - return Cost; -} - -InstructionCost VPRegionBlock::cost(ElementCount VF, VPCostContext &Ctx) { - if (!isReplicator()) { - InstructionCost Cost = 0; - for (VPBlockBase *Block : vp_depth_first_shallow(getEntry())) - Cost += Block->cost(VF, Ctx); - return Cost; - } - - // Compute the cost of a replicate region. Replicating isn't supported for - // scalable vectors, return an invalid cost for them. - // TODO: Discard scalable VPlans with replicate recipes earlier after - // construction. - if (VF.isScalable()) - return InstructionCost::getInvalid(); - - // First compute the cost of the conditionally executed recipes, followed by - // account for the branching cost, except if the mask is a header mask or - // uniform condition. - using namespace llvm::VPlanPatternMatch; - VPBasicBlock *Then = cast(getEntry()->getSuccessors()[0]); - InstructionCost ThenCost = Then->cost(VF, Ctx); - - // Note the cost estimates below closely match the current legacy cost model. - auto *BOM = cast(&getEntryBasicBlock()->front()); - VPValue *Cond = BOM->getOperand(0); - - // Check if Cond is a uniform compare or a header mask and don't account for - // branching costs. A uniform condition corresponding to a single branch per - // VF, and the header mask will always be true except in the last iteration. - if (vputils::isUniformBoolean(Cond) || - vputils::isHeaderMask(Cond, *getPlan())) - return ThenCost; - - // For the scalar case, we may not always execute the original predicated - // block, Thus, scale the block's cost by the probability of executing it. - if (VF.isScalar()) - return ThenCost / getReciprocalPredBlockProb(); - - // Add the cost for branches around scalarized and predicated blocks. - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; - - auto *Vec_i1Ty = VectorType::get(IntegerType::getInt1Ty(Ctx.LLVMCtx), VF); - auto FixedVF = VF.getFixedValue(); // Known to be non scalable. - InstructionCost Cost = ThenCost; - Cost += Ctx.TTI.getScalarizationOverhead(Vec_i1Ty, APInt::getAllOnes(FixedVF), - /*Insert*/ false, /*Extract*/ true, - CostKind); - Cost += Ctx.TTI.getCFInstrCost(Instruction::Br, CostKind) * FixedVF; - return Cost; -} - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, VPSlotTracker &SlotTracker) const { @@ -978,12 +920,6 @@ void VPlan::execute(VPTransformState *State) { "DT not preserved correctly"); } -InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) { - // For now only return the cost of the vector loop region, ignoring any other - // blocks, like the preheader or middle blocks. - return getVectorLoopRegion()->cost(VF, Ctx); -} - #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void VPlan::printLiveIns(raw_ostream &O) const { VPSlotTracker SlotTracker(this); @@ -1518,25 +1454,3 @@ VPValue *vputils::getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr, Plan.addSCEVExpansion(Expr, Expanded); return Expanded; } - -bool vputils::isUniformBoolean(VPValue *Cond) { - if (match(Cond, m_Not(m_VPValue()))) - Cond = Cond->getDefiningRecipe()->getOperand(0); - auto *R = Cond->getDefiningRecipe(); - if (!R) - return true; - // TODO: match additional patterns preserving uniformity of booleans, e.g., - // AND/OR/etc. - return match(R, m_Binary(m_VPValue(), m_VPValue())) && - all_of(R->operands(), [](VPValue *Op) { - return vputils::isUniformAfterVectorization(Op); - }); -} - -bool vputils::isHeaderMask(VPValue *V, VPlan &Plan) { - VPValue *Op; - return isa(V) || - match(V, m_ActiveLaneMask(m_VPValue(), m_VPValue())) || - (match(V, m_Binary(m_VPValue(), m_VPValue(Op))) && - Op == Plan.getOrCreateBackedgeTakenCount()); -} diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 4c0972e517263c..5bb88e4a57dc37 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -42,7 +42,6 @@ #include "llvm/IR/DebugLoc.h" #include "llvm/IR/FMF.h" #include "llvm/IR/Operator.h" -#include "llvm/Support/InstructionCost.h" #include #include #include @@ -65,11 +64,8 @@ class VPlan; class VPReplicateRecipe; class VPlanSlp; class Value; -class LoopVectorizationCostModel; class LoopVersioning; -struct VPCostContext; - namespace Intrinsic { typedef unsigned ID; } @@ -86,14 +82,6 @@ Value *createStepForVF(IRBuilderBase &B, Type *Ty, ElementCount VF, const SCEV *createTripCountSCEV(Type *IdxTy, PredicatedScalarEvolution &PSE, Loop *CurLoop = nullptr); -/// A helper function that returns the reciprocal of the block probability of -/// predicated blocks. If we return X, we are assuming the predicated block -/// will execute once for every X iterations of the loop header. -/// -/// TODO: We should use actual block probability here, if available. Currently, -/// we always assume predicated blocks have a 50% chance of executing. -inline unsigned getReciprocalPredBlockProb() { return 2; } - /// A range of powers-of-2 vectorization factors with fixed start and /// adjustable end. The range includes start and excludes end, e.g.,: /// [1, 16) = {1, 2, 4, 8} @@ -636,9 +624,6 @@ class VPBlockBase { /// VPBlockBase, thereby "executing" the VPlan. virtual void execute(VPTransformState *State) = 0; - /// Return the cost of the block. - virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx) = 0; - /// Delete all blocks reachable from a given VPBlockBase, inclusive. static void deleteCFG(VPBlockBase *Entry); @@ -722,27 +707,6 @@ class VPLiveOut : public VPUser { #endif }; -/// Struct to hold various analysis needed for cost computations. -struct VPCostContext { - const TargetTransformInfo &TTI; - VPTypeAnalysis Types; - LLVMContext &LLVMCtx; - LoopVectorizationCostModel &CM; - SmallPtrSet SkipCostComputation; - - VPCostContext(const TargetTransformInfo &TTI, Type *CanIVTy, - LLVMContext &LLVMCtx, LoopVectorizationCostModel &CM) - : TTI(TTI), Types(CanIVTy, LLVMCtx), LLVMCtx(LLVMCtx), CM(CM) {} - - /// Return the cost for \p UI with \p VF using the legacy cost model as - /// fallback until computing the cost of all recipes migrates to VPlan. - InstructionCost getLegacyCost(Instruction *UI, ElementCount VF) const; - - /// Return true if the cost for \p UI shouldn't be computed, e.g. because it - /// has already been pre-computed. - bool skipCostComputation(Instruction *UI, bool IsVector) const; -}; - /// VPRecipeBase is a base class modeling a sequence of one or more output IR /// instructions. VPRecipeBase owns the VPValues it defines through VPDef /// and is responsible for deleting its defined values. Single-value @@ -782,11 +746,6 @@ class VPRecipeBase : public ilist_node_with_parent, /// this VPRecipe, thereby "executing" the VPlan. virtual void execute(VPTransformState &State) = 0; - /// Return the cost of this recipe, taking into account if the cost - /// computation should be skipped and the ForceTargetInstructionCost flag. - /// Also takes care of printing the cost for debugging. - virtual InstructionCost cost(ElementCount VF, VPCostContext &Ctx); - /// Insert an unlinked recipe into a basic block immediately before /// the specified recipe. void insertBefore(VPRecipeBase *InsertPos); @@ -847,11 +806,6 @@ class VPRecipeBase : public ilist_node_with_parent, /// Returns the debug location of the recipe. DebugLoc getDebugLoc() const { return DL; } - -protected: - /// Compute the cost of this recipe using the legacy cost model and the - /// underlying instructions. - InstructionCost computeCost(ElementCount VF, VPCostContext &Ctx) const; }; // Helper macro to define common classof implementations for recipes. @@ -1427,6 +1381,8 @@ class VPWidenCastRecipe : public VPRecipeWithIRFlags { ResultTy(ResultTy) { assert(UI.getOpcode() == Opcode && "opcode of underlying cast doesn't match"); + assert(UI.getType() == ResultTy && + "result type of underlying cast doesn't match"); } VPWidenCastRecipe(Instruction::CastOps Opcode, VPValue *Op, Type *ResultTy) @@ -2140,8 +2096,6 @@ class VPInterleaveRecipe : public VPRecipeBase { "Op must be an operand of the recipe"); return Op == getAddr() && !llvm::is_contained(getStoredValues(), Op); } - - Instruction *getInsertPos() const { return IG->getInsertPos(); } }; /// A recipe to represent inloop reduction operations, performing a reduction on @@ -2956,9 +2910,6 @@ class VPBasicBlock : public VPBlockBase { /// this VPBasicBlock, thereby "executing" the VPlan. void execute(VPTransformState *State) override; - /// Return the cost of this VPBasicBlock. - InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override; - /// Return the position of the first non-phi node recipe in the block. iterator getFirstNonPhi(); @@ -3133,9 +3084,6 @@ class VPRegionBlock : public VPBlockBase { /// this VPRegionBlock, thereby "executing" the VPlan. void execute(VPTransformState *State) override; - // Return the cost of this region. - InstructionCost cost(ElementCount VF, VPCostContext &Ctx) override; - void dropAllReferences(VPValue *NewValue) override; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -3255,9 +3203,6 @@ class VPlan { /// Generate the IR code for this VPlan. void execute(VPTransformState *State); - /// Return the cost of this plan. - InstructionCost cost(ElementCount VF, VPCostContext &Ctx); - VPBasicBlock *getEntry() { return Entry; } const VPBasicBlock *getEntry() const { return Entry; } @@ -3301,11 +3246,6 @@ class VPlan { return any_of(VFs, [](ElementCount VF) { return VF.isScalable(); }); } - iterator_range::iterator> - vectorFactors() const { - return {VFs.begin(), VFs.end()}; - } - bool hasScalarVFOnly() const { return VFs.size() == 1 && VFs[0].isScalar(); } bool hasUF(unsigned UF) const { return UFs.empty() || UFs.contains(UF); } @@ -3725,13 +3665,6 @@ inline bool isUniformAfterVectorization(VPValue *VPV) { return VPI->isVectorToScalar(); return false; } - -/// Return true if \p Cond is a uniform boolean. -bool isUniformBoolean(VPValue *Cond); - -/// Return true if \p V is a header mask in \p Plan. -bool isHeaderMask(VPValue *V, VPlan &Plan); - } // end namespace vputils } // end namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index b491ea5a18b543..7a482455473e40 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -39,7 +39,6 @@ using VectorParts = SmallVector; namespace llvm { extern cl::opt EnableVPlanNativePath; } -extern cl::opt ForceTargetInstructionCost; #define LV_NAME "loop-vectorize" #define DEBUG_TYPE LV_NAME @@ -256,40 +255,6 @@ void VPRecipeBase::moveBefore(VPBasicBlock &BB, insertBefore(BB, I); } -InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) { - if (auto *S = dyn_cast(this)) { - auto *UI = dyn_cast_or_null(S->getUnderlyingValue()); - if (UI && Ctx.skipCostComputation(UI, VF.isVector())) - return 0; - } - - InstructionCost RecipeCost = computeCost(VF, Ctx); - if (ForceTargetInstructionCost.getNumOccurrences() > 0 && - RecipeCost.isValid()) - RecipeCost = InstructionCost(ForceTargetInstructionCost); - - LLVM_DEBUG({ - dbgs() << "Cost of " << RecipeCost << " for VF " << VF << ": "; - dump(); - }); - return RecipeCost; -} - -InstructionCost VPRecipeBase::computeCost(ElementCount VF, - VPCostContext &Ctx) const { - // Compute the cost for the recipe falling back to the legacy cost model using - // the underlying instruction. If there is no underlying instruction, returns - // 0. - Instruction *UI = nullptr; - if (auto *S = dyn_cast(this)) - UI = dyn_cast_or_null(S->getUnderlyingValue()); - else if (auto *IG = dyn_cast(this)) - UI = IG->getInsertPos(); - else if (auto *WidenMem = dyn_cast(this)) - UI = &WidenMem->getIngredient(); - return UI ? Ctx.getLegacyCost(UI, VF) : 0; -} - FastMathFlags VPRecipeWithIRFlags::getFastMathFlags() const { assert(OpType == OperationType::FPMathOp && "recipe doesn't have fast math flags"); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 082a442bf399d2..8ec67eb2f54bda 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -999,10 +999,6 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { : Instruction::ZExt; auto *VPC = new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy); - if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) { - // UnderlyingExt has distinct return type, used to retain legacy cost. - VPC->setUnderlyingValue(UnderlyingExt); - } VPC->insertBefore(&R); Trunc->replaceAllUsesWith(VPC); } else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) { @@ -1522,7 +1518,6 @@ void VPlanTransforms::dropPoisonGeneratingRecipes( VPInstruction *New = Builder.createOverflowingOp( Instruction::Add, {A, B}, {false, false}, RecWithFlags->getDebugLoc()); - New->setUnderlyingValue(RecWithFlags->getUnderlyingValue()); RecWithFlags->replaceAllUsesWith(New); RecWithFlags->eraseFromParent(); CurRec = New; diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index fa6a65ff2f3ada..8d945f6f2b8ea8 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -74,7 +74,8 @@ class VPValue { public: /// Return the underlying Value attached to this VPValue. - Value *getUnderlyingValue() const { return UnderlyingVal; } + Value *getUnderlyingValue() { return UnderlyingVal; } + const Value *getUnderlyingValue() const { return UnderlyingVal; } /// An enumeration for keeping track of the concrete subclass of VPValue that /// are actually instantiated. diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index 41879f3ebef5a5..b5aa96eb23f5e5 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -119,7 +119,6 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LV: Interleaving is not beneficial. ; CHECK-NEXT: LV: Found a vectorizable loop (vscale x 4) in ; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop -; CHECK-NEXT: VF picked by VPlan cost model: vscale x 4 ; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1 ; CHECK: LV: Interleaving disabled by the pass manager ; CHECK-NEXT: LV: Vectorizing: innermost loop. @@ -261,7 +260,6 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; CHECK-NEXT: LV: Interleaving is not beneficial. ; CHECK-NEXT: LV: Found a vectorizable loop (vscale x 4) in ; CHECK-NEXT: LEV: Epilogue vectorization is not profitable for this loop -; CHECK-NEXT: VF picked by VPlan cost model: vscale x 4 ; CHECK-NEXT: Executing best plan with VF=vscale x 4, UF=1 ; CHECK: LV: Interleaving disabled by the pass manager ; CHECK-NEXT: LV: Vectorizing: innermost loop. From 3a47d948ba1b0ebe99ff068ddf28fe9e6043e932 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Fri, 14 Jun 2024 10:49:18 -0700 Subject: [PATCH 132/155] [flang][cuda] Propagate data attribute to global with initialization (#95504) Global with initial value were missing the CUDA data attribute. --- flang/include/flang/Lower/ConvertConstant.h | 3 ++- flang/lib/Lower/ConvertConstant.cpp | 23 +++++++++++-------- flang/lib/Lower/ConvertVariable.cpp | 12 +++++----- flang/test/Lower/CUDA/cuda-data-attribute.cuf | 17 +++++++++++++- 4 files changed, 37 insertions(+), 18 deletions(-) diff --git a/flang/include/flang/Lower/ConvertConstant.h b/flang/include/flang/Lower/ConvertConstant.h index c49cbbc6e7426b..1bd11e9bacd618 100644 --- a/flang/include/flang/Lower/ConvertConstant.h +++ b/flang/include/flang/Lower/ConvertConstant.h @@ -64,7 +64,8 @@ fir::GlobalOp tryCreatingDenseGlobal(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type symTy, llvm::StringRef globalName, mlir::StringAttr linkage, bool isConst, - const Fortran::lower::SomeExpr &initExpr); + const Fortran::lower::SomeExpr &initExpr, + cuf::DataAttributeAttr dataAttr = {}); /// Lower a StructureConstructor that must be lowered in read only data although /// it may not be wrapped into a Constant (this may be the case for derived diff --git a/flang/lib/Lower/ConvertConstant.cpp b/flang/lib/Lower/ConvertConstant.cpp index 653e874a969c5e..a4ace40a3a1c41 100644 --- a/flang/lib/Lower/ConvertConstant.cpp +++ b/flang/lib/Lower/ConvertConstant.cpp @@ -102,7 +102,8 @@ class DenseGlobalBuilder { mlir::Location loc, mlir::Type symTy, llvm::StringRef globalName, mlir::StringAttr linkage, bool isConst, - const Fortran::lower::SomeExpr &initExpr) { + const Fortran::lower::SomeExpr &initExpr, + cuf::DataAttributeAttr dataAttr) { DenseGlobalBuilder globalBuilder; std::visit( Fortran::common::visitors{ @@ -119,7 +120,7 @@ class DenseGlobalBuilder { }, initExpr.u); return globalBuilder.tryCreatingGlobal(builder, loc, symTy, globalName, - linkage, isConst); + linkage, isConst, dataAttr); } template @@ -127,11 +128,12 @@ class DenseGlobalBuilder { fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type symTy, llvm::StringRef globalName, mlir::StringAttr linkage, bool isConst, const Fortran::evaluate::Constant> - &constant) { + &constant, + cuf::DataAttributeAttr dataAttr) { DenseGlobalBuilder globalBuilder; globalBuilder.tryConvertingToAttributes(builder, constant); return globalBuilder.tryCreatingGlobal(builder, loc, symTy, globalName, - linkage, isConst); + linkage, isConst, dataAttr); } private: @@ -178,8 +180,8 @@ class DenseGlobalBuilder { fir::GlobalOp tryCreatingGlobal(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type symTy, llvm::StringRef globalName, - mlir::StringAttr linkage, - bool isConst) const { + mlir::StringAttr linkage, bool isConst, + cuf::DataAttributeAttr dataAttr) const { // Not a "trivial" intrinsic constant array, or empty array. if (!attributeElementType || attributes.empty()) return {}; @@ -191,7 +193,8 @@ class DenseGlobalBuilder { auto tensorTy = mlir::RankedTensorType::get(tensorShape, attributeElementType); auto init = mlir::DenseElementsAttr::get(tensorTy, attributes); - return builder.createGlobal(loc, symTy, globalName, linkage, init, isConst); + return builder.createGlobal(loc, symTy, globalName, linkage, init, isConst, + /*isTarget=*/false, dataAttr); } llvm::SmallVector attributes; @@ -202,9 +205,9 @@ class DenseGlobalBuilder { fir::GlobalOp Fortran::lower::tryCreatingDenseGlobal( fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type symTy, llvm::StringRef globalName, mlir::StringAttr linkage, bool isConst, - const Fortran::lower::SomeExpr &initExpr) { + const Fortran::lower::SomeExpr &initExpr, cuf::DataAttributeAttr dataAttr) { return DenseGlobalBuilder::tryCreating(builder, loc, symTy, globalName, - linkage, isConst, initExpr); + linkage, isConst, initExpr, dataAttr); } //===----------------------------------------------------------------------===// @@ -661,7 +664,7 @@ genOutlineArrayLit(Fortran::lower::AbstractConverter &converter, T::category == Fortran::common::TypeCategory::Complex) { global = DenseGlobalBuilder::tryCreating( builder, loc, arrayTy, globalName, builder.createInternalLinkage(), - true, constant); + true, constant, {}); } if (!global) // If the number of elements of the array is huge, the compilation may diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index 80a80fd1d92ef5..8c96123b14976d 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -165,12 +165,15 @@ static fir::GlobalOp declareGlobal(Fortran::lower::AbstractConverter &converter, fir::FirOpBuilder &builder = converter.getFirOpBuilder(); if (fir::GlobalOp global = builder.getNamedGlobal(globalName)) return global; + const Fortran::semantics::Symbol &sym = var.getSymbol(); + cuf::DataAttributeAttr dataAttr = + Fortran::lower::translateSymbolCUFDataAttribute( + converter.getFirOpBuilder().getContext(), sym); // Always define linkonce data since it may be optimized out from the module // that actually owns the variable if it does not refers to it. if (linkage == builder.createLinkOnceODRLinkage() || linkage == builder.createLinkOnceLinkage()) - return defineGlobal(converter, var, globalName, linkage); - const Fortran::semantics::Symbol &sym = var.getSymbol(); + return defineGlobal(converter, var, globalName, linkage, dataAttr); mlir::Location loc = genLocation(converter, sym); // Resolve potential host and module association before checking that this // symbol is an object of a function pointer. @@ -179,9 +182,6 @@ static fir::GlobalOp declareGlobal(Fortran::lower::AbstractConverter &converter, !Fortran::semantics::IsProcedurePointer(ultimate)) mlir::emitError(loc, "processing global declaration: symbol '") << toStringRef(sym.name()) << "' has unexpected details\n"; - cuf::DataAttributeAttr dataAttr = - Fortran::lower::translateSymbolCUFDataAttribute( - converter.getFirOpBuilder().getContext(), sym); return builder.createGlobal(loc, converter.genType(var), globalName, linkage, mlir::Attribute{}, isConstant(ultimate), var.isTarget(), dataAttr); @@ -510,7 +510,7 @@ static fir::GlobalOp defineGlobal(Fortran::lower::AbstractConverter &converter, if (details->init()) { global = Fortran::lower::tryCreatingDenseGlobal( builder, loc, symTy, globalName, linkage, isConst, - details->init().value()); + details->init().value(), dataAttr); if (global) { global.setVisibility(mlir::SymbolTable::Visibility::Public); return global; diff --git a/flang/test/Lower/CUDA/cuda-data-attribute.cuf b/flang/test/Lower/CUDA/cuda-data-attribute.cuf index f7f58a43a14393..192ef044913b9e 100644 --- a/flang/test/Lower/CUDA/cuda-data-attribute.cuf +++ b/flang/test/Lower/CUDA/cuda-data-attribute.cuf @@ -4,15 +4,30 @@ ! Test lowering of CUDA attribute on variables. module cuda_var + + type :: t1 + integer :: a + end type + real, constant :: mod_a_rc ! CHECK: fir.global @_QMcuda_varEmod_a_rc {data_attr = #cuf.cuda} : f32 real, device :: mod_b_ra ! CHECK: fir.global @_QMcuda_varEmod_b_ra {data_attr = #cuf.cuda} : f32 real, allocatable, managed :: mod_c_rm ! CHECK: fir.global @_QMcuda_varEmod_c_rm {data_attr = #cuf.cuda} : !fir.box> + + integer, device, dimension(10) :: mod_d_i_init = (/ (i, i = 1, 10) /) +! CHECK: fir.global @_QMcuda_varEmod_d_i_init(dense<[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]> : tensor<10xi32>) {data_attr = #cuf.cuda} : !fir.array<10xi32> + + real, device, dimension(10) :: mod_d_rinit = (/ (i, i = 1, 10) /) +! CHECK: fir.global @_QMcuda_varEmod_d_rinit(dense<[{{.*}}]> : tensor<10xf32>) {data_attr = #cuf.cuda} : !fir.array<10xf32> + real, allocatable, pinned :: mod_d_rp ! CHECK: fir.global @_QMcuda_varEmod_d_rp {data_attr = #cuf.cuda} : !fir.box> + type(t1), device :: mod_d_t(2) +! CHECK: fir.global @_QMcuda_varEmod_d_t {data_attr = #cuf.cuda} : !fir.array<2x!fir.type<_QMcuda_varTt1{a:i32}>> + contains subroutine local_var_attrs @@ -71,7 +86,7 @@ end ! CHECK-LABEL: func.func @_QMcuda_varPcuda_alloc_free ! CHECK: %[[ALLOC_A:.*]] = cuf.alloc !fir.array<10xf32> {bindc_name = "a", data_attr = #cuf.cuda, uniq_name = "_QMcuda_varFcuda_alloc_freeEa"} -> !fir.ref> -! CHECK: %[[SHAPE:.*]] = fir.shape %c10 : (index) -> !fir.shape<1> +! CHECK: %[[SHAPE:.*]] = fir.shape %c10{{.*}} : (index) -> !fir.shape<1> ! CHECK: %[[DECL_A:.*]]:2 = hlfir.declare %[[ALLOC_A]](%[[SHAPE]]) {data_attr = #cuf.cuda, uniq_name = "_QMcuda_varFcuda_alloc_freeEa"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) ! CHECK: %[[ALLOC_U:.*]] = cuf.alloc i32 {bindc_name = "u", data_attr = #cuf.cuda, uniq_name = "_QMcuda_varFcuda_alloc_freeEu"} -> !fir.ref From e84ecf26fa5d9a4be4da078a1f85e988731308af Mon Sep 17 00:00:00 2001 From: Stefan Pintilie Date: Fri, 14 Jun 2024 13:49:37 -0400 Subject: [PATCH 133/155] [NFC][PowerPC] Add test to check lanemasks for subregisters. (#94363) This change adds a test case to check the lane masks for a varitey of subregisters. --- .../test/CodeGen/PowerPC/subreg-lanemasks.mir | 67 +++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir diff --git a/llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir b/llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir new file mode 100644 index 00000000000000..1bc8766cf78d4a --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir @@ -0,0 +1,67 @@ +# RUN: llc -mcpu=pwr10 -ppc-track-subreg-liveness -filetype=null \ +# RUN: -mtriple=powerpc64le-unknown-linux-gnu -run-pass=greedy,virtregrewriter \ +# RUN: -debug-only=regalloc -o - %s 2>&1 | FileCheck %s +# REQUIRES: asserts + +# Keep track of all of the lanemasks for various subregsiters. +# +# TODO: The mask for %6.sub_vsx1:accrc is the same as the mask for %10.sub_vsx1_then_sub_64:accrc. +# Ideally on PowerPC these masks should be different. To be addressed in a later patch. +# +# CHECK: %3 [80r,80d:0) 0@80r L0000000000000004 [80r,80d:0) 0@80r weight:0.000000e+00 +# CHECK: %4 [96r,96d:0) 0@96r L0000000000000800 [96r,96d:0) 0@96r weight:0.000000e+00 +# CHECK: %5 [112r,112d:0) 0@112r L0000000000000004 [112r,112d:0) 0@112r weight:0.000000e+00 +# CHECK: %6 [128r,128d:0) 0@128r L0000000000000800 [128r,128d:0) 0@128r weight:0.000000e+00 +# CHECK: %7 [144r,144d:0) 0@144r L0000000000000004 [144r,144d:0) 0@144r weight:0.000000e+00 +# CHECK: %8 [160r,160d:0) 0@160r L0000000000000800 [160r,160d:0) 0@160r weight:0.000000e+00 +# CHECK: %9 [176r,176d:0) 0@176r L0000000000000004 [176r,176d:0) 0@176r weight:0.000000e+00 +# CHECK: %10 [192r,192d:0) 0@192r L0000000000000800 [192r,192d:0) 0@192r weight:0.000000e+00 +# CHECK: %11 [208r,208d:0) 0@208r L0000000000001000 [208r,208d:0) 0@208r weight:0.000000e+00 +# CHECK: %12 [224r,224d:0) 0@224r L0000000000002000 [224r,224d:0) 0@224r weight:0.000000e+00 +# CHECK: %13 [240r,240d:0) 0@240r L0000000000000804 [240r,240d:0) 0@240r weight:0.000000e+00 +# CHECK: %14 [256r,256d:0) 0@256r L0000000000003000 [256r,256d:0) 0@256r weight:0.000000e+00 + + +# CHECK: 0B bb.0 +# CHECK-NEXT: liveins +# CHECK-NEXT: 16B %0:vsrc = COPY $v2 +# CHECK-NEXT: 32B %float:fprc = COPY %0.sub_64:vsrc +# CHECK-NEXT: 48B dead undef %pair.sub_vsx0:vsrprc = COPY $v2 +# CHECK-NEXT: 64B undef %15.sub_vsx1:vsrprc = COPY $v3 +# CHECK-NEXT: 80B dead undef %3.sub_vsx0:vsrprc = COPY %0:vsrc +# CHECK-NEXT: 96B dead undef %4.sub_vsx1:vsrprc = COPY %0:vsrc +# CHECK-NEXT: 112B dead undef %5.sub_vsx0:accrc = COPY %0:vsrc +# CHECK-NEXT: 128B dead undef %6.sub_vsx1:accrc = COPY %0:vsrc +# CHECK-NEXT: 144B dead undef %7.sub_64:vsrprc = COPY %float:fprc +# CHECK-NEXT: 160B dead undef %8.sub_vsx1_then_sub_64:vsrprc = COPY %float:fprc +# CHECK-NEXT: 176B dead undef %9.sub_64:accrc = COPY %float:fprc +# CHECK-NEXT: 192B dead undef %10.sub_vsx1_then_sub_64:accrc = COPY %float:fprc +# CHECK-NEXT: 208B dead undef %11.sub_pair1_then_sub_64:accrc = COPY %float:fprc +# CHECK-NEXT: 224B dead undef %12.sub_pair1_then_sub_vsx1_then_sub_64:accrc = COPY %float:fprc +# CHECK-NEXT: 240B dead undef %13.sub_pair0:accrc = COPY %15:vsrprc +# CHECK-NEXT: 256B dead undef %14.sub_pair1:accrc = COPY %15:vsrprc + + +--- +name: test +tracksRegLiveness: true +body: | + bb.0: + liveins: $v2, $v3 + %0:vsrc = COPY $v2 + %float:fprc = COPY %0.sub_64 + undef %pair.sub_vsx0:vsrprc = COPY $v2 + undef %pair.sub_vsx1:vsrprc = COPY $v3 + undef %1.sub_vsx0:vsrprc = COPY %0 + undef %2.sub_vsx1:vsrprc = COPY %0 + undef %3.sub_vsx0:accrc = COPY %0 + undef %4.sub_vsx1:accrc = COPY %0 + undef %5.sub_64:vsrprc = COPY %float + undef %6.sub_vsx1_then_sub_64:vsrprc = COPY %float + undef %7.sub_64:accrc = COPY %float + undef %8.sub_vsx1_then_sub_64:accrc = COPY %float + undef %9.sub_pair1_then_sub_64:accrc = COPY %float + undef %10.sub_pair1_then_sub_vsx1_then_sub_64:accrc = COPY %float + undef %11.sub_pair0:accrc = COPY %pair + undef %12.sub_pair1:accrc = COPY %pair +... From 3ecba1ad1f6d2e961948ddfdb165f22621d0788f Mon Sep 17 00:00:00 2001 From: Youngsuk Kim Date: Fri, 14 Jun 2024 14:11:43 -0400 Subject: [PATCH 134/155] [compiler-rt] Remove 'memprof_meminfoblock.h' from MEMPROF_HEADERS (NFC) (#95334) Commit 8306968b592d942cc49bde2e387061e673a9fbb7 deleted file `compiler-rt/lib/memprof/memprof_meminfoblock.h`, but didn't remove it from MEMPROF_HEADERS in `compiler-rt/lib/memprof/CMakeLists.txt`. Remove unneeded leftover line in `compiler-rt/lib/memprof/CMakeLists.txt`. p.s. GH #54777 reported a llvm14 build failure due to the existence of the leftover line, but I'm unable to reproduce the build failure with llvm19 trunk. --- compiler-rt/lib/memprof/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/compiler-rt/lib/memprof/CMakeLists.txt b/compiler-rt/lib/memprof/CMakeLists.txt index 3f55c2f5e075ee..e6d99daca6ee7d 100644 --- a/compiler-rt/lib/memprof/CMakeLists.txt +++ b/compiler-rt/lib/memprof/CMakeLists.txt @@ -37,7 +37,6 @@ SET(MEMPROF_HEADERS memprof_interface_internal.h memprof_internal.h memprof_mapping.h - memprof_meminfoblock.h memprof_mibmap.h memprof_rawprofile.h memprof_stack.h From b6fd6d4cc53d263c586264d1476265fbdcc0ba21 Mon Sep 17 00:00:00 2001 From: Chris B Date: Fri, 14 Jun 2024 13:13:25 -0500 Subject: [PATCH 135/155] [HLSL] Use hlsl vector template in type printer (#95489) In HLSL we really want to be using the HLSL vector template and other built-in sugared spellings for some builtin types. This updates the type printer to take an option to use HLSL type spellings. This changes printing vector type names from: ``` T __attribute__((ext_vector_type(N))) ``` To: ``` vector ``` --- clang/include/clang/AST/PrettyPrinter.h | 7 +- clang/lib/AST/TypePrinter.cpp | 32 +++++-- clang/test/AST/HLSL/pch.hlsl | 2 +- clang/test/AST/HLSL/pch_with_buf.hlsl | 2 +- clang/test/AST/HLSL/vector-alias.hlsl | 16 ++-- clang/test/AST/HLSL/vector-constructors.hlsl | 22 ++--- clang/test/SemaHLSL/BuiltIns/RWBuffers.hlsl | 2 +- .../test/SemaHLSL/BuiltIns/clamp-errors.hlsl | 2 +- clang/test/SemaHLSL/BuiltIns/dot-errors.hlsl | 2 +- clang/test/SemaHLSL/BuiltIns/lerp-errors.hlsl | 2 +- clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl | 2 +- .../test/SemaHLSL/BuiltIns/vector-errors.hlsl | 4 +- .../BuiltinVector/ScalarSwizzleErrors.hlsl | 4 +- .../Types/BuiltinVector/ScalarSwizzles.hlsl | 56 ++++++------ .../SemaHLSL/VectorOverloadResolution.hlsl | 30 +++---- .../standard_conversion_sequences.hlsl | 90 +++++++++---------- 16 files changed, 149 insertions(+), 126 deletions(-) diff --git a/clang/include/clang/AST/PrettyPrinter.h b/clang/include/clang/AST/PrettyPrinter.h index da276e26049b00..332ac3c6a004a9 100644 --- a/clang/include/clang/AST/PrettyPrinter.h +++ b/clang/include/clang/AST/PrettyPrinter.h @@ -77,7 +77,7 @@ struct PrintingPolicy { PrintCanonicalTypes(false), PrintInjectedClassNameWithArguments(true), UsePreferredNames(true), AlwaysIncludeTypeForTemplateArgument(false), CleanUglifiedParameters(false), EntireContentsOfLargeArray(true), - UseEnumerators(true) {} + UseEnumerators(true), UseHLSLTypes(LO.HLSL) {} /// Adjust this printing policy for cases where it's known that we're /// printing C++ code (for instance, if AST dumping reaches a C++-only @@ -342,6 +342,11 @@ struct PrintingPolicy { LLVM_PREFERRED_TYPE(bool) unsigned UseEnumerators : 1; + /// Whether or not we're printing known HLSL code and should print HLSL + /// sugared types when possible. + LLVM_PREFERRED_TYPE(bool) + unsigned UseHLSLTypes : 1; + /// Callbacks to use to allow the behavior of printing to be customized. const PrintingCallbacks *Callbacks = nullptr; }; diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index 58d01705d607b2..4add4d3af69a30 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -644,16 +644,25 @@ void TypePrinter::printDependentAddressSpaceAfter( void TypePrinter::printDependentSizedExtVectorBefore( const DependentSizedExtVectorType *T, raw_ostream &OS) { + if (Policy.UseHLSLTypes) + OS << "vector<"; printBefore(T->getElementType(), OS); } void TypePrinter::printDependentSizedExtVectorAfter( const DependentSizedExtVectorType *T, raw_ostream &OS) { - OS << " __attribute__((ext_vector_type("; - if (T->getSizeExpr()) - T->getSizeExpr()->printPretty(OS, nullptr, Policy); - OS << ")))"; + if (Policy.UseHLSLTypes) { + OS << ", "; + if (T->getSizeExpr()) + T->getSizeExpr()->printPretty(OS, nullptr, Policy); + OS << ">"; + } else { + OS << " __attribute__((ext_vector_type("; + if (T->getSizeExpr()) + T->getSizeExpr()->printPretty(OS, nullptr, Policy); + OS << ")))"; + } printAfter(T->getElementType(), OS); } @@ -815,14 +824,23 @@ void TypePrinter::printDependentVectorAfter( void TypePrinter::printExtVectorBefore(const ExtVectorType *T, raw_ostream &OS) { + if (Policy.UseHLSLTypes) + OS << "vector<"; printBefore(T->getElementType(), OS); } void TypePrinter::printExtVectorAfter(const ExtVectorType *T, raw_ostream &OS) { printAfter(T->getElementType(), OS); - OS << " __attribute__((ext_vector_type("; - OS << T->getNumElements(); - OS << ")))"; + + if (Policy.UseHLSLTypes) { + OS << ", "; + OS << T->getNumElements(); + OS << ">"; + } else { + OS << " __attribute__((ext_vector_type("; + OS << T->getNumElements(); + OS << ")))"; + } } void TypePrinter::printConstantMatrixBefore(const ConstantMatrixType *T, diff --git a/clang/test/AST/HLSL/pch.hlsl b/clang/test/AST/HLSL/pch.hlsl index 839a13093bd150..483af0f5b4c790 100644 --- a/clang/test/AST/HLSL/pch.hlsl +++ b/clang/test/AST/HLSL/pch.hlsl @@ -10,7 +10,7 @@ hlsl::RWBuffer Buffer; float2 bar(float2 a, float2 b) { -// CHECK:CallExpr 0x{{[0-9a-f]+}} 'float2':'float __attribute__((ext_vector_type(2)))' +// CHECK:CallExpr 0x{{[0-9a-f]+}} 'float2':'vector' // CHECK-NEXT:ImplicitCastExpr 0x{{[0-9a-f]+}} 'float2 (*)(float2, float2)' // CHECK-NEXT:`-DeclRefExpr 0x{{[0-9a-f]+}} 'float2 (float2, float2)' lvalue Function 0x[[FOO]] 'foo' 'float2 (float2, float2)' return foo(a, b); diff --git a/clang/test/AST/HLSL/pch_with_buf.hlsl b/clang/test/AST/HLSL/pch_with_buf.hlsl index 63b7ed508a5fba..7fb5e2a3812ead 100644 --- a/clang/test/AST/HLSL/pch_with_buf.hlsl +++ b/clang/test/AST/HLSL/pch_with_buf.hlsl @@ -11,7 +11,7 @@ hlsl::RWBuffer Buf2; float2 bar(float2 a, float2 b) { -// CHECK:CallExpr 0x{{[0-9a-f]+}} 'float2':'float __attribute__((ext_vector_type(2)))' +// CHECK:CallExpr 0x{{[0-9a-f]+}} 'float2':'vector' // CHECK-NEXT:ImplicitCastExpr 0x{{[0-9a-f]+}} 'float2 (*)(float2, float2)' // CHECK-NEXT:`-DeclRefExpr 0x{{[0-9a-f]+}} 'float2 (float2, float2)' lvalue Function 0x[[FOO]] 'foo' 'float2 (float2, float2)' return foo(a, b); diff --git a/clang/test/AST/HLSL/vector-alias.hlsl b/clang/test/AST/HLSL/vector-alias.hlsl index effa1aa53db49a..3d112ee1b22303 100644 --- a/clang/test/AST/HLSL/vector-alias.hlsl +++ b/clang/test/AST/HLSL/vector-alias.hlsl @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -x hlsl -ast-dump -o - %s | FileCheck %s // CHECK: NamespaceDecl 0x{{[0-9a-fA-F]+}} <> implicit hlsl // CHECK-NEXT: TypeAliasTemplateDecl 0x{{[0-9a-fA-F]+}} <> implicit vector @@ -8,8 +8,8 @@ // CHECK-NEXT: NonTypeTemplateParmDecl 0x{{[0-9a-fA-F]+}} <> 'int' depth 0 index 1 element_count // CHECK-NEXT: TemplateArgument expr // CHECK-NEXT: IntegerLiteral 0x{{[0-9a-fA-F]+}} <> 'int' 4 -// CHECK-NEXT: TypeAliasDecl 0x{{[0-9a-fA-F]+}} <> implicit vector 'element __attribute__((ext_vector_type(element_count)))' -// CHECK-NEXT: DependentSizedExtVectorType 0x{{[0-9a-fA-F]+}} 'element __attribute__((ext_vector_type(element_count)))' dependent +// CHECK-NEXT: TypeAliasDecl 0x{{[0-9a-fA-F]+}} <> implicit vector 'vector' +// CHECK-NEXT: DependentSizedExtVectorType 0x{{[0-9a-fA-F]+}} 'vector' dependent // CHECK-NEXT: TemplateTypeParmType 0x{{[0-9a-fA-F]+}} 'element' dependent depth 0 index 0 // CHECK-NEXT: TemplateTypeParm 0x{{[0-9a-fA-F]+}} 'element' // CHECK-NEXT: DeclRefExpr 0x{{[0-9a-fA-F]+}} <> 'int' lvalue @@ -24,30 +24,30 @@ int entry() { hlsl::vector Vec2 = {1.0, 2.0}; // CHECK: DeclStmt 0x{{[0-9a-fA-F]+}} - // CHECK-NEXT: VarDecl 0x{{[0-9a-fA-F]+}} col:26 Vec2 'hlsl::vector':'float __attribute__((ext_vector_type(2)))' cinit + // CHECK-NEXT: VarDecl 0x{{[0-9a-fA-F]+}} col:26 Vec2 'hlsl::vector':'vector' cinit // Verify that you don't need to specify the namespace. vector Vec2a = {1, 2}; // CHECK: DeclStmt 0x{{[0-9a-fA-F]+}} - // CHECK-NEXT: VarDecl 0x{{[0-9a-fA-F]+}} col:18 Vec2a 'vector':'int __attribute__((ext_vector_type(2)))' cinit + // CHECK-NEXT: VarDecl 0x{{[0-9a-fA-F]+}} col:18 Vec2a 'vector' cinit // Build a bigger vector. vector Vec4 = {1.0, 2.0, 3.0, 4.0}; // CHECK: DeclStmt 0x{{[0-9a-fA-F]+}} - // CHECK-NEXT: VarDecl 0x{{[0-9a-fA-F]+}} col:21 used Vec4 'vector':'double __attribute__((ext_vector_type(4)))' cinit + // CHECK-NEXT: VarDecl 0x{{[0-9a-fA-F]+}} col:21 used Vec4 'vector' cinit // Verify that swizzles still work. vector Vec3 = Vec4.xyz; // CHECK: DeclStmt 0x{{[0-9a-fA-F]+}} - // CHECK-NEXT: VarDecl 0x{{[0-9a-fA-F]+}} col:21 Vec3 'vector':'double __attribute__((ext_vector_type(3)))' cinit + // CHECK-NEXT: VarDecl 0x{{[0-9a-fA-F]+}} col:21 Vec3 'vector' cinit // Verify that the implicit arguments generate the correct type. vector<> ImpVec4 = {1.0, 2.0, 3.0, 4.0}; // CHECK: DeclStmt 0x{{[0-9a-fA-F]+}} - // CHECK-NEXT: VarDecl 0x{{[0-9a-fA-F]+}} col:12 ImpVec4 'vector<>':'float __attribute__((ext_vector_type(4)))' cinit + // CHECK-NEXT: VarDecl 0x{{[0-9a-fA-F]+}} col:12 ImpVec4 'vector<>':'vector' cinit return 1; } diff --git a/clang/test/AST/HLSL/vector-constructors.hlsl b/clang/test/AST/HLSL/vector-constructors.hlsl index 5e0900bb623693..905f11d9223248 100644 --- a/clang/test/AST/HLSL/vector-constructors.hlsl +++ b/clang/test/AST/HLSL/vector-constructors.hlsl @@ -11,9 +11,9 @@ void entry() { // For the float2 vector, we just expect a conversion from constructor // parameters to an initialization list -// CHECK-LABEL: VarDecl 0x{{[0-9a-fA-F]+}} {{.*}} used Vec2 'float2':'float __attribute__((ext_vector_type(2)))' cinit -// CHECK-NEXT: CXXFunctionalCastExpr 0x{{[0-9a-fA-F]+}} {{.*}} 'float2':'float __attribute__((ext_vector_type(2)))' functional cast to float2 -// CHECK-NEXT: InitListExpr 0x{{[0-9a-fA-F]+}} {{.*}} 'float2':'float __attribute__((ext_vector_type(2)))' +// CHECK-LABEL: VarDecl 0x{{[0-9a-fA-F]+}} {{.*}} used Vec2 'float2':'vector' cinit +// CHECK-NEXT: CXXFunctionalCastExpr 0x{{[0-9a-fA-F]+}} {{.*}} 'float2':'vector' functional cast to float2 +// CHECK-NEXT: InitListExpr 0x{{[0-9a-fA-F]+}} {{.*}} 'float2':'vector' // CHECK-NEXT: FloatingLiteral 0x{{[0-9a-fA-F]+}} {{.*}} 'float' 1.000000e+00 // CHECK-NEXT: FloatingLiteral 0x{{[0-9a-fA-F]+}} {{.*}} 'float' 2.000000e+00 @@ -21,22 +21,22 @@ void entry() { // For the float 3 things get fun... // Here we expect accesses to the vec2 to provide the first and second // components using ArraySubscriptExpr -// CHECK-LABEL: VarDecl 0x{{[0-9a-fA-F]+}} {{.*}} col:10 Vec3 'float3':'float __attribute__((ext_vector_type(3)))' cinit -// CHECK-NEXT: CXXFunctionalCastExpr 0x{{[0-9a-fA-F]+}} {{.*}} 'float3':'float __attribute__((ext_vector_type(3)))' functional cast to float3 -// CHECK-NEXT: InitListExpr 0x{{[0-9a-fA-F]+}} {{.*}} 'float3':'float __attribute__((ext_vector_type(3)))' +// CHECK-LABEL: VarDecl 0x{{[0-9a-fA-F]+}} {{.*}} col:10 Vec3 'float3':'vector' cinit +// CHECK-NEXT: CXXFunctionalCastExpr 0x{{[0-9a-fA-F]+}} {{.*}} 'float3':'vector' functional cast to float3 +// CHECK-NEXT: InitListExpr 0x{{[0-9a-fA-F]+}} {{.*}} 'float3':'vector' // CHECK-NEXT: ImplicitCastExpr 0x{{[0-9a-fA-F]+}} > 'float' // CHECK-NEXT: ArraySubscriptExpr 0x{{[0-9a-fA-F]+}} > 'float' lvalue -// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-fA-F]+}} {{.*}} 'float2':'float __attribute__((ext_vector_type(2)))' lvalue Var 0x{{[0-9a-fA-F]+}} 'Vec2' 'float2':'float __attribute__((ext_vector_type(2)))' +// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-fA-F]+}} {{.*}} 'float2':'vector' lvalue Var 0x{{[0-9a-fA-F]+}} 'Vec2' 'float2':'vector' // CHECK-NEXT: IntegerLiteral 0x{{[0-9a-fA-F]+}} <> 'int' 0 // CHECK-NEXT: ImplicitCastExpr 0x{{[0-9a-fA-F]+}} > 'float' // CHECK-NEXT: ArraySubscriptExpr 0x{{[0-9a-fA-F]+}} > 'float' lvalue -// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-fA-F]+}} {{.*}} 'float2':'float __attribute__((ext_vector_type(2)))' lvalue Var 0x{{[0-9a-fA-F]+}} 'Vec2' 'float2':'float __attribute__((ext_vector_type(2)))' +// CHECK-NEXT: DeclRefExpr 0x{{[0-9a-fA-F]+}} {{.*}} 'float2':'vector' lvalue Var 0x{{[0-9a-fA-F]+}} 'Vec2' 'float2':'vector' // CHECK-NEXT: IntegerLiteral 0x{{[0-9a-fA-F]+}} <> 'int' 1 // CHECK-NEXT: FloatingLiteral 0x{{[0-9a-fA-F]+}} {{.*}} 'float' 3.000000e+00 -// CHECK: VarDecl 0x{{[0-9a-fA-F]+}} {{.*}} col:10 Vec3b 'float3':'float __attribute__((ext_vector_type(3)))' cinit -// CHECK-NEXT: CXXFunctionalCastExpr 0x{{[0-9a-fA-F]+}} {{.*}} 'float3':'float __attribute__((ext_vector_type(3)))' functional cast to float3 -// CHECK-NEXT: InitListExpr 0x{{[0-9a-fA-F]+}} {{.*}} 'float3':'float __attribute__((ext_vector_type(3)))' +// CHECK: VarDecl 0x{{[0-9a-fA-F]+}} {{.*}} col:10 Vec3b 'float3':'vector' cinit +// CHECK-NEXT: CXXFunctionalCastExpr 0x{{[0-9a-fA-F]+}} {{.*}} 'float3':'vector' functional cast to float3 +// CHECK-NEXT: InitListExpr 0x{{[0-9a-fA-F]+}} {{.*}} 'float3':'vector' // CHECK-NEXT: FloatingLiteral 0x{{[0-9a-fA-F]+}} {{.*}} 'float' 1.000000e+00 // CHECK-NEXT: FloatingLiteral 0x{{[0-9a-fA-F]+}} {{.*}} 'float' 2.000000e+00 diff --git a/clang/test/SemaHLSL/BuiltIns/RWBuffers.hlsl b/clang/test/SemaHLSL/BuiltIns/RWBuffers.hlsl index b1a15c43191829..fecf3b76ff7bb6 100644 --- a/clang/test/SemaHLSL/BuiltIns/RWBuffers.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/RWBuffers.hlsl @@ -15,6 +15,6 @@ RWBuffer<> BufferErr2; [numthreads(1,1,1)] void main() { - (void)Buffer.h; // expected-error {{'h' is a private member of 'hlsl::RWBuffer'}} + (void)Buffer.h; // expected-error {{'h' is a private member of 'hlsl::RWBuffer >'}} // expected-note@* {{implicitly declared private here}} } diff --git a/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl index 8e0709eb030290..036f04cdac0b51 100644 --- a/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/clamp-errors.hlsl @@ -22,7 +22,7 @@ float2 test_clamp_no_second_arg(float2 p0) { float2 test_clamp_vector_size_mismatch(float3 p0, float2 p1) { return clamp(p0, p0, p1); - // expected-warning@-1 {{implicit conversion truncates vector: 'float3' (aka 'vector') to 'float __attribute__((ext_vector_type(2)))' (vector of 2 'float' values)}} + // expected-warning@-1 {{implicit conversion truncates vector: 'float3' (aka 'vector') to 'vector' (vector of 2 'float' values)}} } float2 test_clamp_builtin_vector_size_mismatch(float3 p0, float2 p1) { diff --git a/clang/test/SemaHLSL/BuiltIns/dot-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/dot-errors.hlsl index 58722aaeb9246e..cc42f0cb0a572d 100644 --- a/clang/test/SemaHLSL/BuiltIns/dot-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/dot-errors.hlsl @@ -17,7 +17,7 @@ float test_dot_no_second_arg(float2 p0) { float test_dot_vector_size_mismatch(float3 p0, float2 p1) { return dot(p0, p1); - // expected-warning@-1 {{implicit conversion truncates vector: 'float3' (aka 'vector') to 'float __attribute__((ext_vector_type(2)))' (vector of 2 'float' values)}} + // expected-warning@-1 {{implicit conversion truncates vector: 'float3' (aka 'vector') to 'vector' (vector of 2 'float' values)}} } float test_dot_builtin_vector_size_mismatch(float3 p0, float2 p1) { diff --git a/clang/test/SemaHLSL/BuiltIns/lerp-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/lerp-errors.hlsl index 868ba8a1a4713d..56c8b32cc14e0e 100644 --- a/clang/test/SemaHLSL/BuiltIns/lerp-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/lerp-errors.hlsl @@ -22,7 +22,7 @@ float2 test_lerp_no_second_arg(float2 p0) { float2 test_lerp_vector_size_mismatch(float3 p0, float2 p1) { return lerp(p0, p0, p1); - // expected-warning@-1 {{implicit conversion truncates vector: 'float3' (aka 'vector') to 'float __attribute__((ext_vector_type(2)))' (vector of 2 'float' values)}} + // expected-warning@-1 {{implicit conversion truncates vector: 'float3' (aka 'vector') to 'vector' (vector of 2 'float' values)}} } float2 test_lerp_builtin_vector_size_mismatch(float3 p0, float2 p1) { diff --git a/clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl index 5dfbc23f8defa7..ee4605528f4109 100644 --- a/clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/mad-errors.hlsl @@ -22,7 +22,7 @@ float2 test_mad_no_second_arg(float2 p0) { float2 test_mad_vector_size_mismatch(float3 p0, float2 p1) { return mad(p0, p0, p1); - // expected-warning@-1 {{implicit conversion truncates vector: 'float3' (aka 'vector') to 'float __attribute__((ext_vector_type(2)))' (vector of 2 'float' values)}} + // expected-warning@-1 {{implicit conversion truncates vector: 'float3' (aka 'vector') to 'vector' (vector of 2 'float' values)}} } float2 test_mad_builtin_vector_size_mismatch(float3 p0, float2 p1) { diff --git a/clang/test/SemaHLSL/BuiltIns/vector-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/vector-errors.hlsl index 6aedb9304ed933..7af10a05f76f37 100644 --- a/clang/test/SemaHLSL/BuiltIns/vector-errors.hlsl +++ b/clang/test/SemaHLSL/BuiltIns/vector-errors.hlsl @@ -2,7 +2,7 @@ // Some bad declarations hlsl::vector ShouldWorkSomeday; // expected-error{{use of alias template 'hlsl::vector' requires template arguments}} -// expected-note@*:* {{template declaration from hidden source: template using vector = element __attribute__((ext_vector_type(element_count)))}} +// expected-note@*:* {{template declaration from hidden source: template using vector = vector}} hlsl::vector<1> BadVec; // expected-error{{template argument for template type parameter must be a type}} // expected-note@*:* {{template parameter from hidden source: class element = float}} @@ -11,7 +11,7 @@ hlsl::vector AnotherBadVec; // expected-error{{template argument for // expected-note@*:* {{template parameter from hidden source: int element_count = 4}} hlsl::vector YABV; // expected-error{{too many template arguments for alias template 'vector'}} -// expected-note@*:* {{template declaration from hidden source: template using vector = element __attribute__((ext_vector_type(element_count)))}} +// expected-note@*:* {{template declaration from hidden source: template using vector = vector}} // This code is rejected by clang because clang puts the HLSL built-in types // into the HLSL namespace. diff --git a/clang/test/SemaHLSL/Types/BuiltinVector/ScalarSwizzleErrors.hlsl b/clang/test/SemaHLSL/Types/BuiltinVector/ScalarSwizzleErrors.hlsl index 170be6a13c3660..5088991f2e28ac 100644 --- a/clang/test/SemaHLSL/Types/BuiltinVector/ScalarSwizzleErrors.hlsl +++ b/clang/test/SemaHLSL/Types/BuiltinVector/ScalarSwizzleErrors.hlsl @@ -1,11 +1,11 @@ // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.6-library -x hlsl -finclude-default-header -verify %s int2 ToTwoInts(int V) { - return V.xy; // expected-error{{vector component access exceeds type 'int __attribute__((ext_vector_type(1)))' (vector of 1 'int' value)}} + return V.xy; // expected-error{{vector component access exceeds type 'vector' (vector of 1 'int' value)}} } float2 ToTwoFloats(float V) { - return V.rg; // expected-error{{vector component access exceeds type 'float __attribute__((ext_vector_type(1)))' (vector of 1 'float' value)}} + return V.rg; // expected-error{{vector component access exceeds type 'vector' (vector of 1 'float' value)}} } int4 SomeNonsense(int V) { diff --git a/clang/test/SemaHLSL/Types/BuiltinVector/ScalarSwizzles.hlsl b/clang/test/SemaHLSL/Types/BuiltinVector/ScalarSwizzles.hlsl index 4fa04f3d598898..683c05b20c34ed 100644 --- a/clang/test/SemaHLSL/Types/BuiltinVector/ScalarSwizzles.hlsl +++ b/clang/test/SemaHLSL/Types/BuiltinVector/ScalarSwizzles.hlsl @@ -3,8 +3,8 @@ // CHECK-LABEL: ToTwoInts -// CHECK: ExtVectorElementExpr {{.*}} 'int __attribute__((ext_vector_type(2)))' xx -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int __attribute__((ext_vector_type(1)))' lvalue +// CHECK: ExtVectorElementExpr {{.*}} 'vector' xx +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' lvalue // CHECK-NEXT: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'V' 'int' int2 ToTwoInts(int V){ @@ -12,8 +12,8 @@ int2 ToTwoInts(int V){ } // CHECK-LABEL: ToFourFloats -// CHECK: ExtVectorElementExpr {{.*}} 'float __attribute__((ext_vector_type(4)))' rrrr -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float __attribute__((ext_vector_type(1)))' lvalue +// CHECK: ExtVectorElementExpr {{.*}} 'vector' rrrr +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' lvalue // CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'V' 'float' @@ -22,8 +22,8 @@ float4 ToFourFloats(float V){ } // CHECK-LABEL: FillOne -// CHECK: ExtVectorElementExpr {{.*}} 'int __attribute__((ext_vector_type(2)))' xx -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int __attribute__((ext_vector_type(1)))' +// CHECK: ExtVectorElementExpr {{.*}} 'vector' xx +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' // CHECK-NEXT: IntegerLiteral {{.*}} 'int' 1 int2 FillOne(){ @@ -31,8 +31,8 @@ int2 FillOne(){ } // CHECK-LABEL: FillOneUnsigned -// CHECK: ExtVectorElementExpr {{.*}} 'unsigned int __attribute__((ext_vector_type(3)))' xxx -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned int __attribute__((ext_vector_type(1)))' +// CHECK: ExtVectorElementExpr {{.*}} 'vector' xxx +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' // CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned int' 1 uint3 FillOneUnsigned(){ @@ -40,8 +40,8 @@ uint3 FillOneUnsigned(){ } // CHECK-LABEL: FillOneUnsignedLong -// CHECK: ExtVectorElementExpr {{.*}} 'unsigned long __attribute__((ext_vector_type(4)))' xxxx -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'unsigned long __attribute__((ext_vector_type(1)))' +// CHECK: ExtVectorElementExpr {{.*}} 'vector' xxxx +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' // CHECK-NEXT: IntegerLiteral {{.*}} 'unsigned long' 1 vector FillOneUnsignedLong(){ @@ -49,8 +49,8 @@ vector FillOneUnsignedLong(){ } // CHECK-LABEL: FillTwoPointFive -// CHECK: ExtVectorElementExpr {{.*}} 'double __attribute__((ext_vector_type(2)))' rr -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'double __attribute__((ext_vector_type(1)))' +// CHECK: ExtVectorElementExpr {{.*}} 'vector' rr +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' // CHECK-NEXT: FloatingLiteral {{.*}} 'double' 2.500000e+00 double2 FillTwoPointFive(){ @@ -58,8 +58,8 @@ double2 FillTwoPointFive(){ } // CHECK-LABEL: FillOneHalf -// CHECK: ExtVectorElementExpr {{.*}} 'double __attribute__((ext_vector_type(3)))' rrr -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'double __attribute__((ext_vector_type(1)))' +// CHECK: ExtVectorElementExpr {{.*}} 'vector' rrr +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' // CHECK-NEXT: FloatingLiteral {{.*}} 'double' 5.000000e-01 double3 FillOneHalf(){ @@ -67,8 +67,8 @@ double3 FillOneHalf(){ } // CHECK-LABEL: FillTwoPointFiveFloat -// CHECK: ExtVectorElementExpr {{.*}} 'float __attribute__((ext_vector_type(4)))' rrrr -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float __attribute__((ext_vector_type(1)))' +// CHECK: ExtVectorElementExpr {{.*}} 'vector' rrrr +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' // CHECK-NEXT: FloatingLiteral {{.*}} 'float' 2.500000e+00 float4 FillTwoPointFiveFloat(){ @@ -80,9 +80,9 @@ float4 FillTwoPointFiveFloat(){ // initialze the returned vector. // CHECK-LABEL: FillOneHalfFloat -// CHECK: ImplicitCastExpr {{.*}} 'vector':'float __attribute__((ext_vector_type(1)))' +// CHECK: ImplicitCastExpr {{.*}} 'vector' // CHECK-NEXT: ExtVectorElementExpr {{.*}} 'float' r -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float __attribute__((ext_vector_type(1)))' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' // CHECK-NEXT: FloatingLiteral {{.*}} 'float' 5.000000e-01 vector FillOneHalfFloat(){ @@ -90,9 +90,9 @@ vector FillOneHalfFloat(){ } // CHECK-LABEL: HowManyFloats -// CHECK: ExtVectorElementExpr {{.*}} 'float __attribute__((ext_vector_type(2)))' rr -// CHECK-NEXT: ExtVectorElementExpr {{.*}} 'float __attribute__((ext_vector_type(2)))' rr -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float __attribute__((ext_vector_type(1)))' lvalue +// CHECK: ExtVectorElementExpr {{.*}} 'vector' rr +// CHECK-NEXT: ExtVectorElementExpr {{.*}} 'vector' rr +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' lvalue // CHECK-NEXT: DeclRefExpr {{.*}} 'float' lvalue ParmVar {{.*}} 'V' 'float' float2 HowManyFloats(float V) { @@ -100,8 +100,8 @@ float2 HowManyFloats(float V) { } // CHECK-LABEL: HooBoy -// CHECK: ExtVectorElementExpr {{.*}} 'long __attribute__((ext_vector_type(4)))' xxxx -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'long __attribute__((ext_vector_type(1)))' +// CHECK: ExtVectorElementExpr {{.*}} 'vector' xxxx +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' // CHECK-NEXT: IntegerLiteral {{.*}} 'long' 4 int64_t4 HooBoy() { @@ -113,9 +113,9 @@ int64_t4 HooBoy() { // list with float truncation casts. // CHECK-LABEL: AllRighty -// CHECK: ImplicitCastExpr {{.*}} 'float3':'float __attribute__((ext_vector_type(3)))' -// CHECK-NEXT: ExtVectorElementExpr {{.*}} 'double __attribute__((ext_vector_type(3)))' rrr -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'double __attribute__((ext_vector_type(1)))' +// CHECK: ImplicitCastExpr {{.*}} 'float3':'vector' +// CHECK-NEXT: ExtVectorElementExpr {{.*}} 'vector' rrr +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' // CHECK-NEXT: FloatingLiteral {{.*}} 'double' 1.000000e+00 float3 AllRighty() { @@ -123,8 +123,8 @@ float3 AllRighty() { } // CHECK-LABEL: AllRighty2 -// CHECK: ExtVectorElementExpr {{.*}} 'float __attribute__((ext_vector_type(3)))' rrr -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float __attribute__((ext_vector_type(1)))' +// CHECK: ExtVectorElementExpr {{.*}} 'vector' rrr +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' // CHECK-NEXT: FloatingLiteral {{.*}} 'float' 1.000000e+00 float3 AllRighty2() { diff --git a/clang/test/SemaHLSL/VectorOverloadResolution.hlsl b/clang/test/SemaHLSL/VectorOverloadResolution.hlsl index 2ea7d14e80eebf..485094fd09b3c2 100644 --- a/clang/test/SemaHLSL/VectorOverloadResolution.hlsl +++ b/clang/test/SemaHLSL/VectorOverloadResolution.hlsl @@ -7,9 +7,9 @@ void Fn(half2 H); // CHECK: CallExpr {{.*}}'void' // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(double2)' // CHECK-NEXT: DeclRefExpr {{.*}}'void (double2)' lvalue Function {{.*}} 'Fn' 'void (double2)' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'double2':'double __attribute__((ext_vector_type(2)))' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2':'float __attribute__((ext_vector_type(2)))' -// CHECK-NEXT: DeclRefExpr {{.*}} 'float2':'float __attribute__((ext_vector_type(2)))' lvalue ParmVar {{.*}} 'F' 'float2':'float __attribute__((ext_vector_type(2)))' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'double2':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float2':'vector' lvalue ParmVar {{.*}} 'F' 'float2':'vector' void Call(float2 F) { Fn(F); @@ -22,9 +22,9 @@ void Fn2(int16_t2 S); // CHECK: CallExpr {{.*}} 'void' // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(int64_t2)' // CHECK-NEXT: DeclRefExpr {{.*}} 'void (int64_t2)' lvalue Function {{.*}} 'Fn2' 'void (int64_t2)' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int64_t2':'long __attribute__((ext_vector_type(2)))' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int2':'int __attribute__((ext_vector_type(2)))' -// CHECK-NEXT: DeclRefExpr {{.*}} 'int2':'int __attribute__((ext_vector_type(2)))' lvalue ParmVar {{.*}} 'I' 'int2':'int __attribute__((ext_vector_type(2)))' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int64_t2':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int2':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int2':'vector' lvalue ParmVar {{.*}} 'I' 'int2':'vector' void Call2(int2 I) { Fn2(I); @@ -36,9 +36,9 @@ void Fn3( int64_t2 p0); // CHECK: CallExpr {{.*}} 'void' // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(int64_t2)' // CHECK-NEXT: DeclRefExpr {{.*}} 'void (int64_t2)' lvalue Function {{.*}} 'Fn3' 'void (int64_t2)' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int64_t2':'long __attribute__((ext_vector_type(2)))' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'half2':'half __attribute__((ext_vector_type(2)))' -// CHECK-NEXT: DeclRefExpr {{.*}} 'half2':'half __attribute__((ext_vector_type(2)))' lvalue ParmVar {{.*}} 'p0' 'half2':'half __attribute__((ext_vector_type(2)))' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int64_t2':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'half2':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'half2':'vector' lvalue ParmVar {{.*}} 'p0' 'half2':'vector' // CHECKIR-LABEL: Call3 // CHECKIR: {{.*}} = fptosi <2 x half> {{.*}} to <2 x i64> void Call3(half2 p0) { @@ -49,9 +49,9 @@ void Call3(half2 p0) { // CHECK: CallExpr {{.*}} 'void' // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(int64_t2)' // CHECK-NEXT: DeclRefExpr {{.*}} 'void (int64_t2)' lvalue Function {{.*}} 'Fn3' 'void (int64_t2)' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int64_t2':'long __attribute__((ext_vector_type(2)))' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2':'float __attribute__((ext_vector_type(2)))' -// CHECK-NEXT: DeclRefExpr {{.*}} 'float2':'float __attribute__((ext_vector_type(2)))' lvalue ParmVar {{.*}} 'p0' 'float2':'float __attribute__((ext_vector_type(2)))' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int64_t2':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float2':'vector' lvalue ParmVar {{.*}} 'p0' 'float2':'vector' // CHECKIR-LABEL: Call4 // CHECKIR: {{.*}} = fptosi <2 x float> {{.*}} to <2 x i64> void Call4(float2 p0) { @@ -64,9 +64,9 @@ void Fn4( float2 p0); // CHECK: CallExpr {{.*}} 'void' // CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(float2)' // CHECK-NEXT: DeclRefExpr {{.*}} 'void (float2)' lvalue Function {{.*}} 'Fn4' 'void (float2)' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2':'float __attribute__((ext_vector_type(2)))' -// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int64_t2':'long __attribute__((ext_vector_type(2)))' -// CHECK-NEXT: DeclRefExpr {{.*}} 'int64_t2':'long __attribute__((ext_vector_type(2)))' lvalue ParmVar {{.*}} 'p0' 'int64_t2':'long __attribute__((ext_vector_type(2)))' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float2':'vector' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int64_t2':'vector' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int64_t2':'vector' lvalue ParmVar {{.*}} 'p0' 'int64_t2':'vector' // CHECKIR-LABEL: Call5 // CHECKIR: {{.*}} = sitofp <2 x i64> {{.*}} to <2 x float> void Call5(int64_t2 p0) { diff --git a/clang/test/SemaHLSL/standard_conversion_sequences.hlsl b/clang/test/SemaHLSL/standard_conversion_sequences.hlsl index 256981d2c1e2e0..c8d9f2c156e310 100644 --- a/clang/test/SemaHLSL/standard_conversion_sequences.hlsl +++ b/clang/test/SemaHLSL/standard_conversion_sequences.hlsl @@ -2,79 +2,79 @@ // RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -Wno-conversion -DNO_ERR -ast-dump %s | FileCheck %s void test() { - - // CHECK: VarDecl {{.*}} used f3 'vector':'float __attribute__((ext_vector_type(3)))' cinit - // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector':'float __attribute__((ext_vector_type(3)))' + + // CHECK: VarDecl {{.*}} used f3 'vector' cinit + // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' // CHECK-NEXT: FloatingLiteral {{.*}} 'float' 1.000000e+00 vector f3 = 1.0; // No warning for splatting to a vector from a literal. - // CHECK: VarDecl {{.*}} used d4 'vector':'double __attribute__((ext_vector_type(4)))' cinit - // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector':'double __attribute__((ext_vector_type(4)))' - // CHECK-NEXT: ExtVectorElementExpr {{.*}} 'float __attribute__((ext_vector_type(4)))' xyzx - // CHECK-NEXT: DeclRefExpr {{.*}} 'vector':'float __attribute__((ext_vector_type(3)))' lvalue Var {{.*}} 'f3' 'vector':'float __attribute__((ext_vector_type(3)))' + // CHECK: VarDecl {{.*}} used d4 'vector' cinit + // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' + // CHECK-NEXT: ExtVectorElementExpr {{.*}} 'vector' xyzx + // CHECK-NEXT: DeclRefExpr {{.*}} 'vector' lvalue Var {{.*}} 'f3' 'vector' vector d4 = f3.xyzx; // No warnings for promotion or explicit extension. - // CHECK: VarDecl {{.*}} used f2 'vector':'float __attribute__((ext_vector_type(2)))' cinit - // CHECK-NEXT: ImplicitCastExpr {{.*}} 'float __attribute__((ext_vector_type(2)))' - // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector':'float __attribute__((ext_vector_type(3)))' - // CHECK-NEXT: DeclRefExpr {{.*}} 'vector':'float __attribute__((ext_vector_type(3)))' lvalue Var {{.*}} 'f3' 'vector':'float __attribute__((ext_vector_type(3)))' - // expected-warning@#f2{{implicit conversion truncates vector: 'vector' (vector of 3 'float' values) to 'float __attribute__((ext_vector_type(2)))' (vector of 2 'float' values)}} + // CHECK: VarDecl {{.*}} used f2 'vector' cinit + // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' + // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' + // CHECK-NEXT: DeclRefExpr {{.*}} 'vector' lvalue Var {{.*}} 'f3' 'vector' + // expected-warning@#f2{{implicit conversion truncates vector: 'vector' (vector of 3 'float' values) to 'vector' (vector of 2 'float' values)}} vector f2 = f3; // #f2 - // CHECK: VarDecl {{.*}} f2_2 'vector':'float __attribute__((ext_vector_type(2)))' cinit - // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector':'float __attribute__((ext_vector_type(2)))' - // CHECK-NEXT: ImplicitCastExpr {{.*}} 'double __attribute__((ext_vector_type(2)))' - // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector':'double __attribute__((ext_vector_type(4)))' - // CHECK-NEXT: DeclRefExpr {{.*}} 'vector':'double __attribute__((ext_vector_type(4)))' lvalue Var {{.*}} 'd4' 'vector':'double __attribute__((ext_vector_type(4)))' + // CHECK: VarDecl {{.*}} f2_2 'vector' cinit + // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' + // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' + // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' + // CHECK-NEXT: DeclRefExpr {{.*}} 'vector' lvalue Var {{.*}} 'd4' 'vector' // expected-warning@#f2_2{{implicit conversion truncates vector: 'vector' (vector of 4 'double' values) to 'vector' (vector of 2 'float' values)}} // expected-warning@#f2_2{{implicit conversion loses floating-point precision: 'vector' (vector of 4 'double' values) to 'vector' (vector of 2 'float' values)}} vector f2_2 = d4; // #f2_2 - // CHECK: VarDecl {{.*}} i2 'vector':'int __attribute__((ext_vector_type(2)))' cinit - // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector':'int __attribute__((ext_vector_type(2)))' - // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector':'float __attribute__((ext_vector_type(2)))' - // CHECK-NEXT: DeclRefExpr {{.*}} 'vector':'float __attribute__((ext_vector_type(2)))' lvalue Var {{.*}} 'f2' 'vector':'float __attribute__((ext_vector_type(2)))' + // CHECK: VarDecl {{.*}} i2 'vector' cinit + // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' + // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' + // CHECK-NEXT: DeclRefExpr {{.*}} 'vector' lvalue Var {{.*}} 'f2' 'vector' // expected-warning@#i2{{mplicit conversion turns floating-point number into integer: 'vector' (vector of 2 'float' values) to 'vector' (vector of 2 'int' values)}} vector i2 = f2; // #i2 - - // CHECK: VarDecl {{.*}} i2_2 'vector':'int __attribute__((ext_vector_type(2)))' cinit - // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector':'int __attribute__((ext_vector_type(2)))' - // CHECK-NEXT: ImplicitCastExpr {{.*}} 'double __attribute__((ext_vector_type(2)))' - // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector':'double __attribute__((ext_vector_type(4)))' - // CHECK-NEXT: DeclRefExpr {{.*}} 'vector':'double __attribute__((ext_vector_type(4)))' lvalue Var {{.*}} 'd4' 'vector':'double __attribute__((ext_vector_type(4)))' + + // CHECK: VarDecl {{.*}} i2_2 'vector' cinit + // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' + // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' + // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' + // CHECK-NEXT: DeclRefExpr {{.*}} 'vector' lvalue Var {{.*}} 'd4' 'vector' // expected-warning@#i2_2{{implicit conversion truncates vector: 'vector' (vector of 4 'double' values) to 'vector' (vector of 2 'int' values)}} // expected-warning@#i2_2{{implicit conversion turns floating-point number into integer: 'vector' (vector of 4 'double' values) to 'vector' (vector of 2 'int' values)}} vector i2_2 = d4; // #i2_2 - // CHECK: VarDecl {{.*}} used i64_4 'vector':'long __attribute__((ext_vector_type(4)))' cinit - // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector':'long __attribute__((ext_vector_type(4)))' - // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector':'double __attribute__((ext_vector_type(4)))' - // CHECK-NEXT: DeclRefExpr {{.*}} 'vector':'double __attribute__((ext_vector_type(4)))' lvalue Var {{.*}} 'd4' 'vector':'double __attribute__((ext_vector_type(4)))' + // CHECK: VarDecl {{.*}} used i64_4 'vector' cinit + // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' + // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' + // CHECK-NEXT: DeclRefExpr {{.*}} 'vector' lvalue Var {{.*}} 'd4' 'vector' // expected-warning@#i64_4{{implicit conversion turns floating-point number into integer: 'vector' (vector of 4 'double' values) to 'vector' (vector of 4 'long' values)}} vector i64_4 = d4; // #i64_4 - // CHECK: VarDecl {{.*}} used i2_3 'vector':'int __attribute__((ext_vector_type(2)))' cinit - // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector':'int __attribute__((ext_vector_type(2)))' - // CHECK-NEXT: ImplicitCastExpr {{.*}} 'long __attribute__((ext_vector_type(2)))' - // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector':'long __attribute__((ext_vector_type(4)))' - // CHECK-NEXT: DeclRefExpr {{.*}} 'vector':'long __attribute__((ext_vector_type(4)))' lvalue Var {{.*}} 'i64_4' 'vector':'long __attribute__((ext_vector_type(4)))' + // CHECK: VarDecl {{.*}} used i2_3 'vector' cinit + // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' + // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' + // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' + // CHECK-NEXT: DeclRefExpr {{.*}} 'vector' lvalue Var {{.*}} 'i64_4' 'vector' // expected-warning@#i2_3{{implicit conversion loses integer precision: 'vector' (vector of 4 'long' values) to 'vector' (vector of 2 'int' values)}} // expected-warning@#i2_3{{implicit conversion truncates vector: 'vector' (vector of 4 'long' values) to 'vector' (vector of 2 'int' values)}} vector i2_3 = i64_4; // #i2_3 - //CHECK: VarDecl {{.*}} b2 'vector':'bool __attribute__((ext_vector_type(2)))' cinit - //CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector':'bool __attribute__((ext_vector_type(2)))' - //CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector':'int __attribute__((ext_vector_type(2)))' - //CHECK-NEXT: DeclRefExpr {{.*}} 'vector':'int __attribute__((ext_vector_type(2)))' lvalue Var {{.*}} 'i2_3' 'vector':'int __attribute__((ext_vector_type(2)))' + //CHECK: VarDecl {{.*}} b2 'vector' cinit + //CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' + //CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' + //CHECK-NEXT: DeclRefExpr {{.*}} 'vector' lvalue Var {{.*}} 'i2_3' 'vector' vector b2 = i2_3; // No warning for integer to bool conversion. - // CHECK: VarDecl {{.*}} b2_2 'vector':'bool __attribute__((ext_vector_type(2)))' cinit - // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector':'bool __attribute__((ext_vector_type(2)))' - // CHECK-NEXT: ImplicitCastExpr {{.*}} 'double __attribute__((ext_vector_type(2)))' - // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector':'double __attribute__((ext_vector_type(4)))' - // CHECK-NEXT: DeclRefExpr {{.*}} 'vector':'double __attribute__((ext_vector_type(4)))' lvalue Var {{.*}} 'd4' 'vector':'double __attribute__((ext_vector_type(4)))' + // CHECK: VarDecl {{.*}} b2_2 'vector' cinit + // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' + // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' + // CHECK-NEXT: ImplicitCastExpr {{.*}} 'vector' + // CHECK-NEXT: DeclRefExpr {{.*}} 'vector' lvalue Var {{.*}} 'd4' 'vector' // expected-warning@#b2_2{{implicit conversion truncates vector: 'vector' (vector of 4 'double' values) to 'vector' (vector of 2 'bool' values)}} // expected-warning@#b2_2{{implicit conversion turns floating-point number into integer: 'vector' (vector of 4 'double' values) to 'vector' (vector of 2 'bool' values)}} vector b2_2 = d4; // #b2_2 From 46c8f25b0a7d664d4ef3b8d6376815a877788463 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Fri, 14 Jun 2024 11:22:09 -0700 Subject: [PATCH 136/155] Check whether EvaluatedStmt::Value is valid in VarDecl::hasInit (#94515) VarDecl::isNull() doesn't tell whether the VarDecl has an initializer as methods like ensureEvaluatedStmt can create an EvaluatedStmt even when there isn't an initializer. Revert e1c3e16d24b5cc097ff08e9283f53319acd3f245 as the change isn't needed anymore with this change. See the discussion in https://github.com/llvm/llvm-project/pull/93749. --- clang/lib/AST/Decl.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 1f19dadafa44e8..9d0a835a12c458 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -2390,6 +2390,9 @@ bool VarDecl::hasInit() const { if (P->hasUnparsedDefaultArg() || P->hasUninstantiatedDefaultArg()) return false; + if (auto *Eval = getEvaluatedStmt()) + return Eval->Value.isValid(); + return !Init.isNull(); } @@ -2402,10 +2405,8 @@ Expr *VarDecl::getInit() { auto *Eval = getEvaluatedStmt(); - return cast_if_present( - Eval->Value.isOffset() - ? Eval->Value.get(getASTContext().getExternalSource()) - : Eval->Value.get(nullptr)); + return cast(Eval->Value.get( + Eval->Value.isOffset() ? getASTContext().getExternalSource() : nullptr)); } Stmt **VarDecl::getInitAddress() { From a66e2a1988cb14ec2feadfbc3dd1ff4bd77f4be9 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 14 Jun 2024 14:34:56 -0400 Subject: [PATCH 137/155] [libc++] Revert temporary attempt to implement LWG 4110 (#95263) When I filed LWG4110 after the discussion in #93071, I thought it was going to be a straightforward fix. It turns out that it isn't, so we should stay in the state where libc++ is Standards conforming even if that state leads to some reasonable code being rejected by the library. Once WG21 figures out what to do with this issue and votes on it, we'll implement it through our normal means. This reverts f638f7b6a7c2 and 16f2aa1a2ddf. --- libcxx/docs/Status/Cxx2cIssues.csv | 1 - libcxx/include/__memory/shared_ptr.h | 2 +- .../nullptr_t_deleter.pass.cpp | 7 ++++--- .../nullptr_t_deleter_allocator.pass.cpp | 12 ++++-------- .../pointer_deleter.pass.cpp | 8 -------- .../pointer_deleter_allocator.pass.cpp | 8 -------- 6 files changed, 9 insertions(+), 29 deletions(-) diff --git a/libcxx/docs/Status/Cxx2cIssues.csv b/libcxx/docs/Status/Cxx2cIssues.csv index 28359b7bb49ac4..8d24457186310c 100644 --- a/libcxx/docs/Status/Cxx2cIssues.csv +++ b/libcxx/docs/Status/Cxx2cIssues.csv @@ -65,5 +65,4 @@ "`3343 `__","Ordering of calls to ``unlock()`` and ``notify_all()`` in Effects element of ``notify_all_at_thread_exit()`` should be reversed","Not Yet Adopted","|Complete|","16.0","" "XXXX","","The sys_info range should be affected by save","Not Yet Adopted","|Complete|","19.0" "`4071 `__","","``reference_wrapper`` comparisons are not SFINAE-friendly","Not Yet Adopted","|Complete|","19.0" -"`4110 `__","","``shared_ptr(nullptr_t, Deleter)`` is overconstrained, breaking some sensible deleters","Not Yet Adopted","|Complete|","19.0" "","","","","","" diff --git a/libcxx/include/__memory/shared_ptr.h b/libcxx/include/__memory/shared_ptr.h index 7b5002cb95d32b..00db96185be7c6 100644 --- a/libcxx/include/__memory/shared_ptr.h +++ b/libcxx/include/__memory/shared_ptr.h @@ -404,7 +404,7 @@ struct __shared_ptr_deleter_ctor_reqs { }; template -using __shared_ptr_nullptr_deleter_ctor_reqs = _And, __well_formed_deleter<_Dp, _Tp*> >; +using __shared_ptr_nullptr_deleter_ctor_reqs = _And, __well_formed_deleter<_Dp, nullptr_t> >; #if defined(_LIBCPP_ABI_ENABLE_SHARED_PTR_TRIVIAL_ABI) # define _LIBCPP_SHARED_PTR_TRIVIAL_ABI __attribute__((__trivial_abi__)) diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter.pass.cpp index 4ea752b36bd018..13340ed5294c05 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter.pass.cpp @@ -32,16 +32,17 @@ int A::count = 0; // LWG 3233. Broken requirements for shared_ptr converting constructors // https://cplusplus.github.io/LWG/issue3233 static_assert( std::is_constructible, std::nullptr_t, test_deleter >::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, bad_deleter>::value, ""); +static_assert(!std::is_constructible, std::nullptr_t, bad_deleter>::value, ""); +static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter>::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter>::value, ""); #if TEST_STD_VER >= 17 -static_assert(std::is_constructible, std::nullptr_t, test_deleter >::value, ""); +static_assert( std::is_constructible, std::nullptr_t, test_deleter >::value, ""); static_assert(!std::is_constructible, std::nullptr_t, bad_deleter>::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter>::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter>::value, ""); -static_assert(std::is_constructible, std::nullptr_t, test_deleter >::value, ""); +static_assert( std::is_constructible, std::nullptr_t, test_deleter >::value, ""); static_assert(!std::is_constructible, std::nullptr_t, bad_deleter>::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter>::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter>::value, ""); diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp index a479b24c4595ab..53ca6fb5b234d4 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp @@ -33,21 +33,17 @@ int A::count = 0; // LWG 3233. Broken requirements for shared_ptr converting constructors // https://cplusplus.github.io/LWG/issue3233 static_assert( std::is_constructible, std::nullptr_t, test_deleter, test_allocator >::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, bad_deleter, test_allocator >::value, - ""); +static_assert(!std::is_constructible, std::nullptr_t, bad_deleter, test_allocator >::value, ""); +static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter, test_allocator >::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter, test_allocator >::value, ""); #if TEST_STD_VER >= 17 -static_assert( - std::is_constructible, std::nullptr_t, test_deleter, test_allocator >::value, - ""); +static_assert( std::is_constructible, std::nullptr_t, test_deleter, test_allocator >::value, ""); static_assert(!std::is_constructible, std::nullptr_t, bad_deleter, test_allocator >::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter, test_allocator >::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter, test_allocator >::value, ""); -static_assert( - std::is_constructible, std::nullptr_t, test_deleter, test_allocator >::value, - ""); +static_assert( std::is_constructible, std::nullptr_t, test_deleter, test_allocator >::value, ""); static_assert(!std::is_constructible, std::nullptr_t, bad_deleter, test_allocator >::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter, test_allocator >::value, ""); static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter, test_allocator >::value, ""); diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp index 95dcb92b51993c..562acf56d96fe1 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp @@ -115,14 +115,6 @@ int main(int, char**) } #endif // TEST_STD_VER >= 11 -#if TEST_STD_VER >= 14 - { - // LWG 4110 - auto deleter = [](auto pointer) { delete pointer; }; - std::shared_ptr p(new int, deleter); - } -#endif - test_function_type(); return 0; } diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp index 89e7d0b02d421b..9dffbcdd59a735 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp @@ -165,13 +165,5 @@ int main(int, char**) test_allocator >::value, ""); } -#if TEST_STD_VER >= 14 - { - // LWG 4110 - auto deleter = [](auto pointer) { delete pointer; }; - std::shared_ptr p(new int, deleter, std::allocator()); - } -#endif - return 0; } From ade28a77ed17760bf2fde57c6571b69489b0bac0 Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Fri, 14 Jun 2024 11:35:19 -0700 Subject: [PATCH 138/155] [clang-doc][cmake] Copy assets to build directory (#95187) While we copy the asset files, like index.js, into the correct location in the install step, tests do not have access to those resources in the build directory. This patch copies the contents of the clang-doc/assets directory into the build folder, so that they can be used in testing. Pull Request: https://github.com/llvm/llvm-project/pull/95185 --- .../clang-doc/tool/CMakeLists.txt | 40 ++++++++++++++++--- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/clang-tools-extra/clang-doc/tool/CMakeLists.txt b/clang-tools-extra/clang-doc/tool/CMakeLists.txt index fb8317b272932f..4944251245c6bc 100644 --- a/clang-tools-extra/clang-doc/tool/CMakeLists.txt +++ b/clang-tools-extra/clang-doc/tool/CMakeLists.txt @@ -18,10 +18,38 @@ target_link_libraries(clang-doc clangDoc ) -install(FILES ../assets/clang-doc-default-stylesheet.css - DESTINATION "${CMAKE_INSTALL_DATADIR}/clang" - COMPONENT clang-doc) -install(FILES ../assets/index.js - DESTINATION "${CMAKE_INSTALL_DATADIR}/clang" - COMPONENT clang-doc) +set(assets + index.js + clang-doc-default-stylesheet.css +) + +set(asset_dir "${CMAKE_CURRENT_SOURCE_DIR}/../assets") +set(resource_dir "${CMAKE_BINARY_DIR}/share/clang") +set(out_files) + +function(copy_files_to_dst src_dir dst_dir file) + set(src "${src_dir}/${file}") + set(dst "${dst_dir}/${file}") + add_custom_command(OUTPUT ${dst} + DEPENDS ${src} + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${src} ${dst} + COMMENT "Copying ${file} to ${dst_dir}" + ) + list(APPEND out_files ${dst}) + set(out_files ${out_files} PARENT_SCOPE) +endfunction(copy_files_to_dst) + +foreach(f ${assets}) + install(FILES ${asset_dir}/${f} + DESTINATION "${CMAKE_INSTALL_DATADIR}/clang" + COMPONENT clang-doc) + copy_files_to_dst(${asset_dir} ${resource_dir} ${f}) +endforeach(f) + +add_custom_target(copy-clang-doc-assets + DEPENDS ${out_files} + COMMENT "Copying Clang-Doc Assets" +) +set_target_properties(copy-clang-doc-assets PROPERTIES FOLDER "Clang-Doc/Assets") +add_dependencies(clang-doc copy-clang-doc-assets) From e7d563501f056d0ae54440468df04b04c10f8070 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Fri, 14 Jun 2024 14:41:18 -0400 Subject: [PATCH 139/155] [libc++] Revert LWG3223 Broken requirements for shared_ptr converting constructors (#93071) This reverts commit d868f0970, which was shown to break some code and we don't know yet whether the code should be valid or not. Reverting until we've had time to figure it out next week. --- libcxx/docs/Status/Cxx20Issues.csv | 2 +- libcxx/include/__memory/shared_ptr.h | 7 +-- .../nullptr_t_deleter.pass.cpp | 20 -------- .../nullptr_t_deleter_allocator.pass.cpp | 21 -------- .../pointer_deleter.pass.cpp | 44 ++++++++++++++--- .../pointer_deleter_allocator.pass.cpp | 47 ++++++++++++++---- .../util.smartptr.shared.const/types.h | 49 ------------------- 7 files changed, 77 insertions(+), 113 deletions(-) delete mode 100644 libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/types.h diff --git a/libcxx/docs/Status/Cxx20Issues.csv b/libcxx/docs/Status/Cxx20Issues.csv index e748ff6ad749b7..6453f227cfcc24 100644 --- a/libcxx/docs/Status/Cxx20Issues.csv +++ b/libcxx/docs/Status/Cxx20Issues.csv @@ -200,7 +200,7 @@ "`3200 `__","``midpoint``\ should not constrain ``T``\ is complete","Prague","|Nothing To Do|","" "`3201 `__","``lerp``\ should be marked as ``noexcept``\ ","Prague","|Complete|","" "`3226 `__","``zoned_time``\ constructor from ``string_view``\ should accept ``zoned_time``\ ","Prague","","","|chrono|" -"`3233 `__","Broken requirements for ``shared_ptr``\ converting constructors","Prague","|Complete|","19.0" +"`3233 `__","Broken requirements for ``shared_ptr``\ converting constructors","Prague","","" "`3237 `__","LWG 3038 and 3190 have inconsistent PRs","Prague","|Complete|","16.0" "`3238 `__","Insufficiently-defined behavior of ``std::function``\ deduction guides","Prague","|Nothing To Do|","" "`3242 `__","``std::format``\ : missing rules for ``arg-id``\ in ``width``\ and ``precision``\ ","Prague","|Complete|","14.0","|format|" diff --git a/libcxx/include/__memory/shared_ptr.h b/libcxx/include/__memory/shared_ptr.h index 00db96185be7c6..a8ff189df2aa52 100644 --- a/libcxx/include/__memory/shared_ptr.h +++ b/libcxx/include/__memory/shared_ptr.h @@ -403,9 +403,6 @@ struct __shared_ptr_deleter_ctor_reqs { __well_formed_deleter<_Dp, _Yp*>::value; }; -template -using __shared_ptr_nullptr_deleter_ctor_reqs = _And, __well_formed_deleter<_Dp, nullptr_t> >; - #if defined(_LIBCPP_ABI_ENABLE_SHARED_PTR_TRIVIAL_ABI) # define _LIBCPP_SHARED_PTR_TRIVIAL_ABI __attribute__((__trivial_abi__)) #else @@ -501,7 +498,7 @@ class _LIBCPP_SHARED_PTR_TRIVIAL_ABI _LIBCPP_TEMPLATE_VIS shared_ptr { #endif // _LIBCPP_HAS_NO_EXCEPTIONS } - template ::value, int> = 0 > + template _LIBCPP_HIDE_FROM_ABI shared_ptr(nullptr_t __p, _Dp __d) : __ptr_(nullptr) { #ifndef _LIBCPP_HAS_NO_EXCEPTIONS try { @@ -521,7 +518,7 @@ class _LIBCPP_SHARED_PTR_TRIVIAL_ABI _LIBCPP_TEMPLATE_VIS shared_ptr { #endif // _LIBCPP_HAS_NO_EXCEPTIONS } - template ::value, int> = 0 > + template _LIBCPP_HIDE_FROM_ABI shared_ptr(nullptr_t __p, _Dp __d, _Alloc __a) : __ptr_(nullptr) { #ifndef _LIBCPP_HAS_NO_EXCEPTIONS try { diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter.pass.cpp index 13340ed5294c05..49497b6956b9fb 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter.pass.cpp @@ -17,7 +17,6 @@ #include "test_macros.h" #include "deleter_types.h" -#include "types.h" struct A { static int count; @@ -29,25 +28,6 @@ struct A int A::count = 0; -// LWG 3233. Broken requirements for shared_ptr converting constructors -// https://cplusplus.github.io/LWG/issue3233 -static_assert( std::is_constructible, std::nullptr_t, test_deleter >::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, bad_deleter>::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter>::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter>::value, ""); - -#if TEST_STD_VER >= 17 -static_assert( std::is_constructible, std::nullptr_t, test_deleter >::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, bad_deleter>::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter>::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter>::value, ""); - -static_assert( std::is_constructible, std::nullptr_t, test_deleter >::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, bad_deleter>::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter>::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter>::value, ""); -#endif - int main(int, char**) { { diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp index 53ca6fb5b234d4..4e9fc227b99e81 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/nullptr_t_deleter_allocator.pass.cpp @@ -17,8 +17,6 @@ #include "test_allocator.h" #include "min_allocator.h" -#include "types.h" - struct A { static int count; @@ -30,25 +28,6 @@ struct A int A::count = 0; -// LWG 3233. Broken requirements for shared_ptr converting constructors -// https://cplusplus.github.io/LWG/issue3233 -static_assert( std::is_constructible, std::nullptr_t, test_deleter, test_allocator >::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, bad_deleter, test_allocator >::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter, test_allocator >::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter, test_allocator >::value, ""); - -#if TEST_STD_VER >= 17 -static_assert( std::is_constructible, std::nullptr_t, test_deleter, test_allocator >::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, bad_deleter, test_allocator >::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter, test_allocator >::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter, test_allocator >::value, ""); - -static_assert( std::is_constructible, std::nullptr_t, test_deleter, test_allocator >::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, bad_deleter, test_allocator >::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, no_nullptr_deleter, test_allocator >::value, ""); -static_assert(!std::is_constructible, std::nullptr_t, no_move_deleter, test_allocator >::value, ""); -#endif - int main(int, char**) { test_allocator_statistics alloc_stats; diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp index 562acf56d96fe1..0f4aa0f5c06893 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter.pass.cpp @@ -17,8 +17,6 @@ #include "test_macros.h" #include "deleter_types.h" -#include "types.h" - struct A { static int count; @@ -30,8 +28,38 @@ struct A int A::count = 0; -// LWG 3233. Broken requirements for shared_ptr converting constructors -// https://cplusplus.github.io/LWG/issue3233 +struct bad_ty { }; + +struct bad_deleter +{ + void operator()(bad_ty) { } +}; + +struct no_move_deleter +{ + no_move_deleter(no_move_deleter const&) = delete; + no_move_deleter(no_move_deleter &&) = delete; + void operator()(int*) { } +}; + +static_assert(!std::is_move_constructible::value, ""); + +struct Base { }; +struct Derived : Base { }; + +template +class MoveDeleter +{ + MoveDeleter(); + MoveDeleter(MoveDeleter const&); +public: + MoveDeleter(MoveDeleter&&) {} + + explicit MoveDeleter(int) {} + + void operator()(T* ptr) { delete ptr; } +}; + // https://llvm.org/PR60258 // Invalid constructor SFINAE for std::shared_ptr's array ctors static_assert( std::is_constructible, int*, test_deleter >::value, ""); @@ -40,12 +68,12 @@ static_assert( std::is_constructible, Derived*, test_dele static_assert(!std::is_constructible, int*, test_deleter >::value, ""); #if TEST_STD_VER >= 17 -static_assert( std::is_constructible, int*, test_deleter >::value, ""); +static_assert( std::is_constructible, int*, test_deleter>::value, ""); static_assert(!std::is_constructible, int*, bad_deleter>::value, ""); -static_assert(!std::is_constructible, int(*)[], test_deleter >::value, ""); -static_assert( std::is_constructible, int*, test_deleter >::value, ""); +static_assert(!std::is_constructible, int(*)[], test_deleter>::value, ""); +static_assert( std::is_constructible, int*, test_deleter>::value, ""); static_assert(!std::is_constructible, int*, bad_deleter>::value, ""); -static_assert(!std::is_constructible, int(*)[5], test_deleter >::value, ""); +static_assert(!std::is_constructible, int(*)[5], test_deleter>::value, ""); #endif int f() { return 5; } diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp index 9dffbcdd59a735..a110525b9b922d 100644 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp +++ b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/pointer_deleter_allocator.pass.cpp @@ -17,7 +17,6 @@ #include "test_allocator.h" #include "min_allocator.h" -#include "types.h" struct A { static int count; @@ -29,8 +28,38 @@ struct A int A::count = 0; -// LWG 3233. Broken requirements for shared_ptr converting constructors -// https://cplusplus.github.io/LWG/issue3233 +struct bad_ty { }; + +struct bad_deleter +{ + void operator()(bad_ty) { } +}; + +struct no_move_deleter +{ + no_move_deleter(no_move_deleter const&) = delete; + no_move_deleter(no_move_deleter &&) = delete; + void operator()(int*) { } +}; + +static_assert(!std::is_move_constructible::value, ""); + +struct Base { }; +struct Derived : Base { }; + +template +class MoveDeleter +{ + MoveDeleter(); + MoveDeleter(MoveDeleter const&); +public: + MoveDeleter(MoveDeleter&&) {} + + explicit MoveDeleter(int) {} + + void operator()(T* ptr) { delete ptr; } +}; + // https://llvm.org/PR60258 // Invalid constructor SFINAE for std::shared_ptr's array ctors static_assert( std::is_constructible, int*, test_deleter, test_allocator >::value, ""); @@ -39,12 +68,12 @@ static_assert( std::is_constructible, Derived*, test_dele static_assert(!std::is_constructible, int*, test_deleter, test_allocator >::value, ""); #if TEST_STD_VER >= 17 -static_assert( std::is_constructible, int*, test_deleter, test_allocator >::value, ""); -static_assert(!std::is_constructible, int*, bad_deleter, test_allocator >::value, ""); -static_assert(!std::is_constructible, int(*)[], test_deleter, test_allocator >::value, ""); -static_assert( std::is_constructible, int*, test_deleter, test_allocator >::value, ""); -static_assert(!std::is_constructible, int*, bad_deleter, test_allocator >::value, ""); -static_assert(!std::is_constructible, int(*)[5], test_deleter, test_allocator >::value, ""); +static_assert( std::is_constructible, int*, test_deleter, test_allocator>::value, ""); +static_assert(!std::is_constructible, int*, bad_deleter, test_allocator>::value, ""); +static_assert(!std::is_constructible, int(*)[], test_deleter, test_allocator>::value, ""); +static_assert( std::is_constructible, int*, test_deleter, test_allocator>::value, ""); +static_assert(!std::is_constructible, int*, bad_deleter, test_allocator>::value, ""); +static_assert(!std::is_constructible, int(*)[5], test_deleter, test_allocator>::value, ""); #endif diff --git a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/types.h b/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/types.h deleted file mode 100644 index 5bfb3d70febea0..00000000000000 --- a/libcxx/test/std/utilities/memory/util.smartptr/util.smartptr.shared/util.smartptr.shared.const/types.h +++ /dev/null @@ -1,49 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef TEST_STD_UTILITIES_MEMORY_UTIL_SMARTPTR_SHARED_CONST_TYPES_H -#define TEST_STD_UTILITIES_MEMORY_UTIL_SMARTPTR_SHARED_CONST_TYPES_H - -#include - -struct bad_ty {}; - -struct bad_deleter { - void operator()(bad_ty) {} -}; - -struct no_move_deleter { - no_move_deleter(no_move_deleter const&) = delete; - no_move_deleter(no_move_deleter&&) = delete; - void operator()(int*) {} -}; - -static_assert(!std::is_move_constructible::value, ""); - -struct no_nullptr_deleter { - void operator()(int*) const {} - void operator()(std::nullptr_t) const = delete; -}; - -struct Base {}; -struct Derived : Base {}; - -template -class MoveDeleter { - MoveDeleter(); - MoveDeleter(MoveDeleter const&); - -public: - MoveDeleter(MoveDeleter&&) {} - - explicit MoveDeleter(int) {} - - void operator()(T* ptr) { delete ptr; } -}; - -#endif // TEST_STD_UTILITIES_MEMORY_UTIL_SMARTPTR_SHARED_CONST_TYPES_H From 1551c094e8afe674eaafc20e17bcd9b2f830b7de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Fri, 14 Jun 2024 11:44:55 -0700 Subject: [PATCH 140/155] [flang] Lower function/subroutine attribute to func op (#95468) Keep track of the Fortran procedure attributes on the func operation. --- .../flang/Optimizer/Dialect/FIROpsSupport.h | 12 +++++++ flang/lib/Lower/CallInterface.cpp | 11 +++++++ .../OpenMP/declare-target-func-and-subr.f90 | 4 +-- ...arget-implicit-func-and-subr-cap-enter.f90 | 4 +-- ...lare-target-implicit-func-and-subr-cap.f90 | 4 +-- .../declare-target-implicit-tarop-cap.f90 | 2 +- flang/test/Lower/func-attrs.f90 | 31 +++++++++++++++++++ flang/test/Lower/host-associated.f90 | 2 +- 8 files changed, 62 insertions(+), 8 deletions(-) create mode 100644 flang/test/Lower/func-attrs.f90 diff --git a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h index 47b80cca5d6497..116c1dfea51b49 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h +++ b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h @@ -148,6 +148,18 @@ static constexpr llvm::StringRef getAdaptToByRefAttrName() { return "adapt.valuebyref"; } +static constexpr llvm::StringRef getFuncPureAttrName() { + return "fir.func_pure"; +} + +static constexpr llvm::StringRef getFuncElementAttrName() { + return "fir.func_elemental"; +} + +static constexpr llvm::StringRef getFuncRecursiveAttrName() { + return "fir.func_recursive"; +} + // Attribute for an alloca that is a trivial adaptor for converting a value to // pass-by-ref semantics for a VALUE parameter. The optimizer may be able to // eliminate these. diff --git a/flang/lib/Lower/CallInterface.cpp b/flang/lib/Lower/CallInterface.cpp index cfbb7c7f6b4f4e..5f1d69c1de7acf 100644 --- a/flang/lib/Lower/CallInterface.cpp +++ b/flang/lib/Lower/CallInterface.cpp @@ -610,6 +610,17 @@ static void addSymbolAttribute(mlir::func::FuncOp func, } } + // Set procedure attributes to the func op. + if (IsPureProcedure(sym)) + func->setAttr(fir::getFuncPureAttrName(), + mlir::UnitAttr::get(&mlirContext)); + if (IsElementalProcedure(sym)) + func->setAttr(fir::getFuncElementAttrName(), + mlir::UnitAttr::get(&mlirContext)); + if (sym.attrs().test(Fortran::semantics::Attr::RECURSIVE)) + func->setAttr(fir::getFuncRecursiveAttrName(), + mlir::UnitAttr::get(&mlirContext)); + // Only add this on bind(C) functions for which the symbol is not reflected in // the current context. if (!Fortran::semantics::IsBindCProcedure(sym)) diff --git a/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90 b/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90 index 3d2c4067dab716..0d138321445ce6 100644 --- a/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90 +++ b/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90 @@ -154,7 +154,7 @@ SUBROUTINE SUBR_DEFAULT_EXTENDEDLIST() !! ----- ! DEVICE-LABEL: func.func @_QPrecursive_declare_target -! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget{{.*}} +! DEVICE-SAME: {{.*}}attributes {fir.func_recursive, omp.declare_target = #omp.declaretarget{{.*}} RECURSIVE FUNCTION RECURSIVE_DECLARE_TARGET(INCREMENT) RESULT(K) !$omp declare target to(RECURSIVE_DECLARE_TARGET) device_type(nohost) INTEGER :: INCREMENT, K @@ -166,7 +166,7 @@ RECURSIVE FUNCTION RECURSIVE_DECLARE_TARGET(INCREMENT) RESULT(K) END FUNCTION RECURSIVE_DECLARE_TARGET ! DEVICE-LABEL: func.func @_QPrecursive_declare_target_enter -! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget{{.*}} +! DEVICE-SAME: {{.*}}attributes {fir.func_recursive, omp.declare_target = #omp.declaretarget{{.*}} RECURSIVE FUNCTION RECURSIVE_DECLARE_TARGET_ENTER(INCREMENT) RESULT(K) !$omp declare target enter(RECURSIVE_DECLARE_TARGET_ENTER) device_type(nohost) INTEGER :: INCREMENT, K diff --git a/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90 b/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90 index ed718a485e3ddc..0ca2bcbd66a960 100644 --- a/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90 +++ b/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90 @@ -105,7 +105,7 @@ end function target_function_test_host !! ----- ! DEVICE-LABEL: func.func @_QPimplicitly_captured_with_dev_type_recursive -! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget{{.*}}} +! DEVICE-SAME: {{.*}}attributes {fir.func_recursive, omp.declare_target = #omp.declaretarget{{.*}}} recursive function implicitly_captured_with_dev_type_recursive(increment) result(k) !$omp declare target enter(implicitly_captured_with_dev_type_recursive) device_type(host) integer :: increment, k @@ -174,7 +174,7 @@ recursive subroutine implicitly_captured_recursive(increment) end program ! DEVICE-LABEL: func.func @_QPimplicitly_captured_recursive -! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget{{.*}}} +! DEVICE-SAME: {{.*}}attributes {fir.func_recursive, omp.declare_target = #omp.declaretarget{{.*}}} recursive subroutine implicitly_captured_recursive(increment) integer :: increment if (increment == 10) then diff --git a/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap.f90 b/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap.f90 index df81c43a2fe69b..ffca5c3ff25000 100644 --- a/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap.f90 +++ b/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap.f90 @@ -131,7 +131,7 @@ end function target_function_test_host !! ----- ! DEVICE-LABEL: func.func @_QPimplicitly_captured_with_dev_type_recursive -! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget{{.*}}} +! DEVICE-SAME: {{.*}}attributes {fir.func_recursive, omp.declare_target = #omp.declaretarget{{.*}}} recursive function implicitly_captured_with_dev_type_recursive(increment) result(k) !$omp declare target to(implicitly_captured_with_dev_type_recursive) device_type(host) integer :: increment, k @@ -200,7 +200,7 @@ recursive subroutine implicitly_captured_recursive(increment) end program ! DEVICE-LABEL: func.func @_QPimplicitly_captured_recursive -! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget{{.*}}} +! DEVICE-SAME: {{.*}}attributes {fir.func_recursive, omp.declare_target = #omp.declaretarget{{.*}}} recursive subroutine implicitly_captured_recursive(increment) integer :: increment if (increment == 10) then diff --git a/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90 b/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90 index 7d1ae06c80561d..9b85a32036ca52 100644 --- a/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90 +++ b/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90 @@ -67,7 +67,7 @@ end function target_function_test_device !! ----- ! DEVICE-LABEL: func.func @_QPimplicitly_captured_recursive -! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget{{.*}}} +! DEVICE-SAME: {{.*}}attributes {fir.func_recursive, omp.declare_target = #omp.declaretarget{{.*}}} recursive function implicitly_captured_recursive(increment) result(k) integer :: increment, k if (increment == 10) then diff --git a/flang/test/Lower/func-attrs.f90 b/flang/test/Lower/func-attrs.f90 new file mode 100644 index 00000000000000..7ab549a0ac7ce8 --- /dev/null +++ b/flang/test/Lower/func-attrs.f90 @@ -0,0 +1,31 @@ +! RUN: bbc -emit-hlfir %s -o - | FileCheck %s + +pure subroutine sub1() +end + +! CHECK: func.func @_QPsub1() attributes {fir.func_pure} + +elemental subroutine sub2() +end + +! CHECK: func.func @_QPsub2() attributes {fir.func_elemental, fir.func_pure} + +recursive subroutine sub3() +end + +! CHECK: func.func @_QPsub3() attributes {fir.func_recursive} + +pure function fct1() +end + +! CHECK: func.func @_QPfct1() -> f32 attributes {fir.func_pure} + +elemental function fct2() +end + +! CHECK: func.func @_QPfct2() -> f32 attributes {fir.func_elemental, fir.func_pure} + +recursive function fct3() +end + +! CHECK: func.func @_QPfct3() -> f32 attributes {fir.func_recursive} diff --git a/flang/test/Lower/host-associated.f90 b/flang/test/Lower/host-associated.f90 index cdc7e6a05288a7..b0195108238d7c 100644 --- a/flang/test/Lower/host-associated.f90 +++ b/flang/test/Lower/host-associated.f90 @@ -309,7 +309,7 @@ subroutine test7(j, k) contains ! CHECK-LABEL: func private @_QFtest7Ptest7_inner( -! CHECK-SAME: %[[i:.*]]: !fir.ref{{.*}}, %[[tup:.*]]: !fir.ref>> {fir.host_assoc}) -> i32 attributes {fir.host_symbol = {{.*}}, llvm.linkage = #llvm.linkage} { +! CHECK-SAME: %[[i:.*]]: !fir.ref{{.*}}, %[[tup:.*]]: !fir.ref>> {fir.host_assoc}) -> i32 attributes {fir.func_elemental, fir.func_pure, fir.host_symbol = {{.*}}, llvm.linkage = #llvm.linkage} { elemental integer function test7_inner(i) implicit none integer, intent(in) :: i From f6947e479e14e7904aa0b2539a95f5dfdc8f9295 Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Fri, 14 Jun 2024 11:46:56 -0700 Subject: [PATCH 141/155] Revert " [AArch64][SME] Enable subreg liveness tracking when SME is available" (#95574) Reverts llvm/llvm-project#92142 For now sending this to run on CI --- llvm/lib/Target/AArch64/AArch64Subtarget.h | 1 - .../Atomics/aarch64-atomicrmw-lse2_lse128.ll | 90 +- .../Atomics/aarch64-atomicrmw-v8_1a.ll | 110 +- .../aarch64_be-atomicrmw-lse2_lse128.ll | 150 +- .../Atomics/aarch64_be-atomicrmw-v8_1a.ll | 170 +- .../AArch64/GlobalISel/arm64-atomic-128.ll | 20 + .../aarch64-interleaved-access-w-undef.ll | 29 +- .../aarch64-neon-vector-insert-uaddlv.ll | 24 +- .../test/CodeGen/AArch64/aarch64-sysreg128.ll | 2 + llvm/test/CodeGen/AArch64/arm64-atomic-128.ll | 58 +- llvm/test/CodeGen/AArch64/arm64-dup.ll | 15 +- .../AArch64/arm64-indexed-vector-ldst.ll | 1872 ++++++++++++++++- llvm/test/CodeGen/AArch64/arm64-ld1.ll | 228 +- llvm/test/CodeGen/AArch64/arm64-neon-copy.ll | 18 +- .../AArch64/arm64-neon-copyPhysReg-tuple.ll | 35 +- llvm/test/CodeGen/AArch64/arm64-tbl.ll | 300 ++- llvm/test/CodeGen/AArch64/arm64-zip.ll | 28 +- .../test/CodeGen/AArch64/atomicrmw-xchg-fp.ll | 4 +- llvm/test/CodeGen/AArch64/bf16-shuffle.ll | 27 +- .../CodeGen/AArch64/build-vector-two-dup.ll | 2 +- .../complex-deinterleaving-multiuses.ll | 28 +- .../CodeGen/AArch64/extract-vector-elt.ll | 2 + .../CodeGen/AArch64/fp-conversion-to-tbl.ll | 34 +- llvm/test/CodeGen/AArch64/fptoi.ll | 246 ++- .../test/CodeGen/AArch64/fptosi-sat-vector.ll | 100 +- .../test/CodeGen/AArch64/fptoui-sat-vector.ll | 152 +- llvm/test/CodeGen/AArch64/insert-subvector.ll | 16 +- .../AArch64/neon-bitwise-instructions.ll | 12 +- .../CodeGen/AArch64/neon-extracttruncate.ll | 4 + .../CodeGen/AArch64/neon-reverseshuffle.ll | 4 + .../CodeGen/AArch64/neon-widen-shuffle.ll | 6 + llvm/test/CodeGen/AArch64/seqpairspill.mir | 12 +- llvm/test/CodeGen/AArch64/shuffle-tbl34.ll | 83 +- llvm/test/CodeGen/AArch64/shuffles.ll | 60 +- llvm/test/CodeGen/AArch64/shufflevector.ll | 115 +- .../AArch64/sme2-intrinsics-add-sub-za16.ll | 24 + .../CodeGen/AArch64/sme2-intrinsics-add.ll | 60 + .../CodeGen/AArch64/sme2-intrinsics-cvtn.ll | 4 + .../CodeGen/AArch64/sme2-intrinsics-fmlas.ll | 100 +- .../AArch64/sme2-intrinsics-fp-dots.ll | 52 +- .../AArch64/sme2-intrinsics-insert-mova.ll | 144 ++ .../AArch64/sme2-intrinsics-int-dots.ll | 576 ++--- .../CodeGen/AArch64/sme2-intrinsics-max.ll | 400 ++-- .../CodeGen/AArch64/sme2-intrinsics-min.ll | 400 ++-- .../CodeGen/AArch64/sme2-intrinsics-mlall.ll | 244 ++- .../CodeGen/AArch64/sme2-intrinsics-mlals.ll | 193 +- .../CodeGen/AArch64/sme2-intrinsics-rshl.ll | 208 +- .../sme2-intrinsics-select-sme-tileslice.ll | 2 + .../AArch64/sme2-intrinsics-sqdmulh.ll | 104 +- .../CodeGen/AArch64/sme2-intrinsics-sub.ll | 60 + .../CodeGen/AArch64/sme2-intrinsics-vdot.ll | 382 +--- .../AArch64/sve-fixed-length-shuffles.ll | 2 +- .../sve-intrinsics-stN-reg-imm-addr-mode.ll | 119 ++ .../sve-intrinsics-stN-reg-reg-addr-mode.ll | 63 + .../CodeGen/AArch64/sve-intrinsics-stores.ll | 81 + .../CodeGen/AArch64/sve-merging-stores.ll | 13 +- ...-streaming-mode-fixed-length-ld2-alloca.ll | 8 +- ...sve-streaming-mode-fixed-length-shuffle.ll | 11 +- .../AArch64/sve2-intrinsics-perm-tb.ll | 40 +- .../AArch64/sve2p1-intrinsics-bfclamp.ll | 6 + .../AArch64/sve2p1-intrinsics-fclamp.ll | 18 + .../sve2p1-intrinsics-multivec-stores.ll | 153 ++ .../AArch64/sve2p1-intrinsics-sclamp.ll | 24 + .../AArch64/sve2p1-intrinsics-selx4.ll | 128 +- .../AArch64/sve2p1-intrinsics-stores.ll | 96 +- .../AArch64/sve2p1-intrinsics-uclamp.ll | 24 + .../AArch64/sve2p1-intrinsics-uzpx4.ll | 20 +- .../AArch64/sve2p1-intrinsics-while-pp.ll | 32 + .../AArch64/swift-error-unreachable-use.ll | 1 - llvm/test/CodeGen/AArch64/tbl-loops.ll | 79 +- llvm/test/CodeGen/AArch64/trunc-to-tbl.ll | 12 +- llvm/test/CodeGen/AArch64/vldn_shuffle.ll | 84 +- 72 files changed, 5465 insertions(+), 2559 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 9912190e1bcede..7ef7a89b5749fe 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -149,7 +149,6 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { const Triple &getTargetTriple() const { return TargetTriple; } bool enableMachineScheduler() const override { return true; } bool enablePostRAScheduler() const override { return usePostRAScheduler(); } - bool enableSubRegLiveness() const override { return true; } bool enableMachinePipeliner() const override; bool useDFAforSMS() const override { return false; } diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll index 444f579f232420..a1712a5ec7a27c 100644 --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll @@ -2273,10 +2273,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 -; -O1: mvn x8, x8 -; -O1: mvn x9, x9 -; -O1: casp x4, x5, x8, x9, [x0] +; -O1: and x9, x7, x3 +; -O1: mvn x10, x8 +; -O1: mvn x11, x9 +; -O1: casp x4, x5, x10, x11, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16 @@ -2298,10 +2298,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 -; -O1: mvn x8, x8 -; -O1: mvn x9, x9 -; -O1: caspa x4, x5, x8, x9, [x0] +; -O1: and x9, x7, x3 +; -O1: mvn x10, x8 +; -O1: mvn x11, x9 +; -O1: caspa x4, x5, x10, x11, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16 @@ -2323,10 +2323,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 -; -O1: mvn x8, x8 -; -O1: mvn x9, x9 -; -O1: caspl x4, x5, x8, x9, [x0] +; -O1: and x9, x7, x3 +; -O1: mvn x10, x8 +; -O1: mvn x11, x9 +; -O1: caspl x4, x5, x10, x11, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value release, align 16 @@ -2348,10 +2348,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 -; -O1: mvn x8, x8 -; -O1: mvn x9, x9 -; -O1: caspal x4, x5, x8, x9, [x0] +; -O1: and x9, x7, x3 +; -O1: mvn x10, x8 +; -O1: mvn x11, x9 +; -O1: caspal x4, x5, x10, x11, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16 @@ -2373,10 +2373,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 -; -O1: mvn x8, x8 -; -O1: mvn x9, x9 -; -O1: caspal x4, x5, x8, x9, [x0] +; -O1: and x9, x7, x3 +; -O1: mvn x10, x8 +; -O1: mvn x11, x9 +; -O1: caspal x4, x5, x10, x11, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16 @@ -3406,7 +3406,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x5, x3 +; -O1: eor x9, x7, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3427,7 +3427,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x5, x3 +; -O1: eor x9, x7, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3448,7 +3448,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x5, x3 +; -O1: eor x9, x7, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3469,7 +3469,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x5, x3 +; -O1: eor x9, x7, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3490,7 +3490,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x5, x3 +; -O1: eor x9, x7, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3947,7 +3947,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, lt +; -O1: csel x9, x7, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -3975,7 +3975,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, lt +; -O1: csel x9, x7, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4003,7 +4003,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_max_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, lt +; -O1: csel x9, x7, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4031,7 +4031,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, lt +; -O1: csel x9, x7, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4059,7 +4059,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, lt +; -O1: csel x9, x7, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4592,7 +4592,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, ge +; -O1: csel x9, x7, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4620,7 +4620,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, ge +; -O1: csel x9, x7, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4648,7 +4648,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_min_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, ge +; -O1: csel x9, x7, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4676,7 +4676,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, ge +; -O1: csel x9, x7, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4704,7 +4704,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, ge +; -O1: csel x9, x7, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5237,7 +5237,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, lo +; -O1: csel x9, x7, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5265,7 +5265,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, lo +; -O1: csel x9, x7, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5293,7 +5293,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umax_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, lo +; -O1: csel x9, x7, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5321,7 +5321,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, lo +; -O1: csel x9, x7, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5349,7 +5349,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, lo +; -O1: csel x9, x7, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5877,7 +5877,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, hs +; -O1: csel x9, x7, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5905,7 +5905,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, hs +; -O1: csel x9, x7, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5933,7 +5933,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umin_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, hs +; -O1: csel x9, x7, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5961,7 +5961,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, hs +; -O1: csel x9, x7, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5989,7 +5989,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, hs +; -O1: csel x9, x7, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll index 62af028defde56..ee5fbe39b4492c 100644 --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll @@ -1616,7 +1616,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 +; -O1: and x9, x7, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -1637,7 +1637,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 +; -O1: and x9, x7, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -1658,7 +1658,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_and_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 +; -O1: and x9, x7, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -1679,7 +1679,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 +; -O1: and x9, x7, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -1700,7 +1700,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 +; -O1: and x9, x7, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -2343,10 +2343,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 -; -O1: mvn x8, x8 -; -O1: mvn x9, x9 -; -O1: casp x4, x5, x8, x9, [x0] +; -O1: and x9, x7, x3 +; -O1: mvn x10, x8 +; -O1: mvn x11, x9 +; -O1: casp x4, x5, x10, x11, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16 @@ -2368,10 +2368,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 -; -O1: mvn x8, x8 -; -O1: mvn x9, x9 -; -O1: caspa x4, x5, x8, x9, [x0] +; -O1: and x9, x7, x3 +; -O1: mvn x10, x8 +; -O1: mvn x11, x9 +; -O1: caspa x4, x5, x10, x11, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16 @@ -2393,10 +2393,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 -; -O1: mvn x8, x8 -; -O1: mvn x9, x9 -; -O1: caspl x4, x5, x8, x9, [x0] +; -O1: and x9, x7, x3 +; -O1: mvn x10, x8 +; -O1: mvn x11, x9 +; -O1: caspl x4, x5, x10, x11, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value release, align 16 @@ -2418,10 +2418,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 -; -O1: mvn x8, x8 -; -O1: mvn x9, x9 -; -O1: caspal x4, x5, x8, x9, [x0] +; -O1: and x9, x7, x3 +; -O1: mvn x10, x8 +; -O1: mvn x11, x9 +; -O1: caspal x4, x5, x10, x11, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16 @@ -2443,10 +2443,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 -; -O1: mvn x8, x8 -; -O1: mvn x9, x9 -; -O1: caspal x4, x5, x8, x9, [x0] +; -O1: and x9, x7, x3 +; -O1: mvn x10, x8 +; -O1: mvn x11, x9 +; -O1: caspal x4, x5, x10, x11, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16 @@ -2996,7 +2996,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x5, x3 +; -O1: orr x9, x7, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3017,7 +3017,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x5, x3 +; -O1: orr x9, x7, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3038,7 +3038,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_or_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x5, x3 +; -O1: orr x9, x7, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3059,7 +3059,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x5, x3 +; -O1: orr x9, x7, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3080,7 +3080,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x5, x3 +; -O1: orr x9, x7, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3531,7 +3531,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x5, x3 +; -O1: eor x9, x7, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3552,7 +3552,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x5, x3 +; -O1: eor x9, x7, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3573,7 +3573,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x5, x3 +; -O1: eor x9, x7, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3594,7 +3594,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x5, x3 +; -O1: eor x9, x7, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -3615,7 +3615,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x5, x3 +; -O1: eor x9, x7, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 ; -O1: ccmp x4, x6, #0, eq @@ -4072,7 +4072,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, lt +; -O1: csel x9, x7, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4100,7 +4100,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, lt +; -O1: csel x9, x7, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4128,7 +4128,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_max_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, lt +; -O1: csel x9, x7, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4156,7 +4156,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, lt +; -O1: csel x9, x7, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4184,7 +4184,7 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, lt +; -O1: csel x9, x7, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4717,7 +4717,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, ge +; -O1: csel x9, x7, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4745,7 +4745,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, ge +; -O1: csel x9, x7, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4773,7 +4773,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_min_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, ge +; -O1: csel x9, x7, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4801,7 +4801,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, ge +; -O1: csel x9, x7, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -4829,7 +4829,7 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, ge +; -O1: csel x9, x7, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5362,7 +5362,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, lo +; -O1: csel x9, x7, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5390,7 +5390,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, lo +; -O1: csel x9, x7, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5418,7 +5418,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umax_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, lo +; -O1: csel x9, x7, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5446,7 +5446,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, lo +; -O1: csel x9, x7, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -5474,7 +5474,7 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, lo +; -O1: csel x9, x7, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -6002,7 +6002,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, hs +; -O1: csel x9, x7, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -6030,7 +6030,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, hs +; -O1: csel x9, x7, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -6058,7 +6058,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umin_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, hs +; -O1: csel x9, x7, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -6086,7 +6086,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, hs +; -O1: csel x9, x7, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 @@ -6114,7 +6114,7 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: cmp x2, x4 -; -O1: csel x9, x5, x3, hs +; -O1: csel x9, x7, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x5, x7 diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll index f043f99327308b..83e383f335637c 100644 --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll @@ -517,7 +517,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu ; ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x5, x3 +; -O1: adds x9, x7, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -534,7 +534,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x5, x3 +; -O1: adds x9, x7, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -551,7 +551,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_add_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x5, x3 +; -O1: adds x9, x7, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -568,7 +568,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x5, x3 +; -O1: adds x9, x7, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -585,7 +585,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x5, x3 +; -O1: adds x9, x7, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1102,7 +1102,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x5, x3 +; -O1: subs x9, x7, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1119,7 +1119,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x5, x3 +; -O1: subs x9, x7, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1136,7 +1136,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x5, x3 +; -O1: subs x9, x7, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1153,7 +1153,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x5, x3 +; -O1: subs x9, x7, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1170,7 +1170,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x5, x3 +; -O1: subs x9, x7, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -2356,10 +2356,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 -; -O1: mvn x8, x8 -; -O1: mvn x9, x9 -; -O1: casp x4, x5, x8, x9, [x0] +; -O1: and x9, x7, x3 +; -O1: mvn x10, x8 +; -O1: mvn x11, x9 +; -O1: casp x4, x5, x10, x11, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16 @@ -2379,10 +2379,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 -; -O1: mvn x8, x8 -; -O1: mvn x9, x9 -; -O1: caspa x4, x5, x8, x9, [x0] +; -O1: and x9, x7, x3 +; -O1: mvn x10, x8 +; -O1: mvn x11, x9 +; -O1: caspa x4, x5, x10, x11, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16 @@ -2402,10 +2402,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 -; -O1: mvn x8, x8 -; -O1: mvn x9, x9 -; -O1: caspl x4, x5, x8, x9, [x0] +; -O1: and x9, x7, x3 +; -O1: mvn x10, x8 +; -O1: mvn x11, x9 +; -O1: caspl x4, x5, x10, x11, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value release, align 16 @@ -2425,10 +2425,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 -; -O1: mvn x8, x8 -; -O1: mvn x9, x9 -; -O1: caspal x4, x5, x8, x9, [x0] +; -O1: and x9, x7, x3 +; -O1: mvn x10, x8 +; -O1: mvn x11, x9 +; -O1: caspal x4, x5, x10, x11, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16 @@ -2448,10 +2448,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 -; -O1: mvn x8, x8 -; -O1: mvn x9, x9 -; -O1: caspal x4, x5, x8, x9, [x0] +; -O1: and x9, x7, x3 +; -O1: mvn x10, x8 +; -O1: mvn x11, x9 +; -O1: caspal x4, x5, x10, x11, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16 @@ -3479,7 +3479,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x5, x3 +; -O1: eor x9, x7, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3498,7 +3498,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x5, x3 +; -O1: eor x9, x7, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3517,7 +3517,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x5, x3 +; -O1: eor x9, x7, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3536,7 +3536,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x5, x3 +; -O1: eor x9, x7, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3555,7 +3555,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x5, x3 +; -O1: eor x9, x7, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -4004,8 +4004,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu ; ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, lt +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4025,8 +4025,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, lt +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4046,8 +4046,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_max_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, lt +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4067,8 +4067,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, lt +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4088,8 +4088,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, lt +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4589,8 +4589,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu ; ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, ge +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4610,8 +4610,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, ge +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4631,8 +4631,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_min_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, ge +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4652,8 +4652,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, ge +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4673,8 +4673,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, ge +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5174,8 +5174,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, lo +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5195,8 +5195,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, lo +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5216,8 +5216,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, lo +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5237,8 +5237,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, lo +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5258,8 +5258,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, lo +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5759,8 +5759,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, hs +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5780,8 +5780,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, hs +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5801,8 +5801,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, hs +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5822,8 +5822,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, hs +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5843,8 +5843,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, hs +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll index df7b57e7e18f46..0c3ed9b0f1de0f 100644 --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll @@ -542,7 +542,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu ; ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x5, x3 +; -O1: adds x9, x7, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -559,7 +559,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x5, x3 +; -O1: adds x9, x7, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -576,7 +576,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_add_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x5, x3 +; -O1: adds x9, x7, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -593,7 +593,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x5, x3 +; -O1: adds x9, x7, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -610,7 +610,7 @@ define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: adds x9, x5, x3 +; -O1: adds x9, x7, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1127,7 +1127,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x5, x3 +; -O1: subs x9, x7, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1144,7 +1144,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x5, x3 +; -O1: subs x9, x7, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1161,7 +1161,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x5, x3 +; -O1: subs x9, x7, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1178,7 +1178,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x5, x3 +; -O1: subs x9, x7, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1195,7 +1195,7 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: subs x9, x5, x3 +; -O1: subs x9, x7, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1674,7 +1674,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 +; -O1: and x9, x7, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1693,7 +1693,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 +; -O1: and x9, x7, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1712,7 +1712,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_and_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 +; -O1: and x9, x7, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1731,7 +1731,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 +; -O1: and x9, x7, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -1750,7 +1750,7 @@ define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 +; -O1: and x9, x7, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -2406,10 +2406,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_monotonic(ptr %ptr, i128 %val ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 -; -O1: mvn x8, x8 -; -O1: mvn x9, x9 -; -O1: casp x4, x5, x8, x9, [x0] +; -O1: and x9, x7, x3 +; -O1: mvn x10, x8 +; -O1: mvn x11, x9 +; -O1: casp x4, x5, x10, x11, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16 @@ -2429,10 +2429,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acquire(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 -; -O1: mvn x8, x8 -; -O1: mvn x9, x9 -; -O1: caspa x4, x5, x8, x9, [x0] +; -O1: and x9, x7, x3 +; -O1: mvn x10, x8 +; -O1: mvn x11, x9 +; -O1: caspa x4, x5, x10, x11, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16 @@ -2452,10 +2452,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_release(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 -; -O1: mvn x8, x8 -; -O1: mvn x9, x9 -; -O1: caspl x4, x5, x8, x9, [x0] +; -O1: and x9, x7, x3 +; -O1: mvn x10, x8 +; -O1: mvn x11, x9 +; -O1: caspl x4, x5, x10, x11, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value release, align 16 @@ -2475,10 +2475,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_acq_rel(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 -; -O1: mvn x8, x8 -; -O1: mvn x9, x9 -; -O1: caspal x4, x5, x8, x9, [x0] +; -O1: and x9, x7, x3 +; -O1: mvn x10, x8 +; -O1: mvn x11, x9 +; -O1: caspal x4, x5, x10, x11, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16 @@ -2498,10 +2498,10 @@ define dso_local i128 @atomicrmw_nand_i128_aligned_seq_cst(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: and x8, x4, x2 -; -O1: and x9, x5, x3 -; -O1: mvn x8, x8 -; -O1: mvn x9, x9 -; -O1: caspal x4, x5, x8, x9, [x0] +; -O1: and x9, x7, x3 +; -O1: mvn x10, x8 +; -O1: mvn x11, x9 +; -O1: caspal x4, x5, x10, x11, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16 @@ -3049,7 +3049,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x5, x3 +; -O1: orr x9, x7, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3068,7 +3068,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x5, x3 +; -O1: orr x9, x7, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3087,7 +3087,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_or_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x5, x3 +; -O1: orr x9, x7, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3106,7 +3106,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x5, x3 +; -O1: orr x9, x7, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3125,7 +3125,7 @@ define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: orr x8, x4, x2 -; -O1: orr x9, x5, x3 +; -O1: orr x9, x7, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3574,7 +3574,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_monotonic(ptr %ptr, i128 %valu ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x5, x3 +; -O1: eor x9, x7, x3 ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3593,7 +3593,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acquire(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x5, x3 +; -O1: eor x9, x7, x3 ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3612,7 +3612,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_release(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_release: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x5, x3 +; -O1: eor x9, x7, x3 ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3631,7 +3631,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x5, x3 +; -O1: eor x9, x7, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -3650,7 +3650,7 @@ define dso_local i128 @atomicrmw_xor_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] ; -O1: eor x8, x4, x2 -; -O1: eor x9, x5, x3 +; -O1: eor x9, x7, x3 ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 ; -O1: ccmp x5, x7, #0, eq @@ -4099,8 +4099,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_monotonic(ptr %ptr, i128 %valu ; ; -O1-LABEL: atomicrmw_max_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, lt +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4120,8 +4120,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acquire(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_max_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, lt +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4141,8 +4141,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_release(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_max_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, lt +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4162,8 +4162,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_max_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, lt +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4183,8 +4183,8 @@ define dso_local i128 @atomicrmw_max_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_max_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, lt +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, lt ; -O1: csel x8, x4, x2, lt ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4684,8 +4684,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_monotonic(ptr %ptr, i128 %valu ; ; -O1-LABEL: atomicrmw_min_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, ge +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4705,8 +4705,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acquire(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_min_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, ge +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4726,8 +4726,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_release(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_min_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, ge +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4747,8 +4747,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_acq_rel(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_min_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, ge +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -4768,8 +4768,8 @@ define dso_local i128 @atomicrmw_min_i128_aligned_seq_cst(ptr %ptr, i128 %value) ; ; -O1-LABEL: atomicrmw_min_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, ge +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, ge ; -O1: csel x8, x4, x2, ge ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5269,8 +5269,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_monotonic(ptr %ptr, i128 %val ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, lo +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5290,8 +5290,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acquire(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, lo +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5311,8 +5311,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_release(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, lo +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5332,8 +5332,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_acq_rel(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, lo +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5353,8 +5353,8 @@ define dso_local i128 @atomicrmw_umax_i128_aligned_seq_cst(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umax_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, lo +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, lo ; -O1: csel x8, x4, x2, lo ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5854,8 +5854,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_monotonic(ptr %ptr, i128 %val ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_monotonic: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, hs +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: casp x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5875,8 +5875,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acquire(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_acquire: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, hs +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspa x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5896,8 +5896,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_release(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_release: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, hs +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspl x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5917,8 +5917,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_acq_rel(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_acq_rel: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, hs +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 @@ -5938,8 +5938,8 @@ define dso_local i128 @atomicrmw_umin_i128_aligned_seq_cst(ptr %ptr, i128 %value ; ; -O1-LABEL: atomicrmw_umin_i128_aligned_seq_cst: ; -O1: ldp x4, x5, [x0] -; -O1: cmp x3, x5 -; -O1: csel x9, x5, x3, hs +; -O1: cmp x3, x7 +; -O1: csel x9, x7, x3, hs ; -O1: csel x8, x4, x2, hs ; -O1: caspal x4, x5, x8, x9, [x0] ; -O1: cmp x4, x6 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll index 80310a11add697..1fe63c9be8c629 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll @@ -53,6 +53,10 @@ define void @val_compare_and_swap(ptr %p, i128 %oldval, i128 %newval) { ; ; CHECK-CAS-O1-LABEL: val_compare_and_swap: ; CHECK-CAS-O1: // %bb.0: +; CHECK-CAS-O1-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-CAS-O1-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 +; CHECK-CAS-O1-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-CAS-O1-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 ; CHECK-CAS-O1-NEXT: caspa x2, x3, x4, x5, [x0] ; CHECK-CAS-O1-NEXT: mov v0.d[0], x2 ; CHECK-CAS-O1-NEXT: mov v0.d[1], x3 @@ -176,6 +180,10 @@ define void @val_compare_and_swap_monotonic_seqcst(ptr %p, i128 %oldval, i128 %n ; ; CHECK-CAS-O1-LABEL: val_compare_and_swap_monotonic_seqcst: ; CHECK-CAS-O1: // %bb.0: +; CHECK-CAS-O1-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-CAS-O1-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 +; CHECK-CAS-O1-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-CAS-O1-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 ; CHECK-CAS-O1-NEXT: caspal x2, x3, x4, x5, [x0] ; CHECK-CAS-O1-NEXT: mov v0.d[0], x2 ; CHECK-CAS-O1-NEXT: mov v0.d[1], x3 @@ -299,6 +307,10 @@ define void @val_compare_and_swap_release_acquire(ptr %p, i128 %oldval, i128 %ne ; ; CHECK-CAS-O1-LABEL: val_compare_and_swap_release_acquire: ; CHECK-CAS-O1: // %bb.0: +; CHECK-CAS-O1-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-CAS-O1-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 +; CHECK-CAS-O1-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-CAS-O1-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 ; CHECK-CAS-O1-NEXT: caspal x2, x3, x4, x5, [x0] ; CHECK-CAS-O1-NEXT: mov v0.d[0], x2 ; CHECK-CAS-O1-NEXT: mov v0.d[1], x3 @@ -422,6 +434,10 @@ define void @val_compare_and_swap_monotonic(ptr %p, i128 %oldval, i128 %newval) ; ; CHECK-CAS-O1-LABEL: val_compare_and_swap_monotonic: ; CHECK-CAS-O1: // %bb.0: +; CHECK-CAS-O1-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-CAS-O1-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 +; CHECK-CAS-O1-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-CAS-O1-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 ; CHECK-CAS-O1-NEXT: caspal x2, x3, x4, x5, [x0] ; CHECK-CAS-O1-NEXT: mov v0.d[0], x2 ; CHECK-CAS-O1-NEXT: mov v0.d[1], x3 @@ -642,6 +658,10 @@ define i128 @val_compare_and_swap_return(ptr %p, i128 %oldval, i128 %newval) { ; ; CHECK-CAS-O1-LABEL: val_compare_and_swap_return: ; CHECK-CAS-O1: // %bb.0: +; CHECK-CAS-O1-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-CAS-O1-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 +; CHECK-CAS-O1-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-CAS-O1-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 ; CHECK-CAS-O1-NEXT: caspa x2, x3, x4, x5, [x0] ; CHECK-CAS-O1-NEXT: mov x0, x2 ; CHECK-CAS-O1-NEXT: mov x1, x3 diff --git a/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll b/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll index 7141f53802bff7..07fbe5d7310f60 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll @@ -27,8 +27,9 @@ BB: define void @f_undef_15(<8 x i64> %a, ptr %dst) { ; CHECK-LABEL: f_undef_15: ; CHECK: // %bb.0: // %BB -; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: // kill: def $q0 killed $q0 def $q0_q1 ; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: st2 { v0.2d, v1.2d }, [x8], #32 ; CHECK-NEXT: st2 { v0.2d, v1.2d }, [x8] ; CHECK-NEXT: add x8, x0, #64 @@ -45,17 +46,19 @@ BB: define void @f_undef_1(<8 x i64> %a, ptr %dst) { ; CHECK-LABEL: f_undef_1: ; CHECK: // %bb.0: // %BB -; CHECK-NEXT: mov v4.16b, v2.16b -; CHECK-NEXT: mov v5.16b, v0.16b +; CHECK-NEXT: mov v16.16b, v0.16b +; CHECK-NEXT: mov v5.16b, v2.16b +; CHECK-NEXT: // kill: def $q1 killed $q1 def $q1_q2 +; CHECK-NEXT: // kill: def $q3 killed $q3 def $q3_q4 ; CHECK-NEXT: mov x8, x0 -; CHECK-NEXT: mov v6.16b, v0.16b ; CHECK-NEXT: mov v2.16b, v1.16b -; CHECK-NEXT: st2 { v5.2d, v6.2d }, [x8], #32 -; CHECK-NEXT: mov v5.16b, v4.16b +; CHECK-NEXT: mov v4.16b, v3.16b +; CHECK-NEXT: mov v17.16b, v16.16b +; CHECK-NEXT: mov v6.16b, v5.16b +; CHECK-NEXT: st2 { v16.2d, v17.2d }, [x8], #32 ; CHECK-NEXT: st2 { v1.2d, v2.2d }, [x8] ; CHECK-NEXT: add x8, x0, #64 -; CHECK-NEXT: st2 { v4.2d, v5.2d }, [x8] -; CHECK-NEXT: mov v4.16b, v3.16b +; CHECK-NEXT: st2 { v5.2d, v6.2d }, [x8] ; CHECK-NEXT: add x8, x0, #96 ; CHECK-NEXT: st2 { v3.2d, v4.2d }, [x8] ; CHECK-NEXT: ret @@ -70,10 +73,11 @@ define void @noundefs(<8 x i32> %a, <8 x i32> %b, ptr %dst) { ; CHECK-LABEL: noundefs: ; CHECK: // %bb.0: // %BB ; CHECK-NEXT: mov v5.16b, v2.16b +; CHECK-NEXT: // kill: def $q3 killed $q3 def $q2_q3 ; CHECK-NEXT: mov v4.16b, v0.16b -; CHECK-NEXT: mov v2.16b, v3.16b +; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: st2 { v4.4s, v5.4s }, [x0], #32 -; CHECK-NEXT: st2 { v1.4s, v2.4s }, [x0] +; CHECK-NEXT: st2 { v2.4s, v3.4s }, [x0] ; CHECK-NEXT: ret BB: %S = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> @@ -85,10 +89,11 @@ define void @undefs(<8 x i32> %a, <8 x i32> %b, ptr %dst) { ; CHECK-LABEL: undefs: ; CHECK: // %bb.0: // %BB ; CHECK-NEXT: mov v5.16b, v2.16b +; CHECK-NEXT: // kill: def $q3 killed $q3 def $q2_q3 ; CHECK-NEXT: mov v4.16b, v0.16b -; CHECK-NEXT: mov v2.16b, v3.16b +; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: st2 { v4.4s, v5.4s }, [x0], #32 -; CHECK-NEXT: st2 { v1.4s, v2.4s }, [x0] +; CHECK-NEXT: st2 { v2.4s, v3.4s }, [x0] ; CHECK-NEXT: ret BB: %S = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> diff --git a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll index f0fcafa5302e6d..3c8aca5145261d 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll @@ -146,11 +146,11 @@ define void @insert_vec_v6i64_uaddlv_from_v4i32(ptr %0) { ; CHECK-LABEL: insert_vec_v6i64_uaddlv_from_v4i32: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 +; CHECK-NEXT: movi.2d v2, #0000000000000000 ; CHECK-NEXT: uaddlv.4s d1, v0 +; CHECK-NEXT: str d2, [x0, #16] ; CHECK-NEXT: mov.d v0[0], v1[0] -; CHECK-NEXT: movi.2d v1, #0000000000000000 ; CHECK-NEXT: ucvtf.2d v0, v0 -; CHECK-NEXT: str d1, [x0, #16] ; CHECK-NEXT: fcvtn v0.2s, v0.2d ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret @@ -210,9 +210,9 @@ define void @insert_vec_v8i16_uaddlv_from_v8i16(ptr %0) { ; CHECK-NEXT: stp xzr, xzr, [x0, #16] ; CHECK-NEXT: uaddlv.8h s0, v0 ; CHECK-NEXT: mov.h v1[0], v0[0] -; CHECK-NEXT: ushll.4s v0, v1, #0 -; CHECK-NEXT: ucvtf.4s v0, v0 -; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ushll.4s v1, v1, #0 +; CHECK-NEXT: ucvtf.4s v1, v1 +; CHECK-NEXT: str q1, [x0] ; CHECK-NEXT: ret entry: @@ -232,10 +232,10 @@ define void @insert_vec_v3i16_uaddlv_from_v8i16(ptr %0) { ; CHECK-NEXT: add x8, x0, #8 ; CHECK-NEXT: uaddlv.8h s0, v0 ; CHECK-NEXT: mov.h v1[0], v0[0] -; CHECK-NEXT: ushll.4s v0, v1, #0 -; CHECK-NEXT: ucvtf.4s v0, v0 -; CHECK-NEXT: st1.s { v0 }[2], [x8] -; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ushll.4s v1, v1, #0 +; CHECK-NEXT: ucvtf.4s v1, v1 +; CHECK-NEXT: st1.s { v1 }[2], [x8] +; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: ret entry: @@ -278,9 +278,9 @@ define void @insert_vec_v16i8_uaddlv_from_v8i8(ptr %0) { ; CHECK-NEXT: stp q0, q0, [x0, #32] ; CHECK-NEXT: mov.h v2[0], v1[0] ; CHECK-NEXT: bic.4h v2, #255, lsl #8 -; CHECK-NEXT: ushll.4s v1, v2, #0 -; CHECK-NEXT: ucvtf.4s v1, v1 -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ushll.4s v2, v2, #0 +; CHECK-NEXT: ucvtf.4s v2, v2 +; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll b/llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll index 75a96be9b435e2..7f20b5e5ee4df5 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-sysreg128.ll @@ -24,6 +24,8 @@ declare i128 @llvm.read_volatile_register.i128(metadata) #1 define void @test_wsr128(i128 noundef %v) #0 { ; CHECK-LE-LABEL: test_wsr128: ; CHECK-LE: // %bb.0: // %entry +; CHECK-LE-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-LE-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 ; CHECK-LE-NEXT: msrr S1_2_C3_C4_5, x0, x1 ; CHECK-LE-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll index 4a84c673af8cfc..37c61d0a4a0fb6 100644 --- a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll +++ b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll @@ -43,6 +43,10 @@ define i128 @val_compare_and_swap(ptr %p, i128 %oldval, i128 %newval) { ; ; LSE-LABEL: val_compare_and_swap: ; LSE: // %bb.0: +; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 +; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 +; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 ; LSE-NEXT: caspa x2, x3, x4, x5, [x0] ; LSE-NEXT: mov x0, x2 ; LSE-NEXT: mov x1, x3 @@ -90,6 +94,10 @@ define i128 @val_compare_and_swap_seqcst(ptr %p, i128 %oldval, i128 %newval) { ; ; LSE-LABEL: val_compare_and_swap_seqcst: ; LSE: // %bb.0: +; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 +; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 +; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 ; LSE-NEXT: caspal x2, x3, x4, x5, [x0] ; LSE-NEXT: mov x0, x2 ; LSE-NEXT: mov x1, x3 @@ -137,6 +145,10 @@ define i128 @val_compare_and_swap_release(ptr %p, i128 %oldval, i128 %newval) { ; ; LSE-LABEL: val_compare_and_swap_release: ; LSE: // %bb.0: +; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 +; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 +; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 ; LSE-NEXT: caspl x2, x3, x4, x5, [x0] ; LSE-NEXT: mov x0, x2 ; LSE-NEXT: mov x1, x3 @@ -184,6 +196,10 @@ define i128 @val_compare_and_swap_monotonic(ptr %p, i128 %oldval, i128 %newval) ; ; LSE-LABEL: val_compare_and_swap_monotonic: ; LSE: // %bb.0: +; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 +; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 +; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 ; LSE-NEXT: casp x2, x3, x4, x5, [x0] ; LSE-NEXT: mov x0, x2 ; LSE-NEXT: mov x1, x3 @@ -235,7 +251,7 @@ define void @fetch_and_nand(ptr %p, i128 %bits) { ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 -; LSE-NEXT: and x8, x5, x3 +; LSE-NEXT: and x8, x7, x3 ; LSE-NEXT: and x9, x4, x2 ; LSE-NEXT: mvn x10, x9 ; LSE-NEXT: mvn x11, x8 @@ -295,7 +311,7 @@ define void @fetch_and_or(ptr %p, i128 %bits) { ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 ; LSE-NEXT: orr x8, x4, x2 -; LSE-NEXT: orr x9, x5, x3 +; LSE-NEXT: orr x9, x7, x3 ; LSE-NEXT: mov x4, x6 ; LSE-NEXT: mov x5, x7 ; LSE-NEXT: caspal x4, x5, x8, x9, [x0] @@ -352,7 +368,7 @@ define void @fetch_and_add(ptr %p, i128 %bits) { ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 ; LSE-NEXT: adds x8, x4, x2 -; LSE-NEXT: adc x9, x5, x3 +; LSE-NEXT: adc x9, x7, x3 ; LSE-NEXT: mov x4, x6 ; LSE-NEXT: mov x5, x7 ; LSE-NEXT: caspal x4, x5, x8, x9, [x0] @@ -408,7 +424,7 @@ define void @fetch_and_sub(ptr %p, i128 %bits) { ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 ; LSE-NEXT: subs x8, x4, x2 -; LSE-NEXT: sbc x9, x5, x3 +; LSE-NEXT: sbc x9, x7, x3 ; LSE-NEXT: mov x4, x6 ; LSE-NEXT: mov x5, x7 ; LSE-NEXT: caspal x4, x5, x8, x9, [x0] @@ -468,8 +484,8 @@ define void @fetch_and_min(ptr %p, i128 %bits) { ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 ; LSE-NEXT: cmp x2, x4 -; LSE-NEXT: sbcs xzr, x3, x5 -; LSE-NEXT: csel x9, x5, x3, ge +; LSE-NEXT: sbcs xzr, x3, x7 +; LSE-NEXT: csel x9, x7, x3, ge ; LSE-NEXT: csel x8, x4, x2, ge ; LSE-NEXT: mov x4, x6 ; LSE-NEXT: mov x5, x7 @@ -530,8 +546,8 @@ define void @fetch_and_max(ptr %p, i128 %bits) { ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 ; LSE-NEXT: cmp x2, x4 -; LSE-NEXT: sbcs xzr, x3, x5 -; LSE-NEXT: csel x9, x5, x3, lt +; LSE-NEXT: sbcs xzr, x3, x7 +; LSE-NEXT: csel x9, x7, x3, lt ; LSE-NEXT: csel x8, x4, x2, lt ; LSE-NEXT: mov x4, x6 ; LSE-NEXT: mov x5, x7 @@ -592,8 +608,8 @@ define void @fetch_and_umin(ptr %p, i128 %bits) { ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 ; LSE-NEXT: cmp x2, x4 -; LSE-NEXT: sbcs xzr, x3, x5 -; LSE-NEXT: csel x9, x5, x3, hs +; LSE-NEXT: sbcs xzr, x3, x7 +; LSE-NEXT: csel x9, x7, x3, hs ; LSE-NEXT: csel x8, x4, x2, hs ; LSE-NEXT: mov x4, x6 ; LSE-NEXT: mov x5, x7 @@ -654,8 +670,8 @@ define void @fetch_and_umax(ptr %p, i128 %bits) { ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 ; LSE-NEXT: cmp x2, x4 -; LSE-NEXT: sbcs xzr, x3, x5 -; LSE-NEXT: csel x9, x5, x3, lo +; LSE-NEXT: sbcs xzr, x3, x7 +; LSE-NEXT: csel x9, x7, x3, lo ; LSE-NEXT: csel x8, x4, x2, lo ; LSE-NEXT: mov x4, x6 ; LSE-NEXT: mov x5, x7 @@ -698,8 +714,8 @@ define i128 @atomic_load_seq_cst(ptr %p) { ; ; LSE-LABEL: atomic_load_seq_cst: ; LSE: // %bb.0: -; LSE-NEXT: mov x2, #0 // =0x0 -; LSE-NEXT: mov x3, #0 // =0x0 +; LSE-NEXT: mov x2, #0 +; LSE-NEXT: mov x3, #0 ; LSE-NEXT: caspal x2, x3, x2, x3, [x0] ; LSE-NEXT: mov x0, x2 ; LSE-NEXT: mov x1, x3 @@ -731,8 +747,8 @@ define i128 @atomic_load_relaxed(i64, i64, ptr %p) { ; ; LSE-LABEL: atomic_load_relaxed: ; LSE: // %bb.0: -; LSE-NEXT: mov x0, #0 // =0x0 -; LSE-NEXT: mov x1, #0 // =0x0 +; LSE-NEXT: mov x0, #0 +; LSE-NEXT: mov x1, #0 ; LSE-NEXT: casp x0, x1, x0, x1, [x2] ; LSE-NEXT: ret %r = load atomic i128, ptr %p monotonic, align 16 @@ -763,7 +779,9 @@ define void @atomic_store_seq_cst(i128 %in, ptr %p) { ; ; LSE-LABEL: atomic_store_seq_cst: ; LSE: // %bb.0: +; LSE-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 ; LSE-NEXT: ldp x4, x5, [x2] +; LSE-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 ; LSE-NEXT: .LBB14_1: // %atomicrmw.start ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 ; LSE-NEXT: mov x6, x4 @@ -803,7 +821,9 @@ define void @atomic_store_release(i128 %in, ptr %p) { ; ; LSE-LABEL: atomic_store_release: ; LSE: // %bb.0: +; LSE-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 ; LSE-NEXT: ldp x4, x5, [x2] +; LSE-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 ; LSE-NEXT: .LBB15_1: // %atomicrmw.start ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 ; LSE-NEXT: mov x6, x4 @@ -843,7 +863,9 @@ define void @atomic_store_relaxed(i128 %in, ptr %p) { ; ; LSE-LABEL: atomic_store_relaxed: ; LSE: // %bb.0: +; LSE-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 ; LSE-NEXT: ldp x4, x5, [x2] +; LSE-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 ; LSE-NEXT: .LBB16_1: // %atomicrmw.start ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 ; LSE-NEXT: mov x6, x4 @@ -899,6 +921,10 @@ define void @cmpxchg_dead(ptr %ptr, i128 %desired, i128 %new) { ; ; LSE-LABEL: cmpxchg_dead: ; LSE: // %bb.0: +; LSE-NEXT: // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5 +; LSE-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; LSE-NEXT: // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5 +; LSE-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 ; LSE-NEXT: casp x2, x3, x4, x5, [x0] ; LSE-NEXT: ret cmpxchg ptr %ptr, i128 %desired, i128 %new monotonic monotonic diff --git a/llvm/test/CodeGen/AArch64/arm64-dup.ll b/llvm/test/CodeGen/AArch64/arm64-dup.ll index 979a8b16f4217b..2bf5419e54830b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-dup.ll +++ b/llvm/test/CodeGen/AArch64/arm64-dup.ll @@ -463,7 +463,9 @@ define <4 x i32> @test_perfectshuffle_dupext_v4i32(<4 x i32> %a, <4 x i32> %b) n ; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI35_0 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_0] +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 ; CHECK-GI-NEXT: ret %r = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> @@ -480,7 +482,9 @@ define <4 x float> @test_perfectshuffle_dupext_v4f32(<4 x float> %a, <4 x float> ; CHECK-GI-LABEL: test_perfectshuffle_dupext_v4f32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI36_0 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI36_0] +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 ; CHECK-GI-NEXT: ret %r = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> @@ -500,13 +504,14 @@ define void @disguised_dup(<4 x float> %x, ptr %p1, ptr %p2) { ; CHECK-GI-LABEL: disguised_dup: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI37_1 -; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI37_1] +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_1] ; CHECK-GI-NEXT: adrp x8, .LCPI37_0 -; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v1 -; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI37_0] -; CHECK-GI-NEXT: tbl.16b v1, { v0, v1 }, v1 +; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI37_0] +; CHECK-GI-NEXT: tbl.16b v2, { v0, v1 }, v2 ; CHECK-GI-NEXT: str q0, [x0] -; CHECK-GI-NEXT: str q1, [x1] +; CHECK-GI-NEXT: str q2, [x1] ; CHECK-GI-NEXT: ret %shuf = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %dup = shufflevector <4 x float> %shuf, <4 x float> undef, <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll index fc469a3169deb2..628fb550a0532b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -5490,14 +5490,18 @@ declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.b { v0, v1 }[0], [x0], #2 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld2.b { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x8, x0, #2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.b { v0, v1 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A) @@ -5509,14 +5513,18 @@ define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(ptr %A, ptr %ptr, < define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.b { v0, v1 }[0], [x0], x2 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld2.b { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x8, x0, x2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.b { v0, v1 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A) @@ -5531,14 +5539,18 @@ declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8>, define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.b { v0, v1 }[0], [x0], #2 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld2.b { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x8, x0, #2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.b { v0, v1 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A) @@ -5550,14 +5562,18 @@ define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(ptr %A, ptr %ptr, <8 x define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.b { v0, v1 }[0], [x0], x2 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld2.b { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x8, x0, x2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.b { v0, v1 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A) @@ -5572,14 +5588,18 @@ declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8>, <8 x define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.h { v0, v1 }[0], [x0], #4 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld2.h { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x8, x0, #4 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.h { v0, v1 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A) @@ -5591,15 +5611,19 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(ptr %A, ptr %ptr, < define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.h { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld2.h { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.h { v0, v1 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A) @@ -5614,14 +5638,18 @@ declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16>, define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.h { v0, v1 }[0], [x0], #4 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld2.h { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x8, x0, #4 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.h { v0, v1 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A) @@ -5633,15 +5661,19 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(ptr %A, ptr %ptr, < define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.h { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld2.h { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.h { v0, v1 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A) @@ -5656,14 +5688,18 @@ declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0(<4 x i16>, define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], #8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x8, x0, #8 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A) @@ -5675,15 +5711,19 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(ptr %A, ptr %ptr, < define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A) @@ -5698,14 +5738,18 @@ declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32>, define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], #8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x8, x0, #8 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A) @@ -5717,15 +5761,19 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(ptr %A, ptr %ptr, < define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A) @@ -5740,14 +5788,18 @@ declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0(<2 x i32>, define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], #16 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x8, x0, #16 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A) @@ -5759,15 +5811,19 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(ptr %A, ptr %ptr, < define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A) @@ -5782,14 +5838,18 @@ declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64>, define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], #16 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x8, x0, #16 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A) @@ -5801,15 +5861,19 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(ptr %A, ptr %ptr, < define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A) @@ -5824,14 +5888,18 @@ declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0(<1 x i64>, define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], #8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x8, x0, #8 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A) @@ -5843,15 +5911,19 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(ptr %A, ptr %pt define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A) @@ -5866,14 +5938,18 @@ declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0(<4 x fl define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], #8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x8, x0, #8 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A) @@ -5885,15 +5961,19 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(ptr %A, ptr %pt define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.s { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.s { v0, v1 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A) @@ -5908,14 +5988,18 @@ declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0(<2 x fl define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], #16 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x8, x0, #16 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A) @@ -5927,15 +6011,19 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(ptr %A, ptr % define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A) @@ -5950,14 +6038,18 @@ declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0(<2 x define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_ld2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], #16 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x8, x0, #16 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A) @@ -5969,15 +6061,19 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(ptr %A, ptr % define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_ld2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ld2.d { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_ld2lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.d { v0, v1 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A) @@ -5992,14 +6088,20 @@ declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0(<1 x define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.b { v0, v1, v2 }[0], [x0], #3 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x8, x0, #3 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A) @@ -6011,14 +6113,20 @@ define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(ptr %A, define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.b { v0, v1, v2 }[0], [x0], x2 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x8, x0, x2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A) @@ -6033,14 +6141,20 @@ declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0( define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.b { v0, v1, v2 }[0], [x0], #3 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x8, x0, #3 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A) @@ -6052,14 +6166,20 @@ define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(ptr %A, ptr define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.b { v0, v1, v2 }[0], [x0], x2 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x8, x0, x2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A) @@ -6074,14 +6194,20 @@ declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.h { v0, v1, v2 }[0], [x0], #6 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x8, x0, #6 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A) @@ -6093,15 +6219,21 @@ define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(ptr %A, define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #1 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.h { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A) @@ -6116,14 +6248,20 @@ declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0( define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.h { v0, v1, v2 }[0], [x0], #6 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x8, x0, #6 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A) @@ -6135,15 +6273,21 @@ define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(ptr %A, define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #1 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.h { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A) @@ -6158,14 +6302,20 @@ declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0( define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x8, x0, #12 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A) @@ -6177,15 +6327,21 @@ define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(ptr %A, define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A) @@ -6200,14 +6356,20 @@ declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0( define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x8, x0, #12 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A) @@ -6219,15 +6381,21 @@ define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(ptr %A, define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A) @@ -6242,14 +6410,20 @@ declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0( define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x8, x0, #24 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A) @@ -6261,15 +6435,21 @@ define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(ptr %A, define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A) @@ -6284,14 +6464,20 @@ declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0( define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x8, x0, #24 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A) @@ -6303,15 +6489,21 @@ define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(ptr %A, define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A) @@ -6326,14 +6518,20 @@ declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0( define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x8, x0, #12 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A) @@ -6345,15 +6543,21 @@ define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(pt define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A) @@ -6368,14 +6572,20 @@ declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], #12 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x8, x0, #12 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A) @@ -6387,15 +6597,21 @@ define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(pt define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A) @@ -6410,14 +6626,20 @@ declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x8, x0, #24 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A) @@ -6429,15 +6651,21 @@ define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A) @@ -6452,14 +6680,20 @@ declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane. define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_ld3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], #24 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x8, x0, #24 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A) @@ -6471,15 +6705,21 @@ define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_ld3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_ld3lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A) @@ -6494,14 +6734,22 @@ declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane. define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], #4 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: add x8, x0, #4 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A) @@ -6513,14 +6761,22 @@ define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4la define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], x2 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: add x8, x0, x2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A) @@ -6535,14 +6791,22 @@ declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lan define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], #4 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: add x8, x0, #4 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A) @@ -6554,14 +6818,22 @@ define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(pt define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0], x2 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: add x8, x0, x2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A) @@ -6576,14 +6848,22 @@ declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], #8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: add x8, x0, #8 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A) @@ -6595,15 +6875,23 @@ define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4la define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #1 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A) @@ -6618,14 +6906,22 @@ declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lan define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], #8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: add x8, x0, #8 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A) @@ -6637,15 +6933,23 @@ define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4la define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #1 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #1 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A) @@ -6660,14 +6964,22 @@ declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lan define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: add x8, x0, #16 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A) @@ -6679,15 +6991,23 @@ define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4la define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A) @@ -6702,14 +7022,22 @@ declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lan define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: add x8, x0, #16 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A) @@ -6721,15 +7049,23 @@ define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4la define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A) @@ -6744,14 +7080,22 @@ declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lan define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: add x8, x0, #32 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A) @@ -6763,15 +7107,23 @@ define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4la define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A) @@ -6786,14 +7138,22 @@ declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lan define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: add x8, x0, #32 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A) @@ -6805,15 +7165,23 @@ define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4la define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A) @@ -6828,14 +7196,22 @@ declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lan define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: add x8, x0, #16 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A) @@ -6847,15 +7223,23 @@ define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_i define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A) @@ -6870,14 +7254,22 @@ declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neo define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], #16 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: add x8, x0, #16 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A) @@ -6889,15 +7281,23 @@ define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_i define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A) @@ -6912,14 +7312,22 @@ declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neo define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: add x8, x0, #32 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A) @@ -6931,15 +7339,23 @@ define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_po define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A) @@ -6954,14 +7370,22 @@ declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_ld4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], #32 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: add x8, x0, #32 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A) @@ -6973,15 +7397,23 @@ define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_po define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_ld4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: str x0, [x1] ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_ld4lane: ; CHECK-GI: ; %bb.0: -; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: add x8, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[0], [x0] ; CHECK-GI-NEXT: str x8, [x1] ; CHECK-GI-NEXT: ret %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A) @@ -6996,13 +7428,17 @@ declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64 define ptr @test_v16i8_post_imm_st2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.16b { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.16b { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A) @@ -7013,13 +7449,17 @@ define ptr @test_v16i8_post_imm_st2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C define ptr @test_v16i8_post_reg_st2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.16b { v0, v1 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, x2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.16b { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A) @@ -7033,13 +7473,17 @@ declare void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8>, <16 x i8>, ptr) define ptr @test_v8i8_post_imm_st2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st2.8b { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st2.8b { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A) @@ -7050,13 +7494,17 @@ define ptr @test_v8i8_post_imm_st2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) n define ptr @test_v8i8_post_reg_st2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st2.8b { v0, v1 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, x2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st2.8b { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A) @@ -7070,13 +7518,17 @@ declare void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8>, <8 x i8>, ptr) define ptr @test_v8i16_post_imm_st2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.8h { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.8h { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A) @@ -7088,6 +7540,8 @@ define ptr @test_v8i16_post_reg_st2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C ; CHECK-SD-LABEL: test_v8i16_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #1 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.8h { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7095,6 +7549,8 @@ define ptr @test_v8i16_post_reg_st2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.8h { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A) @@ -7108,13 +7564,17 @@ declare void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16>, <8 x i16>, ptr) define ptr @test_v4i16_post_imm_st2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st2.4h { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st2.4h { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A) @@ -7126,6 +7586,8 @@ define ptr @test_v4i16_post_reg_st2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C ; CHECK-SD-LABEL: test_v4i16_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #1 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st2.4h { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7133,6 +7595,8 @@ define ptr @test_v4i16_post_reg_st2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st2.4h { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A) @@ -7146,13 +7610,17 @@ declare void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16>, <4 x i16>, ptr) define ptr @test_v4i32_post_imm_st2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.4s { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.4s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A) @@ -7164,6 +7632,8 @@ define ptr @test_v4i32_post_reg_st2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C ; CHECK-SD-LABEL: test_v4i32_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.4s { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7171,6 +7641,8 @@ define ptr @test_v4i32_post_reg_st2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.4s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A) @@ -7184,13 +7656,17 @@ declare void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32>, <4 x i32>, ptr) define ptr @test_v2i32_post_imm_st2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st2.2s { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st2.2s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A) @@ -7202,6 +7678,8 @@ define ptr @test_v2i32_post_reg_st2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C ; CHECK-SD-LABEL: test_v2i32_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st2.2s { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7209,6 +7687,8 @@ define ptr @test_v2i32_post_reg_st2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st2.2s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A) @@ -7222,13 +7702,17 @@ declare void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32>, <2 x i32>, ptr) define ptr @test_v2i64_post_imm_st2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.2d { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.2d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A) @@ -7240,6 +7724,8 @@ define ptr @test_v2i64_post_reg_st2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C ; CHECK-SD-LABEL: test_v2i64_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.2d { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7247,6 +7733,8 @@ define ptr @test_v2i64_post_reg_st2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.2d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A) @@ -7260,13 +7748,17 @@ declare void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64>, <2 x i64>, ptr) define ptr @test_v1i64_post_imm_st2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A) @@ -7278,6 +7770,8 @@ define ptr @test_v1i64_post_reg_st2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C ; CHECK-SD-LABEL: test_v1i64_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7285,6 +7779,8 @@ define ptr @test_v1i64_post_reg_st2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A) @@ -7298,13 +7794,17 @@ declare void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64>, <1 x i64>, ptr) define ptr @test_v4f32_post_imm_st2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.4s { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.4s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A) @@ -7316,6 +7816,8 @@ define ptr @test_v4f32_post_reg_st2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float ; CHECK-SD-LABEL: test_v4f32_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.4s { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7323,6 +7825,8 @@ define ptr @test_v4f32_post_reg_st2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.4s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A) @@ -7336,13 +7840,17 @@ declare void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float>, <4 x float>, ptr) define ptr @test_v2f32_post_imm_st2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st2.2s { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st2.2s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A) @@ -7354,6 +7862,8 @@ define ptr @test_v2f32_post_reg_st2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float ; CHECK-SD-LABEL: test_v2f32_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st2.2s { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7361,6 +7871,8 @@ define ptr @test_v2f32_post_reg_st2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st2.2s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A) @@ -7374,13 +7886,17 @@ declare void @llvm.aarch64.neon.st2.v2f32.p0(<2 x float>, <2 x float>, ptr) define ptr @test_v2f64_post_imm_st2(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.2d { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.2d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A) @@ -7392,6 +7908,8 @@ define ptr @test_v2f64_post_reg_st2(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub ; CHECK-SD-LABEL: test_v2f64_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.2d { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7399,6 +7917,8 @@ define ptr @test_v2f64_post_reg_st2(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.2d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A) @@ -7412,13 +7932,17 @@ declare void @llvm.aarch64.neon.st2.v2f64.p0(<2 x double>, <2 x double>, ptr) define ptr @test_v1f64_post_imm_st2(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A) @@ -7430,6 +7954,8 @@ define ptr @test_v1f64_post_reg_st2(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub ; CHECK-SD-LABEL: test_v1f64_post_reg_st2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -7437,6 +7963,8 @@ define ptr @test_v1f64_post_reg_st2(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A) @@ -7450,13 +7978,19 @@ declare void @llvm.aarch64.neon.st2.v1f64.p0(<1 x double>, <1 x double>, ptr) define ptr @test_v16i8_post_imm_st3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.16b { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.16b { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A) @@ -7467,13 +8001,19 @@ define ptr @test_v16i8_post_imm_st3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C define ptr @test_v16i8_post_reg_st3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.16b { v0, v1, v2 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.16b { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A) @@ -7487,13 +8027,19 @@ declare void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, pt define ptr @test_v8i8_post_imm_st3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st3.8b { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st3.8b { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A) @@ -7504,13 +8050,19 @@ define ptr @test_v8i8_post_imm_st3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, < define ptr @test_v8i8_post_reg_st3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st3.8b { v0, v1, v2 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st3.8b { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A) @@ -7524,13 +8076,19 @@ declare void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, ptr) define ptr @test_v8i16_post_imm_st3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.8h { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.8h { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A) @@ -7541,14 +8099,20 @@ define ptr @test_v8i16_post_imm_st3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C define ptr @test_v8i16_post_reg_st3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_st3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #1 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.8h { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.8h { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A) @@ -7562,13 +8126,19 @@ declare void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, pt define ptr @test_v4i16_post_imm_st3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st3.4h { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st3.4h { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A) @@ -7579,14 +8149,20 @@ define ptr @test_v4i16_post_imm_st3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C define ptr @test_v4i16_post_reg_st3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_st3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #1 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st3.4h { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st3.4h { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A) @@ -7600,13 +8176,19 @@ declare void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, pt define ptr @test_v4i32_post_imm_st3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.4s { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.4s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A) @@ -7617,14 +8199,20 @@ define ptr @test_v4i32_post_imm_st3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C define ptr @test_v4i32_post_reg_st3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_st3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.4s { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.4s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A) @@ -7638,13 +8226,19 @@ declare void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, pt define ptr @test_v2i32_post_imm_st3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st3.2s { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st3.2s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A) @@ -7655,14 +8249,20 @@ define ptr @test_v2i32_post_imm_st3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C define ptr @test_v2i32_post_reg_st3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_st3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st3.2s { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st3.2s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A) @@ -7676,13 +8276,19 @@ declare void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, pt define ptr @test_v2i64_post_imm_st3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.2d { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.2d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A) @@ -7693,14 +8299,20 @@ define ptr @test_v2i64_post_imm_st3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C define ptr @test_v2i64_post_reg_st3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_st3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.2d { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.2d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A) @@ -7714,13 +8326,19 @@ declare void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, pt define ptr @test_v1i64_post_imm_st3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A) @@ -7731,14 +8349,20 @@ define ptr @test_v1i64_post_imm_st3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C define ptr @test_v1i64_post_reg_st3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_st3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A) @@ -7752,13 +8376,19 @@ declare void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, pt define ptr @test_v4f32_post_imm_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.4s { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.4s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A) @@ -7769,14 +8399,20 @@ define ptr @test_v4f32_post_imm_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float define ptr @test_v4f32_post_reg_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_st3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.4s { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.4s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A) @@ -7790,13 +8426,19 @@ declare void @llvm.aarch64.neon.st3.v4f32.p0(<4 x float>, <4 x float>, <4 x floa define ptr @test_v2f32_post_imm_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st3.2s { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st3.2s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A) @@ -7807,14 +8449,20 @@ define ptr @test_v2f32_post_imm_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float define ptr @test_v2f32_post_reg_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_st3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st3.2s { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st3.2s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A) @@ -7828,13 +8476,19 @@ declare void @llvm.aarch64.neon.st3.v2f32.p0(<2 x float>, <2 x float>, <2 x floa define ptr @test_v2f64_post_imm_st3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.2d { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.2d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A) @@ -7845,14 +8499,20 @@ define ptr @test_v2f64_post_imm_st3(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub define ptr @test_v2f64_post_reg_st3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_st3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.2d { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.2d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A) @@ -7866,13 +8526,19 @@ declare void @llvm.aarch64.neon.st3.v2f64.p0(<2 x double>, <2 x double>, <2 x do define ptr @test_v1f64_post_imm_st3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A) @@ -7883,14 +8549,20 @@ define ptr @test_v1f64_post_imm_st3(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub define ptr @test_v1f64_post_reg_st3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_st3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_st3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A) @@ -7904,13 +8576,21 @@ declare void @llvm.aarch64.neon.st3.v1f64.p0(<1 x double>, <1 x double>, <1 x do define ptr @test_v16i8_post_imm_st4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.16b { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.16b { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A) @@ -7921,13 +8601,21 @@ define ptr @test_v16i8_post_imm_st4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C define ptr @test_v16i8_post_reg_st4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.16b { v0, v1, v2, v3 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.16b { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A) @@ -7941,13 +8629,21 @@ declare void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <1 define ptr @test_v8i8_post_imm_st4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st4.8b { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st4.8b { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A) @@ -7958,13 +8654,21 @@ define ptr @test_v8i8_post_imm_st4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, < define ptr @test_v8i8_post_reg_st4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st4.8b { v0, v1, v2, v3 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st4.8b { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A) @@ -7978,13 +8682,21 @@ declare void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i define ptr @test_v8i16_post_imm_st4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.8h { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.8h { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A) @@ -7995,14 +8707,22 @@ define ptr @test_v8i16_post_imm_st4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C define ptr @test_v8i16_post_reg_st4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_st4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #1 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.8h { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_st4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.8h { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A) @@ -8016,13 +8736,21 @@ declare void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8 define ptr @test_v4i16_post_imm_st4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st4.4h { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st4.4h { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A) @@ -8033,14 +8761,22 @@ define ptr @test_v4i16_post_imm_st4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C define ptr @test_v4i16_post_reg_st4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_st4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #1 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st4.4h { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_st4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st4.4h { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A) @@ -8054,13 +8790,21 @@ declare void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>,<4 define ptr @test_v4i32_post_imm_st4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.4s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A) @@ -8071,14 +8815,22 @@ define ptr @test_v4i32_post_imm_st4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C define ptr @test_v4i32_post_reg_st4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_st4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_st4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.4s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A) @@ -8092,13 +8844,21 @@ declare void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>,<4 define ptr @test_v2i32_post_imm_st4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st4.2s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A) @@ -8109,14 +8869,22 @@ define ptr @test_v2i32_post_imm_st4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C define ptr @test_v2i32_post_reg_st4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_st4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_st4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st4.2s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A) @@ -8130,13 +8898,21 @@ declare void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, <2 define ptr @test_v2i64_post_imm_st4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.2d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A) @@ -8147,14 +8923,22 @@ define ptr @test_v2i64_post_imm_st4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C define ptr @test_v2i64_post_reg_st4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_st4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_st4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.2d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A) @@ -8168,13 +8952,21 @@ declare void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>,<2 define ptr @test_v1i64_post_imm_st4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A) @@ -8185,14 +8977,22 @@ define ptr @test_v1i64_post_imm_st4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C define ptr @test_v1i64_post_reg_st4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_st4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_st4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A) @@ -8206,13 +9006,21 @@ declare void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>,<1 define ptr @test_v4f32_post_imm_st4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.4s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A) @@ -8223,14 +9031,22 @@ define ptr @test_v4f32_post_imm_st4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float define ptr @test_v4f32_post_reg_st4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_st4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.4s { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_st4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.4s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A) @@ -8244,13 +9060,21 @@ declare void @llvm.aarch64.neon.st4.v4f32.p0(<4 x float>, <4 x float>, <4 x floa define ptr @test_v2f32_post_imm_st4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st4.2s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A) @@ -8261,14 +9085,22 @@ define ptr @test_v2f32_post_imm_st4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float define ptr @test_v2f32_post_reg_st4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_st4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st4.2s { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_st4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st4.2s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A) @@ -8282,13 +9114,21 @@ declare void @llvm.aarch64.neon.st4.v2f32.p0(<2 x float>, <2 x float>, <2 x floa define ptr @test_v2f64_post_imm_st4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.2d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A) @@ -8299,14 +9139,22 @@ define ptr @test_v2f64_post_imm_st4(ptr %A, ptr %ptr, <2 x double> %B, <2 x doub define ptr @test_v2f64_post_reg_st4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_st4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.2d { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_st4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.2d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A) @@ -8320,13 +9168,21 @@ declare void @llvm.aarch64.neon.st4.v2f64.p0(<2 x double>, <2 x double>, <2 x do define ptr @test_v1f64_post_imm_st4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A) @@ -8337,14 +9193,22 @@ define ptr @test_v1f64_post_imm_st4(ptr %A, ptr %ptr, <1 x double> %B, <1 x doub define ptr @test_v1f64_post_reg_st4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_st4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_st4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A) @@ -8358,13 +9222,17 @@ declare void @llvm.aarch64.neon.st4.v1f64.p0(<1 x double>, <1 x double>, <1 x do define ptr @test_v16i8_post_imm_st1x2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st1x2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.16b { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.16b { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A) @@ -8375,13 +9243,17 @@ define ptr @test_v16i8_post_imm_st1x2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> define ptr @test_v16i8_post_reg_st1x2(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st1x2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.16b { v0, v1 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, x2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.16b { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8> %B, <16 x i8> %C, ptr %A) @@ -8395,13 +9267,17 @@ declare void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8>, <16 x i8>, ptr) define ptr @test_v8i8_post_imm_st1x2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st1x2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.8b { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.8b { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A) @@ -8412,13 +9288,17 @@ define ptr @test_v8i8_post_imm_st1x2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) define ptr @test_v8i8_post_reg_st1x2(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st1x2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.8b { v0, v1 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, x2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.8b { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8> %B, <8 x i8> %C, ptr %A) @@ -8432,13 +9312,17 @@ declare void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8>, <8 x i8>, ptr) define ptr @test_v8i16_post_imm_st1x2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st1x2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.8h { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.8h { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A) @@ -8450,6 +9334,8 @@ define ptr @test_v8i16_post_reg_st1x2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> ; CHECK-SD-LABEL: test_v8i16_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #1 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.8h { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -8457,6 +9343,8 @@ define ptr @test_v8i16_post_reg_st1x2(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.8h { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16> %B, <8 x i16> %C, ptr %A) @@ -8470,13 +9358,17 @@ declare void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16>, <8 x i16>, ptr) define ptr @test_v4i16_post_imm_st1x2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st1x2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.4h { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.4h { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A) @@ -8488,6 +9380,8 @@ define ptr @test_v4i16_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> ; CHECK-SD-LABEL: test_v4i16_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #1 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.4h { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -8495,6 +9389,8 @@ define ptr @test_v4i16_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.4h { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16> %B, <4 x i16> %C, ptr %A) @@ -8508,13 +9404,17 @@ declare void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16>, <4 x i16>, ptr) define ptr @test_v4i32_post_imm_st1x2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st1x2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.4s { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.4s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A) @@ -8526,6 +9426,8 @@ define ptr @test_v4i32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> ; CHECK-SD-LABEL: test_v4i32_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.4s { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -8533,6 +9435,8 @@ define ptr @test_v4i32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.4s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32> %B, <4 x i32> %C, ptr %A) @@ -8546,13 +9450,17 @@ declare void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32>, <4 x i32>, ptr) define ptr @test_v2i32_post_imm_st1x2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st1x2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.2s { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.2s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A) @@ -8564,6 +9472,8 @@ define ptr @test_v2i32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> ; CHECK-SD-LABEL: test_v2i32_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.2s { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -8571,6 +9481,8 @@ define ptr @test_v2i32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.2s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32> %B, <2 x i32> %C, ptr %A) @@ -8584,13 +9496,17 @@ declare void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32>, <2 x i32>, ptr) define ptr @test_v2i64_post_imm_st1x2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st1x2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.2d { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.2d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A) @@ -8602,6 +9518,8 @@ define ptr @test_v2i64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> ; CHECK-SD-LABEL: test_v2i64_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.2d { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -8609,6 +9527,8 @@ define ptr @test_v2i64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.2d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> %B, <2 x i64> %C, ptr %A) @@ -8622,13 +9542,17 @@ declare void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64>, <2 x i64>, ptr) define ptr @test_v1i64_post_imm_st1x2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st1x2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A) @@ -8640,6 +9564,8 @@ define ptr @test_v1i64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> ; CHECK-SD-LABEL: test_v1i64_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -8647,6 +9573,8 @@ define ptr @test_v1i64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> %B, <1 x i64> %C, ptr %A) @@ -8660,13 +9588,17 @@ declare void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64>, <1 x i64>, ptr) define ptr @test_v4f32_post_imm_st1x2(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st1x2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.4s { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.4s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A) @@ -8678,6 +9610,8 @@ define ptr @test_v4f32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo ; CHECK-SD-LABEL: test_v4f32_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.4s { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -8685,6 +9619,8 @@ define ptr @test_v4f32_post_reg_st1x2(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.4s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float> %B, <4 x float> %C, ptr %A) @@ -8698,13 +9634,17 @@ declare void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float>, <4 x float>, ptr) define ptr @test_v2f32_post_imm_st1x2(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st1x2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.2s { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.2s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A) @@ -8716,6 +9656,8 @@ define ptr @test_v2f32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo ; CHECK-SD-LABEL: test_v2f32_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.2s { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -8723,6 +9665,8 @@ define ptr @test_v2f32_post_reg_st1x2(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.2s { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float> %B, <2 x float> %C, ptr %A) @@ -8736,13 +9680,17 @@ declare void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float>, <2 x float>, ptr) define ptr @test_v2f64_post_imm_st1x2(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st1x2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.2d { v0, v1 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.2d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A) @@ -8754,6 +9702,8 @@ define ptr @test_v2f64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x double> %B, <2 x do ; CHECK-SD-LABEL: test_v2f64_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st1.2d { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -8761,6 +9711,8 @@ define ptr @test_v2f64_post_reg_st1x2(ptr %A, ptr %ptr, <2 x double> %B, <2 x do ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st1.2d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> %B, <2 x double> %C, ptr %A) @@ -8774,13 +9726,17 @@ declare void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double>, <2 x double>, ptr) define ptr @test_v1f64_post_imm_st1x2(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st1x2: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st1x2: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: add x0, x0, #16 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A) @@ -8792,6 +9748,8 @@ define ptr @test_v1f64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x double> %B, <1 x do ; CHECK-SD-LABEL: test_v1f64_post_reg_st1x2: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 ; CHECK-SD-NEXT: st1.1d { v0, v1 }, [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -8799,6 +9757,8 @@ define ptr @test_v1f64_post_reg_st1x2(ptr %A, ptr %ptr, <1 x double> %B, <1 x do ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1 def $d0_d1 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1 def $d0_d1 ; CHECK-GI-NEXT: st1.1d { v0, v1 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> %B, <1 x double> %C, ptr %A) @@ -8812,13 +9772,19 @@ declare void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double>, <1 x double>, ptr) define ptr @test_v16i8_post_imm_st1x3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st1x3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.16b { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st1x3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.16b { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A) @@ -8829,13 +9795,19 @@ define ptr @test_v16i8_post_imm_st1x3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> define ptr @test_v16i8_post_reg_st1x3(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st1x3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.16b { v0, v1, v2 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st1x3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.16b { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %A) @@ -8849,13 +9821,19 @@ declare void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, define ptr @test_v8i8_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st1x3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.8b { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st1x3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.8b { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A) @@ -8866,13 +9844,19 @@ define ptr @test_v8i8_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, define ptr @test_v8i8_post_reg_st1x3(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st1x3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.8b { v0, v1, v2 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st1x3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.8b { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %A) @@ -8886,13 +9870,19 @@ declare void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, ptr) define ptr @test_v8i16_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st1x3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.8h { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st1x3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.8h { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A) @@ -8903,14 +9893,20 @@ define ptr @test_v8i16_post_imm_st1x3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> define ptr @test_v8i16_post_reg_st1x3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_st1x3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #1 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.8h { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.8h { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %A) @@ -8924,13 +9920,19 @@ declare void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, define ptr @test_v4i16_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st1x3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.4h { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st1x3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.4h { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A) @@ -8941,14 +9943,20 @@ define ptr @test_v4i16_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> define ptr @test_v4i16_post_reg_st1x3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_st1x3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #1 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.4h { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.4h { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %A) @@ -8962,13 +9970,19 @@ declare void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, define ptr @test_v4i32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st1x3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.4s { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st1x3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.4s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A) @@ -8979,14 +9993,20 @@ define ptr @test_v4i32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> define ptr @test_v4i32_post_reg_st1x3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_st1x3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.4s { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.4s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %A) @@ -9000,13 +10020,19 @@ declare void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, define ptr @test_v2i32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st1x3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.2s { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st1x3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.2s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A) @@ -9017,14 +10043,20 @@ define ptr @test_v2i32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> define ptr @test_v2i32_post_reg_st1x3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_st1x3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.2s { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.2s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %A) @@ -9038,13 +10070,19 @@ declare void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, define ptr @test_v2i64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st1x3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.2d { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st1x3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.2d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A) @@ -9055,14 +10093,20 @@ define ptr @test_v2i64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> define ptr @test_v2i64_post_reg_st1x3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_st1x3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.2d { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.2d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %A) @@ -9076,13 +10120,19 @@ declare void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, define ptr @test_v1i64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st1x3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st1x3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A) @@ -9093,14 +10143,20 @@ define ptr @test_v1i64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> define ptr @test_v1i64_post_reg_st1x3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_st1x3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %A) @@ -9114,13 +10170,19 @@ declare void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, define ptr @test_v4f32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st1x3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.4s { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st1x3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.4s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A) @@ -9131,14 +10193,20 @@ define ptr @test_v4f32_post_imm_st1x3(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo define ptr @test_v4f32_post_reg_st1x3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_st1x3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.4s { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.4s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %A) @@ -9152,13 +10220,19 @@ declare void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float>, <4 x float>, <4 x fl define ptr @test_v2f32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st1x3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.2s { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st1x3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.2s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A) @@ -9169,14 +10243,20 @@ define ptr @test_v2f32_post_imm_st1x3(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo define ptr @test_v2f32_post_reg_st1x3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_st1x3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.2s { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.2s { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %A) @@ -9190,13 +10270,19 @@ declare void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float>, <2 x float>, <2 x fl define ptr @test_v2f64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st1x3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.2d { v0, v1, v2 }, [x0], #48 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st1x3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #48 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.2d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A) @@ -9207,14 +10293,20 @@ define ptr @test_v2f64_post_imm_st1x3(ptr %A, ptr %ptr, <2 x double> %B, <2 x do define ptr @test_v2f64_post_reg_st1x3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_st1x3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st1.2d { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st1.2d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %A) @@ -9228,13 +10320,19 @@ declare void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double>, <2 x double>, <2 x define ptr @test_v1f64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st1x3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st1x3: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A) @@ -9245,14 +10343,20 @@ define ptr @test_v1f64_post_imm_st1x3(ptr %A, ptr %ptr, <1 x double> %B, <1 x do define ptr @test_v1f64_post_reg_st1x3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_st1x3: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_st1x3: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %A) @@ -9266,13 +10370,21 @@ declare void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double>, <1 x double>, <1 x define ptr @test_v16i8_post_imm_st1x4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st1x4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.16b { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st1x4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.16b { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A) @@ -9283,13 +10395,21 @@ define ptr @test_v16i8_post_imm_st1x4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> define ptr @test_v16i8_post_reg_st1x4(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st1x4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.16b { v0, v1, v2, v3 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st1x4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.16b { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, ptr %A) @@ -9303,13 +10423,21 @@ declare void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, define ptr @test_v8i8_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st1x4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.8b { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st1x4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.8b { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A) @@ -9320,13 +10448,21 @@ define ptr @test_v8i8_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, define ptr @test_v8i8_post_reg_st1x4(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st1x4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.8b { v0, v1, v2, v3 }, [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st1x4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.8b { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, ptr %A) @@ -9340,13 +10476,21 @@ declare void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x define ptr @test_v8i16_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st1x4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.8h { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st1x4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.8h { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A) @@ -9357,14 +10501,22 @@ define ptr @test_v8i16_post_imm_st1x4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> define ptr @test_v8i16_post_reg_st1x4(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_st1x4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #1 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.8h { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_st1x4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.8h { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, ptr %A) @@ -9378,13 +10530,21 @@ declare void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, define ptr @test_v4i16_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st1x4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.4h { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st1x4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.4h { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A) @@ -9395,14 +10555,22 @@ define ptr @test_v4i16_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> define ptr @test_v4i16_post_reg_st1x4(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_st1x4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #1 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.4h { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_st1x4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.4h { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, ptr %A) @@ -9416,13 +10584,21 @@ declare void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>,< define ptr @test_v4i32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st1x4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st1x4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.4s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A) @@ -9433,14 +10609,22 @@ define ptr @test_v4i32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> define ptr @test_v4i32_post_reg_st1x4(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_st1x4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_st1x4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.4s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, ptr %A) @@ -9454,13 +10638,21 @@ declare void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>,< define ptr @test_v2i32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st1x4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st1x4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.2s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A) @@ -9471,14 +10663,22 @@ define ptr @test_v2i32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> define ptr @test_v2i32_post_reg_st1x4(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_st1x4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_st1x4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.2s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, ptr %A) @@ -9492,13 +10692,21 @@ declare void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, define ptr @test_v2i64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st1x4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st1x4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.2d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A) @@ -9509,14 +10717,22 @@ define ptr @test_v2i64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> define ptr @test_v2i64_post_reg_st1x4(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_st1x4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_st1x4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.2d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, ptr %A) @@ -9530,13 +10746,21 @@ declare void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>,< define ptr @test_v1i64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st1x4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st1x4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A) @@ -9547,14 +10771,22 @@ define ptr @test_v1i64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> define ptr @test_v1i64_post_reg_st1x4(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_st1x4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_st1x4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, ptr %A) @@ -9568,13 +10800,21 @@ declare void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>,< define ptr @test_v4f32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st1x4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st1x4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.4s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A) @@ -9585,14 +10825,22 @@ define ptr @test_v4f32_post_imm_st1x4(ptr %A, ptr %ptr, <4 x float> %B, <4 x flo define ptr @test_v4f32_post_reg_st1x4(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_st1x4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.4s { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_st1x4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.4s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, ptr %A) @@ -9606,13 +10854,21 @@ declare void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float>, <4 x float>, <4 x fl define ptr @test_v2f32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st1x4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st1x4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.2s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A) @@ -9623,14 +10879,22 @@ define ptr @test_v2f32_post_imm_st1x4(ptr %A, ptr %ptr, <2 x float> %B, <2 x flo define ptr @test_v2f32_post_reg_st1x4(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_st1x4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.2s { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_st1x4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.2s { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, ptr %A) @@ -9644,13 +10908,21 @@ declare void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float>, <2 x float>, <2 x fl define ptr @test_v2f64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st1x4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], #64 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st1x4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #64 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.2d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A) @@ -9661,14 +10933,22 @@ define ptr @test_v2f64_post_imm_st1x4(ptr %A, ptr %ptr, <2 x double> %B, <2 x do define ptr @test_v2f64_post_reg_st1x4(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_st1x4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st1.2d { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_st1x4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st1.2d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, ptr %A) @@ -9682,13 +10962,21 @@ declare void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double>, <2 x double>, <2 x define ptr @test_v1f64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st1x4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st1x4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A) @@ -9699,14 +10987,22 @@ define ptr @test_v1f64_post_imm_st1x4(ptr %A, ptr %ptr, <1 x double> %B, <1 x do define ptr @test_v1f64_post_reg_st1x4(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_st1x4: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-SD-NEXT: st1.1d { v0, v1, v2, v3 }, [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_st1x4: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $d0_d1_d2_d3 def $d0_d1_d2_d3 ; CHECK-GI-NEXT: st1.1d { v0, v1, v2, v3 }, [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, ptr %A) @@ -9719,13 +11015,17 @@ declare void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double>, <1 x double>, <1 x define ptr @test_v16i8_post_imm_st2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.b { v0, v1 }[0], [x0], #2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.b { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A) @@ -9736,13 +11036,17 @@ define ptr @test_v16i8_post_imm_st2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8 define ptr @test_v16i8_post_reg_st2lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.b { v0, v1 }[0], [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, x2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.b { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, i64 0, ptr %A) @@ -9756,13 +11060,17 @@ declare void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8>, <16 x i8>, i64, ptr) define ptr @test_v8i8_post_imm_st2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.b { v0, v1 }[0], [x0], #2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.b { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A) @@ -9773,13 +11081,17 @@ define ptr @test_v8i8_post_imm_st2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> % define ptr @test_v8i8_post_reg_st2lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.b { v0, v1 }[0], [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, x2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.b { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, i64 0, ptr %A) @@ -9793,13 +11105,17 @@ declare void @llvm.aarch64.neon.st2lane.v8i8.p0(<8 x i8>, <8 x i8>, i64, ptr) define ptr @test_v8i16_post_imm_st2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.h { v0, v1 }[0], [x0], #4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #4 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.h { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A) @@ -9811,6 +11127,8 @@ define ptr @test_v8i16_post_reg_st2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16 ; CHECK-SD-LABEL: test_v8i16_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #1 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.h { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -9818,6 +11136,8 @@ define ptr @test_v8i16_post_reg_st2lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16 ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.h { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, i64 0, ptr %A) @@ -9831,13 +11151,17 @@ declare void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16>, <8 x i16>, i64, ptr) define ptr @test_v4i16_post_imm_st2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.h { v0, v1 }[0], [x0], #4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #4 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.h { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A) @@ -9849,6 +11173,8 @@ define ptr @test_v4i16_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16 ; CHECK-SD-LABEL: test_v4i16_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #1 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.h { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -9856,6 +11182,8 @@ define ptr @test_v4i16_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16 ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.h { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, i64 0, ptr %A) @@ -9869,13 +11197,17 @@ declare void @llvm.aarch64.neon.st2lane.v4i16.p0(<4 x i16>, <4 x i16>, i64, ptr) define ptr @test_v4i32_post_imm_st2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], #8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #8 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A) @@ -9887,6 +11219,8 @@ define ptr @test_v4i32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32 ; CHECK-SD-LABEL: test_v4i32_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -9894,6 +11228,8 @@ define ptr @test_v4i32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32 ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, i64 0, ptr %A) @@ -9907,13 +11243,17 @@ declare void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32>, <4 x i32>, i64, ptr) define ptr @test_v2i32_post_imm_st2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], #8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #8 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A) @@ -9925,6 +11265,8 @@ define ptr @test_v2i32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32 ; CHECK-SD-LABEL: test_v2i32_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -9932,6 +11274,8 @@ define ptr @test_v2i32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32 ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, i64 0, ptr %A) @@ -9945,13 +11289,17 @@ declare void @llvm.aarch64.neon.st2lane.v2i32.p0(<2 x i32>, <2 x i32>, i64, ptr) define ptr @test_v2i64_post_imm_st2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #16 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A) @@ -9963,6 +11311,8 @@ define ptr @test_v2i64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64 ; CHECK-SD-LABEL: test_v2i64_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -9970,6 +11320,8 @@ define ptr @test_v2i64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64 ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, i64 0, ptr %A) @@ -9983,13 +11335,17 @@ declare void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64>, <2 x i64>, i64, ptr) define ptr @test_v1i64_post_imm_st2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #16 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A) @@ -10001,6 +11357,8 @@ define ptr @test_v1i64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64 ; CHECK-SD-LABEL: test_v1i64_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -10008,6 +11366,8 @@ define ptr @test_v1i64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64 ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, i64 0, ptr %A) @@ -10021,13 +11381,17 @@ declare void @llvm.aarch64.neon.st2lane.v1i64.p0(<1 x i64>, <1 x i64>, i64, ptr) define ptr @test_v4f32_post_imm_st2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], #8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #8 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A) @@ -10039,6 +11403,8 @@ define ptr @test_v4f32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f ; CHECK-SD-LABEL: test_v4f32_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -10046,6 +11412,8 @@ define ptr @test_v4f32_post_reg_st2lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float> %B, <4 x float> %C, i64 0, ptr %A) @@ -10059,13 +11427,17 @@ declare void @llvm.aarch64.neon.st2lane.v4f32.p0(<4 x float>, <4 x float>, i64, define ptr @test_v2f32_post_imm_st2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], #8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #8 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A) @@ -10077,6 +11449,8 @@ define ptr @test_v2f32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f ; CHECK-SD-LABEL: test_v2f32_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.s { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -10084,6 +11458,8 @@ define ptr @test_v2f32_post_reg_st2lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.s { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float> %B, <2 x float> %C, i64 0, ptr %A) @@ -10097,13 +11473,17 @@ declare void @llvm.aarch64.neon.st2lane.v2f32.p0(<2 x float>, <2 x float>, i64, define ptr @test_v2f64_post_imm_st2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #16 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A) @@ -10115,6 +11495,8 @@ define ptr @test_v2f64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x ; CHECK-SD-LABEL: test_v2f64_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -10122,6 +11504,8 @@ define ptr @test_v2f64_post_reg_st2lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double> %B, <2 x double> %C, i64 0, ptr %A) @@ -10135,13 +11519,17 @@ declare void @llvm.aarch64.neon.st2lane.v2f64.p0(<2 x double>, <2 x double>, i64 define ptr @test_v1f64_post_imm_st2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st2lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st2lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: add x0, x0, #16 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A) @@ -10153,6 +11541,8 @@ define ptr @test_v1f64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x ; CHECK-SD-LABEL: test_v1f64_post_reg_st2lane: ; CHECK-SD: ; %bb.0: ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: st2.d { v0, v1 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; @@ -10160,6 +11550,8 @@ define ptr @test_v1f64_post_reg_st2lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: st2.d { v0, v1 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double> %B, <1 x double> %C, i64 0, ptr %A) @@ -10173,13 +11565,19 @@ declare void @llvm.aarch64.neon.st2lane.v1f64.p0(<1 x double>, <1 x double>, i64 define ptr @test_v16i8_post_imm_st3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.b { v0, v1, v2 }[0], [x0], #3 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st3lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #3 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.b { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A) @@ -10190,13 +11588,19 @@ define ptr @test_v16i8_post_imm_st3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8 define ptr @test_v16i8_post_reg_st3lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.b { v0, v1, v2 }[0], [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st3lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.b { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, ptr %A) @@ -10210,13 +11614,19 @@ declare void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8> define ptr @test_v8i8_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.b { v0, v1, v2 }[0], [x0], #3 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st3lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #3 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.b { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A) @@ -10227,13 +11637,19 @@ define ptr @test_v8i8_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> % define ptr @test_v8i8_post_reg_st3lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.b { v0, v1, v2 }[0], [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st3lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.b { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, ptr %A) @@ -10247,13 +11663,19 @@ declare void @llvm.aarch64.neon.st3lane.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, i6 define ptr @test_v8i16_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.h { v0, v1, v2 }[0], [x0], #6 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st3lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #6 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.h { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A) @@ -10264,14 +11686,20 @@ define ptr @test_v8i16_post_imm_st3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16 define ptr @test_v8i16_post_reg_st3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_st3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #1 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.h { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.h { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, ptr %A) @@ -10285,13 +11713,19 @@ declare void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16> define ptr @test_v4i16_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.h { v0, v1, v2 }[0], [x0], #6 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st3lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #6 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.h { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A) @@ -10302,14 +11736,20 @@ define ptr @test_v4i16_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16 define ptr @test_v4i16_post_reg_st3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_st3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #1 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.h { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.h { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, ptr %A) @@ -10323,13 +11763,19 @@ declare void @llvm.aarch64.neon.st3lane.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16> define ptr @test_v4i32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st3lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #12 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A) @@ -10340,14 +11786,20 @@ define ptr @test_v4i32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32 define ptr @test_v4i32_post_reg_st3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_st3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, ptr %A) @@ -10361,13 +11813,19 @@ declare void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32> define ptr @test_v2i32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st3lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #12 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A) @@ -10378,14 +11836,20 @@ define ptr @test_v2i32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32 define ptr @test_v2i32_post_reg_st3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_st3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, ptr %A) @@ -10399,13 +11863,19 @@ declare void @llvm.aarch64.neon.st3lane.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32> define ptr @test_v2i64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st3lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A) @@ -10416,14 +11886,20 @@ define ptr @test_v2i64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64 define ptr @test_v2i64_post_reg_st3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_st3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, ptr %A) @@ -10437,13 +11913,19 @@ declare void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64> define ptr @test_v1i64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st3lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A) @@ -10454,14 +11936,20 @@ define ptr @test_v1i64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64 define ptr @test_v1i64_post_reg_st3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_st3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, ptr %A) @@ -10475,13 +11963,19 @@ declare void @llvm.aarch64.neon.st3lane.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64> define ptr @test_v4f32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st3lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #12 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A) @@ -10492,14 +11986,20 @@ define ptr @test_v4f32_post_imm_st3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f define ptr @test_v4f32_post_reg_st3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_st3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, ptr %A) @@ -10513,13 +12013,19 @@ declare void @llvm.aarch64.neon.st3lane.v4f32.p0(<4 x float>, <4 x float>, <4 x define ptr @test_v2f32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], #12 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st3lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #12 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A) @@ -10530,14 +12036,20 @@ define ptr @test_v2f32_post_imm_st3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f define ptr @test_v2f32_post_reg_st3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_st3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.s { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, ptr %A) @@ -10551,13 +12063,19 @@ declare void @llvm.aarch64.neon.st3lane.v2f32.p0(<2 x float>, <2 x float>, <2 x define ptr @test_v2f64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st3lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A) @@ -10568,14 +12086,20 @@ define ptr @test_v2f64_post_imm_st3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x define ptr @test_v2f64_post_reg_st3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_st3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, ptr %A) @@ -10589,13 +12113,19 @@ declare void @llvm.aarch64.neon.st3lane.v2f64.p0(<2 x double>, <2 x double>, <2 define ptr @test_v1f64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], #24 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st3lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #24 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A) @@ -10606,14 +12136,20 @@ define ptr @test_v1f64_post_imm_st3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x define ptr @test_v1f64_post_reg_st3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_st3lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-SD-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_st3lane: ; CHECK-GI: ; %bb.0: ; CHECK-GI-NEXT: mov x8, x0 +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-GI-NEXT: st3.d { v0, v1, v2 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, ptr %A) @@ -10627,13 +12163,21 @@ declare void @llvm.aarch64.neon.st3lane.v1f64.p0(<1 x double>, <1 x double>, <1 define ptr @test_v16i8_post_imm_st4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_imm_st4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], #4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_imm_st4lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #4 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.b { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A) @@ -10644,13 +12188,21 @@ define ptr @test_v16i8_post_imm_st4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8 define ptr @test_v16i8_post_reg_st4lane(ptr %A, ptr %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v16i8_post_reg_st4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v16i8_post_reg_st4lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.b { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, ptr %A) @@ -10664,13 +12216,21 @@ declare void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8> define ptr @test_v8i8_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_imm_st4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], #4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_imm_st4lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #4 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.b { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A) @@ -10681,13 +12241,21 @@ define ptr @test_v8i8_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> % define ptr @test_v8i8_post_reg_st4lane(ptr %A, ptr %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i8_post_reg_st4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.b { v0, v1, v2, v3 }[0], [x0], x2 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i8_post_reg_st4lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.b { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, ptr %A) @@ -10701,13 +12269,21 @@ declare void @llvm.aarch64.neon.st4lane.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 define ptr @test_v8i16_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_imm_st4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], #8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_imm_st4lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #8 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.h { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A) @@ -10718,14 +12294,22 @@ define ptr @test_v8i16_post_imm_st4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16 define ptr @test_v8i16_post_reg_st4lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v8i16_post_reg_st4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #1 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v8i16_post_reg_st4lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.h { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, ptr %A) @@ -10739,13 +12323,21 @@ declare void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16> define ptr @test_v4i16_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_imm_st4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], #8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_imm_st4lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #8 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.h { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A) @@ -10756,14 +12348,22 @@ define ptr @test_v4i16_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16 define ptr @test_v4i16_post_reg_st4lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i16_post_reg_st4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #1 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.h { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i16_post_reg_st4lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #1 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.h { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, ptr %A) @@ -10777,13 +12377,21 @@ declare void @llvm.aarch64.neon.st4lane.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16> define ptr @test_v4i32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_imm_st4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_imm_st4lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #16 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A) @@ -10794,14 +12402,22 @@ define ptr @test_v4i32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32 define ptr @test_v4i32_post_reg_st4lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4i32_post_reg_st4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4i32_post_reg_st4lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, ptr %A) @@ -10815,13 +12431,21 @@ declare void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32> define ptr @test_v2i32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_imm_st4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_imm_st4lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #16 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A) @@ -10832,14 +12456,22 @@ define ptr @test_v2i32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32 define ptr @test_v2i32_post_reg_st4lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i32_post_reg_st4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i32_post_reg_st4lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, ptr %A) @@ -10853,13 +12485,21 @@ declare void @llvm.aarch64.neon.st4lane.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32> define ptr @test_v2i64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_imm_st4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_imm_st4lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A) @@ -10870,14 +12510,22 @@ define ptr @test_v2i64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64 define ptr @test_v2i64_post_reg_st4lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2i64_post_reg_st4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2i64_post_reg_st4lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, ptr %A) @@ -10891,13 +12539,21 @@ declare void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64> define ptr @test_v1i64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_imm_st4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_imm_st4lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A) @@ -10908,14 +12564,22 @@ define ptr @test_v1i64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64 define ptr @test_v1i64_post_reg_st4lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1i64_post_reg_st4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1i64_post_reg_st4lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, ptr %A) @@ -10929,13 +12593,21 @@ declare void @llvm.aarch64.neon.st4lane.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64> define ptr @test_v4f32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_imm_st4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_imm_st4lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #16 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A) @@ -10946,14 +12618,22 @@ define ptr @test_v4f32_post_imm_st4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x f define ptr @test_v4f32_post_reg_st4lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v4f32_post_reg_st4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v4f32_post_reg_st4lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, ptr %A) @@ -10967,13 +12647,21 @@ declare void @llvm.aarch64.neon.st4lane.v4f32.p0(<4 x float>, <4 x float>, <4 x define ptr @test_v2f32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_imm_st4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], #16 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_imm_st4lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #16 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A) @@ -10984,14 +12672,22 @@ define ptr @test_v2f32_post_imm_st4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x f define ptr @test_v2f32_post_reg_st4lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f32_post_reg_st4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #2 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.s { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f32_post_reg_st4lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #2 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.s { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, ptr %A) @@ -11005,13 +12701,21 @@ declare void @llvm.aarch64.neon.st4lane.v2f32.p0(<2 x float>, <2 x float>, <2 x define ptr @test_v2f64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_imm_st4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_imm_st4lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A) @@ -11022,14 +12726,22 @@ define ptr @test_v2f64_post_imm_st4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x define ptr @test_v2f64_post_reg_st4lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v2f64_post_reg_st4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v2f64_post_reg_st4lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, ptr %A) @@ -11043,13 +12755,21 @@ declare void @llvm.aarch64.neon.st4lane.v2f64.p0(<2 x double>, <2 x double>, <2 define ptr @test_v1f64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_imm_st4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], #32 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_imm_st4lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, #32 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A) @@ -11060,14 +12780,22 @@ define ptr @test_v1f64_post_imm_st4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x define ptr @test_v1f64_post_reg_st4lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind { ; CHECK-SD-LABEL: test_v1f64_post_reg_st4lane: ; CHECK-SD: ; %bb.0: +; CHECK-SD-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: lsl x8, x2, #3 +; CHECK-SD-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: st4.d { v0, v1, v2, v3 }[0], [x0], x8 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test_v1f64_post_reg_st4lane: ; CHECK-GI: ; %bb.0: +; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: mov x8, x0 ; CHECK-GI-NEXT: add x0, x0, x2, lsl #3 +; CHECK-GI-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ; kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-GI-NEXT: st4.d { v0, v1, v2, v3 }[0], [x8] ; CHECK-GI-NEXT: ret call void @llvm.aarch64.neon.st4lane.v1f64.p0(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, ptr %A) diff --git a/llvm/test/CodeGen/AArch64/arm64-ld1.ll b/llvm/test/CodeGen/AArch64/arm64-ld1.ll index c9d94f945f7af2..54b96520dce41d 100644 --- a/llvm/test/CodeGen/AArch64/arm64-ld1.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ld1.ll @@ -351,30 +351,63 @@ declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0(ptr) nounwi define %struct.__neon_int8x16x2_t @ld2lane_16b(<16 x i8> %L1, <16 x i8> %L2, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-LABEL: ld2lane_16b: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2.b { v0, v1 }[1], [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ld2lane_16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ld2.b { v0, v1 }[1], [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ld2lane_16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.b { v0, v1 }[1], [x0] +; CHECK-GI-NEXT: ret %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, i64 1, ptr %A) ret %struct.__neon_int8x16x2_t %tmp2 } define %struct.__neon_int8x16x3_t @ld3lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-LABEL: ld3lane_16b: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3.b { v0, v1, v2 }[1], [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ld3lane_16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ld3.b { v0, v1, v2 }[1], [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ld3lane_16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.b { v0, v1, v2 }[1], [x0] +; CHECK-GI-NEXT: ret %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i64 1, ptr %A) ret %struct.__neon_int8x16x3_t %tmp2 } define %struct.__neon_int8x16x4_t @ld4lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-LABEL: ld4lane_16b: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4.b { v0, v1, v2, v3 }[1], [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ld4lane_16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ld4.b { v0, v1, v2, v3 }[1], [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ld4lane_16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.b { v0, v1, v2, v3 }[1], [x0] +; CHECK-GI-NEXT: ret %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i64 1, ptr %A) ret %struct.__neon_int8x16x4_t %tmp2 } @@ -385,30 +418,63 @@ declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> define %struct.__neon_int16x8x2_t @ld2lane_8h(<8 x i16> %L1, <8 x i16> %L2, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-LABEL: ld2lane_8h: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2.h { v0, v1 }[1], [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ld2lane_8h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ld2.h { v0, v1 }[1], [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ld2lane_8h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.h { v0, v1 }[1], [x0] +; CHECK-GI-NEXT: ret %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, i64 1, ptr %A) ret %struct.__neon_int16x8x2_t %tmp2 } define %struct.__neon_int16x8x3_t @ld3lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-LABEL: ld3lane_8h: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3.h { v0, v1, v2 }[1], [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ld3lane_8h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ld3.h { v0, v1, v2 }[1], [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ld3lane_8h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.h { v0, v1, v2 }[1], [x0] +; CHECK-GI-NEXT: ret %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i64 1, ptr %A) ret %struct.__neon_int16x8x3_t %tmp2 } define %struct.__neon_int16x8x4_t @ld4lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-LABEL: ld4lane_8h: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4.h { v0, v1, v2, v3 }[1], [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ld4lane_8h: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ld4.h { v0, v1, v2, v3 }[1], [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ld4lane_8h: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.h { v0, v1, v2, v3 }[1], [x0] +; CHECK-GI-NEXT: ret %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i64 1, ptr %A) ret %struct.__neon_int16x8x4_t %tmp2 } @@ -419,30 +485,63 @@ declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> define %struct.__neon_int32x4x2_t @ld2lane_4s(<4 x i32> %L1, <4 x i32> %L2, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-LABEL: ld2lane_4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2.s { v0, v1 }[1], [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ld2lane_4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ld2.s { v0, v1 }[1], [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ld2lane_4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.s { v0, v1 }[1], [x0] +; CHECK-GI-NEXT: ret %tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, i64 1, ptr %A) ret %struct.__neon_int32x4x2_t %tmp2 } define %struct.__neon_int32x4x3_t @ld3lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-LABEL: ld3lane_4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3.s { v0, v1, v2 }[1], [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ld3lane_4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ld3.s { v0, v1, v2 }[1], [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ld3lane_4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.s { v0, v1, v2 }[1], [x0] +; CHECK-GI-NEXT: ret %tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 1, ptr %A) ret %struct.__neon_int32x4x3_t %tmp2 } define %struct.__neon_int32x4x4_t @ld4lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-LABEL: ld4lane_4s: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4.s { v0, v1, v2, v3 }[1], [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ld4lane_4s: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ld4.s { v0, v1, v2, v3 }[1], [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ld4lane_4s: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.s { v0, v1, v2, v3 }[1], [x0] +; CHECK-GI-NEXT: ret %tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i64 1, ptr %A) ret %struct.__neon_int32x4x4_t %tmp2 } @@ -453,30 +552,63 @@ declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> define %struct.__neon_int64x2x2_t @ld2lane_2d(<2 x i64> %L1, <2 x i64> %L2, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-LABEL: ld2lane_2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2.d { v0, v1 }[1], [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ld2lane_2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ld2.d { v0, v1 }[1], [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ld2lane_2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ld2.d { v0, v1 }[1], [x0] +; CHECK-GI-NEXT: ret %tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, i64 1, ptr %A) ret %struct.__neon_int64x2x2_t %tmp2 } define %struct.__neon_int64x2x3_t @ld3lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-LABEL: ld3lane_2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ld3.d { v0, v1, v2 }[1], [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ld3lane_2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: ld3.d { v0, v1, v2 }[1], [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ld3lane_2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: ld3.d { v0, v1, v2 }[1], [x0] +; CHECK-GI-NEXT: ret %tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64 1, ptr %A) ret %struct.__neon_int64x2x3_t %tmp2 } define %struct.__neon_int64x2x4_t @ld4lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK-LABEL: ld4lane_2d: -; CHECK: // %bb.0: -; CHECK-NEXT: ld4.d { v0, v1, v2, v3 }[1], [x0] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: ld4lane_2d: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: ld4.d { v0, v1, v2, v3 }[1], [x0] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: ld4lane_2d: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: ld4.d { v0, v1, v2, v3 }[1], [x0] +; CHECK-GI-NEXT: ret %tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64 1, ptr %A) ret %struct.__neon_int64x2x4_t %tmp2 } diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll index ad4b0f377627d5..43d5ab5ab54e10 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -1780,7 +1780,9 @@ define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 { ; CHECK-GI-LABEL: test_concat_v16i8_v16i8_v16i8: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: adrp x8, .LCPI126_0 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI126_0] +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret entry: @@ -1797,9 +1799,11 @@ define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 { ; ; CHECK-GI-LABEL: test_concat_v16i8_v8i8_v16i8: ; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: adrp x8, .LCPI127_0 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: mov b2, v0.b[1] ; CHECK-GI-NEXT: mov b3, v0.b[2] -; CHECK-GI-NEXT: adrp x8, .LCPI127_0 ; CHECK-GI-NEXT: mov b4, v0.b[3] ; CHECK-GI-NEXT: mov b5, v0.b[4] ; CHECK-GI-NEXT: mov b6, v0.b[5] @@ -1999,7 +2003,9 @@ define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 { ; CHECK-GI-LABEL: test_concat_v8i16_v8i16_v8i16: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: adrp x8, .LCPI130_0 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI130_0] +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret entry: @@ -2016,9 +2022,11 @@ define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 { ; ; CHECK-GI-LABEL: test_concat_v8i16_v4i16_v8i16: ; CHECK-GI: // %bb.0: // %entry +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: adrp x8, .LCPI131_0 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: mov h2, v0.h[1] ; CHECK-GI-NEXT: mov h3, v0.h[2] -; CHECK-GI-NEXT: adrp x8, .LCPI131_0 ; CHECK-GI-NEXT: mov h4, v0.h[3] ; CHECK-GI-NEXT: mov v0.h[1], v2.h[0] ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI131_0] @@ -2138,7 +2146,9 @@ define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 { ; CHECK-GI-LABEL: test_concat_v4i32_v4i32_v4i32: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: adrp x8, .LCPI134_0 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI134_0] +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret entry: @@ -2155,8 +2165,10 @@ define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 { ; ; CHECK-GI-LABEL: test_concat_v4i32_v2i32_v4i32: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov s2, v0.s[1] +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: adrp x8, .LCPI135_0 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: mov s2, v0.s[1] ; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI135_0] ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll index d04bac78377bfb..6327679756739a 100644 --- a/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-copyPhysReg-tuple.ll @@ -7,11 +7,12 @@ define <4 x i32> @copyTuple.QPair(ptr %a, ptr %b) { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v3.4s, #2 ; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff -; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: mov v1.16b, v3.16b +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ld2 { v0.s, v1.s }[1], [x0] ; CHECK-NEXT: mov v1.16b, v2.16b ; CHECK-NEXT: ld2 { v0.s, v1.s }[1], [x1] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 ; CHECK-NEXT: ret entry: %vld = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> , <4 x i32> , i64 1, ptr %a) @@ -24,16 +25,17 @@ entry: define <4 x i32> @copyTuple.QTriple(ptr %a, ptr %b, <4 x i32> %c) { ; CHECK-LABEL: copyTuple.QTriple: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $q0 killed $q0 def $q31_q0_q1 ; CHECK-NEXT: movi v31.2d, #0xffffffffffffffff ; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: mov v2.16b, v0.16b -; CHECK-NEXT: mov v3.16b, v1.16b -; CHECK-NEXT: mov v1.16b, v31.16b -; CHECK-NEXT: ld3 { v1.s, v2.s, v3.s }[1], [x0] ; CHECK-NEXT: mov v2.16b, v31.16b ; CHECK-NEXT: mov v3.16b, v0.16b -; CHECK-NEXT: ld3 { v1.s, v2.s, v3.s }[1], [x1] -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: mov v4.16b, v1.16b +; CHECK-NEXT: ld3 { v2.s, v3.s, v4.s }[1], [x0] +; CHECK-NEXT: mov v3.16b, v31.16b +; CHECK-NEXT: mov v4.16b, v0.16b +; CHECK-NEXT: ld3 { v2.s, v3.s, v4.s }[1], [x1] +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret entry: %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> , <4 x i32> %c, <4 x i32> %c, i64 1, ptr %a) @@ -46,19 +48,20 @@ entry: define <4 x i32> @copyTuple.QQuad(ptr %a, ptr %b, <4 x i32> %c) { ; CHECK-LABEL: copyTuple.QQuad: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $q0 killed $q0 def $q31_q0_q1_q2 ; CHECK-NEXT: movi v31.2d, #0xffffffffffffffff ; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov v2.16b, v0.16b -; CHECK-NEXT: mov v4.16b, v2.16b -; CHECK-NEXT: mov v3.16b, v1.16b -; CHECK-NEXT: mov v2.16b, v0.16b -; CHECK-NEXT: mov v1.16b, v31.16b -; CHECK-NEXT: ld4 { v1.s, v2.s, v3.s, v4.s }[1], [x0] -; CHECK-NEXT: mov v2.16b, v31.16b -; CHECK-NEXT: mov v3.16b, v0.16b +; CHECK-NEXT: mov v3.16b, v31.16b ; CHECK-NEXT: mov v4.16b, v0.16b -; CHECK-NEXT: ld4 { v1.s, v2.s, v3.s, v4.s }[1], [x1] -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: mov v5.16b, v1.16b +; CHECK-NEXT: mov v6.16b, v2.16b +; CHECK-NEXT: ld4 { v3.s, v4.s, v5.s, v6.s }[1], [x0] +; CHECK-NEXT: mov v4.16b, v31.16b +; CHECK-NEXT: mov v5.16b, v0.16b +; CHECK-NEXT: mov v6.16b, v0.16b +; CHECK-NEXT: ld4 { v3.s, v4.s, v5.s, v6.s }[1], [x1] +; CHECK-NEXT: mov v0.16b, v3.16b ; CHECK-NEXT: ret entry: %vld = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> , <4 x i32> %c, <4 x i32> %c, <4 x i32> %c, i64 1, ptr %a) diff --git a/llvm/test/CodeGen/AArch64/arm64-tbl.ll b/llvm/test/CodeGen/AArch64/arm64-tbl.ll index 2044a866b830aa..44b92e6ccd088f 100644 --- a/llvm/test/CodeGen/AArch64/arm64-tbl.ll +++ b/llvm/test/CodeGen/AArch64/arm64-tbl.ll @@ -21,55 +21,121 @@ define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind { } define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) { -; CHECK-LABEL: tbl2_8b: -; CHECK: // %bb.0: -; CHECK-NEXT: tbl.8b v0, { v0, v1 }, v2 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: tbl2_8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: tbl.8b v0, { v0, v1 }, v2 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: tbl2_8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: tbl.8b v0, { v0, v1 }, v2 +; CHECK-GI-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) ret <8 x i8> %tmp3 } define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { -; CHECK-LABEL: tbl2_16b: -; CHECK: // %bb.0: -; CHECK-NEXT: tbl.16b v0, { v0, v1 }, v2 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: tbl2_16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: tbl2_16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v2 +; CHECK-GI-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) ret <16 x i8> %tmp3 } define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { -; CHECK-LABEL: tbl3_8b: -; CHECK: // %bb.0: -; CHECK-NEXT: tbl.8b v0, { v0, v1, v2 }, v3 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: tbl3_8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: tbl.8b v0, { v0, v1, v2 }, v3 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: tbl3_8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: tbl.8b v0, { v0, v1, v2 }, v3 +; CHECK-GI-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) ret <8 x i8> %tmp3 } define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) { -; CHECK-LABEL: tbl3_16b: -; CHECK: // %bb.0: -; CHECK-NEXT: tbl.16b v0, { v0, v1, v2 }, v3 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: tbl3_16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2 }, v3 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: tbl3_16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-GI-NEXT: tbl.16b v0, { v0, v1, v2 }, v3 +; CHECK-GI-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) ret <16 x i8> %tmp3 } define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) { -; CHECK-LABEL: tbl4_8b: -; CHECK: // %bb.0: -; CHECK-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: tbl4_8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: tbl4_8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: tbl.8b v0, { v0, v1, v2, v3 }, v4 +; CHECK-GI-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) ret <8 x i8> %tmp3 } define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) { -; CHECK-LABEL: tbl4_16b: -; CHECK: // %bb.0: -; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: tbl4_16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: tbl4_16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-GI-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 +; CHECK-GI-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) ret <16 x i8> %tmp3 } @@ -107,7 +173,11 @@ define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8 ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_v8i8: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI8_0 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-SD-NEXT: ldr d4, [x8, :lo12:.LCPI8_0] +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-SD-NEXT: tbl.8b v0, { v0, v1 }, v4 ; CHECK-SD-NEXT: tbl.8b v1, { v2, v3 }, v4 ; CHECK-SD-NEXT: mov.s v0[1], v1[1] @@ -117,7 +187,11 @@ define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8 ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_v8i8: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI8_1 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr d4, [x8, :lo12:.LCPI8_1] +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: adrp x8, .LCPI8_0 ; CHECK-GI-NEXT: tbl.8b v0, { v0, v1 }, v4 ; CHECK-GI-NEXT: tbl.8b v1, { v2, v3 }, v4 @@ -188,15 +262,23 @@ define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8 define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4: ; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: adrp x8, .LCPI9_0 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI9_0] +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI9_1 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI9_1] +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: adrp x8, .LCPI9_0 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 ; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4 @@ -249,7 +331,11 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: fmov s4, w0 ; CHECK-SD-NEXT: mov w8, #32 // =0x20 +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: mov.b v4[1], w0 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: mov.b v4[2], w0 ; CHECK-SD-NEXT: mov.b v4[3], w0 ; CHECK-SD-NEXT: mov.b v4[4], w0 @@ -278,6 +364,10 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: fmov s4, w0 ; CHECK-GI-NEXT: mov w8, #255 // =0xff +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: mov.16b v5, v4 ; CHECK-GI-NEXT: mov.b v5[1], v4[0] ; CHECK-GI-NEXT: mov.b v5[2], v4[0] @@ -364,7 +454,11 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask2: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: mov w8, #1 // =0x1 +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: fmov s4, w8 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: mov.b v4[1], w8 ; CHECK-SD-NEXT: mov.b v4[2], w8 ; CHECK-SD-NEXT: mov.b v4[3], w8 @@ -395,8 +489,12 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: mov w8, #1 // =0x1 ; CHECK-GI-NEXT: fmov s6, w0 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: fmov s4, w8 ; CHECK-GI-NEXT: mov w8, #255 // =0xff +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: mov.16b v5, v4 ; CHECK-GI-NEXT: mov.b v5[1], v4[0] ; CHECK-GI-NEXT: mov.b v5[2], v4[0] @@ -502,7 +600,11 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: movi.2d v4, #0xffffffffffffffff ; CHECK-SD-NEXT: adrp x8, .LCPI12_0 +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ldr q5, [x8, :lo12:.LCPI12_0] +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: tbl.16b v2, { v2, v3 }, v5 ; CHECK-SD-NEXT: mov.b v4[0], w0 ; CHECK-SD-NEXT: mov.b v4[1], w0 @@ -521,6 +623,10 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: fmov s4, w0 ; CHECK-GI-NEXT: mov w8, #255 // =0xff +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: mov.16b v5, v4 ; CHECK-GI-NEXT: mov.b v5[1], v4[0] ; CHECK-GI-NEXT: mov.b v5[2], v4[0] @@ -643,6 +749,10 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: dup.16b v4, w0 ; CHECK-SD-NEXT: mov w8, #255 // =0xff +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: mov.b v4[8], w8 ; CHECK-SD-NEXT: mov.b v4[9], w8 ; CHECK-SD-NEXT: mov.b v4[10], w8 @@ -662,8 +772,12 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: fmov s4, w0 ; CHECK-GI-NEXT: mov w8, #255 // =0xff +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: fmov s6, w8 ; CHECK-GI-NEXT: adrp x8, .LCPI13_1 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: mov.16b v5, v4 ; CHECK-GI-NEXT: mov.b v5[1], v4[0] ; CHECK-GI-NEXT: mov.b v5[2], v4[0] @@ -765,15 +879,23 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle: ; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: adrp x8, .LCPI14_0 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI14_0] +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI14_1 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI14_1] +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: adrp x8, .LCPI14_0 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 ; CHECK-GI-NEXT: tbl.16b v1, { v2, v3 }, v4 @@ -859,16 +981,24 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1: ; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: adrp x8, .LCPI15_0 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI15_0] +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI15_2 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI15_2] ; CHECK-GI-NEXT: adrp x8, .LCPI15_1 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI15_1] ; CHECK-GI-NEXT: adrp x8, .LCPI15_0 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 @@ -955,16 +1085,24 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { ; CHECK-SD-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2: ; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: adrp x8, .LCPI16_0 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI16_0] +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-SD-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI16_2 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI16_2] ; CHECK-GI-NEXT: adrp x8, .LCPI16_1 +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: ldr q5, [x8, :lo12:.LCPI16_1] ; CHECK-GI-NEXT: adrp x8, .LCPI16_0 ; CHECK-GI-NEXT: tbl.16b v0, { v0, v1 }, v4 @@ -1006,55 +1144,121 @@ define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind { } define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) { -; CHECK-LABEL: tbx2_8b: -; CHECK: // %bb.0: -; CHECK-NEXT: tbx.8b v0, { v1, v2 }, v3 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: tbx2_8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 +; CHECK-SD-NEXT: tbx.8b v0, { v1, v2 }, v3 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: tbx2_8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 +; CHECK-GI-NEXT: tbx.8b v0, { v1, v2 }, v3 +; CHECK-GI-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) ret <8 x i8> %tmp3 } define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) { -; CHECK-LABEL: tbx2_16b: -; CHECK: // %bb.0: -; CHECK-NEXT: tbx.16b v0, { v1, v2 }, v3 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: tbx2_16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 +; CHECK-SD-NEXT: tbx.16b v0, { v1, v2 }, v3 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: tbx2_16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2 def $q1_q2 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2 def $q1_q2 +; CHECK-GI-NEXT: tbx.16b v0, { v1, v2 }, v3 +; CHECK-GI-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) ret <16 x i8> %tmp3 } define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) { -; CHECK-LABEL: tbx3_8b: -; CHECK: // %bb.0: -; CHECK-NEXT: tbx.8b v0, { v1, v2, v3 }, v4 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: tbx3_8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-SD-NEXT: tbx.8b v0, { v1, v2, v3 }, v4 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: tbx3_8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-GI-NEXT: tbx.8b v0, { v1, v2, v3 }, v4 +; CHECK-GI-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) ret <8 x i8> %tmp3 } define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) { -; CHECK-LABEL: tbx3_16b: -; CHECK: // %bb.0: -; CHECK-NEXT: tbx.16b v0, { v1, v2, v3 }, v4 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: tbx3_16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-SD-NEXT: tbx.16b v0, { v1, v2, v3 }, v4 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: tbx3_16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3 def $q1_q2_q3 +; CHECK-GI-NEXT: tbx.16b v0, { v1, v2, v3 }, v4 +; CHECK-GI-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) ret <16 x i8> %tmp3 } define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) { -; CHECK-LABEL: tbx4_8b: -; CHECK: // %bb.0: -; CHECK-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: tbx4_8b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-SD-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: tbx4_8b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-GI-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-GI-NEXT: tbx.8b v0, { v1, v2, v3, v4 }, v5 +; CHECK-GI-NEXT: ret %tmp3 = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) ret <8 x i8> %tmp3 } define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) { -; CHECK-LABEL: tbx4_16b: -; CHECK: // %bb.0: -; CHECK-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: tbx4_16b: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-SD-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-SD-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: tbx4_16b: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-GI-NEXT: // kill: def $q4 killed $q4 killed $q1_q2_q3_q4 def $q1_q2_q3_q4 +; CHECK-GI-NEXT: tbx.16b v0, { v1, v2, v3, v4 }, v5 +; CHECK-GI-NEXT: ret %tmp3 = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) ret <16 x i8> %tmp3 } diff --git a/llvm/test/CodeGen/AArch64/arm64-zip.ll b/llvm/test/CodeGen/AArch64/arm64-zip.ll index fd862dfcbd693a..9955b253f563e9 100644 --- a/llvm/test/CodeGen/AArch64/arm64-zip.ll +++ b/llvm/test/CodeGen/AArch64/arm64-zip.ll @@ -359,18 +359,20 @@ define <8 x i16> @combine_v8i16_undef(<4 x i16> %0, <4 x i16> %1) { define <16 x i8> @combine_v8i16_8first(<8 x i8> %0, <8 x i8> %1) { ; CHECK-SD-LABEL: combine_v8i16_8first: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fmov d31, d1 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1_q2 ; CHECK-SD-NEXT: adrp x8, .LCPI25_0 -; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI25_0] -; CHECK-SD-NEXT: tbl.16b v0, { v31, v0 }, v1 +; CHECK-SD-NEXT: fmov d2, d0 +; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI25_0] +; CHECK-SD-NEXT: tbl.16b v0, { v1, v2 }, v3 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_v8i16_8first: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov d2, d0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q31_q0 ; CHECK-GI-NEXT: adrp x8, .LCPI25_0 -; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI25_0] -; CHECK-GI-NEXT: tbl.16b v0, { v1, v2 }, v0 +; CHECK-GI-NEXT: fmov d31, d1 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI25_0] +; CHECK-GI-NEXT: tbl.16b v0, { v31, v0 }, v2 ; CHECK-GI-NEXT: ret %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> ret <16 x i8> %3 @@ -381,18 +383,20 @@ define <16 x i8> @combine_v8i16_8first(<8 x i8> %0, <8 x i8> %1) { define <16 x i8> @combine_v8i16_8firstundef(<8 x i8> %0, <8 x i8> %1) { ; CHECK-SD-LABEL: combine_v8i16_8firstundef: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fmov d31, d1 +; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1_q2 ; CHECK-SD-NEXT: adrp x8, .LCPI26_0 -; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI26_0] -; CHECK-SD-NEXT: tbl.16b v0, { v31, v0 }, v1 +; CHECK-SD-NEXT: fmov d2, d0 +; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI26_0] +; CHECK-SD-NEXT: tbl.16b v0, { v1, v2 }, v3 ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: combine_v8i16_8firstundef: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fmov d2, d0 +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q31_q0 ; CHECK-GI-NEXT: adrp x8, .LCPI26_0 -; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI26_0] -; CHECK-GI-NEXT: tbl.16b v0, { v1, v2 }, v0 +; CHECK-GI-NEXT: fmov d31, d1 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI26_0] +; CHECK-GI-NEXT: tbl.16b v0, { v31, v0 }, v2 ; CHECK-GI-NEXT: ret %3 = shufflevector <8 x i8> %1, <8 x i8> %0, <16 x i32> ret <16 x i8> %3 diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll b/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll index 66f3c5c93fcbf1..98033a8e449ffb 100644 --- a/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll +++ b/llvm/test/CodeGen/AArch64/atomicrmw-xchg-fp.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --force-update ; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=NOLSE ; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -mattr=+lse -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=LSE @@ -103,8 +103,8 @@ define fp128 @test_rmw_xchg_f128(ptr %dst, fp128 %new) { ; LSE-NEXT: // =>This Inner Loop Header: Depth=1 ; LSE-NEXT: mov x7, x5 ; LSE-NEXT: mov x6, x4 -; LSE-NEXT: mov x4, x6 ; LSE-NEXT: mov x5, x7 +; LSE-NEXT: mov x4, x6 ; LSE-NEXT: caspal x4, x5, x2, x3, [x0] ; LSE-NEXT: cmp x5, x7 ; LSE-NEXT: ccmp x4, x6, #0, eq diff --git a/llvm/test/CodeGen/AArch64/bf16-shuffle.ll b/llvm/test/CodeGen/AArch64/bf16-shuffle.ll index f10b7282669ae6..d59de3c56f4ee2 100644 --- a/llvm/test/CodeGen/AArch64/bf16-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/bf16-shuffle.ll @@ -224,11 +224,14 @@ define <8 x bfloat> @shuffle3step0_bf16(<32 x bfloat> %src) { ; CHECK-LABEL: shuffle3step0_bf16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI16_0 -; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI16_0] +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: mov v3.16b, v2.16b +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI16_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: adrp x8, .LCPI16_1 -; CHECK-NEXT: tbl v1.16b, { v0.16b, v1.16b }, v3.16b +; CHECK-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v4.16b ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI16_1] -; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b +; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b ; CHECK-NEXT: ret entry: %s1 = shufflevector <32 x bfloat> %src, <32 x bfloat> undef, <8 x i32> @@ -239,11 +242,14 @@ define <8 x bfloat> @shuffle3step1_bf16(<32 x bfloat> %src) { ; CHECK-LABEL: shuffle3step1_bf16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI17_0 -; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI17_0] +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: mov v3.16b, v2.16b +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI17_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: adrp x8, .LCPI17_1 -; CHECK-NEXT: tbl v1.16b, { v0.16b, v1.16b }, v3.16b +; CHECK-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v4.16b ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI17_1] -; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b +; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b ; CHECK-NEXT: ret entry: %s1 = shufflevector <32 x bfloat> %src, <32 x bfloat> undef, <8 x i32> @@ -254,11 +260,14 @@ define <8 x bfloat> @shuffle3step2_bf16(<32 x bfloat> %src) { ; CHECK-LABEL: shuffle3step2_bf16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI18_0 -; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI18_0] +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: mov v3.16b, v2.16b +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI18_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: adrp x8, .LCPI18_1 -; CHECK-NEXT: tbl v1.16b, { v0.16b, v1.16b }, v3.16b +; CHECK-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v4.16b ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI18_1] -; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b +; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b ; CHECK-NEXT: ret entry: %s1 = shufflevector <32 x bfloat> %src, <32 x bfloat> undef, <8 x i32> diff --git a/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll b/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll index dbbfbea9176f6e..5cfa59a3022394 100644 --- a/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll +++ b/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll @@ -78,9 +78,9 @@ entry: define <16 x i8> @test5(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) { ; CHECK-LABEL: test5: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr b0, [x0] ; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: ld1r { v1.16b }, [x1] -; CHECK-NEXT: ldr b0, [x0] ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll index eae724870fb9dd..039025dafa0d6e 100644 --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll @@ -111,8 +111,8 @@ define <4 x float> @multiple_muls_shuffle_external(<4 x float> %a, <4 x float> % ; CHECK-NEXT: fmul v17.2s, v6.2s, v5.2s ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: fmul v5.2s, v4.2s, v5.2s -; CHECK-NEXT: fcmla v0.4s, v2.4s, v3.4s, #0 ; CHECK-NEXT: fmla v17.2s, v1.2s, v4.2s +; CHECK-NEXT: fcmla v0.4s, v2.4s, v3.4s, #0 ; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: fneg v16.2s, v5.2s ; CHECK-NEXT: fcmla v0.4s, v2.4s, v3.4s, #90 @@ -162,19 +162,19 @@ define <4 x float> @multiple_muls_shuffle_external_with_loads(ptr %ptr_a, ptr %p ; CHECK-NEXT: ld2 { v0.2s, v1.2s }, [x0] ; CHECK-NEXT: ld2 { v2.2s, v3.2s }, [x1] ; CHECK-NEXT: fmul v4.2s, v3.2s, v1.2s -; CHECK-NEXT: fmul v1.2s, v2.2s, v1.2s +; CHECK-NEXT: fmul v6.2s, v2.2s, v1.2s ; CHECK-NEXT: fneg v4.2s, v4.2s -; CHECK-NEXT: fmla v1.2s, v0.2s, v3.2s +; CHECK-NEXT: fmla v6.2s, v0.2s, v3.2s ; CHECK-NEXT: fmla v4.2s, v0.2s, v2.2s ; CHECK-NEXT: str d4, [x4] ; CHECK-NEXT: ldr q5, [x2] -; CHECK-NEXT: ext v2.16b, v5.16b, v5.16b, #8 -; CHECK-NEXT: zip1 v0.2s, v5.2s, v2.2s -; CHECK-NEXT: zip2 v2.2s, v5.2s, v2.2s -; CHECK-NEXT: fmul v3.2s, v0.2s, v1.2s -; CHECK-NEXT: fmul v1.2s, v2.2s, v1.2s -; CHECK-NEXT: fmla v3.2s, v4.2s, v2.2s -; CHECK-NEXT: fneg v2.2s, v1.2s +; CHECK-NEXT: ext v7.16b, v5.16b, v5.16b, #8 +; CHECK-NEXT: zip1 v0.2s, v5.2s, v7.2s +; CHECK-NEXT: zip2 v1.2s, v5.2s, v7.2s +; CHECK-NEXT: fmul v3.2s, v0.2s, v6.2s +; CHECK-NEXT: fmul v6.2s, v1.2s, v6.2s +; CHECK-NEXT: fmla v3.2s, v4.2s, v1.2s +; CHECK-NEXT: fneg v2.2s, v6.2s ; CHECK-NEXT: fmla v2.2s, v4.2s, v0.2s ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: st2 { v2.2s, v3.2s }, [x5] @@ -241,20 +241,20 @@ define <4 x float> @multiple_muls_mul_external(<4 x float> %a, <4 x float> %b, < ; CHECK-NEXT: zip1 v3.2s, v3.2s, v17.2s ; CHECK-NEXT: fmul v18.2s, v6.2s, v7.2s ; CHECK-NEXT: fmul v5.2s, v19.2s, v16.2s -; CHECK-NEXT: fmul v7.2s, v0.2s, v7.2s ; CHECK-NEXT: fmul v16.2s, v2.2s, v16.2s +; CHECK-NEXT: fmul v7.2s, v0.2s, v7.2s ; CHECK-NEXT: fneg v4.2s, v18.2s ; CHECK-NEXT: fmla v5.2s, v3.2s, v2.2s -; CHECK-NEXT: fmla v7.2s, v1.2s, v6.2s ; CHECK-NEXT: fneg v2.2s, v16.2s +; CHECK-NEXT: fmla v7.2s, v1.2s, v6.2s ; CHECK-NEXT: fmla v4.2s, v1.2s, v0.2s -; CHECK-NEXT: fmul v0.2s, v7.2s, v5.2s ; CHECK-NEXT: fmla v2.2s, v3.2s, v19.2s +; CHECK-NEXT: fmul v0.2s, v7.2s, v5.2s ; CHECK-NEXT: fmul v17.2s, v4.2s, v5.2s ; CHECK-NEXT: str d4, [x0] +; CHECK-NEXT: fmla v17.2s, v2.2s, v7.2s ; CHECK-NEXT: fneg v16.2s, v0.2s ; CHECK-NEXT: zip1 v0.4s, v2.4s, v5.4s -; CHECK-NEXT: fmla v17.2s, v2.2s, v7.2s ; CHECK-NEXT: fmla v16.2s, v2.2s, v4.2s ; CHECK-NEXT: st2 { v16.2s, v17.2s }, [x1] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll index c8dc092bb05e43..0481d997d24faf 100644 --- a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll +++ b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll @@ -920,8 +920,10 @@ define i32 @extract_v4i32_shuffle(<4 x i32> %a, <4 x i32> %b, i32 %c) { ; CHECK-GI-NEXT: sub sp, sp, #16 ; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 ; CHECK-GI-NEXT: adrp x8, .LCPI35_0 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: mov x9, sp ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI35_0] +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: mov w8, w0 ; CHECK-GI-NEXT: and x8, x8, #0x3 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b diff --git a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll index 4253b06e1f1aca..0a3b9a070c2b32 100644 --- a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll +++ b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll @@ -39,9 +39,9 @@ define void @fptoui_v8f32_to_v8i8_in_loop(ptr %A, ptr %dst) { ; CHECK-NEXT: add x8, x8, #1 ; CHECK-NEXT: cmp x8, #1000 ; CHECK-NEXT: ldp q2, q1, [x9] -; CHECK-NEXT: fcvtzu.4s v3, v1 -; CHECK-NEXT: fcvtzu.4s v2, v2 -; CHECK-NEXT: tbl.16b v1, { v2, v3 }, v0 +; CHECK-NEXT: fcvtzu.4s v4, v1 +; CHECK-NEXT: fcvtzu.4s v3, v2 +; CHECK-NEXT: tbl.16b v1, { v3, v4 }, v0 ; CHECK-NEXT: str d1, [x1], #16 ; CHECK-NEXT: b.eq LBB0_1 ; CHECK-NEXT: ; %bb.2: ; %exit @@ -252,12 +252,12 @@ define void @fptoui_v16f32_to_v16i8_in_loop(ptr %A, ptr %dst) { ; CHECK-NEXT: add x8, x8, #1 ; CHECK-NEXT: cmp x8, #1000 ; CHECK-NEXT: ldp q2, q1, [x9, #32] -; CHECK-NEXT: fcvtzu.4s v5, v1 +; CHECK-NEXT: fcvtzu.4s v7, v1 ; CHECK-NEXT: ldp q1, q3, [x9] -; CHECK-NEXT: fcvtzu.4s v4, v2 -; CHECK-NEXT: fcvtzu.4s v3, v3 -; CHECK-NEXT: fcvtzu.4s v2, v1 -; CHECK-NEXT: tbl.16b v1, { v2, v3, v4, v5 }, v0 +; CHECK-NEXT: fcvtzu.4s v6, v2 +; CHECK-NEXT: fcvtzu.4s v5, v3 +; CHECK-NEXT: fcvtzu.4s v4, v1 +; CHECK-NEXT: tbl.16b v1, { v4, v5, v6, v7 }, v0 ; CHECK-NEXT: str q1, [x1], #32 ; CHECK-NEXT: b.eq LBB4_1 ; CHECK-NEXT: ; %bb.2: ; %exit @@ -316,20 +316,20 @@ define void @fptoui_2x_v16f32_to_v16i8_in_loop(ptr %A, ptr %B, ptr %dst) { ; CHECK-NEXT: ldp q3, q4, [x9, #32] ; CHECK-NEXT: ldp q5, q6, [x10] ; CHECK-NEXT: fcvtzu.4s v19, v1 -; CHECK-NEXT: ldp q7, q1, [x9] -; CHECK-NEXT: fcvtzu.4s v4, v4 ; CHECK-NEXT: fcvtzu.4s v18, v2 -; CHECK-NEXT: fcvtzu.4s v3, v3 +; CHECK-NEXT: ldp q2, q1, [x9] +; CHECK-NEXT: fcvtzu.4s v23, v4 ; CHECK-NEXT: fcvtzu.4s v17, v6 -; CHECK-NEXT: fcvtzu.4s v16, v5 ; CHECK-NEXT: add x9, x2, x8, lsl #5 -; CHECK-NEXT: fcvtzu.4s v2, v1 -; CHECK-NEXT: fcvtzu.4s v1, v7 +; CHECK-NEXT: fcvtzu.4s v22, v3 +; CHECK-NEXT: fcvtzu.4s v16, v5 ; CHECK-NEXT: add x8, x8, #1 +; CHECK-NEXT: fcvtzu.4s v21, v1 ; CHECK-NEXT: cmp x8, #1000 -; CHECK-NEXT: tbl.16b v5, { v16, v17, v18, v19 }, v0 -; CHECK-NEXT: tbl.16b v1, { v1, v2, v3, v4 }, v0 -; CHECK-NEXT: stp q1, q5, [x9] +; CHECK-NEXT: fcvtzu.4s v20, v2 +; CHECK-NEXT: tbl.16b v1, { v16, v17, v18, v19 }, v0 +; CHECK-NEXT: tbl.16b v2, { v20, v21, v22, v23 }, v0 +; CHECK-NEXT: stp q2, q1, [x9] ; CHECK-NEXT: b.eq LBB5_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll index e38394f2b05338..3b8054a635bcda 100644 --- a/llvm/test/CodeGen/AArch64/fptoi.ll +++ b/llvm/test/CodeGen/AArch64/fptoi.ll @@ -1483,12 +1483,12 @@ define <8 x i16> @fptos_v8f64_v8i16(<8 x double> %a) { ; CHECK-SD-NEXT: adrp x8, .LCPI70_0 ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI70_0] -; CHECK-SD-NEXT: xtn v3.2s, v3.2d -; CHECK-SD-NEXT: xtn v2.2s, v2.2d -; CHECK-SD-NEXT: xtn v1.2s, v1.2d -; CHECK-SD-NEXT: xtn v0.2s, v0.2d -; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b +; CHECK-SD-NEXT: xtn v6.2s, v3.2d +; CHECK-SD-NEXT: xtn v5.2s, v2.2d +; CHECK-SD-NEXT: xtn v4.2s, v1.2d +; CHECK-SD-NEXT: xtn v3.2s, v0.2d +; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI70_0] +; CHECK-SD-NEXT: tbl v0.16b, { v3.16b, v4.16b, v5.16b, v6.16b }, v0.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptos_v8f64_v8i16: @@ -1514,12 +1514,12 @@ define <8 x i16> @fptou_v8f64_v8i16(<8 x double> %a) { ; CHECK-SD-NEXT: adrp x8, .LCPI71_0 ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI71_0] -; CHECK-SD-NEXT: xtn v3.2s, v3.2d -; CHECK-SD-NEXT: xtn v2.2s, v2.2d -; CHECK-SD-NEXT: xtn v1.2s, v1.2d -; CHECK-SD-NEXT: xtn v0.2s, v0.2d -; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b +; CHECK-SD-NEXT: xtn v6.2s, v3.2d +; CHECK-SD-NEXT: xtn v5.2s, v2.2d +; CHECK-SD-NEXT: xtn v4.2s, v1.2d +; CHECK-SD-NEXT: xtn v3.2s, v0.2d +; CHECK-SD-NEXT: ldr q0, [x8, :lo12:.LCPI71_0] +; CHECK-SD-NEXT: tbl v0.16b, { v3.16b, v4.16b, v5.16b, v6.16b }, v0.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptou_v8f64_v8i16: @@ -1545,21 +1545,21 @@ define <16 x i16> @fptos_v16f64_v16i16(<16 x double> %a) { ; CHECK-SD-NEXT: adrp x8, .LCPI72_0 ; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d ; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d -; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI72_0] ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-SD-NEXT: fcvtzs v4.2d, v4.2d -; CHECK-SD-NEXT: xtn v3.2s, v3.2d -; CHECK-SD-NEXT: xtn v7.2s, v7.2d -; CHECK-SD-NEXT: xtn v2.2s, v2.2d -; CHECK-SD-NEXT: xtn v6.2s, v6.2d -; CHECK-SD-NEXT: xtn v1.2s, v1.2d -; CHECK-SD-NEXT: xtn v5.2s, v5.2d -; CHECK-SD-NEXT: xtn v0.2s, v0.2d -; CHECK-SD-NEXT: xtn v4.2s, v4.2d -; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b -; CHECK-SD-NEXT: tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b +; CHECK-SD-NEXT: xtn v19.2s, v3.2d +; CHECK-SD-NEXT: xtn v23.2s, v7.2d +; CHECK-SD-NEXT: xtn v18.2s, v2.2d +; CHECK-SD-NEXT: xtn v22.2s, v6.2d +; CHECK-SD-NEXT: xtn v17.2s, v1.2d +; CHECK-SD-NEXT: xtn v21.2s, v5.2d +; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI72_0] +; CHECK-SD-NEXT: xtn v16.2s, v0.2d +; CHECK-SD-NEXT: xtn v20.2s, v4.2d +; CHECK-SD-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b +; CHECK-SD-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptos_v16f64_v16i16: @@ -1592,21 +1592,21 @@ define <16 x i16> @fptou_v16f64_v16i16(<16 x double> %a) { ; CHECK-SD-NEXT: adrp x8, .LCPI73_0 ; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d ; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d -; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI73_0] ; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d ; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d ; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-SD-NEXT: fcvtzs v4.2d, v4.2d -; CHECK-SD-NEXT: xtn v3.2s, v3.2d -; CHECK-SD-NEXT: xtn v7.2s, v7.2d -; CHECK-SD-NEXT: xtn v2.2s, v2.2d -; CHECK-SD-NEXT: xtn v6.2s, v6.2d -; CHECK-SD-NEXT: xtn v1.2s, v1.2d -; CHECK-SD-NEXT: xtn v5.2s, v5.2d -; CHECK-SD-NEXT: xtn v0.2s, v0.2d -; CHECK-SD-NEXT: xtn v4.2s, v4.2d -; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b -; CHECK-SD-NEXT: tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b +; CHECK-SD-NEXT: xtn v19.2s, v3.2d +; CHECK-SD-NEXT: xtn v23.2s, v7.2d +; CHECK-SD-NEXT: xtn v18.2s, v2.2d +; CHECK-SD-NEXT: xtn v22.2s, v6.2d +; CHECK-SD-NEXT: xtn v17.2s, v1.2d +; CHECK-SD-NEXT: xtn v21.2s, v5.2d +; CHECK-SD-NEXT: ldr q1, [x8, :lo12:.LCPI73_0] +; CHECK-SD-NEXT: xtn v16.2s, v0.2d +; CHECK-SD-NEXT: xtn v20.2s, v4.2d +; CHECK-SD-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b +; CHECK-SD-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptou_v16f64_v16i16: @@ -1634,48 +1634,65 @@ entry: define <32 x i16> @fptos_v32f64_v32i16(<32 x double> %a) { ; CHECK-SD-LABEL: fptos_v32f64_v32i16: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ldp q16, q17, [sp, #64] +; CHECK-SD-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill +; CHECK-SD-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 +; CHECK-SD-NEXT: .cfi_offset b8, -8 +; CHECK-SD-NEXT: .cfi_offset b9, -16 +; CHECK-SD-NEXT: .cfi_offset b10, -24 +; CHECK-SD-NEXT: .cfi_offset b11, -32 +; CHECK-SD-NEXT: .cfi_offset b12, -40 +; CHECK-SD-NEXT: .cfi_offset b13, -48 +; CHECK-SD-NEXT: .cfi_offset b14, -56 +; CHECK-SD-NEXT: .cfi_offset b15, -64 ; CHECK-SD-NEXT: fcvtzs v3.2d, v3.2d -; CHECK-SD-NEXT: ldp q18, q19, [sp, #96] -; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-SD-NEXT: ldp q20, q21, [sp, #32] -; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-SD-NEXT: ldp q22, q23, [sp] -; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-SD-NEXT: fcvtzs v18.2d, v2.2d +; CHECK-SD-NEXT: adrp x8, .LCPI74_0 +; CHECK-SD-NEXT: fcvtzs v19.2d, v1.2d +; CHECK-SD-NEXT: ldp q20, q21, [sp, #160] +; CHECK-SD-NEXT: fcvtzs v22.2d, v0.2d +; CHECK-SD-NEXT: ldp q23, q24, [sp, #96] ; CHECK-SD-NEXT: fcvtzs v7.2d, v7.2d -; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d -; CHECK-SD-NEXT: fcvtzs v19.2d, v19.2d +; CHECK-SD-NEXT: ldp q16, q17, [sp, #128] +; CHECK-SD-NEXT: xtn v3.2s, v3.2d ; CHECK-SD-NEXT: fcvtzs v21.2d, v21.2d -; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d ; CHECK-SD-NEXT: fcvtzs v20.2d, v20.2d -; CHECK-SD-NEXT: fcvtzs v18.2d, v18.2d -; CHECK-SD-NEXT: fcvtzs v23.2d, v23.2d +; CHECK-SD-NEXT: xtn v2.2s, v18.2d +; CHECK-SD-NEXT: ldp q18, q25, [sp, #64] +; CHECK-SD-NEXT: xtn v1.2s, v19.2d +; CHECK-SD-NEXT: fcvtzs v19.2d, v24.2d ; CHECK-SD-NEXT: fcvtzs v17.2d, v17.2d -; CHECK-SD-NEXT: fcvtzs v4.2d, v4.2d -; CHECK-SD-NEXT: fcvtzs v22.2d, v22.2d +; CHECK-SD-NEXT: xtn v0.2s, v22.2d +; CHECK-SD-NEXT: fcvtzs v22.2d, v23.2d +; CHECK-SD-NEXT: xtn v29.2s, v7.2d +; CHECK-SD-NEXT: fcvtzs v7.2d, v25.2d +; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-SD-NEXT: fcvtzs v18.2d, v18.2d ; CHECK-SD-NEXT: fcvtzs v16.2d, v16.2d -; CHECK-SD-NEXT: xtn v3.2s, v3.2d -; CHECK-SD-NEXT: xtn v2.2s, v2.2d -; CHECK-SD-NEXT: adrp x8, .LCPI74_0 -; CHECK-SD-NEXT: xtn v1.2s, v1.2d -; CHECK-SD-NEXT: xtn v0.2s, v0.2d -; CHECK-SD-NEXT: xtn v7.2s, v7.2d -; CHECK-SD-NEXT: xtn v6.2s, v6.2d -; CHECK-SD-NEXT: xtn v21.2s, v21.2d -; CHECK-SD-NEXT: xtn v25.2s, v19.2d -; CHECK-SD-NEXT: xtn v5.2s, v5.2d -; CHECK-SD-NEXT: xtn v20.2s, v20.2d -; CHECK-SD-NEXT: xtn v24.2s, v18.2d -; CHECK-SD-NEXT: xtn v19.2s, v23.2d -; CHECK-SD-NEXT: xtn v23.2s, v17.2d -; CHECK-SD-NEXT: xtn v4.2s, v4.2d -; CHECK-SD-NEXT: xtn v18.2s, v22.2d -; CHECK-SD-NEXT: xtn v22.2s, v16.2d -; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI74_0] -; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b -; CHECK-SD-NEXT: tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b -; CHECK-SD-NEXT: tbl v2.16b, { v18.16b, v19.16b, v20.16b, v21.16b }, v16.16b -; CHECK-SD-NEXT: tbl v3.16b, { v22.16b, v23.16b, v24.16b, v25.16b }, v16.16b +; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-SD-NEXT: xtn v15.2s, v21.2d +; CHECK-SD-NEXT: xtn v11.2s, v19.2d +; CHECK-SD-NEXT: fcvtzs v4.2d, v4.2d +; CHECK-SD-NEXT: xtn v14.2s, v20.2d +; CHECK-SD-NEXT: xtn v10.2s, v22.2d +; CHECK-SD-NEXT: xtn v13.2s, v17.2d +; CHECK-SD-NEXT: xtn v9.2s, v7.2d +; CHECK-SD-NEXT: xtn v28.2s, v6.2d +; CHECK-SD-NEXT: xtn v8.2s, v18.2d +; CHECK-SD-NEXT: xtn v12.2s, v16.2d +; CHECK-SD-NEXT: xtn v27.2s, v5.2d +; CHECK-SD-NEXT: xtn v26.2s, v4.2d +; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI74_0] +; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b +; CHECK-SD-NEXT: tbl v2.16b, { v8.16b, v9.16b, v10.16b, v11.16b }, v4.16b +; CHECK-SD-NEXT: tbl v3.16b, { v12.16b, v13.16b, v14.16b, v15.16b }, v4.16b +; CHECK-SD-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: tbl v1.16b, { v26.16b, v27.16b, v28.16b, v29.16b }, v4.16b +; CHECK-SD-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptos_v32f64_v32i16: @@ -1721,48 +1738,65 @@ entry: define <32 x i16> @fptou_v32f64_v32i16(<32 x double> %a) { ; CHECK-SD-LABEL: fptou_v32f64_v32i16: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ldp q16, q17, [sp, #64] +; CHECK-SD-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill +; CHECK-SD-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-SD-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 +; CHECK-SD-NEXT: .cfi_offset b8, -8 +; CHECK-SD-NEXT: .cfi_offset b9, -16 +; CHECK-SD-NEXT: .cfi_offset b10, -24 +; CHECK-SD-NEXT: .cfi_offset b11, -32 +; CHECK-SD-NEXT: .cfi_offset b12, -40 +; CHECK-SD-NEXT: .cfi_offset b13, -48 +; CHECK-SD-NEXT: .cfi_offset b14, -56 +; CHECK-SD-NEXT: .cfi_offset b15, -64 ; CHECK-SD-NEXT: fcvtzs v3.2d, v3.2d -; CHECK-SD-NEXT: ldp q18, q19, [sp, #96] -; CHECK-SD-NEXT: fcvtzs v2.2d, v2.2d -; CHECK-SD-NEXT: ldp q20, q21, [sp, #32] -; CHECK-SD-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-SD-NEXT: ldp q22, q23, [sp] -; CHECK-SD-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-SD-NEXT: fcvtzs v18.2d, v2.2d +; CHECK-SD-NEXT: adrp x8, .LCPI75_0 +; CHECK-SD-NEXT: fcvtzs v19.2d, v1.2d +; CHECK-SD-NEXT: ldp q20, q21, [sp, #160] +; CHECK-SD-NEXT: fcvtzs v22.2d, v0.2d +; CHECK-SD-NEXT: ldp q23, q24, [sp, #96] ; CHECK-SD-NEXT: fcvtzs v7.2d, v7.2d -; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d -; CHECK-SD-NEXT: fcvtzs v19.2d, v19.2d +; CHECK-SD-NEXT: ldp q16, q17, [sp, #128] +; CHECK-SD-NEXT: xtn v3.2s, v3.2d ; CHECK-SD-NEXT: fcvtzs v21.2d, v21.2d -; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d ; CHECK-SD-NEXT: fcvtzs v20.2d, v20.2d -; CHECK-SD-NEXT: fcvtzs v18.2d, v18.2d -; CHECK-SD-NEXT: fcvtzs v23.2d, v23.2d +; CHECK-SD-NEXT: xtn v2.2s, v18.2d +; CHECK-SD-NEXT: ldp q18, q25, [sp, #64] +; CHECK-SD-NEXT: xtn v1.2s, v19.2d +; CHECK-SD-NEXT: fcvtzs v19.2d, v24.2d ; CHECK-SD-NEXT: fcvtzs v17.2d, v17.2d -; CHECK-SD-NEXT: fcvtzs v4.2d, v4.2d -; CHECK-SD-NEXT: fcvtzs v22.2d, v22.2d +; CHECK-SD-NEXT: xtn v0.2s, v22.2d +; CHECK-SD-NEXT: fcvtzs v22.2d, v23.2d +; CHECK-SD-NEXT: xtn v29.2s, v7.2d +; CHECK-SD-NEXT: fcvtzs v7.2d, v25.2d +; CHECK-SD-NEXT: fcvtzs v6.2d, v6.2d +; CHECK-SD-NEXT: fcvtzs v18.2d, v18.2d ; CHECK-SD-NEXT: fcvtzs v16.2d, v16.2d -; CHECK-SD-NEXT: xtn v3.2s, v3.2d -; CHECK-SD-NEXT: xtn v2.2s, v2.2d -; CHECK-SD-NEXT: adrp x8, .LCPI75_0 -; CHECK-SD-NEXT: xtn v1.2s, v1.2d -; CHECK-SD-NEXT: xtn v0.2s, v0.2d -; CHECK-SD-NEXT: xtn v7.2s, v7.2d -; CHECK-SD-NEXT: xtn v6.2s, v6.2d -; CHECK-SD-NEXT: xtn v21.2s, v21.2d -; CHECK-SD-NEXT: xtn v25.2s, v19.2d -; CHECK-SD-NEXT: xtn v5.2s, v5.2d -; CHECK-SD-NEXT: xtn v20.2s, v20.2d -; CHECK-SD-NEXT: xtn v24.2s, v18.2d -; CHECK-SD-NEXT: xtn v19.2s, v23.2d -; CHECK-SD-NEXT: xtn v23.2s, v17.2d -; CHECK-SD-NEXT: xtn v4.2s, v4.2d -; CHECK-SD-NEXT: xtn v18.2s, v22.2d -; CHECK-SD-NEXT: xtn v22.2s, v16.2d -; CHECK-SD-NEXT: ldr q16, [x8, :lo12:.LCPI75_0] -; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b -; CHECK-SD-NEXT: tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b -; CHECK-SD-NEXT: tbl v2.16b, { v18.16b, v19.16b, v20.16b, v21.16b }, v16.16b -; CHECK-SD-NEXT: tbl v3.16b, { v22.16b, v23.16b, v24.16b, v25.16b }, v16.16b +; CHECK-SD-NEXT: fcvtzs v5.2d, v5.2d +; CHECK-SD-NEXT: xtn v15.2s, v21.2d +; CHECK-SD-NEXT: xtn v11.2s, v19.2d +; CHECK-SD-NEXT: fcvtzs v4.2d, v4.2d +; CHECK-SD-NEXT: xtn v14.2s, v20.2d +; CHECK-SD-NEXT: xtn v10.2s, v22.2d +; CHECK-SD-NEXT: xtn v13.2s, v17.2d +; CHECK-SD-NEXT: xtn v9.2s, v7.2d +; CHECK-SD-NEXT: xtn v28.2s, v6.2d +; CHECK-SD-NEXT: xtn v8.2s, v18.2d +; CHECK-SD-NEXT: xtn v12.2s, v16.2d +; CHECK-SD-NEXT: xtn v27.2s, v5.2d +; CHECK-SD-NEXT: xtn v26.2s, v4.2d +; CHECK-SD-NEXT: ldr q4, [x8, :lo12:.LCPI75_0] +; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b +; CHECK-SD-NEXT: tbl v2.16b, { v8.16b, v9.16b, v10.16b, v11.16b }, v4.16b +; CHECK-SD-NEXT: tbl v3.16b, { v12.16b, v13.16b, v14.16b, v15.16b }, v4.16b +; CHECK-SD-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-SD-NEXT: tbl v1.16b, { v26.16b, v27.16b, v28.16b, v29.16b }, v4.16b +; CHECK-SD-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-SD-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: fptou_v32f64_v32i16: diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll index 2d0931fb4f5257..d620a8851ee449 100644 --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -3365,111 +3365,111 @@ define <16 x i8> @test_signed_v16f64_v16i8(<16 x double> %f) { ; CHECK-NEXT: cmn w10, #128 ; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: mov v0.s[1], w11 ; CHECK-NEXT: csel w12, w13, w8, lt +; CHECK-NEXT: mov v0.s[1], w11 ; CHECK-NEXT: fcvtzs w11, d1 ; CHECK-NEXT: cmn w12, #128 ; CHECK-NEXT: csel w12, w12, w9, gt ; CHECK-NEXT: fmov s1, w12 ; CHECK-NEXT: fcvtzs w12, d2 ; CHECK-NEXT: mov d2, v3.d[1] -; CHECK-NEXT: mov w13, v0.s[1] ; CHECK-NEXT: cmp w11, #127 +; CHECK-NEXT: mov w13, v0.s[1] ; CHECK-NEXT: mov v1.s[1], w10 ; CHECK-NEXT: csel w10, w11, w8, lt ; CHECK-NEXT: cmn w10, #128 ; CHECK-NEXT: fcvtzs w11, d2 -; CHECK-NEXT: mov d2, v4.d[1] -; CHECK-NEXT: mov v0.b[1], w13 ; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w12, #127 +; CHECK-NEXT: mov v0.b[1], w13 ; CHECK-NEXT: csel w12, w12, w8, lt ; CHECK-NEXT: cmn w12, #128 ; CHECK-NEXT: mov w13, v1.s[1] ; CHECK-NEXT: csel w12, w12, w9, gt ; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: mov v0.b[2], v1.b[0] -; CHECK-NEXT: fmov s1, w12 +; CHECK-NEXT: fmov s2, w12 ; CHECK-NEXT: fcvtzs w12, d3 -; CHECK-NEXT: mov v1.s[1], w10 +; CHECK-NEXT: mov d3, v4.d[1] +; CHECK-NEXT: mov v0.b[2], v1.b[0] +; CHECK-NEXT: mov v2.s[1], w10 ; CHECK-NEXT: csel w10, w11, w8, lt -; CHECK-NEXT: fcvtzs w11, d2 ; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: mov v0.b[3], w13 -; CHECK-NEXT: mov d2, v5.d[1] +; CHECK-NEXT: fcvtzs w11, d3 ; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w12, #127 +; CHECK-NEXT: mov v0.b[3], w13 ; CHECK-NEXT: csel w12, w12, w8, lt ; CHECK-NEXT: cmn w12, #128 -; CHECK-NEXT: mov w13, v1.s[1] +; CHECK-NEXT: mov w13, v2.s[1] ; CHECK-NEXT: csel w12, w12, w9, gt -; CHECK-NEXT: mov v0.b[4], v1.b[0] ; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: fmov s1, w12 +; CHECK-NEXT: fmov s3, w12 ; CHECK-NEXT: fcvtzs w12, d4 -; CHECK-NEXT: mov v1.s[1], w10 +; CHECK-NEXT: mov v0.b[4], v2.b[0] +; CHECK-NEXT: mov d4, v5.d[1] +; CHECK-NEXT: mov v3.s[1], w10 ; CHECK-NEXT: csel w10, w11, w8, lt -; CHECK-NEXT: mov v0.b[5], w13 ; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: fcvtzs w11, d2 -; CHECK-NEXT: mov d2, v6.d[1] +; CHECK-NEXT: mov v0.b[5], w13 ; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w12, #127 +; CHECK-NEXT: fcvtzs w11, d4 ; CHECK-NEXT: csel w12, w12, w8, lt ; CHECK-NEXT: cmn w12, #128 -; CHECK-NEXT: mov w13, v1.s[1] -; CHECK-NEXT: mov v0.b[6], v1.b[0] +; CHECK-NEXT: mov w13, v3.s[1] ; CHECK-NEXT: csel w12, w12, w9, gt -; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: fmov s1, w12 +; CHECK-NEXT: mov v0.b[6], v3.b[0] +; CHECK-NEXT: fmov s4, w12 ; CHECK-NEXT: fcvtzs w12, d5 -; CHECK-NEXT: mov v0.b[7], w13 -; CHECK-NEXT: fcvtzs w13, d2 -; CHECK-NEXT: mov d2, v7.d[1] -; CHECK-NEXT: mov v1.s[1], w10 +; CHECK-NEXT: cmp w11, #127 +; CHECK-NEXT: mov d5, v6.d[1] +; CHECK-NEXT: mov v4.s[1], w10 ; CHECK-NEXT: csel w10, w11, w8, lt +; CHECK-NEXT: mov v0.b[7], w13 ; CHECK-NEXT: cmn w10, #128 ; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w12, #127 +; CHECK-NEXT: fcvtzs w13, d5 ; CHECK-NEXT: csel w11, w12, w8, lt ; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: mov w12, v1.s[1] -; CHECK-NEXT: mov v0.b[8], v1.b[0] +; CHECK-NEXT: mov w12, v4.s[1] +; CHECK-NEXT: mov v0.b[8], v4.b[0] ; CHECK-NEXT: csel w11, w11, w9, gt -; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: fmov s1, w11 +; CHECK-NEXT: fmov s5, w11 ; CHECK-NEXT: fcvtzs w11, d6 +; CHECK-NEXT: cmp w13, #127 +; CHECK-NEXT: mov d6, v7.d[1] ; CHECK-NEXT: mov v0.b[9], w12 -; CHECK-NEXT: mov v1.s[1], w10 +; CHECK-NEXT: mov v5.s[1], w10 ; CHECK-NEXT: csel w10, w13, w8, lt -; CHECK-NEXT: fcvtzs w13, d2 ; CHECK-NEXT: cmn w10, #128 ; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w11, #127 +; CHECK-NEXT: fcvtzs w13, d6 ; CHECK-NEXT: csel w11, w11, w8, lt ; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: mov v0.b[10], v1.b[0] -; CHECK-NEXT: mov w12, v1.s[1] +; CHECK-NEXT: mov v0.b[10], v5.b[0] +; CHECK-NEXT: mov w12, v5.s[1] ; CHECK-NEXT: csel w11, w11, w9, gt -; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: fmov s1, w11 +; CHECK-NEXT: fmov s6, w11 ; CHECK-NEXT: fcvtzs w11, d7 +; CHECK-NEXT: cmp w13, #127 ; CHECK-NEXT: mov v0.b[11], w12 -; CHECK-NEXT: mov v1.s[1], w10 +; CHECK-NEXT: mov v6.s[1], w10 ; CHECK-NEXT: csel w10, w13, w8, lt ; CHECK-NEXT: cmn w10, #128 ; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w11, #127 ; CHECK-NEXT: csel w8, w11, w8, lt ; CHECK-NEXT: cmn w8, #128 -; CHECK-NEXT: mov v0.b[12], v1.b[0] -; CHECK-NEXT: mov w11, v1.s[1] +; CHECK-NEXT: mov v0.b[12], v6.b[0] +; CHECK-NEXT: mov w11, v6.s[1] ; CHECK-NEXT: csel w8, w8, w9, gt -; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: fmov s7, w8 ; CHECK-NEXT: mov v0.b[13], w11 -; CHECK-NEXT: mov v1.s[1], w10 -; CHECK-NEXT: mov v0.b[14], v1.b[0] -; CHECK-NEXT: mov w8, v1.s[1] +; CHECK-NEXT: mov v7.s[1], w10 +; CHECK-NEXT: mov v0.b[14], v7.b[0] +; CHECK-NEXT: mov w8, v7.s[1] ; CHECK-NEXT: mov v0.b[15], w8 ; CHECK-NEXT: ret %x = call <16 x i8> @llvm.fptosi.sat.v16f64.v16i8(<16 x double> %f) @@ -3575,32 +3575,26 @@ define <16 x i16> @test_signed_v16f64_v16i16(<16 x double> %f) { ; CHECK-NEXT: cmp w13, w9 ; CHECK-NEXT: csel w11, w13, w9, lt ; CHECK-NEXT: fcvtzs w13, d3 -; CHECK-NEXT: fmov s3, w12 ; CHECK-NEXT: cmn w11, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w11, w11, w8, gt ; CHECK-NEXT: cmp w14, w9 ; CHECK-NEXT: csel w14, w14, w9, lt -; CHECK-NEXT: mov v3.s[1], w10 ; CHECK-NEXT: cmn w14, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w14, w14, w8, gt ; CHECK-NEXT: cmp w13, w9 ; CHECK-NEXT: csel w13, w13, w9, lt -; CHECK-NEXT: fmov s2, w14 ; CHECK-NEXT: cmn w13, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w13, w13, w8, gt ; CHECK-NEXT: cmp w15, w9 ; CHECK-NEXT: csel w15, w15, w9, lt -; CHECK-NEXT: mov v2.s[1], w11 ; CHECK-NEXT: cmn w15, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w16, w15, w8, gt ; CHECK-NEXT: cmp w17, w9 ; CHECK-NEXT: csel w15, w17, w9, lt -; CHECK-NEXT: fmov s1, w16 ; CHECK-NEXT: cmn w15, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w15, w15, w8, gt ; CHECK-NEXT: cmp w18, w9 ; CHECK-NEXT: csel w17, w18, w9, lt -; CHECK-NEXT: mov v1.s[1], w13 ; CHECK-NEXT: cmn w17, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w17, w17, w8, gt ; CHECK-NEXT: cmp w0, w9 @@ -3623,32 +3617,38 @@ define <16 x i16> @test_signed_v16f64_v16i16(<16 x double> %f) { ; CHECK-NEXT: cmp w2, w9 ; CHECK-NEXT: fcvtzs w5, d0 ; CHECK-NEXT: csel w2, w2, w9, lt -; CHECK-NEXT: fmov s0, w17 +; CHECK-NEXT: fmov s3, w12 ; CHECK-NEXT: mov v7.s[1], w18 ; CHECK-NEXT: cmn w2, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w2, w2, w8, gt ; CHECK-NEXT: cmp w3, w9 ; CHECK-NEXT: csel w3, w3, w9, lt +; CHECK-NEXT: mov v3.s[1], w10 ; CHECK-NEXT: fmov s6, w2 -; CHECK-NEXT: mov v0.s[1], w15 ; CHECK-NEXT: cmn w3, #8, lsl #12 // =32768 +; CHECK-NEXT: fmov s2, w14 ; CHECK-NEXT: csel w3, w3, w8, gt ; CHECK-NEXT: cmp w4, w9 ; CHECK-NEXT: csel w4, w4, w9, lt ; CHECK-NEXT: mov v6.s[1], w0 ; CHECK-NEXT: cmn w4, #8, lsl #12 // =32768 +; CHECK-NEXT: mov v2.s[1], w11 ; CHECK-NEXT: csel w12, w4, w8, gt ; CHECK-NEXT: cmp w5, w9 +; CHECK-NEXT: fmov s1, w16 ; CHECK-NEXT: csel w10, w5, w9, lt ; CHECK-NEXT: fmov s5, w12 ; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w10, w10, w8, gt ; CHECK-NEXT: cmp w6, w9 +; CHECK-NEXT: mov v1.s[1], w13 ; CHECK-NEXT: csel w9, w6, w9, lt ; CHECK-NEXT: mov v5.s[1], w3 +; CHECK-NEXT: fmov s0, w17 ; CHECK-NEXT: cmn w9, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w8, w9, w8, gt ; CHECK-NEXT: fmov s4, w8 +; CHECK-NEXT: mov v0.s[1], w15 ; CHECK-NEXT: adrp x8, .LCPI85_0 ; CHECK-NEXT: ldr q16, [x8, :lo12:.LCPI85_0] ; CHECK-NEXT: mov v4.s[1], w10 diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll index 62f5e0fe2dcaa5..16e04070b65439 100644 --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -2751,8 +2751,8 @@ define <8 x i8> @test_unsigned_v8f64_v8i8(<8 x double> %f) { ; CHECK-NEXT: fcvtzu w12, d2 ; CHECK-NEXT: fcvtzu w14, d1 ; CHECK-NEXT: fcvtzu w8, d4 -; CHECK-NEXT: fcvtzu w10, d5 ; CHECK-NEXT: mov d4, v0.d[1] +; CHECK-NEXT: fcvtzu w10, d5 ; CHECK-NEXT: fcvtzu w13, d3 ; CHECK-NEXT: cmp w8, #255 ; CHECK-NEXT: fcvtzu w15, d4 @@ -2760,29 +2760,29 @@ define <8 x i8> @test_unsigned_v8f64_v8i8(<8 x double> %f) { ; CHECK-NEXT: cmp w9, #255 ; CHECK-NEXT: csel w9, w9, w11, lo ; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: fmov s3, w9 +; CHECK-NEXT: fmov s4, w9 ; CHECK-NEXT: csel w9, w10, w11, lo ; CHECK-NEXT: cmp w12, #255 ; CHECK-NEXT: fcvtzu w10, d0 -; CHECK-NEXT: mov v3.s[1], w8 +; CHECK-NEXT: mov v4.s[1], w8 ; CHECK-NEXT: csel w8, w12, w11, lo ; CHECK-NEXT: cmp w13, #255 -; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: fmov s3, w8 ; CHECK-NEXT: csel w8, w13, w11, lo ; CHECK-NEXT: cmp w14, #255 -; CHECK-NEXT: mov v2.s[1], w9 +; CHECK-NEXT: mov v3.s[1], w9 ; CHECK-NEXT: csel w9, w14, w11, lo ; CHECK-NEXT: cmp w15, #255 -; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: fmov s2, w9 ; CHECK-NEXT: csel w9, w15, w11, lo ; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov v1.s[1], w8 +; CHECK-NEXT: mov v2.s[1], w8 ; CHECK-NEXT: csel w8, w10, w11, lo -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: adrp x8, .LCPI82_0 -; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI82_0] -; CHECK-NEXT: mov v0.s[1], w9 -; CHECK-NEXT: tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.8b +; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI82_0] +; CHECK-NEXT: mov v1.s[1], w9 +; CHECK-NEXT: tbl v0.8b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.8b ; CHECK-NEXT: ret %x = call <8 x i8> @llvm.fptoui.sat.v8f64.v8i8(<8 x double> %f) ret <8 x i8> %x @@ -2802,29 +2802,29 @@ define <16 x i8> @test_unsigned_v16f64_v16i8(<16 x double> %f) { ; CHECK-NEXT: csel w10, w10, w8, lo ; CHECK-NEXT: fmov s0, w10 ; CHECK-NEXT: fcvtzu w10, d16 +; CHECK-NEXT: mov d16, v2.d[1] ; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: fcvtzu w9, d1 -; CHECK-NEXT: mov d1, v2.d[1] ; CHECK-NEXT: cmp w10, #255 ; CHECK-NEXT: csel w10, w10, w8, lo ; CHECK-NEXT: cmp w9, #255 ; CHECK-NEXT: mov w11, v0.s[1] ; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: fmov s16, w9 -; CHECK-NEXT: fcvtzu w9, d1 -; CHECK-NEXT: mov d1, v3.d[1] +; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: fcvtzu w9, d16 +; CHECK-NEXT: mov d16, v3.d[1] ; CHECK-NEXT: mov v0.b[1], w11 -; CHECK-NEXT: mov v16.s[1], w10 +; CHECK-NEXT: mov v1.s[1], w10 ; CHECK-NEXT: fcvtzu w10, d2 ; CHECK-NEXT: cmp w9, #255 ; CHECK-NEXT: csel w9, w9, w8, lo ; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov w11, v16.s[1] -; CHECK-NEXT: mov v0.b[2], v16.b[0] +; CHECK-NEXT: mov w11, v1.s[1] +; CHECK-NEXT: mov v0.b[2], v1.b[0] ; CHECK-NEXT: csel w10, w10, w8, lo ; CHECK-NEXT: fmov s2, w10 -; CHECK-NEXT: fcvtzu w10, d1 -; CHECK-NEXT: mov d1, v4.d[1] +; CHECK-NEXT: fcvtzu w10, d16 +; CHECK-NEXT: mov d16, v4.d[1] ; CHECK-NEXT: mov v0.b[3], w11 ; CHECK-NEXT: mov v2.s[1], w9 ; CHECK-NEXT: fcvtzu w9, d3 @@ -2834,58 +2834,58 @@ define <16 x i8> @test_unsigned_v16f64_v16i8(<16 x double> %f) { ; CHECK-NEXT: mov w11, v2.s[1] ; CHECK-NEXT: mov v0.b[4], v2.b[0] ; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: fcvtzu w9, d1 -; CHECK-NEXT: mov d1, v5.d[1] +; CHECK-NEXT: fmov s3, w9 +; CHECK-NEXT: fcvtzu w9, d16 +; CHECK-NEXT: mov d16, v5.d[1] ; CHECK-NEXT: mov v0.b[5], w11 -; CHECK-NEXT: mov v2.s[1], w10 +; CHECK-NEXT: mov v3.s[1], w10 ; CHECK-NEXT: fcvtzu w10, d4 ; CHECK-NEXT: cmp w9, #255 ; CHECK-NEXT: csel w9, w9, w8, lo ; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov w11, v2.s[1] -; CHECK-NEXT: mov v0.b[6], v2.b[0] +; CHECK-NEXT: mov w11, v3.s[1] +; CHECK-NEXT: mov v0.b[6], v3.b[0] ; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: fmov s2, w10 -; CHECK-NEXT: fcvtzu w10, d1 -; CHECK-NEXT: mov d1, v6.d[1] +; CHECK-NEXT: fmov s4, w10 +; CHECK-NEXT: fcvtzu w10, d16 ; CHECK-NEXT: mov v0.b[7], w11 -; CHECK-NEXT: mov v2.s[1], w9 +; CHECK-NEXT: mov v4.s[1], w9 ; CHECK-NEXT: fcvtzu w9, d5 +; CHECK-NEXT: mov d5, v6.d[1] ; CHECK-NEXT: cmp w10, #255 ; CHECK-NEXT: csel w10, w10, w8, lo ; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: mov w11, v2.s[1] -; CHECK-NEXT: mov v0.b[8], v2.b[0] +; CHECK-NEXT: mov w11, v4.s[1] +; CHECK-NEXT: mov v0.b[8], v4.b[0] ; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: fcvtzu w9, d1 -; CHECK-NEXT: mov d1, v7.d[1] +; CHECK-NEXT: fmov s16, w9 +; CHECK-NEXT: fcvtzu w9, d5 +; CHECK-NEXT: mov d5, v7.d[1] ; CHECK-NEXT: mov v0.b[9], w11 -; CHECK-NEXT: mov v2.s[1], w10 +; CHECK-NEXT: mov v16.s[1], w10 ; CHECK-NEXT: fcvtzu w10, d6 ; CHECK-NEXT: cmp w9, #255 ; CHECK-NEXT: csel w9, w9, w8, lo ; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov v0.b[10], v2.b[0] -; CHECK-NEXT: mov w11, v2.s[1] +; CHECK-NEXT: mov v0.b[10], v16.b[0] +; CHECK-NEXT: mov w11, v16.s[1] ; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: fmov s2, w10 +; CHECK-NEXT: fmov s6, w10 ; CHECK-NEXT: fcvtzu w10, d7 ; CHECK-NEXT: mov v0.b[11], w11 -; CHECK-NEXT: mov v2.s[1], w9 -; CHECK-NEXT: fcvtzu w9, d1 +; CHECK-NEXT: mov v6.s[1], w9 +; CHECK-NEXT: fcvtzu w9, d5 ; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: mov v0.b[12], v2.b[0] -; CHECK-NEXT: mov w11, v2.s[1] +; CHECK-NEXT: mov v0.b[12], v6.b[0] +; CHECK-NEXT: mov w11, v6.s[1] ; CHECK-NEXT: csel w9, w9, w8, lo ; CHECK-NEXT: cmp w10, #255 ; CHECK-NEXT: csel w8, w10, w8, lo -; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: fmov s5, w8 ; CHECK-NEXT: mov v0.b[13], w11 -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: mov v0.b[14], v1.b[0] -; CHECK-NEXT: mov w8, v1.s[1] +; CHECK-NEXT: mov v5.s[1], w9 +; CHECK-NEXT: mov v0.b[14], v5.b[0] +; CHECK-NEXT: mov w8, v5.s[1] ; CHECK-NEXT: mov v0.b[15], w8 ; CHECK-NEXT: ret %x = call <16 x i8> @llvm.fptoui.sat.v16f64.v16i8(<16 x double> %f) @@ -2903,8 +2903,8 @@ define <8 x i16> @test_unsigned_v8f64_v8i16(<8 x double> %f) { ; CHECK-NEXT: fcvtzu w12, d2 ; CHECK-NEXT: fcvtzu w14, d1 ; CHECK-NEXT: fcvtzu w8, d4 -; CHECK-NEXT: fcvtzu w11, d5 ; CHECK-NEXT: mov d4, v0.d[1] +; CHECK-NEXT: fcvtzu w11, d5 ; CHECK-NEXT: fcvtzu w13, d3 ; CHECK-NEXT: cmp w8, w10 ; CHECK-NEXT: fcvtzu w15, d4 @@ -2912,29 +2912,29 @@ define <8 x i16> @test_unsigned_v8f64_v8i16(<8 x double> %f) { ; CHECK-NEXT: cmp w9, w10 ; CHECK-NEXT: csel w9, w9, w10, lo ; CHECK-NEXT: cmp w11, w10 -; CHECK-NEXT: fmov s3, w9 +; CHECK-NEXT: fmov s4, w9 ; CHECK-NEXT: csel w9, w11, w10, lo ; CHECK-NEXT: cmp w12, w10 ; CHECK-NEXT: fcvtzu w11, d0 -; CHECK-NEXT: mov v3.s[1], w8 +; CHECK-NEXT: mov v4.s[1], w8 ; CHECK-NEXT: csel w8, w12, w10, lo ; CHECK-NEXT: cmp w13, w10 -; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: fmov s3, w8 ; CHECK-NEXT: csel w8, w13, w10, lo ; CHECK-NEXT: cmp w14, w10 -; CHECK-NEXT: mov v2.s[1], w9 +; CHECK-NEXT: mov v3.s[1], w9 ; CHECK-NEXT: csel w9, w14, w10, lo ; CHECK-NEXT: cmp w15, w10 -; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: fmov s2, w9 ; CHECK-NEXT: csel w9, w15, w10, lo ; CHECK-NEXT: cmp w11, w10 -; CHECK-NEXT: mov v1.s[1], w8 +; CHECK-NEXT: mov v2.s[1], w8 ; CHECK-NEXT: csel w8, w11, w10, lo -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: adrp x8, .LCPI84_0 -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI84_0] -; CHECK-NEXT: mov v0.s[1], w9 -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI84_0] +; CHECK-NEXT: mov v1.s[1], w9 +; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b ; CHECK-NEXT: ret %x = call <8 x i16> @llvm.fptoui.sat.v8f64.v8i16(<8 x double> %f) ret <8 x i16> %x @@ -2973,53 +2973,53 @@ define <16 x i16> @test_unsigned_v16f64_v16i16(<16 x double> %f) { ; CHECK-NEXT: fcvtzu w16, d0 ; CHECK-NEXT: csel w11, w11, w8, lo ; CHECK-NEXT: cmp w17, w8 -; CHECK-NEXT: fmov s18, w11 ; CHECK-NEXT: mov v19.s[1], w13 ; CHECK-NEXT: csel w13, w17, w8, lo ; CHECK-NEXT: cmp w10, w8 ; CHECK-NEXT: csel w10, w10, w8, lo ; CHECK-NEXT: cmp w18, w8 -; CHECK-NEXT: fcvtzu w17, d2 +; CHECK-NEXT: fmov s18, w11 ; CHECK-NEXT: csel w11, w18, w8, lo ; CHECK-NEXT: cmp w12, w8 -; CHECK-NEXT: mov v18.s[1], w9 +; CHECK-NEXT: fcvtzu w17, d2 ; CHECK-NEXT: csel w12, w12, w8, lo ; CHECK-NEXT: cmp w16, w8 -; CHECK-NEXT: fmov s17, w10 +; CHECK-NEXT: fcvtzu w18, d6 +; CHECK-NEXT: mov v18.s[1], w9 ; CHECK-NEXT: csel w9, w16, w8, lo ; CHECK-NEXT: cmp w14, w8 -; CHECK-NEXT: fcvtzu w16, d5 +; CHECK-NEXT: fmov s17, w10 ; CHECK-NEXT: csel w10, w14, w8, lo -; CHECK-NEXT: fcvtzu w18, d6 +; CHECK-NEXT: fcvtzu w16, d5 +; CHECK-NEXT: fmov s23, w10 ; CHECK-NEXT: cmp w17, w8 -; CHECK-NEXT: fmov s5, w10 +; CHECK-NEXT: fcvtzu w14, d3 ; CHECK-NEXT: csel w10, w17, w8, lo ; CHECK-NEXT: cmp w15, w8 -; CHECK-NEXT: fcvtzu w14, d3 ; CHECK-NEXT: fcvtzu w17, d4 -; CHECK-NEXT: fmov s16, w12 ; CHECK-NEXT: mov v17.s[1], w13 -; CHECK-NEXT: mov v5.s[1], w9 +; CHECK-NEXT: mov v23.s[1], w9 ; CHECK-NEXT: csel w9, w15, w8, lo ; CHECK-NEXT: cmp w18, w8 -; CHECK-NEXT: fmov s4, w9 +; CHECK-NEXT: fmov s22, w9 ; CHECK-NEXT: csel w9, w18, w8, lo ; CHECK-NEXT: cmp w16, w8 -; CHECK-NEXT: mov v16.s[1], w11 -; CHECK-NEXT: mov v4.s[1], w10 +; CHECK-NEXT: fmov s16, w12 +; CHECK-NEXT: mov v22.s[1], w10 ; CHECK-NEXT: csel w10, w16, w8, lo ; CHECK-NEXT: cmp w14, w8 -; CHECK-NEXT: fmov s3, w10 +; CHECK-NEXT: fmov s21, w10 ; CHECK-NEXT: csel w10, w14, w8, lo ; CHECK-NEXT: cmp w17, w8 ; CHECK-NEXT: csel w8, w17, w8, lo -; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: mov v16.s[1], w11 +; CHECK-NEXT: mov v21.s[1], w9 +; CHECK-NEXT: fmov s20, w8 ; CHECK-NEXT: adrp x8, .LCPI85_0 -; CHECK-NEXT: mov v3.s[1], w9 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI85_0] -; CHECK-NEXT: mov v2.s[1], w10 +; CHECK-NEXT: mov v20.s[1], w10 ; CHECK-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b -; CHECK-NEXT: tbl v1.16b, { v2.16b, v3.16b, v4.16b, v5.16b }, v1.16b +; CHECK-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b ; CHECK-NEXT: ret %x = call <16 x i16> @llvm.fptoui.sat.v16f64.v16i16(<16 x double> %f) ret <16 x i16> %x diff --git a/llvm/test/CodeGen/AArch64/insert-subvector.ll b/llvm/test/CodeGen/AArch64/insert-subvector.ll index d664421086fef0..6828fa9f1508c8 100644 --- a/llvm/test/CodeGen/AArch64/insert-subvector.ll +++ b/llvm/test/CodeGen/AArch64/insert-subvector.ll @@ -47,10 +47,11 @@ define <16 x i8> @insert_v16i8_4_1(float %tmp, <16 x i8> %b, <16 x i8> %a) { define <16 x i8> @insert_v16i8_4_15(float %tmp, <16 x i8> %b, <16 x i8> %a) { ; CHECK-LABEL: insert_v16i8_4_15: ; CHECK: // %bb.0: -; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: // kill: def $q2 killed $q2 def $q2_q3 ; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: mov v3.16b, v1.16b +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI4_0] +; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b ; CHECK-NEXT: ret %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %s2 @@ -145,10 +146,11 @@ define <8 x i16> @insert_v8i16_2_1(float %tmp, <8 x i16> %b, <8 x i16> %a) { define <8 x i16> @insert_v8i16_2_15(float %tmp, <8 x i16> %b, <8 x i16> %a) { ; CHECK-LABEL: insert_v8i16_2_15: ; CHECK: // %bb.0: -; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: // kill: def $q2 killed $q2 def $q2_q3 ; CHECK-NEXT: adrp x8, .LCPI13_0 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-NEXT: mov v3.16b, v1.16b +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI13_0] +; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b ; CHECK-NEXT: ret %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %s2 @@ -270,6 +272,7 @@ define <16 x i8> @load_v16i8_4_1(float %tmp, <16 x i8> %b, ptr %a) { define <16 x i8> @load_v16i8_4_15(float %tmp, <16 x i8> %b, ptr %a) { ; CHECK-LABEL: load_v16i8_4_15: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q1 killed $q1 def $q0_q1 ; CHECK-NEXT: adrp x8, .LCPI24_0 ; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI24_0] @@ -490,6 +493,7 @@ define <8 x i16> @load_v8i16_2_1(float %tmp, <8 x i16> %b, ptr %a) { define <8 x i16> @load_v8i16_2_15(float %tmp, <8 x i16> %b, ptr %a) { ; CHECK-LABEL: load_v8i16_2_15: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $q1 killed $q1 def $q0_q1 ; CHECK-NEXT: adrp x8, .LCPI40_0 ; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI40_0] diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll index 4907abc6e946ec..50c0c8b11e7517 100644 --- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll +++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll @@ -1349,14 +1349,18 @@ define <8 x i16> @vselect_equivalent_shuffle_v8i16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-SD-LABEL: vselect_equivalent_shuffle_v8i16: ; CHECK-SD: // %bb.0: ; CHECK-SD-NEXT: adrp x8, .LCPI92_0 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI92_0] +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: vselect_equivalent_shuffle_v8i16: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI92_0 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI92_0] +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -1382,8 +1386,9 @@ define <8 x i16> @vselect_equivalent_shuffle_v8i16_zero(<8 x i16> %a) { ; ; CHECK-GI-LABEL: vselect_equivalent_shuffle_v8i16_zero: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: movi v1.2d, #0000000000000000 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q0_q1 ; CHECK-GI-NEXT: adrp x8, .LCPI93_0 +; CHECK-GI-NEXT: movi v1.2d, #0000000000000000 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI93_0] ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret @@ -1417,8 +1422,9 @@ define <8 x i16> @vselect_equivalent_shuffle_v8i16_zeroswap(<8 x i16> %a) { ; ; CHECK-GI-LABEL: vselect_equivalent_shuffle_v8i16_zeroswap: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: movi v31.2d, #0000000000000000 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 def $q31_q0 ; CHECK-GI-NEXT: adrp x8, .LCPI94_0 +; CHECK-GI-NEXT: movi v31.2d, #0000000000000000 ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI94_0] ; CHECK-GI-NEXT: tbl v0.16b, { v31.16b, v0.16b }, v1.16b ; CHECK-GI-NEXT: ret @@ -1460,7 +1466,9 @@ define <4 x i32> @vselect_equivalent_shuffle_v4i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-GI-LABEL: vselect_equivalent_shuffle_v4i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI96_0 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI96_0] +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret %c = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll b/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll index 8c88d3c33e07ce..3f590226c47150 100644 --- a/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll +++ b/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll @@ -267,8 +267,12 @@ entry: define <16 x i8> @extract_4_v4i32_badindex(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { ; CHECK-LABEL: extract_4_v4i32_badindex: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: adrp x8, .LCPI5_0 +; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI5_0] +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll index 1ed9e7cc5254d3..de90024a4a2571 100644 --- a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll +++ b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll @@ -47,7 +47,9 @@ define <8 x i16> @v8i16_2(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: v8i16_2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI4_0 +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret entry: @@ -80,7 +82,9 @@ define <16 x i8> @v16i8_2(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: v16i8_2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI7_0 +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI7_0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll b/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll index d315c306aa37a0..afcced5dcb9ab5 100644 --- a/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll @@ -137,7 +137,9 @@ define <8 x i16> @shuffle_widen_faili1(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: shuffle_widen_faili1: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI12_0 +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret entry: @@ -150,7 +152,9 @@ define <8 x i16> @shuffle_widen_fail2(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: shuffle_widen_fail2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI13_0 +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret entry: @@ -163,7 +167,9 @@ define <8 x i16> @shuffle_widen_fail3(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: shuffle_widen_fail3: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI14_0 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/seqpairspill.mir b/llvm/test/CodeGen/AArch64/seqpairspill.mir index b29ab7727f65d6..0e6c94c44712c7 100644 --- a/llvm/test/CodeGen/AArch64/seqpairspill.mir +++ b/llvm/test/CodeGen/AArch64/seqpairspill.mir @@ -7,11 +7,11 @@ body: | bb.0: ; Check the spill/reload sequence for the %0 register ; CHECK: renamable $[[REG0:[a-z0-9]+]]_[[REG1:[a-z0-9]+]] = CASPALX - ; CHECK-NEXT: STPXi killed renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0 :: (store (s128) into %stack.0, align 8) + ; CHECK-NEXT: STPXi renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0, implicit killed $[[REG0]]_[[REG1]] :: (store (s128) into %stack.0, align 8) ; CHECK: INLINEASM - ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPXi %stack.0, 0 :: (load (s128) from %stack.0, align 8) + ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPXi %stack.0, 0, implicit-def $[[REG2]]_[[REG3]] :: (load (s128) from %stack.0, align 8) ; CHECK-NEXT: $xzr = COPY renamable $[[REG2]] - ; CHECK-NEXT: $xzr = COPY killed renamable $[[REG3]] + ; CHECK-NEXT: $xzr = COPY renamable $[[REG3]] %0 : xseqpairsclass = IMPLICIT_DEF %1 : xseqpairsclass = IMPLICIT_DEF %2 : gpr64common = IMPLICIT_DEF @@ -27,11 +27,11 @@ body: | bb.0: ; Check the spill/reload sequence for the %0 register ; CHECK: $[[REG0:[a-z0-9]+]]_[[REG1:[a-z0-9]+]] = CASPALW - ; CHECK-NEXT: STPWi killed renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0 :: (store (s64) into %stack.0, align 4) + ; CHECK-NEXT: STPWi renamable $[[REG0]], renamable $[[REG1]], %stack.0, 0, implicit killed $[[REG0]]_[[REG1]] :: (store (s64) into %stack.0, align 4) ; CHECK: INLINEASM - ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPWi %stack.0, 0 :: (load (s64) from %stack.0, align 4) + ; CHECK: renamable $[[REG2:[a-z0-9]+]], renamable $[[REG3:[a-z0-9]+]] = LDPWi %stack.0, 0, implicit-def $[[REG2]]_[[REG3]] :: (load (s64) from %stack.0, align 4) ; CHECK-NEXT: $xzr = COPY renamable $[[REG2]] - ; CHECK-NEXT: $xzr = COPY killed renamable $[[REG3]] + ; CHECK-NEXT: $xzr = COPY renamable $[[REG3]] %0 : wseqpairsclass = IMPLICIT_DEF %1 : wseqpairsclass = IMPLICIT_DEF %2 : gpr64common = IMPLICIT_DEF diff --git a/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll b/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll index 4e49a055067847..fb571eff39fe50 100644 --- a/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll +++ b/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll @@ -21,8 +21,12 @@ define <16 x i8> @shuffle4_v4i8_16(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) { ; CHECK-LABEL: shuffle4_v4i8_16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: adrp x8, .LCPI0_0 +; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI0_0] +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b ; CHECK-NEXT: ret %x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> @@ -43,8 +47,12 @@ define <16 x i8> @shuffle4_v4i8_16(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i define <8 x i8> @shuffle4_v4i8_8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) { ; CHECK-LABEL: shuffle4_v4i8_8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: adrp x8, .LCPI1_0 +; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.8b ; CHECK-NEXT: ret %x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> @@ -93,10 +101,10 @@ define <8 x i8> @shuffle4_v4i8_8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> define <16 x i8> @shuffle4_v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) { ; CHECK-LABEL: shuffle4_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 ; CHECK-NEXT: adrp x8, .LCPI2_0 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: mov v2.d[1], v3.d[0] @@ -206,10 +214,10 @@ define <8 x i16> @shuffle4_v8i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x ; CHECK-LABEL: shuffle4_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d5, d2 -; CHECK-NEXT: fmov d4, d0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 ; CHECK-NEXT: adrp x8, .LCPI4_0 +; CHECK-NEXT: fmov d4, d0 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: mov v4.d[1], v1.d[0] ; CHECK-NEXT: mov v5.d[1], v3.d[0] @@ -274,10 +282,10 @@ define <16 x i8> @shuffle4_v8i8_v16i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 ; CHECK-NEXT: mov v0.d[1], v0.d[0] ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI6_0] ; CHECK-NEXT: adrp x8, .LCPI6_1 -; CHECK-NEXT: tbl v2.8b, { v2.16b }, v1.8b -; CHECK-NEXT: tbl v1.8b, { v0.16b }, v1.8b +; CHECK-NEXT: tbl v3.8b, { v2.16b }, v1.8b +; CHECK-NEXT: tbl v2.8b, { v0.16b }, v1.8b ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI6_1] -; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b +; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b ; CHECK-NEXT: ret %x = shufflevector <8 x i8> %a, <8 x i8> %b, <4 x i32> %y = shufflevector <8 x i8> %c, <8 x i8> %d, <4 x i32> @@ -346,10 +354,10 @@ define <8 x i16> @shuffle4_v4i8_zext(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x ; CHECK-LABEL: shuffle4_v4i8_zext: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d5, d2 -; CHECK-NEXT: fmov d4, d0 -; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 ; CHECK-NEXT: adrp x8, .LCPI8_0 +; CHECK-NEXT: fmov d4, d0 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI8_0] ; CHECK-NEXT: mov v4.d[1], v1.d[0] ; CHECK-NEXT: mov v5.d[1], v3.d[0] @@ -385,8 +393,12 @@ define <8 x i16> @shuffle4_v4i8_zext(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x define <16 x i8> @shuffle4_v4i16_trunc(<4 x i16> %ae, <4 x i16> %be, <4 x i16> %ce, <4 x i16> %de) { ; CHECK-LABEL: shuffle4_v4i16_trunc: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: adrp x8, .LCPI9_0 +; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI9_0] +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b ; CHECK-NEXT: ret %a = trunc <4 x i16> %ae to <4 x i8> @@ -420,13 +432,13 @@ define <16 x i8> @shuffle4_v4i16_trunc(<4 x i16> %ae, <4 x i16> %be, <4 x i16> % define <16 x i8> @shuffle4_v4i32_trunc(<4 x i32> %ae, <4 x i32> %be, <4 x i32> %ce, <4 x i32> %de) { ; CHECK-LABEL: shuffle4_v4i32_trunc: ; CHECK: // %bb.0: -; CHECK-NEXT: xtn v0.4h, v0.4s -; CHECK-NEXT: xtn v1.4h, v1.4s +; CHECK-NEXT: xtn v4.4h, v0.4s ; CHECK-NEXT: adrp x8, .LCPI10_0 -; CHECK-NEXT: xtn v2.4h, v2.4s -; CHECK-NEXT: xtn v3.4h, v3.4s -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI10_0] -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b +; CHECK-NEXT: xtn v5.4h, v1.4s +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI10_0] +; CHECK-NEXT: xtn v6.4h, v2.4s +; CHECK-NEXT: xtn v7.4h, v3.4s +; CHECK-NEXT: tbl v0.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v0.16b ; CHECK-NEXT: ret %a = trunc <4 x i32> %ae to <4 x i8> %b = trunc <4 x i32> %be to <4 x i8> @@ -458,8 +470,11 @@ define <16 x i8> @shuffle4_v4i32_trunc(<4 x i32> %ae, <4 x i32> %be, <4 x i32> % define <12 x i8> @shuffle3_v4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c) { ; CHECK-LABEL: shuffle3_v4i8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: adrp x8, .LCPI11_0 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI11_0] +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v3.16b ; CHECK-NEXT: ret %x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> @@ -489,9 +504,9 @@ define <8 x i16> @shuffle3_v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) { ; CHECK-LABEL: shuffle3_v4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d3, d2 -; CHECK-NEXT: fmov d2, d0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: adrp x8, .LCPI12_0 +; CHECK-NEXT: fmov d2, d0 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI12_0] ; CHECK-NEXT: mov v2.d[1], v1.d[0] ; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b @@ -548,12 +563,12 @@ define <8 x i8> @insert4_v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8> ; CHECK-LABEL: insert4_v8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 ; CHECK-NEXT: mov v4.16b, v3.16b -; CHECK-NEXT: mov v3.16b, v1.16b +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 ; CHECK-NEXT: adrp x8, .LCPI14_0 -; CHECK-NEXT: mov v0.d[1], v2.d[0] ; CHECK-NEXT: adrp x9, .LCPI14_1 +; CHECK-NEXT: mov v0.d[1], v2.d[0] +; CHECK-NEXT: mov v3.16b, v1.16b ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI14_0] ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI14_1] ; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b @@ -617,14 +632,16 @@ define <8 x i8> @insert4_v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8> define <16 x i8> @insert4_v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8> %d) { ; CHECK-LABEL: insert4_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mov v4.16b, v3.16b ; CHECK-NEXT: adrp x8, .LCPI15_0 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q31_q0 +; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: mov v3.16b, v1.16b +; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI15_0] ; CHECK-NEXT: mov v0.d[1], v2.d[0] -; CHECK-NEXT: mov v2.16b, v1.16b -; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI15_0] ; CHECK-NEXT: adrp x8, .LCPI15_1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_1] -; CHECK-NEXT: tbl v31.16b, { v2.16b, v3.16b }, v4.16b +; CHECK-NEXT: tbl v31.16b, { v3.16b, v4.16b }, v5.16b ; CHECK-NEXT: tbl v0.16b, { v31.16b, v0.16b }, v1.16b ; CHECK-NEXT: ret %e1 = extractelement <8 x i8> %a, i32 4 @@ -688,7 +705,6 @@ define <16 x i16> @test(<2 x double> %l213, <2 x double> %l231, <2 x double> %l2 ; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: frintm v1.2d, v1.2d ; CHECK-NEXT: frintm v5.2d, v5.2d -; CHECK-NEXT: ldr q16, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: frintm v2.2d, v2.2d ; CHECK-NEXT: frintm v6.2d, v6.2d ; CHECK-NEXT: frintm v3.2d, v3.2d @@ -701,16 +717,17 @@ define <16 x i16> @test(<2 x double> %l213, <2 x double> %l231, <2 x double> %l2 ; CHECK-NEXT: fcvtzs v6.2d, v6.2d ; CHECK-NEXT: fcvtzs v3.2d, v3.2d ; CHECK-NEXT: fcvtzs v7.2d, v7.2d -; CHECK-NEXT: xtn v0.2s, v0.2d -; CHECK-NEXT: xtn v4.2s, v4.2d -; CHECK-NEXT: xtn v1.2s, v1.2d -; CHECK-NEXT: xtn v5.2s, v5.2d -; CHECK-NEXT: xtn v2.2s, v2.2d -; CHECK-NEXT: xtn v6.2s, v6.2d -; CHECK-NEXT: xtn v3.2s, v3.2d -; CHECK-NEXT: xtn v7.2s, v7.2d -; CHECK-NEXT: tbl v1.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b -; CHECK-NEXT: tbl v2.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b +; CHECK-NEXT: xtn v16.2s, v0.2d +; CHECK-NEXT: xtn v20.2s, v4.2d +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI16_0] +; CHECK-NEXT: xtn v17.2s, v1.2d +; CHECK-NEXT: xtn v21.2s, v5.2d +; CHECK-NEXT: xtn v18.2s, v2.2d +; CHECK-NEXT: xtn v22.2s, v6.2d +; CHECK-NEXT: xtn v19.2s, v3.2d +; CHECK-NEXT: xtn v23.2s, v7.2d +; CHECK-NEXT: tbl v1.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.16b +; CHECK-NEXT: tbl v2.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v0.16b ; CHECK-NEXT: uzp1 v0.8h, v1.8h, v2.8h ; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/shuffles.ll b/llvm/test/CodeGen/AArch64/shuffles.ll index 00057ea3359b74..41dd7f06712d24 100644 --- a/llvm/test/CodeGen/AArch64/shuffles.ll +++ b/llvm/test/CodeGen/AArch64/shuffles.ll @@ -366,7 +366,9 @@ define <8 x i16> @test_shuf9(<8 x i16> %a, <8 x i16> %b) ; CHECKLE-LABEL: test_shuf9: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: adrp x8, .LCPI13_0 +; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] +; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKLE-NEXT: ret ; @@ -376,10 +378,10 @@ define <8 x i16> @test_shuf9(<8 x i16> %a, <8 x i16> %b) ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: adrp x8, .LCPI13_0 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI13_0 -; CHECKBE-NEXT: ld1 { v2.16b }, [x8] -; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECKBE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECKBE-NEXT: ld1 { v0.16b }, [x8] +; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret @@ -416,7 +418,9 @@ define <8 x half> @test_shuf11(<8 x half> %a, <8 x half> %b) ; CHECKLE-LABEL: test_shuf11: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: adrp x8, .LCPI15_0 +; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI15_0] +; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKLE-NEXT: ret ; @@ -426,10 +430,10 @@ define <8 x half> @test_shuf11(<8 x half> %a, <8 x half> %b) ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: adrp x8, .LCPI15_0 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI15_0 -; CHECKBE-NEXT: ld1 { v2.16b }, [x8] -; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECKBE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECKBE-NEXT: ld1 { v0.16b }, [x8] +; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret @@ -442,7 +446,9 @@ define <8 x half> @test_shuf12(<8 x half> %a, <8 x half> %b) ; CHECKLE-LABEL: test_shuf12: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: adrp x8, .LCPI16_0 +; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI16_0] +; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKLE-NEXT: ret ; @@ -452,10 +458,10 @@ define <8 x half> @test_shuf12(<8 x half> %a, <8 x half> %b) ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: adrp x8, .LCPI16_0 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI16_0 -; CHECKBE-NEXT: ld1 { v2.16b }, [x8] -; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECKBE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECKBE-NEXT: ld1 { v0.16b }, [x8] +; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret @@ -468,7 +474,9 @@ define <8 x half> @test_shuf13(<8 x half> %a, <8 x half> %b) ; CHECKLE-LABEL: test_shuf13: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: adrp x8, .LCPI17_0 +; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI17_0] +; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKLE-NEXT: ret ; @@ -478,10 +486,10 @@ define <8 x half> @test_shuf13(<8 x half> %a, <8 x half> %b) ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: adrp x8, .LCPI17_0 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI17_0 -; CHECKBE-NEXT: ld1 { v2.16b }, [x8] -; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECKBE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECKBE-NEXT: ld1 { v0.16b }, [x8] +; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret @@ -494,7 +502,9 @@ define <8 x half> @test_shuf14(<8 x half> %a, <8 x half> %b) ; CHECKLE-LABEL: test_shuf14: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: adrp x8, .LCPI18_0 +; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] +; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKLE-NEXT: ret ; @@ -504,10 +514,10 @@ define <8 x half> @test_shuf14(<8 x half> %a, <8 x half> %b) ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: adrp x8, .LCPI18_0 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI18_0 -; CHECKBE-NEXT: ld1 { v2.16b }, [x8] -; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECKBE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECKBE-NEXT: ld1 { v0.16b }, [x8] +; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret @@ -520,7 +530,9 @@ define <8 x half> @test_shuf15(<8 x half> %a, <8 x half> %b) ; CHECKLE-LABEL: test_shuf15: ; CHECKLE: // %bb.0: ; CHECKLE-NEXT: adrp x8, .LCPI19_0 +; CHECKLE-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: ldr q2, [x8, :lo12:.LCPI19_0] +; CHECKLE-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECKLE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECKLE-NEXT: ret ; @@ -530,10 +542,10 @@ define <8 x half> @test_shuf15(<8 x half> %a, <8 x half> %b) ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: adrp x8, .LCPI19_0 ; CHECKBE-NEXT: add x8, x8, :lo12:.LCPI19_0 -; CHECKBE-NEXT: ld1 { v2.16b }, [x8] -; CHECKBE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECKBE-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECKBE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECKBE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECKBE-NEXT: ld1 { v0.16b }, [x8] +; CHECKBE-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b ; CHECKBE-NEXT: rev64 v0.16b, v0.16b ; CHECKBE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECKBE-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/shufflevector.ll b/llvm/test/CodeGen/AArch64/shufflevector.ll index 1f81a2e4bbb824..b1131f287fe9a9 100644 --- a/llvm/test/CodeGen/AArch64/shufflevector.ll +++ b/llvm/test/CodeGen/AArch64/shufflevector.ll @@ -33,12 +33,23 @@ define <8 x i8> @shufflevector_v8i8(<8 x i8> %a, <8 x i8> %b) { } define <16 x i8> @shufflevector_v16i8(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shufflevector_v16i8: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI1_0 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI1_0] -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shufflevector_v16i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adrp x8, .LCPI1_0 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI1_0] +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v16i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI1_0 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI1_0] +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-GI-NEXT: ret %c = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %c } @@ -53,12 +64,23 @@ define <4 x i16> @shufflevector_v4i16(<4 x i16> %a, <4 x i16> %b) { } define <8 x i16> @shufflevector_v8i16(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: shufflevector_v8i16: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shufflevector_v8i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adrp x8, .LCPI3_0 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v8i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI3_0 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-GI-NEXT: ret %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %c } @@ -215,25 +237,26 @@ define i32 @shufflevector_v4i8(<4 x i8> %a, <4 x i8> %b){ define <32 x i8> @shufflevector_v32i8(<32 x i8> %a, <32 x i8> %b){ ; CHECK-SD-LABEL: shufflevector_v32i8: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: mov v1.16b, v2.16b +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 def $q1_q2 ; CHECK-SD-NEXT: adrp x8, .LCPI16_0 ; CHECK-SD-NEXT: adrp x9, .LCPI16_1 -; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI16_0] -; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI16_1] -; CHECK-SD-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-SD-NEXT: tbl v1.16b, { v0.16b, v1.16b }, v3.16b -; CHECK-SD-NEXT: mov v0.16b, v2.16b +; CHECK-SD-NEXT: mov v1.16b, v0.16b +; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI16_0] +; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI16_1] +; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b +; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v32i8: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov v1.16b, v0.16b +; CHECK-GI-NEXT: mov v3.16b, v0.16b ; CHECK-GI-NEXT: adrp x8, .LCPI16_1 ; CHECK-GI-NEXT: adrp x9, .LCPI16_0 +; CHECK-GI-NEXT: mov v4.16b, v2.16b ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI16_1] -; CHECK-GI-NEXT: ldr q3, [x9, :lo12:.LCPI16_0] -; CHECK-GI-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b -; CHECK-GI-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v3.16b +; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI16_0] +; CHECK-GI-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v0.16b +; CHECK-GI-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b ; CHECK-GI-NEXT: ret %c = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> ret <32 x i8> %c @@ -275,25 +298,26 @@ define i32 @shufflevector_v2i16(<2 x i16> %a, <2 x i16> %b){ define <16 x i16> @shufflevector_v16i16(<16 x i16> %a, <16 x i16> %b){ ; CHECK-SD-LABEL: shufflevector_v16i16: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: mov v1.16b, v2.16b +; CHECK-SD-NEXT: // kill: def $q2 killed $q2 def $q1_q2 ; CHECK-SD-NEXT: adrp x8, .LCPI18_0 ; CHECK-SD-NEXT: adrp x9, .LCPI18_1 -; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] -; CHECK-SD-NEXT: ldr q3, [x9, :lo12:.LCPI18_1] -; CHECK-SD-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-SD-NEXT: tbl v1.16b, { v0.16b, v1.16b }, v3.16b -; CHECK-SD-NEXT: mov v0.16b, v2.16b +; CHECK-SD-NEXT: mov v1.16b, v0.16b +; CHECK-SD-NEXT: ldr q3, [x8, :lo12:.LCPI18_0] +; CHECK-SD-NEXT: ldr q4, [x9, :lo12:.LCPI18_1] +; CHECK-SD-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v3.16b +; CHECK-SD-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v4.16b ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: shufflevector_v16i16: ; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: mov v1.16b, v0.16b +; CHECK-GI-NEXT: mov v3.16b, v0.16b ; CHECK-GI-NEXT: adrp x8, .LCPI18_1 ; CHECK-GI-NEXT: adrp x9, .LCPI18_0 +; CHECK-GI-NEXT: mov v4.16b, v2.16b ; CHECK-GI-NEXT: ldr q0, [x8, :lo12:.LCPI18_1] -; CHECK-GI-NEXT: ldr q3, [x9, :lo12:.LCPI18_0] -; CHECK-GI-NEXT: tbl v0.16b, { v1.16b, v2.16b }, v0.16b -; CHECK-GI-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v3.16b +; CHECK-GI-NEXT: ldr q1, [x9, :lo12:.LCPI18_0] +; CHECK-GI-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v0.16b +; CHECK-GI-NEXT: tbl v1.16b, { v3.16b, v4.16b }, v1.16b ; CHECK-GI-NEXT: ret %c = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> ret <16 x i16> %c @@ -320,8 +344,10 @@ define <8 x i32> @shufflevector_v8i32(<8 x i32> %a, <8 x i32> %b) { ; CHECK-GI-LABEL: shufflevector_v8i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI20_0 +; CHECK-GI-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: uzp2 v0.4s, v0.4s, v1.4s ; CHECK-GI-NEXT: ldr q4, [x8, :lo12:.LCPI20_0] +; CHECK-GI-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-GI-NEXT: tbl v1.16b, { v2.16b, v3.16b }, v4.16b ; CHECK-GI-NEXT: ret %c = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> @@ -537,12 +563,23 @@ define <3 x i16> @shufflevector_v3i16(<3 x i16> %a, <3 x i16> %b) { } define <7 x i16> @shufflevector_v7i16(<7 x i16> %a, <7 x i16> %b) { -; CHECK-LABEL: shufflevector_v7i16: -; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI33_0 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI33_0] -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: shufflevector_v7i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: adrp x8, .LCPI33_0 +; CHECK-SD-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI33_0] +; CHECK-SD-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-SD-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: shufflevector_v7i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: adrp x8, .LCPI33_0 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI33_0] +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b +; CHECK-GI-NEXT: ret %c = shufflevector <7 x i16> %a, <7 x i16> %b, <7 x i32> ret <7 x i16> %c } @@ -557,7 +594,9 @@ define <3 x i32> @shufflevector_v3i32(<3 x i32> %a, <3 x i32> %b) { ; CHECK-GI-LABEL: shufflevector_v3i32: ; CHECK-GI: // %bb.0: ; CHECK-GI-NEXT: adrp x8, .LCPI34_0 +; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI34_0] +; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-GI-NEXT: ret %c = shufflevector <3 x i32> %a, <3 x i32> %b, <3 x i32> diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll index 52a161ba78525a..e7a6c0d6c549be 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll @@ -6,7 +6,9 @@ target triple = "aarch64-linux" define void @add_f16_vg1x2(i32 %slice, %zn0, %zn1) #0 { ; CHECK-LABEL: add_f16_vg1x2: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fadd za.h[w8, 0, vgx2], { z0.h, z1.h } ; CHECK-NEXT: fadd za.h[w8, 7, vgx2], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -19,7 +21,11 @@ define void @add_f16_vg1x2(i32 %slice, %zn0, %zn0, %zn1, ; CHECK-LABEL: add_f16_vg1x4: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fadd za.h[w8, 0, vgx4], { z0.h - z3.h } ; CHECK-NEXT: fadd za.h[w8, 7, vgx4], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -35,7 +41,9 @@ define void @add_f16_vg1x4(i32 %slice, %zn0, %zn0, %zn1) #1 { ; CHECK-LABEL: sub_f16_vg1x2: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fsub za.h[w8, 0, vgx2], { z0.h, z1.h } ; CHECK-NEXT: fsub za.h[w8, 7, vgx2], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -48,7 +56,11 @@ define void @sub_f16_vg1x2(i32 %slice, %zn0, %zn0, %zn1, ; CHECK-LABEL: sub_f16_vg1x4: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fsub za.h[w8, 0, vgx4], { z0.h - z3.h } ; CHECK-NEXT: fsub za.h[w8, 7, vgx4], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -64,7 +76,9 @@ define void @sub_f16_vg1x4(i32 %slice, %zn0, %zn0, %zn1) #2 { ; CHECK-LABEL: add_bf16_vg1x2: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfadd za.h[w8, 0, vgx2], { z0.h, z1.h } ; CHECK-NEXT: bfadd za.h[w8, 7, vgx2], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -77,7 +91,11 @@ define void @add_bf16_vg1x2(i32 %slice, %zn0, %zn0, %zn1, ; CHECK-LABEL: add_bf16_vg1x4: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfadd za.h[w8, 0, vgx4], { z0.h - z3.h } ; CHECK-NEXT: bfadd za.h[w8, 7, vgx4], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -93,7 +111,9 @@ define void @add_bf16_vg1x4(i32 %slice, %zn0, %zn0, %zn1) #2 { ; CHECK-LABEL: sub_bf16_vg1x2: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfsub za.h[w8, 0, vgx2], { z0.h, z1.h } ; CHECK-NEXT: bfsub za.h[w8, 7, vgx2], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -106,7 +126,11 @@ define void @sub_bf16_vg1x2(i32 %slice, %zn0, %zn0, %zn1, ; CHECK-LABEL: sub_bf16_vg1x4: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfsub za.h[w8, 0, vgx4], { z0.h - z3.h } ; CHECK-NEXT: bfsub za.h[w8, 7, vgx4], { z0.h - z3.h } ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll index 402183ab123728..ecaf8bccb71fb6 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll @@ -8,7 +8,9 @@ define void @multi_vector_add_write_single_za_vg1x2_i32(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_write_single_za_vg1x2_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: add za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: ret @@ -25,7 +27,9 @@ define void @multi_vector_add_write_single_za_vg1x2_i32(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_write_single_za_vg1x2_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: add za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: ret @@ -46,7 +50,11 @@ define void @multi_vector_add_write_single_za_vg1x2_i64(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_write_single_za_vg1x4_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: add za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s ; CHECK-NEXT: add za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s ; CHECK-NEXT: ret @@ -67,7 +75,11 @@ define void @multi_vector_add_write_single_za_vg1x4_i32(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_write_za_vg1x2_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: add za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: ret @@ -112,7 +128,11 @@ define void @multi_vector_add_write_za_vg1x2_i32(i32 %slice, define void @multi_vector_add_write_za_vg1x2_i64(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_write_za_vg1x2_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d } ; CHECK-NEXT: add za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d } ; CHECK-NEXT: ret @@ -135,7 +155,15 @@ define void @multi_vector_add_write_za_vg1x2_i64(i32 %slice, define void @multi_vector_add_write_za_vg1x4_i32(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_write_za_vg1x4_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: add za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: add za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: ret @@ -159,7 +187,15 @@ define void @multi_vector_add_write_za_vg1x4_i32(i32 %slice, define void @multi_vector_add_write_za_vg1x4_i64(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_write_za_vg1x4_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: add za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: add za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: ret @@ -187,7 +223,9 @@ define void @multi_vector_add_write_za_vg1x4_i64(i32 %slice, define void @multi_vector_add_za_vg1x2_i32(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_add_za_vg1x2_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.s[w8, 0, vgx2], { z0.s, z1.s } ; CHECK-NEXT: add za.s[w8, 7, vgx2], { z0.s, z1.s } ; CHECK-NEXT: ret @@ -200,7 +238,9 @@ define void @multi_vector_add_za_vg1x2_i32(i32 %slice, %zn0, define void @multi_vector_add_za_vg1x2_i64(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_add_za_vg1x2_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: add za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -213,7 +253,9 @@ define void @multi_vector_add_za_vg1x2_i64(i32 %slice, %zn0, define void @multi_vector_add_za_vg1x2_f32(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_add_za_vg1x2_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fadd za.s[w8, 0, vgx2], { z0.s, z1.s } ; CHECK-NEXT: fadd za.s[w8, 7, vgx2], { z0.s, z1.s } ; CHECK-NEXT: ret @@ -228,7 +270,9 @@ define void @multi_vector_add_za_vg1x2_f32(i32 %slice, %zn0 define void @multi_vector_add_za_vg1x2_f64(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_add_za_vg1x2_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fadd za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: fadd za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -245,7 +289,11 @@ define void @multi_vector_add_za_vg1x2_f64(i32 %slice, %zn define void @multi_vector_add_za_vg1x4_i32(i32 %slice, %zn0, %zn1, %zn2, %zn3) { ; CHECK-LABEL: multi_vector_add_za_vg1x4_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: add za.s[w8, 0, vgx4], { z0.s - z3.s } ; CHECK-NEXT: add za.s[w8, 7, vgx4], { z0.s - z3.s } ; CHECK-NEXT: ret @@ -262,7 +310,11 @@ define void @multi_vector_add_za_vg1x4_i32(i32 %slice, %zn0, define void @multi_vector_add_za_vg1x4_i64(i32 %slice, %zn0, %zn1, %zn2, %zn3) { ; CHECK-LABEL: multi_vector_add_za_vg1x4_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: add za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: add za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -279,7 +331,11 @@ define void @multi_vector_add_za_vg1x4_i64(i32 %slice, %zn0, define void @multi_vector_add_za_vg1x4_f32(i32 %slice, %zn0, %zn1, %zn2, %zn3) { ; CHECK-LABEL: multi_vector_add_za_vg1x4_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fadd za.s[w8, 0, vgx4], { z0.s - z3.s } ; CHECK-NEXT: fadd za.s[w8, 7, vgx4], { z0.s - z3.s } ; CHECK-NEXT: ret @@ -296,7 +352,11 @@ define void @multi_vector_add_za_vg1x4_f32(i32 %slice, %zn0 define void @multi_vector_add_za_vg1x4_f64(i32 %slice, %zn0, %zn1, %zn2, %zn3) { ; CHECK-LABEL: multi_vector_add_za_vg1x4_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fadd za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: fadd za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll index 613fba4a73838a..3a73ff7cdc29ce 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvtn.ll @@ -7,6 +7,8 @@ define @multi_vector_cvtn_x2_f16( %zn1, %zn2) { ; CHECK-LABEL: multi_vector_cvtn_x2_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fcvtn z0.h, { z0.s, z1.s } ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.fcvtn.x2.nxv4f32( %zn1, %zn2) @@ -20,6 +22,8 @@ define @multi_vector_cvtn_x2_f16( %zn1, define @multi_vector_bfcvtn_x2( %zn1, %zn2) { ; CHECK-LABEL: multi_vector_bfcvtn_x2: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfcvtn z0.h, { z0.s, z1.s } ; CHECK-NEXT: ret %res = call @llvm.aarch64.sve.bfcvtn.x2( %zn1, %zn2) diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll index 07b10fdc8eeb21..401cdd0b9dfb79 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll @@ -6,7 +6,9 @@ define void @multi_vector_add_single_vg1x2_s(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg1x2_s: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: fmla za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: ret @@ -23,7 +25,9 @@ define void @multi_vector_add_single_vg1x2_s(i32 %slice, %z define void @multi_vector_add_single_vg1x2_d(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg1x2_d: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: fmla za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: ret @@ -40,7 +44,11 @@ define void @multi_vector_add_single_vg1x2_d(i32 %slice, % define void @multi_vector_add_single_vg1x4_s(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_single_vg1x4_s: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s ; CHECK-NEXT: fmla za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s ; CHECK-NEXT: ret @@ -60,7 +68,11 @@ define void @multi_vector_add_single_vg1x4_s(i32 %slice, %z define void @multi_vector_add_single_vg1x4_d(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_single_vg1x4_d: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d ; CHECK-NEXT: fmla za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d ; CHECK-NEXT: ret @@ -82,7 +94,9 @@ define void @multi_vector_add_single_vg1x4_d(i32 %slice, % define void @multi_vector_sub_single_vg1x2_s(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg1x2_s: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: fmls za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: ret @@ -99,7 +113,9 @@ define void @multi_vector_sub_single_vg1x2_s(i32 %slice, %z define void @multi_vector_sub_single_vg1x2_d(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg1x2_d: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: fmls za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: ret @@ -116,7 +132,11 @@ define void @multi_vector_sub_single_vg1x2_d(i32 %slice, % define void @multi_vector_sub_single_vg1x4_s(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_single_vg1x4_s: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s ; CHECK-NEXT: fmls za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s ; CHECK-NEXT: ret @@ -136,7 +156,11 @@ define void @multi_vector_sub_single_vg1x4_s(i32 %slice, %z define void @multi_vector_sub_single_vg1x4_d(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_single_vg1x4_d: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d ; CHECK-NEXT: fmls za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d ; CHECK-NEXT: ret @@ -158,7 +182,11 @@ define void @multi_vector_sub_single_vg1x4_d(i32 %slice, % define void @multi_vector_add_vg1x2_s(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_vg1x2_s: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: fmla za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: ret @@ -176,7 +204,11 @@ define void @multi_vector_add_vg1x2_s(i32 %slice, %zn0, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_vg1x2_d: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d } ; CHECK-NEXT: fmla za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d } ; CHECK-NEXT: ret @@ -212,7 +244,15 @@ define void @multi_vector_add_vg1x2_s_regclass(i32 %slice, define void @multi_vector_add_vg1x4_s(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_vg1x4_s: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: fmla za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: ret @@ -230,7 +270,15 @@ define void @multi_vector_add_vg1x4_s(i32 %slice, %zn0, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_vg1x4_d: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: fmla za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: ret @@ -272,7 +320,11 @@ define void @multi_vector_add_vg1x4_s_regclass(i32 %slice, define void @multi_vector_sub_vg1x2_s(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_sub_vg1x2_s: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: fmls za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: ret @@ -290,7 +342,11 @@ define void @multi_vector_sub_vg1x2_s(i32 %slice, %zn0, %zn0, %zn1, ; CHECK-LABEL: multi_vector_sub_vg1x2_d: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d } ; CHECK-NEXT: fmls za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d } ; CHECK-NEXT: ret @@ -308,7 +364,15 @@ define void @multi_vector_sub_vg1x2_d(i32 %slice, %zn0, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_vg1x4_s: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: fmls za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: ret @@ -326,7 +390,15 @@ define void @multi_vector_sub_vg1x4_s(i32 %slice, %zn0, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_vg1x4_d: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: fmls za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: ret @@ -346,7 +418,9 @@ define void @multi_vector_sub_vg1x4_d(i32 %slice, %zn0, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg1x2_s: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s[3] ; CHECK-NEXT: fmla za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s[3] ; CHECK-NEXT: ret @@ -363,7 +437,9 @@ define void @multi_vector_add_lane_vg1x2_s(i32 %slice, %zn0 define void @multi_vector_add_lane_vg1x2_d(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg1x2_d: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d[1] ; CHECK-NEXT: fmla za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d[1] ; CHECK-NEXT: ret @@ -382,8 +458,8 @@ define void @multi_vector_add_lane_vg1x2_s_regclass(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_lane_vg1x4_s: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s[3] ; CHECK-NEXT: fmla za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s[3] ; CHECK-NEXT: ret @@ -415,7 +495,11 @@ define void @multi_vector_add_lane_vg1x4_s(i32 %slice, %zn0 define void @multi_vector_add_lane_vg1x4_d(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_lane_vg1x4_d: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d[1] ; CHECK-NEXT: fmla za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d[1] ; CHECK-NEXT: ret @@ -437,8 +521,8 @@ define void @multi_vector_add_lane_vg1x4_s_regclass(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg1x2_s: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s[3] ; CHECK-NEXT: fmls za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s[3] ; CHECK-NEXT: ret @@ -473,7 +559,9 @@ define void @multi_vector_sub_lane_vg1x2_s(i32 %slice, %zn0 define void @multi_vector_sub_lane_vg1x2_d(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg1x2_d: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d[1] ; CHECK-NEXT: fmls za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d[1] ; CHECK-NEXT: ret @@ -490,7 +578,11 @@ define void @multi_vector_sub_lane_vg1x2_d(i32 %slice, %zn define void @multi_vector_sub_lane_vg1x4_s(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_lane_vg1x4_s: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s[3] ; CHECK-NEXT: fmls za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s[3] ; CHECK-NEXT: ret @@ -510,7 +602,11 @@ define void @multi_vector_sub_lane_vg1x4_s(i32 %slice, %zn0 define void @multi_vector_sub_lane_vg1x4_d(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_lane_vg1x4_d: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d[1] ; CHECK-NEXT: fmls za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d[1] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll index cd8d22441eaa23..b4fd5a2272e7ea 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll @@ -26,18 +26,18 @@ define void @fdot_multi_za32_f16_vg1x2(i32 %slice, %unused, < define void @fdot_multi_za32_f16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: fdot_multi_za32_f16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z31.d, z4.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z30.d, z3.d ; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h } -; CHECK-NEXT: fdot za.s[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: fdot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #0 { call void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8f16(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -71,18 +71,18 @@ define void @bfdot_multi_za32_bf16_vg1x2(i32 %slice, %unused, define void @fdot_multi_za32_bf16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: fdot_multi_za32_bf16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z31.d, z4.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z30.d, z3.d ; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: bfdot za.s[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h } -; CHECK-NEXT: bfdot za.s[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: bfdot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: bfdot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #0 { call void @llvm.aarch64.sme.fdot.za32.vg1x4.nxv8bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -99,7 +99,9 @@ define void @fdot_multi_za32_bf16_vg1x4(i32 %slice, %unused, define void @fdot_single_za32_f16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: fdot_single_za32_f16_vg1x2: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: fdot za.s[w8, 0, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: fdot za.s[w8, 7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -112,7 +114,11 @@ define void @fdot_single_za32_f16_vg1x2(i32 %slice, %unused, define void @fdot_single_za32_f16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: fdot_single_za32_f16_vg1x4: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: fdot za.s[w8, 7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -128,7 +134,9 @@ define void @fdot_single_za32_f16_vg1x4(i32 %slice, %unused, define void @bfdot_single_za32_bf16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: bfdot_single_za32_bf16_vg1x2: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: bfdot za.s[w8, 0, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: bfdot za.s[w8, 7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -141,7 +149,11 @@ define void @bfdot_single_za32_bf16_vg1x2(i32 %slice, %unused define void @bfdot_single_za32_bf16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: bfdot_single_za32_bf16_vg1x4: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: bfdot za.s[w8, 0, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: bfdot za.s[w8, 7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -158,8 +170,8 @@ define void @fdot_lane_za32_f16_vg1x2(i32 %slice, %unused, %unused, %unused, ; CHECK-LABEL: bfdot_lane_za32_bf16_vg1x2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: bfdot za.s[w8, 0, vgx2], { z4.h, z5.h }, z3.h[3] ; CHECK-NEXT: bfdot za.s[w8, 7, vgx2], { z4.h, z5.h }, z3.h[3] ; CHECK-NEXT: ret @@ -210,8 +222,8 @@ define void @bfdot_lane_za32_bf16_vg1x4(i32 %slice, %unused, ; CHECK-LABEL: bfdot_lane_za32_bf16_vg1x4: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov z26.d, z3.d ; CHECK-NEXT: mov z25.d, z2.d ; CHECK-NEXT: mov z24.d, z1.d ; CHECK-NEXT: bfdot za.s[w8, 0, vgx4], { z24.h - z27.h }, z5.h[3] diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll index f144e33793fe80..99de6f832a3c91 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll @@ -10,7 +10,9 @@ define void @za_write_vg2_horiz_b(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_b: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.b[w12, 0:1], { z0.b, z1.b } ; CHECK-NEXT: mov za0h.b[w12, 14:15], { z0.b, z1.b } ; CHECK-NEXT: ret @@ -23,7 +25,9 @@ define void @za_write_vg2_horiz_b(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_h: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1h.h[w12, 6:7], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -36,7 +40,9 @@ define void @za_write_vg2_horiz_h(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1h.h[w12, 6:7], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -49,7 +55,9 @@ define void @za_write_vg2_horiz_f16(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1h.h[w12, 6:7], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -62,7 +70,9 @@ define void @za_write_vg2_horiz_bf16(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_s: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.s[w12, 0:1], { z0.s, z1.s } ; CHECK-NEXT: mov za3h.s[w12, 2:3], { z0.s, z1.s } ; CHECK-NEXT: ret @@ -75,7 +85,9 @@ define void @za_write_vg2_horiz_s(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.s[w12, 0:1], { z0.s, z1.s } ; CHECK-NEXT: mov za3h.s[w12, 2:3], { z0.s, z1.s } ; CHECK-NEXT: ret @@ -88,7 +100,9 @@ define void @za_write_vg2_horiz_f32(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_d: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.d[w12, 0:1], { z0.d, z1.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 0, i32 %slice, %zn1, %zn2) @@ -98,7 +112,9 @@ define void @za_write_vg2_horiz_d(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.d[w12, 0:1], { z0.d, z1.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg2.nxv2f64(i32 0, i32 %slice, %zn1, %zn2) @@ -110,7 +126,9 @@ define void @za_write_vg2_horiz_f64(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_b: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.b[w12, 0:1], { z0.b, z1.b } ; CHECK-NEXT: mov za0v.b[w12, 14:15], { z0.b, z1.b } ; CHECK-NEXT: ret @@ -123,7 +141,9 @@ define void @za_write_vg2_vert_b(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_h: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1v.h[w12, 6:7], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -136,7 +156,9 @@ define void @za_write_vg2_vert_h(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1v.h[w12, 6:7], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -149,7 +171,9 @@ define void @za_write_vg2_vert_f16(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1v.h[w12, 6:7], { z0.h, z1.h } ; CHECK-NEXT: ret @@ -162,7 +186,9 @@ define void @za_write_vg2_vert_bf16(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_s: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.s[w12, 0:1], { z0.s, z1.s } ; CHECK-NEXT: mov za3v.s[w12, 2:3], { z0.s, z1.s } ; CHECK-NEXT: ret @@ -175,7 +201,9 @@ define void @za_write_vg2_vert_s(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.s[w12, 0:1], { z0.s, z1.s } ; CHECK-NEXT: mov za3v.s[w12, 2:3], { z0.s, z1.s } ; CHECK-NEXT: ret @@ -188,7 +216,9 @@ define void @za_write_vg2_vert_f32(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_d: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.d[w12, 0:1], { z0.d, z1.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 0, i32 %slice, %zn1, %zn2) @@ -198,7 +228,9 @@ define void @za_write_vg2_vert_d(i32 %slice, %zn1, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.d[w12, 0:1], { z0.d, z1.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg2.nxv2f64(i32 0, i32 %slice, %zn1, %zn2) @@ -214,7 +246,11 @@ define void @za_write_vg2_vert_f64(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_b: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.b[w12, 0:3], { z0.b - z3.b } ; CHECK-NEXT: mov za0h.b[w12, 12:15], { z0.b - z3.b } ; CHECK-NEXT: ret @@ -227,7 +263,11 @@ define void @za_write_vg4_horiz_b(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_h: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.h[w12, 0:3], { z0.h - z3.h } ; CHECK-NEXT: mov za1h.h[w12, 4:7], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -240,7 +280,11 @@ define void @za_write_vg4_horiz_h(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.h[w12, 0:3], { z0.h - z3.h } ; CHECK-NEXT: mov za1h.h[w12, 4:7], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -253,7 +297,11 @@ define void @za_write_vg4_horiz_f16(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.h[w12, 0:3], { z0.h - z3.h } ; CHECK-NEXT: mov za1h.h[w12, 4:7], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -266,7 +314,11 @@ define void @za_write_vg4_horiz_bf16(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_s: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.s[w12, 0:3], { z0.s - z3.s } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) @@ -276,7 +328,11 @@ define void @za_write_vg4_horiz_s(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.s[w12, 0:3], { z0.s - z3.s } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg4.nxv4f32(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) @@ -286,7 +342,11 @@ define void @za_write_vg4_horiz_f32(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_d: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.d[w12, 0:3], { z0.d - z3.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) @@ -296,7 +356,11 @@ define void @za_write_vg4_horiz_d(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_horiz_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0h.d[w12, 0:3], { z0.d - z3.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.hor.vg4.nxv2f64(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) @@ -308,7 +372,11 @@ define void @za_write_vg4_horiz_f64(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_b: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.b[w12, 0:3], { z0.b - z3.b } ; CHECK-NEXT: mov za0v.b[w12, 12:15], { z0.b - z3.b } ; CHECK-NEXT: ret @@ -321,7 +389,11 @@ define void @za_write_vg4_vert_b(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_h: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.h[w12, 0:3], { z0.h - z3.h } ; CHECK-NEXT: mov za1v.h[w12, 4:7], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -334,7 +406,11 @@ define void @za_write_vg4_vert_h(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.h[w12, 0:3], { z0.h - z3.h } ; CHECK-NEXT: mov za1v.h[w12, 4:7], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -347,7 +423,11 @@ define void @za_write_vg4_vert_f16(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.h[w12, 0:3], { z0.h - z3.h } ; CHECK-NEXT: mov za1v.h[w12, 4:7], { z0.h - z3.h } ; CHECK-NEXT: ret @@ -360,7 +440,11 @@ define void @za_write_vg4_vert_bf16(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_s: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.s[w12, 0:3], { z0.s - z3.s } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) @@ -370,7 +454,11 @@ define void @za_write_vg4_vert_s(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.s[w12, 0:3], { z0.s - z3.s } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg4.nxv4f32(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) @@ -380,7 +468,11 @@ define void @za_write_vg4_vert_f32(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_d: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.d[w12, 0:3], { z0.d - z3.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) @@ -390,7 +482,11 @@ define void @za_write_vg4_vert_d(i32 %slice, %zn1, %zn1, %zn2, %zn3, %zn4) { ; CHECK-LABEL: za_write_vg4_vert_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za0v.d[w12, 0:3], { z0.d - z3.d } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.write.ver.vg4.nxv2f64(i32 0, i32 %slice, %zn1, %zn2, %zn3, %zn4) @@ -404,7 +500,9 @@ define void @za_write_vg4_vert_f64(i32 %slice, %zn1, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_b: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -417,7 +515,9 @@ define void @za_write_vg1x2_b(i32 %slice, %za1, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_h: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -430,7 +530,9 @@ define void @za_write_vg1x2_h(i32 %slice, %za1, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -443,7 +545,9 @@ define void @za_write_vg1x2_f16(i32 %slice, %za1, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -456,7 +560,9 @@ define void @za_write_vg1x2_bf16(i32 %slice, %za1, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_s: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -469,7 +575,9 @@ define void @za_write_vg1x2_s(i32 %slice, %za1, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -482,7 +590,9 @@ define void @za_write_vg1x2_f32(i32 %slice, %za1, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_d: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -495,7 +605,9 @@ define void @za_write_vg1x2_d(i32 %slice, %za1, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -512,7 +624,11 @@ define void @za_write_vg1x2_f64(i32 %slice, %za1, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_b: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -525,7 +641,11 @@ define void @za_write_vg1x4_b(i32 %slice, %za1, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_h: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -538,7 +658,11 @@ define void @za_write_vg1x4_h(i32 %slice, %za1, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -551,7 +675,11 @@ define void @za_write_vg1x4_f16(i32 %slice, %za1, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -564,7 +692,11 @@ define void @za_write_vg1x4_bf16(i32 %slice, %za1, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_s: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -577,7 +709,11 @@ define void @za_write_vg1x4_s(i32 %slice, %za1, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -590,7 +726,11 @@ define void @za_write_vg1x4_f32(i32 %slice, %za1, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_d: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -603,7 +743,11 @@ define void @za_write_vg1x4_d(i32 %slice, %za1, %za1, %za2, %za3, %za4) { ; CHECK-LABEL: za_write_vg1x4_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll index 3ce77cd8e03216..e154a4df86efe1 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll @@ -26,18 +26,18 @@ define void @udot_multi_za32_u16_vg1x2(i32 %slice, %unused, < define void @udot_multi_za32_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: udot_multi_za32_u16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z31.d, z4.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z30.d, z3.d ; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h } -; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #0 { call void @llvm.aarch64.sme.udot.za32.vg1x4.nxv8i16(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -68,18 +68,18 @@ define void @udot_multi_za32_u8_vg1x2(i32 %slice, %unused, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: udot_multi_za32_u8_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z31.d, z4.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z30.d, z3.d ; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z4.b - z7.b }, { z24.b - z27.b } -; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z28.b - z31.b }, { z24.b - z27.b } +; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #0 { call void @llvm.aarch64.sme.udot.za32.vg1x4.nxv16i8(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -110,18 +110,18 @@ define void @udot_multi_za64_u16_vg1x2(i32 %slice, %unused, < define void @udot_multi_za64_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: udot_multi_za64_u16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z31.d, z4.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z30.d, z3.d ; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: udot za.d[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h } -; CHECK-NEXT: udot za.d[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: udot za.d[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: udot za.d[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #1 { call void @llvm.aarch64.sme.udot.za64.vg1x4.nxv8i16(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -152,18 +152,18 @@ define void @usdot_multi_za32_u8_vg1x2(i32 %slice, %unused, < define void @usdot_multi_za32_u8_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: usdot_multi_za32_u8_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z31.d, z4.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z30.d, z3.d ; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z4.b - z7.b }, { z24.b - z27.b } -; CHECK-NEXT: usdot za.s[w8, 7, vgx4], { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z28.b - z31.b }, { z24.b - z27.b } +; CHECK-NEXT: usdot za.s[w8, 7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #0 { call void @llvm.aarch64.sme.usdot.za32.vg1x4.nxv16i8(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -197,18 +197,18 @@ define void @sdot_multi_za32_u16_vg1x2(i32 %slice, %unused, < define void @sdot_multi_za32_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: sdot_multi_za32_u16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z31.d, z4.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z30.d, z3.d ; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h } -; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #0 { call void @llvm.aarch64.sme.sdot.za32.vg1x4.nxv8i16(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -239,18 +239,18 @@ define void @sdot_multi_za32_u8_vg1x2(i32 %slice, %unused, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: sdot_multi_za32_u8_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z31.d, z4.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z30.d, z3.d ; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z4.b - z7.b }, { z24.b - z27.b } -; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z28.b - z31.b }, { z24.b - z27.b } +; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #0 { call void @llvm.aarch64.sme.sdot.za32.vg1x4.nxv16i8(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -281,18 +281,18 @@ define void @sdot_multi_za64_u16_vg1x2(i32 %slice, %unused, < define void @sdot_multi_za64_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: sdot_multi_za64_u16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z31.d, z4.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z30.d, z3.d ; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: sdot za.d[w8, 0, vgx4], { z4.h - z7.h }, { z24.h - z27.h } -; CHECK-NEXT: sdot za.d[w8, 7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: sdot za.d[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: sdot za.d[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: ret %zn4, %zn5, %zn6, %zn7) #1 { call void @llvm.aarch64.sme.sdot.za64.vg1x4.nxv8i16(i32 %slice, %zn0, %zn1, %zn2, %zn3, @@ -309,7 +309,9 @@ define void @sdot_multi_za64_u16_vg1x4(i32 %slice, %unused, < define void @udot_single_za32_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: udot_single_za32_u16_vg1x2: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: udot za.s[w8, 0, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: udot za.s[w8, 7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -322,7 +324,11 @@ define void @udot_single_za32_u16_vg1x2(i32 %slice, %unused, define void @udot_single_za32_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: udot_single_za32_u16_vg1x4: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -335,7 +341,9 @@ define void @udot_single_za32_u16_vg1x4(i32 %slice, %unused, define void @udot_single_za32_u8_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: udot_single_za32_u8_vg1x2: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: udot za.s[w8, 0, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: udot za.s[w8, 7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -348,7 +356,11 @@ define void @udot_single_za32_u8_vg1x2(i32 %slice, %unused, < define void @udot_single_za32_u8_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: udot_single_za32_u8_vg1x4: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -361,7 +373,9 @@ define void @udot_single_za32_u8_vg1x4(i32 %slice, %unused, < define void @udot_single_za64_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #1 { ; CHECK-LABEL: udot_single_za64_u16_vg1x2: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: udot za.d[w8, 0, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: udot za.d[w8, 7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -374,7 +388,11 @@ define void @udot_single_za64_u16_vg1x2(i32 %slice, %unused, define void @udot_single_za64_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #1 { ; CHECK-LABEL: udot_single_za64_u16_vg1x4: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: udot za.d[w8, 0, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: udot za.d[w8, 7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -387,7 +405,9 @@ define void @udot_single_za64_u16_vg1x4(i32 %slice, %unused, define void @usdot_single_za32_u8_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: usdot_single_za32_u8_vg1x2: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: usdot za.s[w8, 0, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: usdot za.s[w8, 7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -400,7 +420,11 @@ define void @usdot_single_za32_u8_vg1x2(i32 %slice, %unused, define void @usdot_single_za32_u8_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: usdot_single_za32_u8_vg1x4: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: usdot za.s[w8, 7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -416,7 +440,9 @@ define void @usdot_single_za32_u8_vg1x4(i32 %slice, %unused, define void @sdot_single_za32_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: sdot_single_za32_u16_vg1x2: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: sdot za.s[w8, 0, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: sdot za.s[w8, 7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -429,7 +455,11 @@ define void @sdot_single_za32_u16_vg1x2(i32 %slice, %unused, define void @sdot_single_za32_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: sdot_single_za32_u16_vg1x4: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -442,7 +472,9 @@ define void @sdot_single_za32_u16_vg1x4(i32 %slice, %unused, define void @sdot_single_za32_u8_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: sdot_single_za32_u8_vg1x2: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: sdot za.s[w8, 0, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: sdot za.s[w8, 7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -455,7 +487,11 @@ define void @sdot_single_za32_u8_vg1x2(i32 %slice, %unused, < define void @sdot_single_za32_u8_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: sdot_single_za32_u8_vg1x4: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -468,7 +504,9 @@ define void @sdot_single_za32_u8_vg1x4(i32 %slice, %unused, < define void @sdot_single_za64_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #1 { ; CHECK-LABEL: sdot_single_za64_u16_vg1x2: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: sdot za.d[w8, 0, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: sdot za.d[w8, 7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -481,7 +519,11 @@ define void @sdot_single_za64_u16_vg1x2(i32 %slice, %unused, define void @sdot_single_za64_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #1 { ; CHECK-LABEL: sdot_single_za64_u16_vg1x4: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: sdot za.d[w8, 0, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: sdot za.d[w8, 7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -494,7 +536,9 @@ define void @sdot_single_za64_u16_vg1x4(i32 %slice, %unused, define void @sudot_single_za32_u8_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: sudot_single_za32_u8_vg1x2: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: sudot za.s[w8, 0, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: sudot za.s[w8, 7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -507,7 +551,11 @@ define void @sudot_single_za32_u8_vg1x2(i32 %slice, %unused, define void @sudot_single_za32_u8_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: sudot_single_za32_u8_vg1x4: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: sudot za.s[w8, 7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -523,8 +571,8 @@ define void @udot_lane_za32_u16_vg1x2(i32 %slice, %unused, %unused, %zn0, %zn1, %zn2, %zn3, %zn4) #0 { ; CHECK-LABEL: udot_lane_za32_u16_vg1x4: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z0.h - z3.h }, z4.h[3] ; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z0.h - z3.h }, z4.h[3] ; CHECK-NEXT: ret @@ -553,8 +605,8 @@ define void @udot_lane_za32_u8_vg1x2(i32 %slice, %unused, %unused, %unused, , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) - %2 = extractvalue { , } %1, 0 - %3 = extractvalue { , } %1, 1 - %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride - %4 = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) - %5 = extractvalue { , } %4, 0 - %6 = extractvalue { , } %4, 1 - tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x2.nxv16i8(i32 0, %2, %5, undef, i32 0) - tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x2.nxv16i8(i32 0, %3, %6, undef, i32 0) - ret void -} - -define void @udot_form_4x_tuple(ptr %ptr, i64 %stride) #0 { -; CHECK-LABEL: udot_form_4x_tuple: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: ptrue pn8.b -; CHECK-NEXT: lsl x9, x1, #1 -; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0] -; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1] -; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9] -; CHECK-NEXT: add x9, x9, x1 -; CHECK-NEXT: mov z0.d, z17.d -; CHECK-NEXT: mov z1.d, z16.d -; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9] -; CHECK-NEXT: mov z4.d, z21.d -; CHECK-NEXT: mov z5.d, z20.d -; CHECK-NEXT: mov z8.d, z25.d -; CHECK-NEXT: mov z9.d, z24.d -; CHECK-NEXT: mov z3.d, z16.d -; CHECK-NEXT: mov z7.d, z17.d -; CHECK-NEXT: mov z11.d, z18.d -; CHECK-NEXT: mov z16.d, z29.d -; CHECK-NEXT: mov z17.d, z28.d -; CHECK-NEXT: mov z18.d, z14.d -; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] -; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0] -; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0] -; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0] -; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload -; CHECK-NEXT: ret -entry: - %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() - %1 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) - %2 = extractvalue { , , , } %1, 0 - %3 = extractvalue { , , , } %1, 1 - %4 = extractvalue { , , , } %1, 2 - %5 = extractvalue { , , , } %1, 3 - %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride - %6 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) - %7 = extractvalue { , , , } %6, 0 - %8 = extractvalue { , , , } %6, 1 - %9 = extractvalue { , , , } %6, 2 - %10 = extractvalue { , , , } %6, 3 - %mul3 = shl i64 %stride, 1 - %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3 - %11 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4) - %12 = extractvalue { , , , } %11, 0 - %13 = extractvalue { , , , } %11, 1 - %14 = extractvalue { , , , } %11, 2 - %15 = extractvalue { , , , } %11, 3 - %mul5 = mul i64 %stride, 3 - %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5 - %16 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6) - %17 = extractvalue { , , , } %16, 0 - %18 = extractvalue { , , , } %16, 1 - %19 = extractvalue { , , , } %16, 2 - %20 = extractvalue { , , , } %16, 3 - tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x4.nxv16i8(i32 0, %2, %7, %12, %17, undef, i32 0) - tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x4.nxv16i8(i32 0, %3, %8, %13, %18, undef, i32 0) - tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x4.nxv16i8(i32 0, %4, %9, %14, %19, undef, i32 0) - tail call void @llvm.aarch64.sme.udot.lane.za32.vg1x4.nxv16i8(i32 0, %5, %10, %15, %20, undef, i32 0) - ret void -} - define void @udot_lane_za64_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #1 { ; CHECK-LABEL: udot_lane_za64_u16_vg1x2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: udot za.d[w8, 0, vgx2], { z4.h, z5.h }, z3.h[1] ; CHECK-NEXT: udot za.d[w8, 7, vgx2], { z4.h, z5.h }, z3.h[1] ; CHECK-NEXT: ret @@ -697,8 +654,8 @@ define void @udot_lane_za64_u16_vg1x4(i32 %slice, %unused, %unused, %unused, %unused, , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) - %2 = extractvalue { , } %1, 0 - %3 = extractvalue { , } %1, 1 - %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride - %4 = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) - %5 = extractvalue { , } %4, 0 - %6 = extractvalue { , } %4, 1 - tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x2.nxv16i8(i32 0, %2, %5, undef, i32 0) - tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x2.nxv16i8(i32 0, %3, %6, undef, i32 0) - ret void -} - -define void @usdot_form_4x_tuple(ptr %ptr, i64 %stride) #0 { -; CHECK-LABEL: usdot_form_4x_tuple: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: ptrue pn8.b -; CHECK-NEXT: lsl x9, x1, #1 -; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0] -; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1] -; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9] -; CHECK-NEXT: add x9, x9, x1 -; CHECK-NEXT: mov z0.d, z17.d -; CHECK-NEXT: mov z1.d, z16.d -; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9] -; CHECK-NEXT: mov z4.d, z21.d -; CHECK-NEXT: mov z5.d, z20.d -; CHECK-NEXT: mov z8.d, z25.d -; CHECK-NEXT: mov z9.d, z24.d -; CHECK-NEXT: mov z3.d, z16.d -; CHECK-NEXT: mov z7.d, z17.d -; CHECK-NEXT: mov z11.d, z18.d -; CHECK-NEXT: mov z16.d, z29.d -; CHECK-NEXT: mov z17.d, z28.d -; CHECK-NEXT: mov z18.d, z14.d -; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] -; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0] -; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0] -; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0] -; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload -; CHECK-NEXT: ret -entry: - %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() - %1 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) - %2 = extractvalue { , , , } %1, 0 - %3 = extractvalue { , , , } %1, 1 - %4 = extractvalue { , , , } %1, 2 - %5 = extractvalue { , , , } %1, 3 - %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride - %6 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) - %7 = extractvalue { , , , } %6, 0 - %8 = extractvalue { , , , } %6, 1 - %9 = extractvalue { , , , } %6, 2 - %10 = extractvalue { , , , } %6, 3 - %mul3 = shl i64 %stride, 1 - %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3 - %11 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4) - %12 = extractvalue { , , , } %11, 0 - %13 = extractvalue { , , , } %11, 1 - %14 = extractvalue { , , , } %11, 2 - %15 = extractvalue { , , , } %11, 3 - %mul5 = mul i64 %stride, 3 - %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5 - %16 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6) - %17 = extractvalue { , , , } %16, 0 - %18 = extractvalue { , , , } %16, 1 - %19 = extractvalue { , , , } %16, 2 - %20 = extractvalue { , , , } %16, 3 - tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x4.nxv16i8(i32 0, %2, %7, %12, %17, undef, i32 0) - tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x4.nxv16i8(i32 0, %3, %8, %13, %18, undef, i32 0) - tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x4.nxv16i8(i32 0, %4, %9, %14, %19, undef, i32 0) - tail call void @llvm.aarch64.sme.usdot.lane.za32.vg1x4.nxv16i8(i32 0, %5, %10, %15, %20, undef, i32 0) - ret void -} ; == Multi, indexed (signed) == @@ -847,8 +710,8 @@ define void @sdot_lane_za32_u16_vg1x2(i32 %slice, %unused, %unused, %unused, %unused, %unused, , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) - %2 = extractvalue { , } %1, 0 - %3 = extractvalue { , } %1, 1 - %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride - %4 = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) - %5 = extractvalue { , } %4, 0 - %6 = extractvalue { , } %4, 1 - tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x2.nxv16i8(i32 0, %2, %5, undef, i32 0) - tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x2.nxv16i8(i32 0, %3, %6, undef, i32 0) - ret void -} - -define void @sdot_form_4x_tuple(ptr %ptr, i64 %stride) #0 { -; CHECK-LABEL: sdot_form_4x_tuple: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: ptrue pn8.b -; CHECK-NEXT: lsl x9, x1, #1 -; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0] -; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1] -; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9] -; CHECK-NEXT: add x9, x9, x1 -; CHECK-NEXT: mov z0.d, z17.d -; CHECK-NEXT: mov z1.d, z16.d -; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9] -; CHECK-NEXT: mov z4.d, z21.d -; CHECK-NEXT: mov z5.d, z20.d -; CHECK-NEXT: mov z8.d, z25.d -; CHECK-NEXT: mov z9.d, z24.d -; CHECK-NEXT: mov z3.d, z16.d -; CHECK-NEXT: mov z7.d, z17.d -; CHECK-NEXT: mov z11.d, z18.d -; CHECK-NEXT: mov z16.d, z29.d -; CHECK-NEXT: mov z17.d, z28.d -; CHECK-NEXT: mov z18.d, z14.d -; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] -; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0] -; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0] -; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0] -; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload -; CHECK-NEXT: ret -entry: - %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() - %1 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) - %2 = extractvalue { , , , } %1, 0 - %3 = extractvalue { , , , } %1, 1 - %4 = extractvalue { , , , } %1, 2 - %5 = extractvalue { , , , } %1, 3 - %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride - %6 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) - %7 = extractvalue { , , , } %6, 0 - %8 = extractvalue { , , , } %6, 1 - %9 = extractvalue { , , , } %6, 2 - %10 = extractvalue { , , , } %6, 3 - %mul3 = shl i64 %stride, 1 - %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3 - %11 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4) - %12 = extractvalue { , , , } %11, 0 - %13 = extractvalue { , , , } %11, 1 - %14 = extractvalue { , , , } %11, 2 - %15 = extractvalue { , , , } %11, 3 - %mul5 = mul i64 %stride, 3 - %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5 - %16 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6) - %17 = extractvalue { , , , } %16, 0 - %18 = extractvalue { , , , } %16, 1 - %19 = extractvalue { , , , } %16, 2 - %20 = extractvalue { , , , } %16, 3 - tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x4.nxv16i8(i32 0, %2, %7, %12, %17, undef, i32 0) - tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x4.nxv16i8(i32 0, %3, %8, %13, %18, undef, i32 0) - tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x4.nxv16i8(i32 0, %4, %9, %14, %19, undef, i32 0) - tail call void @llvm.aarch64.sme.sdot.lane.za32.vg1x4.nxv16i8(i32 0, %5, %10, %15, %20, undef, i32 0) - ret void -} - define void @sdot_lane_za64_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #1 { ; CHECK-LABEL: sdot_lane_za64_u16_vg1x2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: sdot za.d[w8, 0, vgx2], { z4.h, z5.h }, z3.h[1] ; CHECK-NEXT: sdot za.d[w8, 7, vgx2], { z4.h, z5.h }, z3.h[1] ; CHECK-NEXT: ret @@ -1025,8 +793,8 @@ define void @sdot_lane_za64_u16_vg1x4(i32 %slice, %unused, %unused, %unused, %unused, , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) - %2 = extractvalue { , } %1, 0 - %3 = extractvalue { , } %1, 1 - %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride - %4 = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) - %5 = extractvalue { , } %4, 0 - %6 = extractvalue { , } %4, 1 - tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x2.nxv16i8(i32 0, %2, %5, undef, i32 0) - tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x2.nxv16i8(i32 0, %3, %6, undef, i32 0) - ret void -} - -define void @sudot_form_4x_tuple(ptr %ptr, i64 %stride) #0 { -; CHECK-LABEL: sudot_form_4x_tuple: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: ptrue pn8.b -; CHECK-NEXT: lsl x9, x1, #1 -; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0] -; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1] -; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9] -; CHECK-NEXT: add x9, x9, x1 -; CHECK-NEXT: mov z0.d, z17.d -; CHECK-NEXT: mov z1.d, z16.d -; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9] -; CHECK-NEXT: mov z4.d, z21.d -; CHECK-NEXT: mov z5.d, z20.d -; CHECK-NEXT: mov z8.d, z25.d -; CHECK-NEXT: mov z9.d, z24.d -; CHECK-NEXT: mov z3.d, z16.d -; CHECK-NEXT: mov z7.d, z17.d -; CHECK-NEXT: mov z11.d, z18.d -; CHECK-NEXT: mov z16.d, z29.d -; CHECK-NEXT: mov z17.d, z28.d -; CHECK-NEXT: mov z18.d, z14.d -; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] -; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0] -; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0] -; CHECK-NEXT: sudot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0] -; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload -; CHECK-NEXT: ret -entry: - %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() - %1 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) - %2 = extractvalue { , , , } %1, 0 - %3 = extractvalue { , , , } %1, 1 - %4 = extractvalue { , , , } %1, 2 - %5 = extractvalue { , , , } %1, 3 - %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride - %6 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) - %7 = extractvalue { , , , } %6, 0 - %8 = extractvalue { , , , } %6, 1 - %9 = extractvalue { , , , } %6, 2 - %10 = extractvalue { , , , } %6, 3 - %mul3 = shl i64 %stride, 1 - %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3 - %11 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4) - %12 = extractvalue { , , , } %11, 0 - %13 = extractvalue { , , , } %11, 1 - %14 = extractvalue { , , , } %11, 2 - %15 = extractvalue { , , , } %11, 3 - %mul5 = mul i64 %stride, 3 - %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5 - %16 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6) - %17 = extractvalue { , , , } %16, 0 - %18 = extractvalue { , , , } %16, 1 - %19 = extractvalue { , , , } %16, 2 - %20 = extractvalue { , , , } %16, 3 - tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x4.nxv16i8(i32 0, %2, %7, %12, %17, undef, i32 0) - tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x4.nxv16i8(i32 0, %3, %8, %13, %18, undef, i32 0) - tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x4.nxv16i8(i32 0, %4, %9, %14, %19, undef, i32 0) - tail call void @llvm.aarch64.sme.sudot.lane.za32.vg1x4.nxv16i8(i32 0, %5, %10, %15, %20, undef, i32 0) - ret void -} -attributes #0 = { nounwind "target-features"="+sme2" "aarch64_pstate_sm_enabled" } -attributes #1 = { nounwind "target-features"="+sme2,+sme-i16i64" "aarch64_pstate_sm_enabled" } +attributes #0 = { nounwind "target-features"="+sme2" } +attributes #1 = { nounwind "target-features"="+sme2,+sme-i16i64" } ; == Multi, multi (unsigned) diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll index 3616e074d408e1..79db677853cb58 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll @@ -114,6 +114,8 @@ define { , } @multi_vec_max_single_x2_u64(< define { , } @multi_vec_max_single_x2_bf16( %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_max_single_x2_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmax { z0.h, z1.h }, { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8bf16( %zdn1, %zdn2, %zm) @@ -314,6 +316,10 @@ define { , , , , , , } @multi_vec_max_single_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { ; CHECK-LABEL: multi_vec_max_single_x4_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmax { z0.h - z3.h }, { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) @@ -505,6 +511,10 @@ define { , } @multi_vec_max_multi_x2_u64(, } @multi_vec_max_x2_bf16( %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_max_x2_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: bfmax { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fmax.x2.nxv8bf16( %zdn1, %zdn2, %zm1, %zm2) @@ -563,20 +573,20 @@ define { , } @multi_vec_max_multi_x2_ define { , , , } @multi_vec_max_multi_x4_s8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_s8: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: smax { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smax { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -588,20 +598,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_s16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_s16: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: smax { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -613,20 +623,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_s32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_s32: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: smax { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -638,20 +648,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_s64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_s64: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: smax { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -665,20 +675,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_u8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_u8: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: umax { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: umax { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -690,20 +700,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_u16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_u16: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: umax { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: umax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -715,20 +725,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_u32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_u32: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: umax { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: umax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -740,20 +750,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_u64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_u64: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: umax { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: umax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -767,6 +777,14 @@ define { , , , , , , } @multi_vec_max_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_max_x4_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 ; CHECK-NEXT: bfmax { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) @@ -778,20 +796,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_f16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: fmax { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -803,20 +821,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_f32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: fmax { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -828,20 +846,20 @@ define { , , , , , , } @multi_vec_max_multi_x4_f64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_max_multi_x4_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: fmax { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -855,6 +873,8 @@ define { , , , , } @multi_vec_maxnm_single_x2_bf16( %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_maxnm_single_x2_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmaxnm { z0.h, z1.h }, { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8bf16( %zdn1, %zdn2, %zm) @@ -907,6 +927,10 @@ define { , } @multi_vec_maxnm_single define { , , , } @multi_vec_maxnm_single_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { ; CHECK-LABEL: multi_vec_maxnm_single_x4_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmaxnm { z0.h - z3.h }, { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) @@ -974,6 +998,10 @@ define { , , , , } @multi_vec_maxnm_x2_bf16( %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_maxnm_x2_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: bfmaxnm { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8bf16( %zdn1, %zdn2, %zm1, %zm2) @@ -1032,6 +1060,14 @@ define { , } @multi_vec_maxnm_x2_f64( define { , , , } @multi_vec_maxnm_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_maxnm_x4_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 ; CHECK-NEXT: bfmaxnm { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) @@ -1043,20 +1079,20 @@ define { , , , , , , } @multi_vec_maxnm_x4_f16( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_maxnm_x4_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: fmaxnm { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmaxnm { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, @@ -1067,20 +1103,20 @@ define { , , , , , , } @multi_vec_maxnm_x4_f32( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_maxnm_x4_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: fmaxnm { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmaxnm { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, @@ -1091,20 +1127,20 @@ define { , , , , , , } @multi_vec_maxnm_x4_f64( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_maxnm_x4_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: fmaxnm { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmaxnm { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll index 58a0989f25d82c..e5c36d42fb1351 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll @@ -114,6 +114,8 @@ define { , } @multi_vec_min_single_x2_u64(< define { , } @multi_vec_min_single_x2_bf16( %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_min_single_x2_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmin { z0.h, z1.h }, { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8bf16( %zdn1, %zdn2, %zm) @@ -314,6 +316,10 @@ define { , , , , , , } @multi_vec_min_single_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { ; CHECK-LABEL: multi_vec_min_single_x4_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmin { z0.h - z3.h }, { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) @@ -505,6 +511,10 @@ define { , } @multi_vec_min_multi_x2_u64(, } @multi_vec_min_x2_bf16( %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_min_x2_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: bfmin { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fmin.x2.nxv8bf16( %zdn1, %zdn2, %zm1, %zm2) @@ -563,20 +573,20 @@ define { , } @multi_vec_min_multi_x2_ define { , , , } @multi_vec_min_multi_x4_s8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_s8: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: smin { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smin { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -588,20 +598,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_s16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_s16: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: smin { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -613,20 +623,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_s32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_s32: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: smin { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -638,20 +648,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_s64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_s64: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: smin { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: smin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -665,20 +675,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_u8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_u8: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: umin { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: umin { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -690,20 +700,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_u16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_u16: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: umin { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: umin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -715,20 +725,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_u32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_u32: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: umin { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: umin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -740,20 +750,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_u64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_u64: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: umin { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: umin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -768,6 +778,14 @@ define { , , , , , , } @multi_vec_min_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_min_x4_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 ; CHECK-NEXT: bfmin { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) @@ -779,20 +797,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_f16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: fmin { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -804,20 +822,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_f32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: fmin { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -829,20 +847,20 @@ define { , , , , , , } @multi_vec_min_multi_x4_f64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_min_multi_x4_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: fmin { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fmin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -856,6 +874,8 @@ define { , , , , } @multi_vec_minnm_single_x2_bf16( %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_minnm_single_x2_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfminnm { z0.h, z1.h }, { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8bf16( %zdn1, %zdn2, %zm) @@ -908,6 +928,10 @@ define { , } @multi_vec_minnm_single define { , , , } @multi_vec_minnm_single_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) { ; CHECK-LABEL: multi_vec_minnm_single_x4_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfminnm { z0.h - z3.h }, { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) @@ -975,6 +999,10 @@ define { , , , , } @multi_vec_minnm_x2_bf16( %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_minnm_x2_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: bfminnm { z0.h, z1.h }, { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fminnm.x2.nxv8bf16( %zdn1, %zdn2, %zm1, %zm2) @@ -1033,6 +1061,14 @@ define { , } @multi_vec_minnm_x2_f64( define { , , , } @multi_vec_minnm_x4_bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_minnm_x4_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 ; CHECK-NEXT: bfminnm { z0.h - z3.h }, { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8bf16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) @@ -1044,20 +1080,20 @@ define { , , , , , , } @multi_vec_minnm_x4_f16( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_minnm_x4_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: fminnm { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fminnm { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, @@ -1068,20 +1104,20 @@ define { , , , , , , } @multi_vec_minnm_x4_f32( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_minnm_x4_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: fminnm { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fminnm { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, @@ -1092,20 +1128,20 @@ define { , , , , , , } @multi_vec_minnm_x4_f64( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_minnm_x4_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: fminnm { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: fminnm { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll index e5e3da05edced8..346afc611eb756 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll @@ -38,7 +38,9 @@ define void @multi_vector_mul_add_single_long_vg4x1_s16(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_s8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: smlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: smlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -51,7 +53,9 @@ define void @multi_vector_mul_add_single_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_s16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: smlall za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: smlall za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -66,7 +70,11 @@ define void @multi_vector_mul_add_single_long_vg4x2_s16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_s8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: smlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: smlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -79,7 +87,11 @@ define void @multi_vector_mul_add_single_long_vg4x4_s8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_s16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: smlall za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: smlall za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -130,18 +142,18 @@ define void @multi_vector_mul_add_multi_long_vg4x2_s16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z31.d, z4.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z30.d, z3.d ; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: smlall za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b } -; CHECK-NEXT: smlall za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: smlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b } +; CHECK-NEXT: smlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.smla.za32.vg4x4.nxv16i8(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -152,18 +164,18 @@ define void @multi_vector_mul_add_multi_long_vg4x4_s8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z31.d, z4.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z30.d, z3.d ; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: smlall za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h } -; CHECK-NEXT: smlall za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: smlall za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: smlall za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.smla.za64.vg4x4.nxv8i16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -205,8 +217,8 @@ define void @multi_vector_mul_add_lane_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_u8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: umlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: umlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -315,7 +329,9 @@ define void @multi_vector_mul_add_single_long_vg4x2_u8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_u16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: umlall za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: umlall za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -330,7 +346,11 @@ define void @multi_vector_mul_add_single_long_vg4x2_u16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_u8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: umlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: umlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -343,7 +363,11 @@ define void @multi_vector_mul_add_single_long_vg4x4_u8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x4_u16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: umlall za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: umlall za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -394,18 +418,18 @@ define void @multi_vector_mul_add_multi_long_vg4x2_u16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z31.d, z4.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z30.d, z3.d ; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: umlall za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b } -; CHECK-NEXT: umlall za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: umlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b } +; CHECK-NEXT: umlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.umla.za32.vg4x4.nxv16i8(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -416,18 +440,18 @@ define void @multi_vector_mul_add_multi_long_vg4x4_u8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z31.d, z4.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z30.d, z3.d ; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: umlall za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h } -; CHECK-NEXT: umlall za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: umlall za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: umlall za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.umla.za64.vg4x4.nxv8i16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -469,8 +493,8 @@ define void @multi_vector_mul_add_lane_long_vg4x2_u8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_s8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: smlsll za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: smlsll za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -579,7 +605,9 @@ define void @multi_vector_mul_sub_single_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_s16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: smlsll za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: smlsll za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -594,7 +622,11 @@ define void @multi_vector_mul_sub_single_long_vg4x2_s16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_s8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: smlsll za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: smlsll za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -607,7 +639,11 @@ define void @multi_vector_mul_sub_single_long_vg4x4_s8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_s16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: smlsll za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: smlsll za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -658,18 +694,18 @@ define void @multi_vector_mul_sub_multi_long_vg4x2_s16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z31.d, z4.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z30.d, z3.d ; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: smlsll za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b } -; CHECK-NEXT: smlsll za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: smlsll za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b } +; CHECK-NEXT: smlsll za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.smls.za32.vg4x4.nxv16i8(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -680,18 +716,18 @@ define void @multi_vector_mul_sub_multi_long_vg4x4_s8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z31.d, z4.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z30.d, z3.d ; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: smlsll za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h } -; CHECK-NEXT: smlsll za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: smlsll za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: smlsll za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.smls.za64.vg4x4.nxv8i16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -733,8 +769,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_u8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: umlsll za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: umlsll za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -843,7 +881,9 @@ define void @multi_vector_mul_sub_single_long_vg4x2_u8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_u16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: umlsll za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: umlsll za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret @@ -858,7 +898,11 @@ define void @multi_vector_mul_sub_single_long_vg4x2_u16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_u8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: umlsll za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: umlsll za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -871,7 +915,11 @@ define void @multi_vector_mul_sub_single_long_vg4x4_u8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x4_u16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: umlsll za.d[w8, 0:3, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: umlsll za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z5.h ; CHECK-NEXT: ret @@ -922,18 +970,18 @@ define void @multi_vector_mul_sub_multi_long_vg4x2_u16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z31.d, z4.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z30.d, z3.d ; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: umlsll za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b } -; CHECK-NEXT: umlsll za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: umlsll za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b } +; CHECK-NEXT: umlsll za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.umls.za32.vg4x4.nxv16i8(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -944,18 +992,18 @@ define void @multi_vector_mul_sub_multi_long_vg4x4_u8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z31.d, z4.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z30.d, z3.d ; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: umlsll za.d[w8, 0:3, vgx4], { z4.h - z7.h }, { z24.h - z27.h } -; CHECK-NEXT: umlsll za.d[w8, 4:7, vgx4], { z4.h - z7.h }, { z24.h - z27.h } +; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: umlsll za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h } +; CHECK-NEXT: umlsll za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.umls.za64.vg4x4.nxv8i16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -997,8 +1045,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x2_u8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_signed_long_vg4x2_s8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: sumlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: sumlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -1083,7 +1133,11 @@ define void @multi_vector_mul_add_single_signed_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_signed_long_vg4x4_s8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: sumlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: sumlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -1114,8 +1168,8 @@ define void @multi_vector_mul_add_lane_signed_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_unsigned_long_vg4x2_s8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: usmlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: usmlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret @@ -1181,7 +1237,11 @@ define void @multi_vector_mul_add_single_unsigned_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_unsigned_long_vg4x4_s8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2_z3_z4 def $z1_z2_z3_z4 ; CHECK-NEXT: usmlall za.s[w8, 0:3, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: usmlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z5.b ; CHECK-NEXT: ret @@ -1215,18 +1275,18 @@ define void @multi_vector_mul_add_multi_unsigned_long_vg4x2_u8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_add_multi_unsigned_long_vg4x4_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d +; CHECK-NEXT: mov z31.d, z4.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z30.d, z3.d ; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: usmlall za.s[w8, 0:3, vgx4], { z4.b - z7.b }, { z24.b - z27.b } -; CHECK-NEXT: usmlall za.s[w8, 4:7, vgx4], { z4.b - z7.b }, { z24.b - z27.b } +; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: usmlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b } +; CHECK-NEXT: usmlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } ; CHECK-NEXT: ret call void @llvm.aarch64.sme.usmla.za32.vg4x4.nxv16i8(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) %slice.4 = add i32 %slice, 4 @@ -1255,8 +1315,8 @@ define void @multi_vector_mul_add_lane_unsigned_long_vg4x2_s8(i32 %slice, %z define void @multi_vector_add_single_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x2_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret @@ -133,7 +135,9 @@ define void @multi_vector_add_single_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x2_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret @@ -146,7 +150,9 @@ define void @multi_vector_add_single_vg2x2_f16(i32 %slice, % define void @multi_vector_add_single_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x2_s16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: smlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret @@ -159,7 +165,9 @@ define void @multi_vector_add_single_vg2x2_s16(i32 %slice, %z define void @multi_vector_add_single_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x2_u16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: umlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret @@ -176,7 +184,9 @@ define void @multi_vector_add_single_vg2x2_u16(i32 %slice, %z define void @multi_vector_sub_single_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x2_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret @@ -189,7 +199,9 @@ define void @multi_vector_sub_single_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x2_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret @@ -202,7 +214,9 @@ define void @multi_vector_sub_single_vg2x2_f16(i32 %slice, % define void @multi_vector_sub_single_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x2_s16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret @@ -215,7 +229,9 @@ define void @multi_vector_sub_single_vg2x2_s16(i32 %slice, %z define void @multi_vector_sub_single_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x2_u16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: ret @@ -232,7 +248,11 @@ define void @multi_vector_sub_single_vg2x2_u16(i32 %slice, %z define void @multi_vector_add_single_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x4_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret @@ -249,8 +269,11 @@ define void @multi_vector_add_single_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x4_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret @@ -267,7 +290,11 @@ define void @multi_vector_add_single_vg2x4_f16(i32 %slice, % define void @multi_vector_add_single_vg2x4_s16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x4_s16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: smlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: smlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret @@ -284,7 +311,11 @@ define void @multi_vector_add_single_vg2x4_s16(i32 %slice, %z define void @multi_vector_add_single_vg2x4_u16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x4_u16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: umlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: umlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret @@ -305,7 +336,11 @@ define void @multi_vector_add_single_vg2x4_u16(i32 %slice, %z define void @multi_vector_sub_single_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x4_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret @@ -322,7 +357,11 @@ define void @multi_vector_sub_single_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x4_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret @@ -339,7 +378,11 @@ define void @multi_vector_sub_single_vg2x4_f16(i32 %slice, % define void @multi_vector_sub_single_vg2x4_s16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x4_s16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret @@ -356,7 +399,11 @@ define void @multi_vector_sub_single_vg2x4_s16(i32 %slice, %z define void @multi_vector_sub_single_vg2x4_u16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x4_u16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h ; CHECK-NEXT: ret @@ -377,7 +424,11 @@ define void @multi_vector_sub_single_vg2x4_u16(i32 %slice, %z define void @multi_vector_add_multi_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_add_multi_vg2x2_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret @@ -392,7 +443,11 @@ define void @multi_vector_add_multi_vg2x2_bf16(i32 %slice, define void @multi_vector_add_multi_vg2x2_f16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_add_multi_vg2x2_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret @@ -407,7 +462,11 @@ define void @multi_vector_add_multi_vg2x2_f16(i32 %slice, %z define void @multi_vector_add_multi_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_add_multi_vg2x2_s16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: smlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret @@ -422,7 +481,11 @@ define void @multi_vector_add_multi_vg2x2_s16(i32 %slice, %zn define void @multi_vector_add_multi_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_add_multi_vg2x2_u16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: umlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret @@ -441,7 +504,11 @@ define void @multi_vector_add_multi_vg2x2_u16(i32 %slice, %zn define void @multi_vector_sub_multi_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret @@ -456,7 +523,11 @@ define void @multi_vector_sub_multi_vg2x2_bf16(i32 %slice, define void @multi_vector_sub_multi_vg2x2_f16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret @@ -471,7 +542,11 @@ define void @multi_vector_sub_multi_vg2x2_f16(i32 %slice, %z define void @multi_vector_sub_multi_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_s16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret @@ -486,7 +561,11 @@ define void @multi_vector_sub_multi_vg2x2_s16(i32 %slice, %zn define void @multi_vector_sub_multi_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_u16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } ; CHECK-NEXT: ret @@ -505,7 +584,15 @@ define void @multi_vector_sub_multi_vg2x2_u16(i32 %slice, %zn define void @multi_vector_add_multi_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_multi_vg2x4_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret @@ -523,7 +610,15 @@ define void @multi_vector_add_multi_vg2x4_bf16(i32 %slice, define void @multi_vector_add_multi_vg2x4_f16(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_multi_vg2x4_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret @@ -541,7 +636,15 @@ define void @multi_vector_add_multi_vg2x4_f16(i32 %slice, %z define void @multi_vector_add_multi_vg2x4_s16(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_multi_vg2x4_s16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: smlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: smlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret @@ -559,7 +662,15 @@ define void @multi_vector_add_multi_vg2x4_s16(i32 %slice, %zn define void @multi_vector_add_multi_vg2x4_u16(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_add_multi_vg2x4_u16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: umlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: umlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret @@ -581,7 +692,15 @@ define void @multi_vector_add_multi_vg2x4_u16(i32 %slice, %zn define void @multi_vector_sub_multi_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_multi_vg2x4_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret @@ -599,7 +718,15 @@ define void @multi_vector_sub_multi_vg2x4_bf16(i32 %slice, define void @multi_vector_sub_multi_vg2x4_f16(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_multi_vg2x4_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret @@ -617,7 +744,15 @@ define void @multi_vector_sub_multi_vg2x4_f16(i32 %slice, %z define void @multi_vector_sub_multi_vg2x4_s16(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_multi_vg2x4_s16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret @@ -635,7 +770,15 @@ define void @multi_vector_sub_multi_vg2x4_s16(i32 %slice, %zn define void @multi_vector_sub_multi_vg2x4_u16(i32 %slice, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: multi_vector_sub_multi_vg2x4_u16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } ; CHECK-NEXT: ret @@ -769,7 +912,9 @@ define void @multi_vector_sub_lane_vg2x1_u16(i32 %slice, %zn, define void @multi_vector_add_lane_vg2x2_f16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x2_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] ; CHECK-NEXT: ret @@ -784,7 +929,9 @@ define void @multi_vector_add_lane_vg2x2_f16(i32 %slice, %zn define void @multi_vector_add_lane_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x2_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] ; CHECK-NEXT: ret @@ -799,7 +946,9 @@ define void @multi_vector_add_lane_vg2x2_bf16(i32 %slice, define void @multi_vector_add_lane_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x2_s16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: smlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] ; CHECK-NEXT: ret @@ -814,7 +963,9 @@ define void @multi_vector_add_lane_vg2x2_s16(i32 %slice, %zn0 define void @multi_vector_add_lane_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x2_u16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: umlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] ; CHECK-NEXT: ret @@ -833,7 +984,9 @@ define void @multi_vector_add_lane_vg2x2_u16(i32 %slice, %zn0 define void @multi_vector_sub_lane_vg2x2_f16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] ; CHECK-NEXT: ret @@ -848,7 +1001,9 @@ define void @multi_vector_sub_lane_vg2x2_f16(i32 %slice, %zn define void @multi_vector_sub_lane_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] ; CHECK-NEXT: ret @@ -863,7 +1018,9 @@ define void @multi_vector_sub_lane_vg2x2_bf16(i32 %slice, define void @multi_vector_sub_lane_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_s16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] ; CHECK-NEXT: ret @@ -878,7 +1035,9 @@ define void @multi_vector_sub_lane_vg2x2_s16(i32 %slice, %zn0 define void @multi_vector_sub_lane_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_u16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] ; CHECK-NEXT: ret @@ -897,7 +1056,11 @@ define void @multi_vector_sub_lane_vg2x2_u16(i32 %slice, %zn0 define void @multi_vector_add_lane_vg2x4_f16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x4_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] ; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] ; CHECK-NEXT: ret @@ -914,7 +1077,11 @@ define void @multi_vector_add_lane_vg2x4_f16(i32 %slice, %zn define void @multi_vector_add_lane_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x4_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] ; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] ; CHECK-NEXT: ret @@ -931,7 +1098,11 @@ define void @multi_vector_add_lane_vg2x4_bf16(i32 %slice, define void @multi_vector_add_lane_vg2x4_s16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x4_s16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: smlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] ; CHECK-NEXT: smlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] ; CHECK-NEXT: ret @@ -948,7 +1119,11 @@ define void @multi_vector_add_lane_vg2x4_s16(i32 %slice, %zn0 define void @multi_vector_add_lane_vg2x4_u16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x4_u16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: umlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] ; CHECK-NEXT: umlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] ; CHECK-NEXT: ret @@ -969,7 +1144,11 @@ define void @multi_vector_add_lane_vg2x4_u16(i32 %slice, %zn0 define void @multi_vector_sub_lane_vg2x4_f16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x4_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] ; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] ; CHECK-NEXT: ret @@ -986,7 +1165,11 @@ define void @multi_vector_sub_lane_vg2x4_f16(i32 %slice, %zn define void @multi_vector_sub_lane_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x4_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] ; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] ; CHECK-NEXT: ret @@ -1003,7 +1186,11 @@ define void @multi_vector_sub_lane_vg2x4_bf16(i32 %slice, define void @multi_vector_sub_lane_vg2x4_s16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x4_s16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] ; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] ; CHECK-NEXT: ret @@ -1020,7 +1207,11 @@ define void @multi_vector_sub_lane_vg2x4_s16(i32 %slice, %zn0 define void @multi_vector_sub_lane_vg2x4_u16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x4_u16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] ; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll index b95a774e899c89..12a940ff03e29a 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll @@ -324,20 +324,20 @@ define { , } @multi_vec_rounding_shl_x2_s64 define { , , , } @multi_vec_rounding_shl_x4_s8( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_rounding_shl_x4_s8: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: srshl { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: srshl { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.srshl.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, @@ -348,20 +348,20 @@ define { , , , , , , } @multi_vec_rounding_shl_x4_s16( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_rounding_shl_x4_s16: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: srshl { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: srshl { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.srshl.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, @@ -372,20 +372,20 @@ define { , , , , , , } @multi_vec_rounding_shl_x4_s32( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_rounding_shl_x4_s32: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: srshl { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: srshl { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.srshl.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, @@ -396,20 +396,20 @@ define { , , , , , , } @multi_vec_rounding_shl_x4_s64( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_rounding_shl_x4_s64: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: srshl { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: srshl { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.srshl.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, @@ -484,20 +484,20 @@ define { , } @multi_vec_rounding_uhl_x2_u64 define { , , , } @multi_vec_rounding_shl_x4_u8( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_rounding_shl_x4_u8: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: urshl { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: urshl { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.urshl.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, @@ -508,20 +508,20 @@ define { , , , , , , } @multi_vec_rounding_shl_x4_u16( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_rounding_shl_x4_u16: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: urshl { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: urshl { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.urshl.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, @@ -532,20 +532,20 @@ define { , , , , , , } @multi_vec_rounding_shl_x4_u32( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_rounding_shl_x4_u32: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: urshl { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: urshl { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.urshl.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, @@ -556,20 +556,20 @@ define { , , , , , , } @multi_vec_rounding_shl_x4_u64( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { ; CHECK-LABEL: multi_vec_rounding_shl_x4_u64: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: urshl { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: urshl { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.urshl.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll index 07a5f7993a1cba..f41791e626f5f5 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-select-sme-tileslice.ll @@ -8,6 +8,7 @@ define @test_tileslice_no_add(i32 %idx) #0 { ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2] +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 ; CHECK-NEXT: ret entry: %read = call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 %idx) @@ -20,6 +21,7 @@ define @test_tileslice_add_nonconstant(i32 %idx1, i32 %idx2) ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: add w8, w0, w1 ; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2] +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 ; CHECK-NEXT: ret entry: %add = add i32 %idx1, %idx2 diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll index 68ae92bc68f4ba..e71afe213d8a59 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll @@ -196,20 +196,20 @@ define { , } @multi_vec_sat_double_mulh_mul define { , , , } @multi_vec_sat_double_mulh_multi_x4_s8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s8: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1b { z27.b }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: sqdmulh { z4.b - z7.b }, { z4.b - z7.b }, { z24.b - z27.b } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: sqdmulh { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -221,20 +221,20 @@ define { , , , , , , } @multi_vec_sat_double_mulh_multi_x4_s16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s16: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1h { z27.h }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: sqdmulh { z4.h - z7.h }, { z4.h - z7.h }, { z24.h - z27.h } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: sqdmulh { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -246,20 +246,20 @@ define { , , , , , , } @multi_vec_sat_double_mulh_multi_x4_s32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s32: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1w { z27.s }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: sqdmulh { z4.s - z7.s }, { z4.s - z7.s }, { z24.s - z27.s } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: sqdmulh { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } @@ -271,20 +271,20 @@ define { , , , , , , } @multi_vec_sat_double_mulh_multi_x4_s64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s64: ; CHECK: // %bb.0: +; CHECK-NEXT: mov z30.d, z7.d +; CHECK-NEXT: mov z27.d, z4.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: mov z25.d, z6.d -; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: ld1d { z27.d }, p0/z, [x0] -; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: sqdmulh { z4.d - z7.d }, { z4.d - z7.d }, { z24.d - z27.d } -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d -; CHECK-NEXT: mov z2.d, z6.d -; CHECK-NEXT: mov z3.d, z7.d +; CHECK-NEXT: mov z29.d, z6.d +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov z28.d, z5.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: sqdmulh { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } +; CHECK-NEXT: mov z0.d, z24.d +; CHECK-NEXT: mov z1.d, z25.d +; CHECK-NEXT: mov z2.d, z26.d +; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: ret %zm1, %zm2, %zm3, %zm4) { %res = call { , , , } diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll index 46409a0a80b780..da8c679d5a39a8 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll @@ -8,7 +8,9 @@ define void @multi_vector_sub_write_single_za_vg1x2_i32(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x2_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sub za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: sub za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: ret @@ -25,7 +27,9 @@ define void @multi_vector_sub_write_single_za_vg1x2_i32(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x2_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sub za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: sub za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: ret @@ -46,7 +50,11 @@ define void @multi_vector_sub_write_single_za_vg1x2_i64(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x4_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sub za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s ; CHECK-NEXT: sub za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s ; CHECK-NEXT: ret @@ -67,7 +75,11 @@ define void @multi_vector_sub_write_single_za_vg1x4_i32(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_sub_write_za_vg1x2_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sub za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: sub za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: ret @@ -112,7 +128,11 @@ define void @multi_vector_sub_write_za_vg1x2_i32(i32 %slice, define void @multi_vector_sub_write_za_vg1x2_i64(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_sub_write_za_vg1x2_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sub za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d } ; CHECK-NEXT: sub za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d } ; CHECK-NEXT: ret @@ -135,7 +155,15 @@ define void @multi_vector_sub_write_za_vg1x2_i64(i32 %slice, define void @multi_vector_sub_write_za_vg1x4_i32(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_sub_write_za_vg1x4_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sub za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: sub za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: ret @@ -159,7 +187,15 @@ define void @multi_vector_sub_write_za_vg1x4_i32(i32 %slice, define void @multi_vector_sub_write_za_vg1x4_i64(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_sub_write_za_vg1x4_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sub za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: sub za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d } ; CHECK-NEXT: ret @@ -189,7 +225,9 @@ define void @multi_vector_sub_write_za_vg1x4_i64(i32 %slice, define void @multi_vector_sub_za_vg1x2_i32(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_sub_za_vg1x2_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sub za.s[w8, 0, vgx2], { z0.s, z1.s } ; CHECK-NEXT: sub za.s[w8, 7, vgx2], { z0.s, z1.s } ; CHECK-NEXT: ret @@ -202,7 +240,9 @@ define void @multi_vector_sub_za_vg1x2_i32(i32 %slice, %zn0, define void @multi_vector_sub_za_vg1x2_i64(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_sub_za_vg1x2_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sub za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: sub za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -215,7 +255,9 @@ define void @multi_vector_sub_za_vg1x2_i64(i32 %slice, %zn0, define void @multi_vector_sub_za_vg1x2_f32(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_sub_za_vg1x2_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fsub za.s[w8, 0, vgx2], { z0.s, z1.s } ; CHECK-NEXT: fsub za.s[w8, 7, vgx2], { z0.s, z1.s } ; CHECK-NEXT: ret @@ -230,7 +272,9 @@ define void @multi_vector_sub_za_vg1x2_f32(i32 %slice, %zn0 define void @multi_vector_sub_za_vg1x2_f64(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_sub_za_vg1x2_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fsub za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: fsub za.d[w8, 7, vgx2], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -247,7 +291,11 @@ define void @multi_vector_sub_za_vg1x2_f64(i32 %slice, %zn define void @multi_vector_sub_za_vg1x4_i32(i32 %slice, ; CHECK-LABEL: multi_vector_sub_za_vg1x4_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sub za.s[w8, 0, vgx4], { z0.s - z3.s } ; CHECK-NEXT: sub za.s[w8, 7, vgx4], { z0.s - z3.s } ; CHECK-NEXT: ret @@ -266,7 +314,11 @@ define void @multi_vector_sub_za_vg1x4_i32(i32 %slice, define void @multi_vector_sub_za_vg1x4_i64(i32 %slice, ; CHECK-LABEL: multi_vector_sub_za_vg1x4_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sub za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: sub za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret @@ -285,7 +337,11 @@ define void @multi_vector_sub_za_vg1x4_i64(i32 %slice, define void @multi_vector_sub_za_vg1x4_f32(i32 %slice, ; CHECK-LABEL: multi_vector_sub_za_vg1x4_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fsub za.s[w8, 0, vgx4], { z0.s - z3.s } ; CHECK-NEXT: fsub za.s[w8, 7, vgx4], { z0.s - z3.s } ; CHECK-NEXT: ret @@ -304,7 +360,11 @@ define void @multi_vector_sub_za_vg1x4_f32(i32 %slice, define void @multi_vector_sub_za_vg1x4_f64(i32 %slice, ; CHECK-LABEL: multi_vector_sub_za_vg1x4_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fsub za.d[w8, 0, vgx4], { z0.d - z3.d } ; CHECK-NEXT: fsub za.d[w8, 7, vgx4], { z0.d - z3.d } ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll index f552c9e604bdd5..b698b60007eb9f 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll @@ -1,14 +1,15 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2,+sme-i16i64 -verify-machineinstrs < %s | FileCheck %s -target triple="aarch64-linux-gnu" ; == FVDOT == -define void @test_fvdot_lane_za32_vg1x2_nxv8f16(i32 %slice, %zn1, %zn2, %zm) #0 { +define void @test_fvdot_lane_za32_vg1x2_nxv8f16(i32 %slice, %zn1, %zn2, %zm) { ; CHECK-LABEL: test_fvdot_lane_za32_vg1x2_nxv8f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fvdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: fvdot za.s[w8, 7, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: ret @@ -21,10 +22,12 @@ define void @test_fvdot_lane_za32_vg1x2_nxv8f16(i32 %slice, ; == BFVDOT == -define void @test_fvdot_lane_za32_vg1x2_nxv8bf16(i32 %slice, %zn1, %zn2, %zm) #0 { +define void @test_fvdot_lane_za32_vg1x2_nxv8bf16(i32 %slice, %zn1, %zn2, %zm) { ; CHECK-LABEL: test_fvdot_lane_za32_vg1x2_nxv8bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfvdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: bfvdot za.s[w8, 7, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: ret @@ -37,10 +40,12 @@ define void @test_fvdot_lane_za32_vg1x2_nxv8bf16(i32 %slice, %zn1, %zn2, %zm) #0 { +define void @test_svdot_lane_za32_vg1x2_nxv8i16(i32 %slice, %zn1, %zn2, %zm) { ; CHECK-LABEL: test_svdot_lane_za32_vg1x2_nxv8i16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: svdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: svdot za.s[w8, 7, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: ret @@ -50,10 +55,14 @@ define void @test_svdot_lane_za32_vg1x2_nxv8i16(i32 %slice, % ret void } -define void @test_svdot_lane_za32_vg1x4_nxv16i8(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) #0 { +define void @test_svdot_lane_za32_vg1x4_nxv16i8(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) { ; CHECK-LABEL: test_svdot_lane_za32_vg1x4_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: svdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z4.b[3] ; CHECK-NEXT: svdot za.s[w8, 7, vgx4], { z0.b - z3.b }, z4.b[3] ; CHECK-NEXT: ret @@ -63,10 +72,14 @@ define void @test_svdot_lane_za32_vg1x4_nxv16i8(i32 %slice, % ret void } -define void @test_svdot_lane_za64_vg1x4_nxv8i16(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) #1 { +define void @test_svdot_lane_za64_vg1x4_nxv8i16(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) { ; CHECK-LABEL: test_svdot_lane_za64_vg1x4_nxv8i16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: svdot za.d[w8, 0, vgx4], { z0.h - z3.h }, z4.h[1] ; CHECK-NEXT: svdot za.d[w8, 7, vgx4], { z0.h - z3.h }, z4.h[1] ; CHECK-NEXT: ret @@ -76,108 +89,15 @@ define void @test_svdot_lane_za64_vg1x4_nxv8i16(i32 %slice, % ret void } -define void @svdot_form_2x_tuple(ptr %ptr, i64 %stride) #0 { -; CHECK-LABEL: svdot_form_2x_tuple: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue pn8.b -; CHECK-NEXT: add x9, x0, x1 -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: ld1h { z16.h, z24.h }, pn8/z, [x0] -; CHECK-NEXT: ld1h { z0.h, z1.h }, pn8/z, [x9] -; CHECK-NEXT: mov z2.d, z16.d -; CHECK-NEXT: mov z3.d, z0.d -; CHECK-NEXT: svdot za.s[w8, 0, vgx2], { z2.h, z3.h }, z0.h[0] -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: svdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] -; CHECK-NEXT: ret -entry: - %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() - %1 = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") %0, ptr %ptr) - %2 = extractvalue { , } %1, 0 - %3 = extractvalue { , } %1, 1 - %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride - %4 = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") %0, ptr %arrayidx2) - %5 = extractvalue { , } %4, 0 - %6 = extractvalue { , } %4, 1 - tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x2.nxv8i16(i32 0, %2, %5, undef, i32 0) - tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x2.nxv8i16(i32 0, %3, %6, undef, i32 0) - ret void -} - -define void @svdot_form_4x_tuple(ptr %ptr, i64 %stride) #0 { -; CHECK-LABEL: svdot_form_4x_tuple: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: ptrue pn8.b -; CHECK-NEXT: lsl x9, x1, #1 -; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0] -; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1] -; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9] -; CHECK-NEXT: add x9, x9, x1 -; CHECK-NEXT: mov z0.d, z17.d -; CHECK-NEXT: mov z1.d, z16.d -; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9] -; CHECK-NEXT: mov z4.d, z21.d -; CHECK-NEXT: mov z5.d, z20.d -; CHECK-NEXT: mov z8.d, z25.d -; CHECK-NEXT: mov z9.d, z24.d -; CHECK-NEXT: mov z3.d, z16.d -; CHECK-NEXT: mov z7.d, z17.d -; CHECK-NEXT: mov z11.d, z18.d -; CHECK-NEXT: mov z16.d, z29.d -; CHECK-NEXT: mov z17.d, z28.d -; CHECK-NEXT: mov z18.d, z14.d -; CHECK-NEXT: svdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] -; CHECK-NEXT: svdot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0] -; CHECK-NEXT: svdot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0] -; CHECK-NEXT: svdot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0] -; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload -; CHECK-NEXT: ret -entry: - %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() - %1 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) - %2 = extractvalue { , , , } %1, 0 - %3 = extractvalue { , , , } %1, 1 - %4 = extractvalue { , , , } %1, 2 - %5 = extractvalue { , , , } %1, 3 - %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride - %6 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) - %7 = extractvalue { , , , } %6, 0 - %8 = extractvalue { , , , } %6, 1 - %9 = extractvalue { , , , } %6, 2 - %10 = extractvalue { , , , } %6, 3 - %mul3 = shl i64 %stride, 1 - %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3 - %11 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4) - %12 = extractvalue { , , , } %11, 0 - %13 = extractvalue { , , , } %11, 1 - %14 = extractvalue { , , , } %11, 2 - %15 = extractvalue { , , , } %11, 3 - %mul5 = mul i64 %stride, 3 - %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5 - %16 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6) - %17 = extractvalue { , , , } %16, 0 - %18 = extractvalue { , , , } %16, 1 - %19 = extractvalue { , , , } %16, 2 - %20 = extractvalue { , , , } %16, 3 - tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x4.nxv16i8(i32 0, %2, %7, %12, %17, undef, i32 0) - tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x4.nxv16i8(i32 0, %3, %8, %13, %18, undef, i32 0) - tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x4.nxv16i8(i32 0, %4, %9, %14, %19, undef, i32 0) - tail call void @llvm.aarch64.sme.svdot.lane.za32.vg1x4.nxv16i8(i32 0, %5, %10, %15, %20, undef, i32 0) - ret void -} ; == UVDOT == -define void @test_uvdot_lane_za32_vg1x2_nxv8i16(i32 %slice, %zn1, %zn2, %zm) #0 { +define void @test_uvdot_lane_za32_vg1x2_nxv8i16(i32 %slice, %zn1, %zn2, %zm) { ; CHECK-LABEL: test_uvdot_lane_za32_vg1x2_nxv8i16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: uvdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: uvdot za.s[w8, 7, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: ret @@ -187,10 +107,14 @@ define void @test_uvdot_lane_za32_vg1x2_nxv8i16(i32 %slice, % ret void } -define void @test_uvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) #0 { +define void @test_uvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) { ; CHECK-LABEL: test_uvdot_lane_za32_vg1x4_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: uvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z4.b[3] ; CHECK-NEXT: uvdot za.s[w8, 7, vgx4], { z0.b - z3.b }, z4.b[3] ; CHECK-NEXT: ret @@ -200,10 +124,14 @@ define void @test_uvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, % ret void } -define void @test_uvdot_lane_za64_vg1x4_nxv8i16(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) #1 { +define void @test_uvdot_lane_za64_vg1x4_nxv8i16(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) { ; CHECK-LABEL: test_uvdot_lane_za64_vg1x4_nxv8i16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: uvdot za.d[w8, 0, vgx4], { z0.h - z3.h }, z4.h[1] ; CHECK-NEXT: uvdot za.d[w8, 7, vgx4], { z0.h - z3.h }, z4.h[1] ; CHECK-NEXT: ret @@ -213,108 +141,17 @@ define void @test_uvdot_lane_za64_vg1x4_nxv8i16(i32 %slice, % ret void } -define void @uvdot_form_2x_tuple(ptr %ptr, i64 %stride) #0 { -; CHECK-LABEL: uvdot_form_2x_tuple: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue pn8.b -; CHECK-NEXT: add x9, x0, x1 -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: ld1h { z16.h, z24.h }, pn8/z, [x0] -; CHECK-NEXT: ld1h { z0.h, z1.h }, pn8/z, [x9] -; CHECK-NEXT: mov z2.d, z16.d -; CHECK-NEXT: mov z3.d, z0.d -; CHECK-NEXT: uvdot za.s[w8, 0, vgx2], { z2.h, z3.h }, z0.h[0] -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: uvdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] -; CHECK-NEXT: ret -entry: - %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() - %1 = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") %0, ptr %ptr) - %2 = extractvalue { , } %1, 0 - %3 = extractvalue { , } %1, 1 - %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride - %4 = tail call { , } @llvm.aarch64.sve.ld1.pn.x2.nxv8i16(target("aarch64.svcount") %0, ptr %arrayidx2) - %5 = extractvalue { , } %4, 0 - %6 = extractvalue { , } %4, 1 - tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x2.nxv8i16(i32 0, %2, %5, undef, i32 0) - tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x2.nxv8i16(i32 0, %3, %6, undef, i32 0) - ret void -} - -define void @uvdot_form_4x_tuple(ptr %ptr, i64 %stride) #0 { -; CHECK-LABEL: uvdot_form_4x_tuple: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: ptrue pn8.b -; CHECK-NEXT: lsl x9, x1, #1 -; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0] -; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1] -; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9] -; CHECK-NEXT: add x9, x9, x1 -; CHECK-NEXT: mov z0.d, z17.d -; CHECK-NEXT: mov z1.d, z16.d -; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9] -; CHECK-NEXT: mov z4.d, z21.d -; CHECK-NEXT: mov z5.d, z20.d -; CHECK-NEXT: mov z8.d, z25.d -; CHECK-NEXT: mov z9.d, z24.d -; CHECK-NEXT: mov z3.d, z16.d -; CHECK-NEXT: mov z7.d, z17.d -; CHECK-NEXT: mov z11.d, z18.d -; CHECK-NEXT: mov z16.d, z29.d -; CHECK-NEXT: mov z17.d, z28.d -; CHECK-NEXT: mov z18.d, z14.d -; CHECK-NEXT: uvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] -; CHECK-NEXT: uvdot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0] -; CHECK-NEXT: uvdot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0] -; CHECK-NEXT: uvdot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0] -; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload -; CHECK-NEXT: ret -entry: - %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() - %1 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) - %2 = extractvalue { , , , } %1, 0 - %3 = extractvalue { , , , } %1, 1 - %4 = extractvalue { , , , } %1, 2 - %5 = extractvalue { , , , } %1, 3 - %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride - %6 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) - %7 = extractvalue { , , , } %6, 0 - %8 = extractvalue { , , , } %6, 1 - %9 = extractvalue { , , , } %6, 2 - %10 = extractvalue { , , , } %6, 3 - %mul3 = shl i64 %stride, 1 - %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3 - %11 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4) - %12 = extractvalue { , , , } %11, 0 - %13 = extractvalue { , , , } %11, 1 - %14 = extractvalue { , , , } %11, 2 - %15 = extractvalue { , , , } %11, 3 - %mul5 = mul i64 %stride, 3 - %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5 - %16 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6) - %17 = extractvalue { , , , } %16, 0 - %18 = extractvalue { , , , } %16, 1 - %19 = extractvalue { , , , } %16, 2 - %20 = extractvalue { , , , } %16, 3 - tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x4.nxv16i8(i32 0, %2, %7, %12, %17, undef, i32 0) - tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x4.nxv16i8(i32 0, %3, %8, %13, %18, undef, i32 0) - tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x4.nxv16i8(i32 0, %4, %9, %14, %19, undef, i32 0) - tail call void @llvm.aarch64.sme.uvdot.lane.za32.vg1x4.nxv16i8(i32 0, %5, %10, %15, %20, undef, i32 0) - ret void -} ; == SUVDOT == -define void @test_suvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) #0 { +define void @test_suvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) { ; CHECK-LABEL: test_suvdot_lane_za32_vg1x4_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: suvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z4.b[3] ; CHECK-NEXT: suvdot za.s[w8, 7, vgx4], { z0.b - z3.b }, z4.b[3] ; CHECK-NEXT: ret @@ -324,80 +161,17 @@ define void @test_suvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, ret void } -define void @suvdot_form_4x_tuple(ptr %ptr, i64 %stride) #0 { -; CHECK-LABEL: suvdot_form_4x_tuple: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: ptrue pn8.b -; CHECK-NEXT: lsl x9, x1, #1 -; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0] -; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1] -; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9] -; CHECK-NEXT: add x9, x9, x1 -; CHECK-NEXT: mov z0.d, z17.d -; CHECK-NEXT: mov z1.d, z16.d -; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9] -; CHECK-NEXT: mov z4.d, z21.d -; CHECK-NEXT: mov z5.d, z20.d -; CHECK-NEXT: mov z8.d, z25.d -; CHECK-NEXT: mov z9.d, z24.d -; CHECK-NEXT: mov z3.d, z16.d -; CHECK-NEXT: mov z7.d, z17.d -; CHECK-NEXT: mov z11.d, z18.d -; CHECK-NEXT: mov z16.d, z29.d -; CHECK-NEXT: mov z17.d, z28.d -; CHECK-NEXT: mov z18.d, z14.d -; CHECK-NEXT: suvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] -; CHECK-NEXT: suvdot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0] -; CHECK-NEXT: suvdot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0] -; CHECK-NEXT: suvdot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0] -; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload -; CHECK-NEXT: ret -entry: - %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() - %1 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) - %2 = extractvalue { , , , } %1, 0 - %3 = extractvalue { , , , } %1, 1 - %4 = extractvalue { , , , } %1, 2 - %5 = extractvalue { , , , } %1, 3 - %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride - %6 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) - %7 = extractvalue { , , , } %6, 0 - %8 = extractvalue { , , , } %6, 1 - %9 = extractvalue { , , , } %6, 2 - %10 = extractvalue { , , , } %6, 3 - %mul3 = shl i64 %stride, 1 - %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3 - %11 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4) - %12 = extractvalue { , , , } %11, 0 - %13 = extractvalue { , , , } %11, 1 - %14 = extractvalue { , , , } %11, 2 - %15 = extractvalue { , , , } %11, 3 - %mul5 = mul i64 %stride, 3 - %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5 - %16 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6) - %17 = extractvalue { , , , } %16, 0 - %18 = extractvalue { , , , } %16, 1 - %19 = extractvalue { , , , } %16, 2 - %20 = extractvalue { , , , } %16, 3 - tail call void @llvm.aarch64.sme.suvdot.lane.za32.vg1x4.nxv16i8(i32 0, %2, %7, %12, %17, undef, i32 0) - tail call void @llvm.aarch64.sme.suvdot.lane.za32.vg1x4.nxv16i8(i32 0, %3, %8, %13, %18, undef, i32 0) - tail call void @llvm.aarch64.sme.suvdot.lane.za32.vg1x4.nxv16i8(i32 0, %4, %9, %14, %19, undef, i32 0) - tail call void @llvm.aarch64.sme.suvdot.lane.za32.vg1x4.nxv16i8(i32 0, %5, %10, %15, %20, undef, i32 0) - ret void -} ; == USVDOT == -define void @test_usvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) #0 { +define void @test_usvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, %zn1, %zn2, %zn3, %zn4, %zm) { ; CHECK-LABEL: test_usvdot_lane_za32_vg1x4_nxv16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: usvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z4.b[3] ; CHECK-NEXT: usvdot za.s[w8, 7, vgx4], { z0.b - z3.b }, z4.b[3] ; CHECK-NEXT: ret @@ -407,76 +181,6 @@ define void @test_usvdot_lane_za32_vg1x4_nxv16i8(i32 %slice, ret void } -define void @usvdot_form_4x_tuple(ptr %ptr, i64 %stride) #0 { -; CHECK-LABEL: usvdot_form_4x_tuple: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str d14, [sp, #-48]! // 8-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: ptrue pn8.b -; CHECK-NEXT: lsl x9, x1, #1 -; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: ld1b { z17.b, z21.b, z25.b, z29.b }, pn8/z, [x0] -; CHECK-NEXT: ld1b { z16.b, z20.b, z24.b, z28.b }, pn8/z, [x0, x1] -; CHECK-NEXT: ld1b { z2.b, z6.b, z10.b, z14.b }, pn8/z, [x0, x9] -; CHECK-NEXT: add x9, x9, x1 -; CHECK-NEXT: mov z0.d, z17.d -; CHECK-NEXT: mov z1.d, z16.d -; CHECK-NEXT: ld1b { z16.b - z19.b }, pn8/z, [x0, x9] -; CHECK-NEXT: mov z4.d, z21.d -; CHECK-NEXT: mov z5.d, z20.d -; CHECK-NEXT: mov z8.d, z25.d -; CHECK-NEXT: mov z9.d, z24.d -; CHECK-NEXT: mov z3.d, z16.d -; CHECK-NEXT: mov z7.d, z17.d -; CHECK-NEXT: mov z11.d, z18.d -; CHECK-NEXT: mov z16.d, z29.d -; CHECK-NEXT: mov z17.d, z28.d -; CHECK-NEXT: mov z18.d, z14.d -; CHECK-NEXT: usvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] -; CHECK-NEXT: usvdot za.s[w8, 0, vgx4], { z4.b - z7.b }, z0.b[0] -; CHECK-NEXT: usvdot za.s[w8, 0, vgx4], { z8.b - z11.b }, z0.b[0] -; CHECK-NEXT: usvdot za.s[w8, 0, vgx4], { z16.b - z19.b }, z0.b[0] -; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr d14, [sp], #48 // 8-byte Folded Reload -; CHECK-NEXT: ret -entry: - %0 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8() - %1 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %ptr) - %2 = extractvalue { , , , } %1, 0 - %3 = extractvalue { , , , } %1, 1 - %4 = extractvalue { , , , } %1, 2 - %5 = extractvalue { , , , } %1, 3 - %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 %stride - %6 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx2) - %7 = extractvalue { , , , } %6, 0 - %8 = extractvalue { , , , } %6, 1 - %9 = extractvalue { , , , } %6, 2 - %10 = extractvalue { , , , } %6, 3 - %mul3 = shl i64 %stride, 1 - %arrayidx4 = getelementptr inbounds i8, ptr %ptr, i64 %mul3 - %11 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx4) - %12 = extractvalue { , , , } %11, 0 - %13 = extractvalue { , , , } %11, 1 - %14 = extractvalue { , , , } %11, 2 - %15 = extractvalue { , , , } %11, 3 - %mul5 = mul i64 %stride, 3 - %arrayidx6 = getelementptr inbounds i8, ptr %ptr, i64 %mul5 - %16 = tail call { , , , } @llvm.aarch64.sve.ld1.pn.x4.nxv16i8(target("aarch64.svcount") %0, ptr %arrayidx6) - %17 = extractvalue { , , , } %16, 0 - %18 = extractvalue { , , , } %16, 1 - %19 = extractvalue { , , , } %16, 2 - %20 = extractvalue { , , , } %16, 3 - tail call void @llvm.aarch64.sme.usvdot.lane.za32.vg1x4.nxv16i8(i32 0, %2, %7, %12, %17, undef, i32 0) - tail call void @llvm.aarch64.sme.usvdot.lane.za32.vg1x4.nxv16i8(i32 0, %3, %8, %13, %18, undef, i32 0) - tail call void @llvm.aarch64.sme.usvdot.lane.za32.vg1x4.nxv16i8(i32 0, %4, %9, %14, %19, undef, i32 0) - tail call void @llvm.aarch64.sme.usvdot.lane.za32.vg1x4.nxv16i8(i32 0, %5, %10, %15, %20, undef, i32 0) - ret void -} - -attributes #0 = { nounwind "target-features"="+sme2" "aarch64_pstate_sm_enabled" } -attributes #1 = { nounwind "target-features"="+sme2,+sme-i16i64" "aarch64_pstate_sm_enabled" } ; == FVDOT == declare void @llvm.aarch64.sme.fvdot.lane.za32.vg1x2.nxv8f16(i32, , , , i32) diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll index 6895d1854e87d8..fb169491b0c909 100644 --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll @@ -13,7 +13,7 @@ define void @hang_when_merging_stores_after_legalisation(ptr %a, <2 x i32> %b) v ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: mov z0.s, s0 ; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #16 +; CHECK-NEXT: ext z1.b, z1.b, z1.b, #16 ; CHECK-NEXT: st2 { v0.4s, v1.4s }, [x0] ; CHECK-NEXT: ret %splat = shufflevector <2 x i32> %b, <2 x i32> undef, <8 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll index 0ecf1b1a988343..8882fc9290386d 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll @@ -15,6 +15,8 @@ define void @st2b_i8_valid_imm( %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2b_i8_valid_imm: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, #2, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 2, i64 0 @@ -28,7 +30,9 @@ define void @st2b_i8_valid_imm( %v0, %v1, < define void @st2b_i8_invalid_imm_not_multiple_of_2( %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2b_i8_invalid_imm_not_multiple_of_2: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: rdvl x8, #3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 3, i64 0 @@ -42,7 +46,9 @@ define void @st2b_i8_invalid_imm_not_multiple_of_2( %v0, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2b_i8_invalid_imm_out_of_lower_bound: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: rdvl x8, #-18 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -18, i64 0 @@ -56,7 +62,9 @@ define void @st2b_i8_invalid_imm_out_of_lower_bound( %v0, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2b_i8_invalid_imm_out_of_upper_bound: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: rdvl x8, #16 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 16, i64 0 @@ -70,6 +78,8 @@ define void @st2b_i8_invalid_imm_out_of_upper_bound( %v0, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2b_i8_valid_imm_lower_bound: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, #-16, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -16, i64 0 @@ -83,6 +93,8 @@ define void @st2b_i8_valid_imm_lower_bound( %v0, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2b_i8_valid_imm_upper_bound: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 14, i64 0 @@ -100,6 +112,8 @@ define void @st2b_i8_valid_imm_upper_bound( %v0, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2h_i16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0, #2, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 2, i64 0 @@ -113,6 +127,8 @@ define void @st2h_i16( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2h_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0, #2, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 2, i64 0 @@ -130,6 +146,8 @@ define void @st2h_f16( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2w_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0, #4, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 4, i64 0 @@ -143,6 +161,8 @@ define void @st2w_i32( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2w_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0, #6, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 6, i64 0 @@ -160,6 +180,8 @@ define void @st2w_f32( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2d_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0, #8, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 8, i64 0 @@ -173,6 +195,8 @@ define void @st2d_i64( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2d_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0, #10, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 10, i64 0 @@ -190,6 +214,9 @@ define void @st2d_f64( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3b_i8_valid_imm: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, #3, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 3, i64 0 @@ -204,7 +231,10 @@ define void @st3b_i8_valid_imm( %v0, %v1, < define void @st3b_i8_invalid_imm_not_multiple_of_3_01( %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_01: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: rdvl x8, #4 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 4, i64 0 @@ -219,7 +249,10 @@ define void @st3b_i8_invalid_imm_not_multiple_of_3_01( %v0, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_02: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: rdvl x8, #5 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 5, i64 0 @@ -234,7 +267,10 @@ define void @st3b_i8_invalid_imm_not_multiple_of_3_02( %v0, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3b_i8_invalid_imm_out_of_lower_bound: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: rdvl x8, #-27 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -27, i64 0 @@ -249,7 +285,10 @@ define void @st3b_i8_invalid_imm_out_of_lower_bound( %v0, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3b_i8_invalid_imm_out_of_upper_bound: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: rdvl x8, #24 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 24, i64 0 @@ -264,6 +303,9 @@ define void @st3b_i8_invalid_imm_out_of_upper_bound( %v0, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3b_i8_valid_imm_lower_bound: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, #-24, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -24, i64 0 @@ -278,6 +320,9 @@ define void @st3b_i8_valid_imm_lower_bound( %v0, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3b_i8_valid_imm_upper_bound: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21, i64 0 @@ -296,6 +341,9 @@ define void @st3b_i8_valid_imm_upper_bound( %v0, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3h_i16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0, #6, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 6, i64 0 @@ -310,6 +358,9 @@ define void @st3h_i16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3h_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0, #9, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 9, i64 0 @@ -328,6 +379,9 @@ define void @st3h_f16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3w_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0, #12, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 12, i64 0 @@ -342,6 +396,9 @@ define void @st3w_i32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3w_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0, #15, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 15, i64 0 @@ -360,6 +417,9 @@ define void @st3w_f32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3d_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0, #18, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 18, i64 0 @@ -374,6 +434,9 @@ define void @st3d_i64( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3d_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0, #-3, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -3, i64 0 @@ -392,6 +455,10 @@ define void @st3d_f64( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4b_i8_valid_imm: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, #4, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 4, i64 0 @@ -407,7 +474,11 @@ define void @st4b_i8_valid_imm( %v0, %v1, < define void @st4b_i8_invalid_imm_not_multiple_of_4_01( %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_01: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: rdvl x8, #5 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 5, i64 0 @@ -423,7 +494,11 @@ define void @st4b_i8_invalid_imm_not_multiple_of_4_01( %v0, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_02: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: rdvl x8, #6 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 6, i64 0 @@ -439,7 +514,11 @@ define void @st4b_i8_invalid_imm_not_multiple_of_4_02( %v0, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_03: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: rdvl x8, #7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 7, i64 0 @@ -457,8 +536,12 @@ define void @st4b_i8_invalid_imm_out_of_lower_bound( %v0, %v0, %v0, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4b_i8_valid_imm_lower_bound: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, #-32, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -32, i64 0 @@ -516,6 +607,10 @@ define void @st4b_i8_valid_imm_lower_bound( %v0, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4b_i8_valid_imm_upper_bound: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28, i64 0 @@ -535,6 +630,10 @@ define void @st4b_i8_valid_imm_upper_bound( %v0, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4h_i16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0, #8, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 8, i64 0 @@ -550,6 +649,10 @@ define void @st4h_i16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4h_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0, #12, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 12, i64 0 @@ -569,6 +672,10 @@ define void @st4h_f16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4w_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0, #16, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 16, i64 0 @@ -584,6 +691,10 @@ define void @st4w_i32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4w_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0, #20, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 20, i64 0 @@ -603,6 +714,10 @@ define void @st4w_f32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4d_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0, #24, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 24, i64 0 @@ -618,6 +733,10 @@ define void @st4d_i64( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4d_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28, i64 0 diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll index d7b7e59548003b..d6ee787a23f877 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll @@ -9,6 +9,8 @@ define void @st2b_i8( %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2b_i8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x1] ; CHECK-NEXT: ret %1 = getelementptr i8, ptr %addr, i64 %offset @@ -26,6 +28,8 @@ define void @st2b_i8( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2h_i16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1] ; CHECK-NEXT: ret %1 = getelementptr i16, ptr %addr, i64 %offset @@ -39,6 +43,8 @@ define void @st2h_i16( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2h_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1] ; CHECK-NEXT: ret %1 = getelementptr half, ptr %addr, i64 %offset @@ -56,6 +62,8 @@ define void @st2h_f16( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2w_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret %1 = getelementptr i32, ptr %addr, i64 %offset @@ -69,6 +77,8 @@ define void @st2w_i32( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2w_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret %1 = getelementptr float, ptr %addr, i64 %offset @@ -86,6 +96,8 @@ define void @st2w_f32( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2d_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3] ; CHECK-NEXT: ret %1 = getelementptr i64, ptr %addr, i64 %offset @@ -99,6 +111,8 @@ define void @st2d_i64( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2d_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3] ; CHECK-NEXT: ret %1 = getelementptr double, ptr %addr, i64 %offset @@ -116,6 +130,9 @@ define void @st2d_f64( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3b_i8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x1] ; CHECK-NEXT: ret %1 = getelementptr i8, ptr %addr, i64 %offset @@ -134,6 +151,9 @@ define void @st3b_i8( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3h_i16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0, x1, lsl #1] ; CHECK-NEXT: ret %1 = getelementptr i16, ptr %addr, i64 %offset @@ -148,6 +168,9 @@ define void @st3h_i16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3h_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0, x1, lsl #1] ; CHECK-NEXT: ret %1 = getelementptr half, ptr %addr, i64 %offset @@ -166,6 +189,9 @@ define void @st3h_f16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3w_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret %1 = getelementptr i32, ptr %addr, i64 %offset @@ -180,6 +206,9 @@ define void @st3w_i32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3w_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret %1 = getelementptr float, ptr %addr, i64 %offset @@ -198,6 +227,9 @@ define void @st3w_f32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3d_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0, x1, lsl #3] ; CHECK-NEXT: ret %1 = getelementptr i64, ptr %addr, i64 %offset @@ -212,6 +244,9 @@ define void @st3d_i64( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3d_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0, x1, lsl #3] ; CHECK-NEXT: ret %1 = getelementptr double, ptr %addr, i64 %offset @@ -230,6 +265,10 @@ define void @st3d_f64( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4b_i8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x1] ; CHECK-NEXT: ret %1 = getelementptr i8, ptr %addr, i64 %offset @@ -249,6 +288,10 @@ define void @st4b_i8( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4h_i16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0, x1, lsl #1] ; CHECK-NEXT: ret %1 = getelementptr i16, ptr %addr, i64 %offset @@ -264,6 +307,10 @@ define void @st4h_i16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4h_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0, x1, lsl #1] ; CHECK-NEXT: ret %1 = getelementptr half, ptr %addr, i64 %offset @@ -283,6 +330,10 @@ define void @st4h_f16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4w_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret %1 = getelementptr i32, ptr %addr, i64 %offset @@ -298,6 +349,10 @@ define void @st4w_i32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4w_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret %1 = getelementptr float, ptr %addr, i64 %offset @@ -317,6 +372,10 @@ define void @st4w_f32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4d_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0, x1, lsl #3] ; CHECK-NEXT: ret %1 = getelementptr i64, ptr %addr, i64 %offset @@ -332,6 +391,10 @@ define void @st4d_i64( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4d_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0, x1, lsl #3] ; CHECK-NEXT: ret %1 = getelementptr double, ptr %addr, i64 %offset diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll index e03d4379d0ee23..d07fd8785121b3 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll @@ -9,6 +9,8 @@ define void @st2b_i8( %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2b_i8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv16i8( %v0, @@ -25,6 +27,8 @@ define void @st2b_i8( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2h_i16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv8i16( %v0, @@ -37,6 +41,8 @@ define void @st2h_i16( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2h_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv8f16( %v0, @@ -49,6 +55,8 @@ define void @st2h_f16( %v0, %v1, %v0, %v1, %pred, ptr %addr) #0 { ; CHECK-LABEL: st2h_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2h { z0.h, z1.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv8bf16( %v0, @@ -65,6 +73,8 @@ define void @st2h_bf16( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2w_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv4i32( %v0, @@ -77,6 +87,8 @@ define void @st2w_i32( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2w_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv4f32( %v0, @@ -93,6 +105,8 @@ define void @st2w_f32( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2d_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv2i64( %v0, @@ -105,6 +119,8 @@ define void @st2d_i64( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2d_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv2f64( %v0, @@ -117,6 +133,8 @@ define void @st2d_f64( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2d_ptr: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2d { z0.d, z1.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st2.nxv2p0( %v0, @@ -133,6 +151,9 @@ define void @st2d_ptr( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3b_i8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv16i8( %v0, @@ -150,6 +171,9 @@ define void @st3b_i8( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3h_i16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv8i16( %v0, @@ -163,6 +187,9 @@ define void @st3h_i16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3h_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv8f16( %v0, @@ -176,6 +203,9 @@ define void @st3h_f16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) #0 { ; CHECK-LABEL: st3h_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv8bf16( %v0, @@ -193,6 +223,9 @@ define void @st3h_bf16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3w_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv4i32( %v0, @@ -206,6 +239,9 @@ define void @st3w_i32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3w_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv4f32( %v0, @@ -223,6 +259,9 @@ define void @st3w_f32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3d_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv2i64( %v0, @@ -236,6 +275,9 @@ define void @st3d_i64( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3d_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv2f64( %v0, @@ -249,6 +291,9 @@ define void @st3d_f64( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3d_ptr: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv2p0( %v0, @@ -266,6 +311,10 @@ define void @st3d_ptr( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4b_i8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv16i8( %v0, @@ -284,6 +333,10 @@ define void @st4b_i8( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4h_i16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv8i16( %v0, @@ -298,6 +351,10 @@ define void @st4h_i16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4h_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv8f16( %v0, @@ -312,6 +369,10 @@ define void @st4h_f16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) #0 { ; CHECK-LABEL: st4h_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv8bf16( %v0, @@ -330,6 +391,10 @@ define void @st4h_bf16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4w_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv4i32( %v0, @@ -344,6 +409,10 @@ define void @st4w_i32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4w_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv4f32( %v0, @@ -362,6 +431,10 @@ define void @st4w_f32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4d_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv2i64( %v0, @@ -376,6 +449,10 @@ define void @st4d_i64( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4d_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv2f64( %v0, @@ -390,6 +467,10 @@ define void @st4d_f64( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4d_ptr: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv2p0( %v0, diff --git a/llvm/test/CodeGen/AArch64/sve-merging-stores.ll b/llvm/test/CodeGen/AArch64/sve-merging-stores.ll index f6330c613de842..47758893ce7117 100644 --- a/llvm/test/CodeGen/AArch64/sve-merging-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-merging-stores.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s %complex = type { { double, double } } @@ -11,13 +10,11 @@ declare double @llvm.aarch64.sve.faddv.nxv2f64(, %pred, ptr %inptr) { ; CHECK-LABEL: foo1: -; CHECK: // %bb.0: -; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x1] -; CHECK-NEXT: faddv d0, p0, z0.d -; CHECK-NEXT: faddv d1, p0, z1.d -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: ret +; CHECK: ld2d { z0.d, z1.d }, p0/z, [x1] +; CHECK-NEXT: faddv d2, p0, z0.d +; CHECK-NEXT: faddv d0, p0, z1.d +; CHECK-NEXT: mov v2.d[1], v0.d[0] +; CHECK-NEXT: str q2, [x0] %imagp = getelementptr inbounds %complex, ptr %outval, i64 0, i32 0, i32 1 %1 = call { , } @llvm.aarch64.sve.ld2.sret.nxv2f64( %pred, ptr nonnull %inptr) %2 = extractvalue { , } %1, 0 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll index 4e52258e8b5df7..66d544d0acbf56 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll @@ -21,9 +21,9 @@ define void @alloc_v4i8(ptr %st_ptr) nounwind { ; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x20] ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: mov z1.b, z0.b[1] +; CHECK-NEXT: mov z2.b, z0.b[1] ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: fmov w9, s2 ; CHECK-NEXT: stp w8, w9, [sp, #8] ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: st1b { z0.s }, p0, [x19] @@ -198,9 +198,9 @@ define void @alloc_v8f64(ptr %st_ptr) nounwind { ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: mov x8, #4 // =0x4 ; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x20] -; CHECK-NEXT: ld2d { z1.d, z2.d }, p0/z, [x20, x8, lsl #3] +; CHECK-NEXT: ld2d { z2.d, z3.d }, p0/z, [x20, x8, lsl #3] ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload -; CHECK-NEXT: stp q0, q1, [x19] +; CHECK-NEXT: stp q0, q2, [x19] ; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll index dd27097d8bdf75..b66e6d90135730 100644 --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll @@ -38,6 +38,8 @@ define void @interleave_store_without_splat(ptr %a, <4 x i32> %v1, <4 x i32> %v2 ; CHECK-LABEL: interleave_store_without_splat: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0] ; CHECK-NEXT: ret ; @@ -73,12 +75,13 @@ define void @interleave_store_legalization(ptr %a, <8 x i32> %v1, <8 x i32> %v2) ; CHECK-LABEL: interleave_store_legalization: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z4.d, z0.d -; CHECK-NEXT: mov x8, #8 // =0x8 -; CHECK-NEXT: mov z2.d, z3.d +; CHECK-NEXT: // kill: def $q3 killed $q3 def $z2_z3 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: mov x8, #8 // =0x8 +; CHECK-NEXT: mov z4.d, z0.d +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: st2w { z4.s, z5.s }, p0, [x0] -; CHECK-NEXT: st2w { z1.s, z2.s }, p0, [x0, x8, lsl #2] +; CHECK-NEXT: st2w { z2.s, z3.s }, p0, [x0, x8, lsl #2] ; CHECK-NEXT: ret ; ; NONEON-NOSVE-LABEL: interleave_store_legalization: diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll index b200eb3f23bf2a..9fd1eb616c28c9 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll @@ -8,8 +8,9 @@ define @tbl2_b( %a, %unused, %b, %c) { ; CHECK-LABEL: tbl2_b: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: tbl z0.b, { z0.b, z1.b }, z3.b +; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2 +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: tbl z0.b, { z1.b, z2.b }, z3.b ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.tbl2.nxv16i8( %a, %b, @@ -20,8 +21,9 @@ define @tbl2_b( %a, %unu define @tbl2_h( %a, %unused, %b, %c) { ; CHECK-LABEL: tbl2_h: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: tbl z0.h, { z0.h, z1.h }, z3.h +; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2 +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: tbl z0.h, { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.tbl2.nxv8i16( %a, %b, @@ -32,8 +34,9 @@ define @tbl2_h( %a, %unu define @tbl2_s( %a, %unused, %b, %c) { ; CHECK-LABEL: tbl2_s: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: tbl z0.s, { z0.s, z1.s }, z3.s +; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2 +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: tbl z0.s, { z1.s, z2.s }, z3.s ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.tbl2.nxv4i32( %a, %b, @@ -44,8 +47,9 @@ define @tbl2_s( %a, %unu define @tbl2_d( %a, %unused, %b, %c) { ; CHECK-LABEL: tbl2_d: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: tbl z0.d, { z0.d, z1.d }, z3.d +; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2 +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: tbl z0.d, { z1.d, z2.d }, z3.d ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.tbl2.nxv2i64( %a, %b, @@ -56,8 +60,9 @@ define @tbl2_d( %a, %unu define @tbl2_fh( %a, %unused, %b, %c) { ; CHECK-LABEL: tbl2_fh: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: tbl z0.h, { z0.h, z1.h }, z3.h +; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2 +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: tbl z0.h, { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.tbl2.nxv8f16( %a, %b, @@ -68,8 +73,9 @@ define @tbl2_fh( %a, define @tbl2_bf16( %a, %unused, %b, %c) #0 { ; CHECK-LABEL: tbl2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: tbl z0.h, { z0.h, z1.h }, z3.h +; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2 +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: tbl z0.h, { z1.h, z2.h }, z3.h ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.tbl2.nxv8bf16( %a, %b, @@ -80,8 +86,9 @@ define @tbl2_bf16( %a, @tbl2_fs( %a, %unused, %b, %c) { ; CHECK-LABEL: tbl2_fs: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: tbl z0.s, { z0.s, z1.s }, z3.s +; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2 +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: tbl z0.s, { z1.s, z2.s }, z3.s ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.tbl2.nxv4f32( %a, %b, @@ -92,8 +99,9 @@ define @tbl2_fs( %a, @tbl2_fd( %a, %unused, %b, %c) { ; CHECK-LABEL: tbl2_fd: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, z2.d -; CHECK-NEXT: tbl z0.d, { z0.d, z1.d }, z3.d +; CHECK-NEXT: // kill: def $z2 killed $z2 def $z1_z2 +; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: tbl z0.d, { z1.d, z2.d }, z3.d ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.tbl2.nxv2f64( %a, %b, diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll index 5eeca5fec16f1b..7934f831a7e62f 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfclamp.ll @@ -15,6 +15,8 @@ declare @llvm.aarch64.sve.fclamp.nxv8bf16(, } @test_bfclamp_single_x2_f16( %a, %b, %c, %d){ ; CHECK-LABEL: test_bfclamp_single_x2_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfclamp { z0.h, z1.h }, z2.h, z3.h ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.bfclamp.single.x2.nxv8bf16( %a, %b, %c, %d) @@ -24,6 +26,10 @@ define { , } @test_bfclamp_single_x2_ define { , , , } @test_bfclamp_single_x4_f16( %a, %b, %c, %d, %e, %f){ ; CHECK-LABEL: test_bfclamp_single_x4_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: bfclamp { z0.h - z3.h }, z4.h, z5.h ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.bfclamp.single.x4.nxv8bf16( %a, %b, %c, %d, %e, %f) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll index 90a4927cfa5e99..df6b34a3280a7a 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-fclamp.ll @@ -33,6 +33,8 @@ define @test_fclamp_f64( %a, , } @test_fclamp_single_x2_f16( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_fclamp_single_x2_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fclamp { z0.h, z1.h }, z2.h, z3.h ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv8f16( %a, %b, %c, %d) @@ -42,6 +44,8 @@ define { , } @test_fclamp_single_x2_f16(< define { , } @test_fclamp_single_x2_f32( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_fclamp_single_x2_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fclamp { z0.s, z1.s }, z2.s, z3.s ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv4f32( %a, %b, %c, %d) @@ -51,6 +55,8 @@ define { , } @test_fclamp_single_x2_f32 define { , } @test_fclamp_single_x2_f64( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_fclamp_single_x2_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fclamp { z0.d, z1.d }, z2.d, z3.d ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.fclamp.single.x2.nxv2f64( %a, %b, %c, %d) @@ -61,6 +67,10 @@ define { , } @test_fclamp_single_x2_f define { , , , } @test_fclamp_single_x4_f16( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_fclamp_single_x4_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fclamp { z0.h - z3.h }, z4.h, z5.h ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv8f16( %a, %b, %c, %d, %e, %f) @@ -70,6 +80,10 @@ define { , , , , , , } @test_fclamp_single_x4_f32( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_fclamp_single_x4_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fclamp { z0.s - z3.s }, z4.s, z5.s ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv4f32( %a, %b, %c, %d, %e, %f) @@ -79,6 +93,10 @@ define { , , , , , , } @test_fclamp_single_x4_f64( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_fclamp_single_x4_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: fclamp { z0.d - z3.d }, z4.d, z5.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fclamp.single.x4.nxv2f64( %a, %b, %c, %d, %e, %f) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll index 57e1a1e100db0b..8fe0694808c8e9 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-multivec-stores.ll @@ -7,6 +7,8 @@ define void @st2q_ss_i8( %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2q_ss_i8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -19,6 +21,8 @@ define void @st2q_ss_i8( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2q_ss_i16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -32,6 +36,8 @@ define void @st2q_ss_i16( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2q_ss_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -45,6 +51,8 @@ define void @st2q_ss_i32( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2q_ss_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -58,6 +66,8 @@ define void @st2q_ss_i64( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2q_ss_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -71,6 +81,8 @@ define void @st2q_ss_f16( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2q_ss_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -84,6 +96,8 @@ define void @st2q_ss_f32( %v0, %v1, %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2q_ss_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -97,6 +111,8 @@ define void @st2q_ss_f64( %v0, %v1, < define void @st2q_ss_bf16( %v0, %v1, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st2q_ss_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -111,6 +127,8 @@ define void @st2q_ss_bf16( %v0, %v1, define void @st2q_si_i8_off16( %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2q_si_i8_off16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #-16, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -16 @@ -124,6 +142,8 @@ define void @st2q_si_i8_off16( %v0, %v1, %v0, %v1, %pred, ptr %addr) { ; CHECK-LABEL: st2q_si_i8_off14: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 14 @@ -137,6 +157,8 @@ define void @st2q_si_i8_off14( %v0, %v1, %v0, %v1, %pred, ptr %base) { ; CHECK-LABEL: st2q_si_i16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %gep = getelementptr , ptr %base, i64 14 @@ -150,6 +172,8 @@ define void @st2q_si_i16( %v0, %v1, %v0, %v1, %pred, ptr %base) { ; CHECK-LABEL: st2q_si_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %gep = getelementptr , ptr %base, i64 14 @@ -163,6 +187,8 @@ define void @st2q_si_i32( %v0, %v1, %v0, %v1, %pred, ptr %base) { ; CHECK-LABEL: st2q_si_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %gep = getelementptr , ptr %base, i64 14 @@ -176,6 +202,8 @@ define void @st2q_si_i64( %v0, %v1, %v0, %v1, %pred, ptr %base) { ; CHECK-LABEL: st2q_si_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %gep = getelementptr , ptr %base, i64 14 @@ -189,6 +217,8 @@ define void @st2q_si_f16( %v0, %v1, %v0, %v1, %pred, ptr %base) { ; CHECK-LABEL: st2q_si_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %gep = getelementptr , ptr %base, i64 14 @@ -202,6 +232,8 @@ define void @st2q_si_f32( %v0, %v1, %v0, %v1, %pred, ptr %base) { ; CHECK-LABEL: st2q_si_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %gep= getelementptr , ptr %base, i64 14 @@ -215,6 +247,8 @@ define void @st2q_si_f64( %v0, %v1, < define void @st2q_si_bf16( %v0, %v1, %pred, ptr %base) { ; CHECK-LABEL: st2q_si_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2q { z0.q, z1.q }, p0, [x0, #14, mul vl] ; CHECK-NEXT: ret %gep = getelementptr , ptr %base, i64 14 @@ -232,6 +266,9 @@ define void @st2q_si_bf16( %v0, %v1, define void @st3q_ss_i8( %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3q_ss_i8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -246,6 +283,9 @@ define void @st3q_ss_i8( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3q_ss_i16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -260,6 +300,9 @@ define void @st3q_ss_i16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3q_ss_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -274,6 +317,9 @@ define void @st3q_ss_i32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3q_ss_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -288,6 +334,9 @@ define void @st3q_ss_i64( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3q_ss_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -302,6 +351,9 @@ define void @st3q_ss_f16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3q_ss_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -316,6 +368,9 @@ define void @st3q_ss_f32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3q_ss_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -330,6 +385,9 @@ define void @st3q_ss_f64( %v0, %v1, < define void @st3q_ss_bf16( %v0, %v1, %v2, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st3q_ss_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -344,6 +402,9 @@ define void @st3q_ss_bf16( %v0, %v1, define void @st3q_si_i8_off24( %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_i8_off24: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #-24, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -24 @@ -358,6 +419,9 @@ define void @st3q_si_i8_off24( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_i8_off21: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21 @@ -372,6 +436,9 @@ define void @st3q_si_i8_off21( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_i16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21 @@ -386,6 +453,9 @@ define void @st3q_si_i16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21 @@ -400,6 +470,9 @@ define void @st3q_si_i32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21 @@ -414,6 +487,9 @@ define void @st3q_si_i64( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21 @@ -428,6 +504,9 @@ define void @st3q_si_f16( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21 @@ -442,6 +521,9 @@ define void @st3q_si_f32( %v0, %v1, %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21 @@ -456,6 +538,9 @@ define void @st3q_si_f64( %v0, %v1, < define void @st3q_si_bf16( %v0, %v1, %v2, %pred, ptr %addr) { ; CHECK-LABEL: st3q_si_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: st3q { z0.q - z2.q }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 21 @@ -473,6 +558,10 @@ define void @st3q_si_bf16( %v0, %v1, define void @st4q_ss_i8( %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4q_ss_i8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -488,6 +577,10 @@ define void @st4q_ss_i8( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4q_ss_i16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -503,6 +596,10 @@ define void @st4q_ss_i16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4q_ss_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -518,6 +615,10 @@ define void @st4q_ss_i32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4q_ss_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -533,6 +634,10 @@ define void @st4q_ss_i64( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4q_ss_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -548,6 +653,10 @@ define void @st4q_ss_f16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4q_ss_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -563,6 +672,10 @@ define void @st4q_ss_f32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4q_ss_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -578,6 +691,10 @@ define void @st4q_ss_f64( %v0, %v1, < define void @st4q_ss_bf16( %v0, %v1, %v2, %v3, %pred, ptr %addr, i64 %offset) { ; CHECK-LABEL: st4q_ss_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %1 = getelementptr i128, ptr %addr, i64 %offset @@ -593,6 +710,10 @@ define void @st4q_ss_bf16( %v0, %v1, define void @st4q_si_i8_off32( %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_i8_off32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #-32, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 -32 @@ -608,6 +729,10 @@ define void @st4q_si_i8_off32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_i8_off28: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28 @@ -623,6 +748,10 @@ define void @st4q_si_i8_off28( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_i16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28 @@ -638,6 +767,10 @@ define void @st4q_si_i16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base1 = getelementptr , ptr %addr, i64 28 @@ -653,6 +786,10 @@ define void @st4q_si_i32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28 @@ -668,6 +805,10 @@ define void @st4q_si_i64( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_f16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28 @@ -683,6 +824,10 @@ define void @st4q_si_f16( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_f32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28 @@ -698,6 +843,10 @@ define void @st4q_si_f32( %v0, %v1, %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_f64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28 @@ -713,6 +862,10 @@ define void @st4q_si_f64( %v0, %v1, < define void @st4q_si_bf16( %v0, %v1, %v2, %v3, %pred, ptr %addr) { ; CHECK-LABEL: st4q_si_bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4q { z0.q - z3.q }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , ptr %addr, i64 28 diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll index 26316caad2bbc2..912d5d853aa8d5 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-sclamp.ll @@ -42,6 +42,8 @@ define @test_sclamp_i64( %a, , } @test_sclamp_single_x2_i8( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_sclamp_single_x2_i8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sclamp { z0.b, z1.b }, z2.b, z3.b ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv16i8( %a, %b, %c, %d) @@ -51,6 +53,8 @@ define { , } @test_sclamp_single_x2_i8(, } @test_sclamp_single_x2_i16( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_sclamp_single_x2_i16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sclamp { z0.h, z1.h }, z2.h, z3.h ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv8i16( %a, %b, %c, %d) @@ -60,6 +64,8 @@ define { , } @test_sclamp_single_x2_i16(, } @test_sclamp_single_x2_i32( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_sclamp_single_x2_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sclamp { z0.s, z1.s }, z2.s, z3.s ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv4i32( %a, %b, %c, %d) @@ -69,6 +75,8 @@ define { , } @test_sclamp_single_x2_i32(, } @test_sclamp_single_x2_i64( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_sclamp_single_x2_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sclamp { z0.d, z1.d }, z2.d, z3.d ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.sclamp.single.x2.nxv2i64( %a, %b, %c, %d) @@ -78,6 +86,10 @@ define { , } @test_sclamp_single_x2_i64(, , , } @test_sclamp_single_x4_i8( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_sclamp_single_x4_i8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sclamp { z0.b - z3.b }, z4.b, z5.b ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv16i8( %a, %b, %c, %d, %e, %f) @@ -87,6 +99,10 @@ define { , , , , , , } @test_sclamp_single_x4_i16( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_sclamp_single_x4_i16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sclamp { z0.h - z3.h }, z4.h, z5.h ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv8i16( %a, %b, %c, %d, %e, %f) @@ -96,6 +112,10 @@ define { , , , , , , } @test_sclamp_single_x4_i32( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_sclamp_single_x4_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sclamp { z0.s - z3.s }, z4.s, z5.s ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv4i32( %a, %b, %c, %d, %e, %f) @@ -105,6 +125,10 @@ define { , , , , , , } @test_sclamp_single_x4_i64( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_sclamp_single_x4_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: sclamp { z0.d - z3.d }, z4.d, z5.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.sclamp.single.x4.nxv2i64( %a, %b, %c, %d, %e, %f) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll index d64f06aaef8858..3a21eaead5f72e 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll @@ -8,18 +8,18 @@ define { , , , , , , , , , , , , , , , , , , , , , , , , %unused, %zn0, %unused, %zn0, %unused, %zn0, %unused, %zn0, %unused, %zn0, %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: st1h { z2.h, z3.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -118,9 +118,9 @@ define void @st1_x2_f32( %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: st1w { z2.s, z3.s }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -136,9 +136,9 @@ define void @st1_x2_f64( %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: st1d { z2.d, z3.d }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -154,11 +154,11 @@ define void @st1_x4_i8( %unused, %zn0, %unused, %zn0, %unused, %zn0, %unused, %zn0, %unused, %zn0, %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: st1h { z4.h - z7.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -274,11 +274,11 @@ define void @st1_x4_f32( %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: st1w { z4.s - z7.s }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -294,11 +294,11 @@ define void @st1_x4_f64( %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: st1d { z4.d - z7.d }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -316,9 +316,9 @@ define void @stnt1_x2_i8( %unused, %zn0, %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -352,9 +352,9 @@ define void @stnt1_x2_i32( %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1w { z2.s, z3.s }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -370,9 +370,9 @@ define void @stnt1_x2_i64( %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1d { z2.d, z3.d }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -388,9 +388,9 @@ define void @stnt1_x2_f16( %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -406,9 +406,9 @@ define void @stnt1_x2_bf16( %unused, %zn ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -424,9 +424,9 @@ define void @stnt1_x2_f32( %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1w { z2.s, z3.s }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -442,9 +442,9 @@ define void @stnt1_x2_f64( %unused, %zn0 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z3.d, z2.d -; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1d { z2.d, z3.d }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -460,11 +460,11 @@ define void @stnt1_x4_i8( %unused, %zn0, %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -500,11 +500,11 @@ define void @stnt1_x4_i32( %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: stnt1w { z4.s - z7.s }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -520,11 +520,11 @@ define void @stnt1_x4_i64( %unused, %zn0, < ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: stnt1d { z4.d - z7.d }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -540,11 +540,11 @@ define void @stnt1_x4_f16( %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -560,11 +560,11 @@ define void @stnt1_x4_bf16( %unused, %zn ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -580,11 +580,11 @@ define void @stnt1_x4_f32( %unused, %zn0, ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: stnt1w { z4.s - z7.s }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -600,11 +600,11 @@ define void @stnt1_x4_f64( %unused, %zn0 ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: mov z7.d, z4.d -; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: stnt1d { z4.d - z7.d }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll index ca0bad16fe0e9c..de1695162c98eb 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uclamp.ll @@ -42,6 +42,8 @@ define @test_uclamp_i64( %a, , } @test_uclamp_single_x2_i8( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_uclamp_single_x2_i8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: uclamp { z0.b, z1.b }, z2.b, z3.b ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv16i8( %a, %b, %c, %d) @@ -51,6 +53,8 @@ define { , } @test_uclamp_single_x2_i8(, } @test_uclamp_single_x2_i16( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_uclamp_single_x2_i16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: uclamp { z0.h, z1.h }, z2.h, z3.h ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv8i16( %a, %b, %c, %d) @@ -60,6 +64,8 @@ define { , } @test_uclamp_single_x2_i16(, } @test_uclamp_single_x2_i32( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_uclamp_single_x2_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: uclamp { z0.s, z1.s }, z2.s, z3.s ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv4i32( %a, %b, %c, %d) @@ -69,6 +75,8 @@ define { , } @test_uclamp_single_x2_i32(, } @test_uclamp_single_x2_i64( %a, %b, %c, %d) #1 { ; CHECK-LABEL: test_uclamp_single_x2_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: uclamp { z0.d, z1.d }, z2.d, z3.d ; CHECK-NEXT: ret %res = call { , } @llvm.aarch64.sve.uclamp.single.x2.nxv2i64( %a, %b, %c, %d) @@ -78,6 +86,10 @@ define { , } @test_uclamp_single_x2_i64(, , , } @test_uclamp_single_x4_i8( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_uclamp_single_x4_i8: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: uclamp { z0.b - z3.b }, z4.b, z5.b ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv16i8( %a, %b, %c, %d, %e, %f) @@ -87,6 +99,10 @@ define { , , , , , , } @test_uclamp_single_x4_i16( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_uclamp_single_x4_i16: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: uclamp { z0.h - z3.h }, z4.h, z5.h ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv8i16( %a, %b, %c, %d, %e, %f) @@ -96,6 +112,10 @@ define { , , , , , , } @test_uclamp_single_x4_i32( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_uclamp_single_x4_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: uclamp { z0.s - z3.s }, z4.s, z5.s ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv4i32( %a, %b, %c, %d, %e, %f) @@ -105,6 +125,10 @@ define { , , , , , , } @test_uclamp_single_x4_i64( %a, %b, %c, %d, %e, %f) #1 { ; CHECK-LABEL: test_uclamp_single_x4_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: uclamp { z0.d - z3.d }, z4.d, z5.d ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.uclamp.single.x4.nxv2i64( %a, %b, %c, %d, %e, %f) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll index 741afc3a49a69e..fe3ddbf747acec 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpx4.ll @@ -97,11 +97,11 @@ define { , , , , , , } @uzp_x4_f64( %unused, %zn1, %zn2, %zn3, %zn4) nounwind { ; CHECK-LABEL: uzp_x4_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z7.d, z5.d -; CHECK-NEXT: mov z6.d, z4.d -; CHECK-NEXT: mov z5.d, z3.d -; CHECK-NEXT: mov z4.d, z2.d -; CHECK-NEXT: uzp { z0.d - z3.d }, { z4.d - z7.d } +; CHECK-NEXT: mov z27.d, z5.d +; CHECK-NEXT: mov z26.d, z4.d +; CHECK-NEXT: mov z25.d, z3.d +; CHECK-NEXT: mov z24.d, z2.d +; CHECK-NEXT: uzp { z0.d - z3.d }, { z24.d - z27.d } ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.uzp.x4.nxv2f64( %zn1, %zn2, %zn3, %zn4) ret { , , , } %res @@ -204,11 +204,11 @@ define { , , , , , , } @zipq_x4_f64( %unused, %zn1, %zn2, %zn3, %zn4) nounwind { ; CHECK-LABEL: zipq_x4_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z7.d, z5.d -; CHECK-NEXT: mov z6.d, z4.d -; CHECK-NEXT: mov z5.d, z3.d -; CHECK-NEXT: mov z4.d, z2.d -; CHECK-NEXT: uzp { z0.q - z3.q }, { z4.q - z7.q } +; CHECK-NEXT: mov z27.d, z5.d +; CHECK-NEXT: mov z26.d, z4.d +; CHECK-NEXT: mov z25.d, z3.d +; CHECK-NEXT: mov z24.d, z2.d +; CHECK-NEXT: uzp { z0.q - z3.q }, { z24.q - z27.q } ; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.uzpq.x4.nxv2f64( %zn1, %zn2, %zn3, %zn4) ret { , , , } %res diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll index 638849605a2cb8..ab70f57b488742 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-while-pp.ll @@ -7,6 +7,7 @@ define @whilege_x2_nxv16i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilege_x2_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilege { p0.b, p1.b }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilege.x2.nxv16i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -17,6 +18,7 @@ define @whilege_x2_nxv8i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilege_x2_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilege { p0.h, p1.h }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilege.x2.nxv8i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -27,6 +29,7 @@ define @whilege_x2_nxv4i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilege_x2_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilege { p0.s, p1.s }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilege.x2.nxv4i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -37,6 +40,7 @@ define @whilege_x2_nxv2i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilege_x2_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilege { p0.d, p1.d }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilege.x2.nxv2i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -50,6 +54,7 @@ define @whilegt_x2_nxv16i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilegt_x2_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilegt { p0.b, p1.b }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilegt.x2.nxv16i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -60,6 +65,7 @@ define @whilegt_x2_nxv8i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilegt_x2_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilegt { p0.h, p1.h }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilegt.x2.nxv8i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -70,6 +76,7 @@ define @whilegt_x2_nxv4i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilegt_x2_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilegt { p0.s, p1.s }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilegt.x2.nxv4i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -80,6 +87,7 @@ define @whilegt_x2_nxv2i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilegt_x2_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilegt { p0.d, p1.d }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilegt.x2.nxv2i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -93,6 +101,7 @@ define @whilehi_x2_nxv16i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilehi_x2_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilehi { p0.b, p1.b }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilehi.x2.nxv16i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -103,6 +112,7 @@ define @whilehi_x2_nxv8i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilehi_x2_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilehi { p0.h, p1.h }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilehi.x2.nxv8i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -113,6 +123,7 @@ define @whilehi_x2_nxv4i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilehi_x2_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilehi { p0.s, p1.s }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilehi.x2.nxv4i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -123,6 +134,7 @@ define @whilehi_x2_nxv2i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilehi_x2_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilehi { p0.d, p1.d }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilehi.x2.nxv2i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -136,6 +148,7 @@ define @whilehs_x2_nxv16i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilehs_x2_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilehs { p0.b, p1.b }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilehs.x2.nxv16i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -146,6 +159,7 @@ define @whilehs_x2_nxv8i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilehs_x2_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilehs { p0.h, p1.h }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilehs.x2.nxv8i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -156,6 +170,7 @@ define @whilehs_x2_nxv4i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilehs_x2_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilehs { p0.s, p1.s }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilehs.x2.nxv4i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -166,6 +181,7 @@ define @whilehs_x2_nxv2i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilehs_x2_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilehs { p0.d, p1.d }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilehs.x2.nxv2i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -179,6 +195,7 @@ define @whilele_x2_nxv16i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilele_x2_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilele { p0.b, p1.b }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilele.x2.nxv16i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -189,6 +206,7 @@ define @whilele_x2_nxv8i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilele_x2_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilele { p0.h, p1.h }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilele.x2.nxv8i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -199,6 +217,7 @@ define @whilele_x2_nxv4i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilele_x2_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilele { p0.s, p1.s }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilele.x2.nxv4i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -209,6 +228,7 @@ define @whilele_x2_nxv2i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilele_x2_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilele { p0.d, p1.d }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilele.x2.nxv2i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -222,6 +242,7 @@ define @whilelo_x2_nxv16i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilelo_x2_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilelo { p0.b, p1.b }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilelo.x2.nxv16i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -232,6 +253,7 @@ define @whilelo_x2_nxv8i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilelo_x2_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilelo { p0.h, p1.h }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilelo.x2.nxv8i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -242,6 +264,7 @@ define @whilelo_x2_nxv4i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilelo_x2_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilelo { p0.s, p1.s }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilelo.x2.nxv4i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -252,6 +275,7 @@ define @whilelo_x2_nxv2i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilelo_x2_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilelo { p0.d, p1.d }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilelo.x2.nxv2i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -265,6 +289,7 @@ define @whilels_x2_nxv16i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilels_x2_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilels { p0.b, p1.b }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilels.x2.nxv16i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -275,6 +300,7 @@ define @whilels_x2_nxv8i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilels_x2_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilels { p0.h, p1.h }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilels.x2.nxv8i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -285,6 +311,7 @@ define @whilels_x2_nxv4i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilels_x2_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilels { p0.s, p1.s }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilels.x2.nxv4i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -295,6 +322,7 @@ define @whilels_x2_nxv2i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilels_x2_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilels { p0.d, p1.d }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilels.x2.nxv2i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -308,6 +336,7 @@ define @whilelt_x2_nxv16i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilelt_x2_nxv16i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilelt { p0.b, p1.b }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilelt.x2.nxv16i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -318,6 +347,7 @@ define @whilelt_x2_nxv8i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilelt_x2_nxv8i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilelt { p0.h, p1.h }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilelt.x2.nxv8i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -328,6 +358,7 @@ define @whilelt_x2_nxv4i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilelt_x2_nxv4i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilelt { p0.s, p1.s }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilelt.x2.nxv4i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 @@ -338,6 +369,7 @@ define @whilelt_x2_nxv2i1(i64 %m, i64 %n) nounwind { ; CHECK-LABEL: whilelt_x2_nxv2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: whilelt { p0.d, p1.d }, x0, x1 +; CHECK-NEXT: // kill: def $p0 killed $p0 killed $p0_p1 ; CHECK-NEXT: ret %pp = call { , } @llvm.aarch64.sve.whilelt.x2.nxv2i1(i64 %m, i64 %n) %res = extractvalue {, } %pp, 0 diff --git a/llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll b/llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll index 77415381709d18..d3abc27a53dadc 100644 --- a/llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll +++ b/llvm/test/CodeGen/AArch64/swift-error-unreachable-use.ll @@ -9,7 +9,6 @@ define void @"func"(ptr swifterror %0) #0 { ; CHECK-NEXT: b {{\.?}}LBB0_2 ; CHECK-NEXT: {{\.?}}LBB0_1:{{.*}}%thirtythree ; CHECK-NEXT: {{.*}}=>This Inner Loop Header: Depth=1 -; CHECK-NEXT: {{.*}}implicit-def: $x0 ; CHECK-NEXT: b {{\.?}}LBB0_1 ; CHECK-NEXT: {{\.?}}LBB0_2:{{.*}}%thirtyeight ; CHECK-NEXT: b {{\.?}}LBB0_3 diff --git a/llvm/test/CodeGen/AArch64/tbl-loops.ll b/llvm/test/CodeGen/AArch64/tbl-loops.ll index dd5ce449bb1d2a..0ad99008655184 100644 --- a/llvm/test/CodeGen/AArch64/tbl-loops.ll +++ b/llvm/test/CodeGen/AArch64/tbl-loops.ll @@ -203,17 +203,16 @@ define void @loop2(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: fcmgt v3.4s, v1.4s, v0.4s ; CHECK-NEXT: fcmgt v4.4s, v2.4s, v0.4s ; CHECK-NEXT: fcmlt v5.4s, v1.4s, #0.0 -; CHECK-NEXT: bit v1.16b, v0.16b, v3.16b -; CHECK-NEXT: mov v3.16b, v4.16b -; CHECK-NEXT: bsl v3.16b, v0.16b, v2.16b -; CHECK-NEXT: fcmlt v2.4s, v2.4s, #0.0 -; CHECK-NEXT: bic v1.16b, v1.16b, v5.16b -; CHECK-NEXT: bic v2.16b, v3.16b, v2.16b -; CHECK-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NEXT: bsl v3.16b, v0.16b, v1.16b +; CHECK-NEXT: bsl v4.16b, v0.16b, v2.16b +; CHECK-NEXT: fcmlt v1.4s, v2.4s, #0.0 +; CHECK-NEXT: bic v2.16b, v3.16b, v5.16b +; CHECK-NEXT: bic v1.16b, v4.16b, v1.16b ; CHECK-NEXT: fcvtzs v2.4s, v2.4s -; CHECK-NEXT: xtn v1.4h, v1.4s +; CHECK-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-NEXT: xtn v2.4h, v2.4s -; CHECK-NEXT: trn1 v1.8b, v1.8b, v2.8b +; CHECK-NEXT: xtn v1.4h, v1.4s +; CHECK-NEXT: trn1 v1.8b, v2.8b, v1.8b ; CHECK-NEXT: str d1, [x0], #8 ; CHECK-NEXT: b.ne .LBB1_9 ; CHECK-NEXT: // %bb.10: // %middle.block @@ -353,22 +352,21 @@ define void @loop3(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: fcmgt v6.4s, v3.4s, v0.4s ; CHECK-NEXT: fcmgt v7.4s, v4.4s, v0.4s ; CHECK-NEXT: fcmlt v16.4s, v2.4s, #0.0 -; CHECK-NEXT: bit v2.16b, v0.16b, v5.16b -; CHECK-NEXT: fcmlt v5.4s, v3.4s, #0.0 -; CHECK-NEXT: bit v3.16b, v0.16b, v6.16b -; CHECK-NEXT: mov v6.16b, v7.16b -; CHECK-NEXT: bsl v6.16b, v0.16b, v4.16b -; CHECK-NEXT: fcmlt v4.4s, v4.4s, #0.0 -; CHECK-NEXT: bic v2.16b, v2.16b, v16.16b -; CHECK-NEXT: bic v3.16b, v3.16b, v5.16b -; CHECK-NEXT: fcvtzs v2.4s, v2.4s -; CHECK-NEXT: bic v4.16b, v6.16b, v4.16b +; CHECK-NEXT: fcmlt v17.4s, v3.4s, #0.0 +; CHECK-NEXT: bsl v5.16b, v0.16b, v2.16b +; CHECK-NEXT: bsl v6.16b, v0.16b, v3.16b +; CHECK-NEXT: bsl v7.16b, v0.16b, v4.16b +; CHECK-NEXT: fcmlt v2.4s, v4.4s, #0.0 +; CHECK-NEXT: bic v3.16b, v5.16b, v16.16b +; CHECK-NEXT: bic v4.16b, v6.16b, v17.16b +; CHECK-NEXT: bic v2.16b, v7.16b, v2.16b ; CHECK-NEXT: fcvtzs v3.4s, v3.4s ; CHECK-NEXT: fcvtzs v4.4s, v4.4s -; CHECK-NEXT: xtn v2.4h, v2.4s -; CHECK-NEXT: xtn v3.4h, v3.4s -; CHECK-NEXT: xtn v4.4h, v4.4s -; CHECK-NEXT: tbl v2.16b, { v2.16b, v3.16b, v4.16b }, v1.16b +; CHECK-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-NEXT: xtn v5.4h, v3.4s +; CHECK-NEXT: xtn v6.4h, v4.4s +; CHECK-NEXT: xtn v7.4h, v2.4s +; CHECK-NEXT: tbl v2.16b, { v5.16b, v6.16b, v7.16b }, v1.16b ; CHECK-NEXT: st1 { v2.s }[2], [x13] ; CHECK-NEXT: str d2, [x0], #12 ; CHECK-NEXT: b.ne .LBB2_4 @@ -607,27 +605,26 @@ define void @loop4(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: fcmgt v16.4s, v4.4s, v0.4s ; CHECK-NEXT: fcmgt v17.4s, v5.4s, v0.4s ; CHECK-NEXT: fcmlt v18.4s, v2.4s, #0.0 -; CHECK-NEXT: bit v2.16b, v0.16b, v6.16b -; CHECK-NEXT: fcmlt v6.4s, v3.4s, #0.0 -; CHECK-NEXT: bit v3.16b, v0.16b, v7.16b -; CHECK-NEXT: fcmlt v7.4s, v4.4s, #0.0 -; CHECK-NEXT: bit v4.16b, v0.16b, v16.16b -; CHECK-NEXT: mov v16.16b, v17.16b -; CHECK-NEXT: bsl v16.16b, v0.16b, v5.16b -; CHECK-NEXT: fcmlt v5.4s, v5.4s, #0.0 -; CHECK-NEXT: bic v2.16b, v2.16b, v18.16b -; CHECK-NEXT: bic v3.16b, v3.16b, v6.16b -; CHECK-NEXT: bic v4.16b, v4.16b, v7.16b -; CHECK-NEXT: fcvtzs v2.4s, v2.4s -; CHECK-NEXT: bic v5.16b, v16.16b, v5.16b +; CHECK-NEXT: fcmlt v19.4s, v3.4s, #0.0 +; CHECK-NEXT: fcmlt v20.4s, v4.4s, #0.0 +; CHECK-NEXT: bsl v6.16b, v0.16b, v2.16b +; CHECK-NEXT: bsl v7.16b, v0.16b, v3.16b +; CHECK-NEXT: bsl v16.16b, v0.16b, v4.16b +; CHECK-NEXT: bsl v17.16b, v0.16b, v5.16b +; CHECK-NEXT: fcmlt v2.4s, v5.4s, #0.0 +; CHECK-NEXT: bic v3.16b, v6.16b, v18.16b +; CHECK-NEXT: bic v4.16b, v7.16b, v19.16b +; CHECK-NEXT: bic v5.16b, v16.16b, v20.16b +; CHECK-NEXT: bic v2.16b, v17.16b, v2.16b ; CHECK-NEXT: fcvtzs v3.4s, v3.4s ; CHECK-NEXT: fcvtzs v4.4s, v4.4s ; CHECK-NEXT: fcvtzs v5.4s, v5.4s -; CHECK-NEXT: xtn v2.4h, v2.4s -; CHECK-NEXT: xtn v3.4h, v3.4s -; CHECK-NEXT: xtn v4.4h, v4.4s -; CHECK-NEXT: xtn v5.4h, v5.4s -; CHECK-NEXT: tbl v2.16b, { v2.16b, v3.16b, v4.16b, v5.16b }, v1.16b +; CHECK-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-NEXT: xtn v16.4h, v3.4s +; CHECK-NEXT: xtn v17.4h, v4.4s +; CHECK-NEXT: xtn v18.4h, v5.4s +; CHECK-NEXT: xtn v19.4h, v2.4s +; CHECK-NEXT: tbl v2.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b ; CHECK-NEXT: str q2, [x0], #16 ; CHECK-NEXT: b.ne .LBB3_9 ; CHECK-NEXT: // %bb.10: // %middle.block diff --git a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll index 575a4b2e6e0fbe..c4a58ba12dc6be 100644 --- a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll +++ b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll @@ -74,8 +74,8 @@ define void @trunc_v16i32_to_v16i8_in_loop(ptr %A, ptr %dst) { ; CHECK-BE-NEXT: add x10, x9, #16 ; CHECK-BE-NEXT: ld1 { v1.16b }, [x9] ; CHECK-BE-NEXT: add x11, x9, #32 -; CHECK-BE-NEXT: add x9, x9, #48 ; CHECK-BE-NEXT: ld1 { v2.16b }, [x10] +; CHECK-BE-NEXT: add x9, x9, #48 ; CHECK-BE-NEXT: ld1 { v3.16b }, [x11] ; CHECK-BE-NEXT: ld1 { v4.16b }, [x9] ; CHECK-BE-NEXT: add x9, x1, x8, lsl #4 @@ -363,21 +363,21 @@ define void @trunc_v16i64_to_v16i8_in_loop(ptr %A, ptr %dst) { ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: add x9, x0, x8, lsl #7 ; CHECK-BE-NEXT: add x13, x9, #64 -; CHECK-BE-NEXT: add x10, x9, #112 -; CHECK-BE-NEXT: add x11, x9, #96 ; CHECK-BE-NEXT: add x12, x9, #80 ; CHECK-BE-NEXT: add x14, x9, #16 ; CHECK-BE-NEXT: ld1 { v1.16b }, [x9] ; CHECK-BE-NEXT: ld1 { v16.16b }, [x13] +; CHECK-BE-NEXT: add x11, x9, #96 ; CHECK-BE-NEXT: add x13, x9, #32 -; CHECK-BE-NEXT: add x9, x9, #48 ; CHECK-BE-NEXT: ld1 { v2.16b }, [x14] ; CHECK-BE-NEXT: ld1 { v17.16b }, [x12] +; CHECK-BE-NEXT: add x10, x9, #112 +; CHECK-BE-NEXT: add x9, x9, #48 ; CHECK-BE-NEXT: ld1 { v3.16b }, [x13] ; CHECK-BE-NEXT: ld1 { v18.16b }, [x11] ; CHECK-BE-NEXT: ld1 { v4.16b }, [x9] -; CHECK-BE-NEXT: ld1 { v19.16b }, [x10] ; CHECK-BE-NEXT: add x9, x1, x8, lsl #4 +; CHECK-BE-NEXT: ld1 { v19.16b }, [x10] ; CHECK-BE-NEXT: add x8, x8, #1 ; CHECK-BE-NEXT: cmp x8, #1000 ; CHECK-BE-NEXT: tbl v1.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b @@ -510,8 +510,8 @@ define void @trunc_v8i64_to_v8i8_in_loop(ptr %A, ptr %dst) { ; CHECK-BE-NEXT: add x10, x9, #16 ; CHECK-BE-NEXT: ld1 { v1.16b }, [x9] ; CHECK-BE-NEXT: add x11, x9, #32 -; CHECK-BE-NEXT: add x9, x9, #48 ; CHECK-BE-NEXT: ld1 { v2.16b }, [x10] +; CHECK-BE-NEXT: add x9, x9, #48 ; CHECK-BE-NEXT: ld1 { v3.16b }, [x11] ; CHECK-BE-NEXT: ld1 { v4.16b }, [x9] ; CHECK-BE-NEXT: add x9, x1, x8, lsl #3 diff --git a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll index e453d618325225..3685e9cf85bd6e 100644 --- a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll +++ b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll @@ -10,9 +10,9 @@ define void @vld2(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32 ; CHECK-NEXT: .LBB0_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld2 { v0.4s, v1.4s }, [x0], #32 -; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s -; CHECK-NEXT: fmla v0.4s, v1.4s, v1.4s -; CHECK-NEXT: str q0, [x1, x8] +; CHECK-NEXT: fmul v2.4s, v0.4s, v0.4s +; CHECK-NEXT: fmla v2.4s, v1.4s, v1.4s +; CHECK-NEXT: str q2, [x1, x8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #1, lsl #12 // =4096 ; CHECK-NEXT: b.ne .LBB0_1 @@ -50,10 +50,10 @@ define void @vld3(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32 ; CHECK-NEXT: .LBB1_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld3 { v0.4s, v1.4s, v2.4s }, [x0], #48 -; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s -; CHECK-NEXT: fmla v0.4s, v1.4s, v1.4s -; CHECK-NEXT: fmla v0.4s, v2.4s, v2.4s -; CHECK-NEXT: str q0, [x1, x8] +; CHECK-NEXT: fmul v3.4s, v0.4s, v0.4s +; CHECK-NEXT: fmla v3.4s, v1.4s, v1.4s +; CHECK-NEXT: fmla v3.4s, v2.4s, v2.4s +; CHECK-NEXT: str q3, [x1, x8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #1, lsl #12 // =4096 ; CHECK-NEXT: b.ne .LBB1_1 @@ -97,11 +97,11 @@ define void @vld4(ptr nocapture readonly %pSrc, ptr noalias nocapture %pDst, i32 ; CHECK-NEXT: add x9, x1, x8 ; CHECK-NEXT: add x8, x8, #32 ; CHECK-NEXT: cmp x8, #2, lsl #12 // =8192 -; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s -; CHECK-NEXT: fmla v0.4s, v1.4s, v1.4s -; CHECK-NEXT: fmul v1.4s, v2.4s, v2.4s -; CHECK-NEXT: fmla v1.4s, v3.4s, v3.4s -; CHECK-NEXT: st2 { v0.4s, v1.4s }, [x9] +; CHECK-NEXT: fmul v4.4s, v0.4s, v0.4s +; CHECK-NEXT: fmla v4.4s, v1.4s, v1.4s +; CHECK-NEXT: fmul v5.4s, v2.4s, v2.4s +; CHECK-NEXT: fmla v5.4s, v3.4s, v3.4s +; CHECK-NEXT: st2 { v4.4s, v5.4s }, [x9] ; CHECK-NEXT: b.ne .LBB2_1 ; CHECK-NEXT: // %bb.2: // %while.end ; CHECK-NEXT: ret @@ -149,9 +149,9 @@ define void @twosrc(ptr nocapture readonly %pSrc, ptr nocapture readonly %pSrc2, ; CHECK-NEXT: ld2 { v0.4s, v1.4s }, [x9] ; CHECK-NEXT: cmp x8, #2, lsl #12 // =8192 ; CHECK-NEXT: ld2 { v2.4s, v3.4s }, [x10] -; CHECK-NEXT: fmul v0.4s, v2.4s, v0.4s -; CHECK-NEXT: fmla v0.4s, v1.4s, v3.4s -; CHECK-NEXT: str q0, [x2], #16 +; CHECK-NEXT: fmul v4.4s, v2.4s, v0.4s +; CHECK-NEXT: fmla v4.4s, v1.4s, v3.4s +; CHECK-NEXT: str q4, [x2], #16 ; CHECK-NEXT: b.ne .LBB3_1 ; CHECK-NEXT: // %bb.2: // %while.end ; CHECK-NEXT: ret @@ -190,9 +190,9 @@ define void @vld2_multiuse(ptr nocapture readonly %pSrc, ptr noalias nocapture % ; CHECK-NEXT: .LBB4_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld2 { v0.4s, v1.4s }, [x0], #32 -; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s -; CHECK-NEXT: fmla v0.4s, v1.4s, v1.4s -; CHECK-NEXT: str q0, [x1, x8] +; CHECK-NEXT: fmul v2.4s, v0.4s, v0.4s +; CHECK-NEXT: fmla v2.4s, v1.4s, v1.4s +; CHECK-NEXT: str q2, [x1, x8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #1, lsl #12 // =4096 ; CHECK-NEXT: b.ne .LBB4_1 @@ -229,10 +229,10 @@ define void @vld3_multiuse(ptr nocapture readonly %pSrc, ptr noalias nocapture % ; CHECK-NEXT: .LBB5_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld3 { v0.4s, v1.4s, v2.4s }, [x0], #48 -; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s -; CHECK-NEXT: fmla v0.4s, v1.4s, v1.4s -; CHECK-NEXT: fmla v0.4s, v2.4s, v2.4s -; CHECK-NEXT: str q0, [x1, x8] +; CHECK-NEXT: fmul v3.4s, v0.4s, v0.4s +; CHECK-NEXT: fmla v3.4s, v1.4s, v1.4s +; CHECK-NEXT: fmla v3.4s, v2.4s, v2.4s +; CHECK-NEXT: str q3, [x1, x8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #1, lsl #12 // =4096 ; CHECK-NEXT: b.ne .LBB5_1 @@ -274,11 +274,11 @@ define void @vld4_multiuse(ptr nocapture readonly %pSrc, ptr noalias nocapture % ; CHECK-NEXT: add x9, x1, x8 ; CHECK-NEXT: add x8, x8, #32 ; CHECK-NEXT: cmp x8, #2, lsl #12 // =8192 -; CHECK-NEXT: fmul v0.4s, v0.4s, v0.4s -; CHECK-NEXT: fmla v0.4s, v1.4s, v1.4s -; CHECK-NEXT: fmul v1.4s, v2.4s, v2.4s -; CHECK-NEXT: fmla v1.4s, v3.4s, v3.4s -; CHECK-NEXT: st2 { v0.4s, v1.4s }, [x9] +; CHECK-NEXT: fmul v4.4s, v0.4s, v0.4s +; CHECK-NEXT: fmla v4.4s, v1.4s, v1.4s +; CHECK-NEXT: fmul v5.4s, v2.4s, v2.4s +; CHECK-NEXT: fmla v5.4s, v3.4s, v3.4s +; CHECK-NEXT: st2 { v4.4s, v5.4s }, [x9] ; CHECK-NEXT: b.ne .LBB6_1 ; CHECK-NEXT: // %bb.2: // %while.end ; CHECK-NEXT: ret @@ -369,16 +369,16 @@ define void @transpose_s16_8x8_simpler2(ptr nocapture noundef %a) { ; CHECK: .Lfunc_begin8: ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ldp q2, q3, [x0, #64] -; CHECK-NEXT: ldp q4, q5, [x0, #32] -; CHECK-NEXT: ldp q6, q7, [x0, #96] -; CHECK-NEXT: mov v0.h[5], v1.h[4] -; CHECK-NEXT: zip1 v1.8h, v2.8h, v3.8h -; CHECK-NEXT: zip1 v2.8h, v4.8h, v5.8h -; CHECK-NEXT: mov v6.h[5], v7.h[4] -; CHECK-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEXT: uzp1 v1.4s, v2.4s, v6.4s +; CHECK-NEXT: ldp q0, q2, [x0] +; CHECK-NEXT: ldp q3, q4, [x0, #64] +; CHECK-NEXT: ldp q5, q6, [x0, #32] +; CHECK-NEXT: ldp q7, q16, [x0, #96] +; CHECK-NEXT: mov v0.h[5], v2.h[4] +; CHECK-NEXT: zip1 v2.8h, v3.8h, v4.8h +; CHECK-NEXT: zip1 v3.8h, v5.8h, v6.8h +; CHECK-NEXT: mov v7.h[5], v16.h[4] +; CHECK-NEXT: mov v0.s[1], v2.s[0] +; CHECK-NEXT: uzp1 v1.4s, v3.4s, v7.4s ; CHECK-NEXT: zip2 v2.4s, v0.4s, v1.4s ; CHECK-NEXT: st2 { v0.2s, v1.2s }, [x0] ; CHECK-NEXT: str q2, [x0, #64] @@ -424,23 +424,23 @@ define void @transpose_s16_8x8(ptr nocapture noundef %0, ptr nocapture noundef % ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: ldr q2, [x2] ; CHECK-NEXT: ldr q3, [x4] ; CHECK-NEXT: ldr q4, [x5] +; CHECK-NEXT: ldr q2, [x2] ; CHECK-NEXT: ldr q5, [x3] ; CHECK-NEXT: trn1 v16.8h, v0.8h, v1.8h ; CHECK-NEXT: trn2 v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ldr q6, [x6] ; CHECK-NEXT: ldr q7, [x7] ; CHECK-NEXT: trn1 v17.8h, v3.8h, v4.8h -; CHECK-NEXT: trn1 v18.8h, v2.8h, v5.8h ; CHECK-NEXT: trn2 v1.8h, v3.8h, v4.8h +; CHECK-NEXT: trn1 v18.8h, v2.8h, v5.8h ; CHECK-NEXT: trn2 v2.8h, v2.8h, v5.8h ; CHECK-NEXT: trn1 v19.8h, v6.8h, v7.8h ; CHECK-NEXT: trn2 v3.8h, v6.8h, v7.8h ; CHECK-NEXT: trn1 v4.4s, v16.4s, v17.4s -; CHECK-NEXT: trn2 v16.4s, v16.4s, v17.4s ; CHECK-NEXT: trn1 v6.4s, v0.4s, v1.4s +; CHECK-NEXT: trn2 v16.4s, v16.4s, v17.4s ; CHECK-NEXT: trn2 v0.4s, v0.4s, v1.4s ; CHECK-NEXT: trn1 v5.4s, v18.4s, v19.4s ; CHECK-NEXT: trn1 v7.4s, v2.4s, v3.4s @@ -668,11 +668,11 @@ define void @store_factor3(ptr %ptr, <4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2 ; CHECK-NEXT: .cfi_startproc ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ext v3.16b, v0.16b, v1.16b, #12 -; CHECK-NEXT: ext v4.16b, v1.16b, v2.16b, #12 +; CHECK-NEXT: ext v6.16b, v1.16b, v2.16b, #12 ; CHECK-NEXT: zip2 v3.4s, v0.4s, v3.4s -; CHECK-NEXT: zip2 v4.4s, v1.4s, v4.4s ; CHECK-NEXT: mov v3.s[0], v0.s[0] ; CHECK-NEXT: ext v0.16b, v2.16b, v0.16b, #12 +; CHECK-NEXT: zip2 v4.4s, v1.4s, v6.4s ; CHECK-NEXT: mov v4.s[0], v1.s[0] ; CHECK-NEXT: zip2 v5.4s, v2.4s, v0.4s ; CHECK-NEXT: mov v5.s[0], v2.s[0] From 436872693a8a57487bf4510437183878d1e35cfb Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Fri, 14 Jun 2024 11:52:27 -0700 Subject: [PATCH 142/155] [lldb] Tweak Python interpreter workaround on macOS (#95582) Avoid copying the Python interpreter when running in a virtual environment as it will already have its own copy of the Python interpreter. Also leave a breadcrumb that we're running with a different Python interpreter. --- lldb/test/API/lit.cfg.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/lldb/test/API/lit.cfg.py b/lldb/test/API/lit.cfg.py index d934349fe3ca3d..1e99c8cb95d163 100644 --- a/lldb/test/API/lit.cfg.py +++ b/lldb/test/API/lit.cfg.py @@ -58,6 +58,15 @@ def find_shlibpath_var(): # enabled, we can't inject libraries into system binaries at all, so we need a # copy of the "real" python to work with. def find_python_interpreter(): + # This is only necessary when using DYLD_INSERT_LIBRARIES. + if "DYLD_INSERT_LIBRARIES" not in config.environment: + return None + + # If we're running in a virtual environment, we already have a copy of the + # Python executable. + if "VIRTUAL_ENV" in config.environment: + return None + # Avoid doing any work if we already copied the binary. copied_python = os.path.join(config.lldb_build_directory, "copied-python") if os.path.isfile(copied_python): @@ -84,7 +93,7 @@ def find_python_interpreter(): # RPATH and cannot be copied. try: # We don't care about the output, just make sure it runs. - subprocess.check_output([copied_python, "-V"], stderr=subprocess.STDOUT) + subprocess.check_call([copied_python, "-V"]) except subprocess.CalledProcessError: # The copied Python didn't work. Assume we're dealing with the Python # interpreter in Xcode. Given that this is not a system binary SIP @@ -130,8 +139,13 @@ def delete_module_cache(path): "libclang_rt.tsan_osx_dynamic.dylib" ) -if "DYLD_INSERT_LIBRARIES" in config.environment and platform.system() == "Darwin": - config.python_executable = find_python_interpreter() +if platform.system() == "Darwin": + python_executable = find_python_interpreter() + if python_executable: + lit_config.note( + "Using {} instead of {}".format(python_executable, config.python_executable) + ) + config.python_executable = python_executable # Shared library build of LLVM may require LD_LIBRARY_PATH or equivalent. if is_configured("shared_libs"): From 2ecb1ab6d701b6b4ec451f2c402c80c9fb9dcb14 Mon Sep 17 00:00:00 2001 From: Aviad Cohen Date: Fri, 14 Jun 2024 21:56:17 +0300 Subject: [PATCH 143/155] [mlir][scf]: Removed LoopParams struct and used Range instead (NFC) (#95501) --- mlir/include/mlir/Dialect/SCF/Utils/Utils.h | 14 ++-------- mlir/lib/Dialect/SCF/Utils/Utils.cpp | 31 ++++++++++----------- 2 files changed, 18 insertions(+), 27 deletions(-) diff --git a/mlir/include/mlir/Dialect/SCF/Utils/Utils.h b/mlir/include/mlir/Dialect/SCF/Utils/Utils.h index f719c002139875..da3fe3ceb86be3 100644 --- a/mlir/include/mlir/Dialect/SCF/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/SCF/Utils/Utils.h @@ -120,14 +120,6 @@ LogicalResult loopUnrollByFactor( scf::ForOp forOp, uint64_t unrollFactor, function_ref annotateFn = nullptr); -/// This structure is to pass and return sets of loop parameters without -/// confusing the order. -struct LoopParams { - OpFoldResult lowerBound; - OpFoldResult upperBound; - OpFoldResult step; -}; - /// Transform a loop with a strictly positive step /// for %i = %lb to %ub step %s /// into a 0-based loop with step 1 @@ -137,9 +129,9 @@ struct LoopParams { /// expected to be either `loop` or another loop perfectly nested under `loop`. /// Insert the definition of new bounds immediate before `outer`, which is /// expected to be either `loop` or its parent in the loop nest. -LoopParams emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc, - OpFoldResult lb, OpFoldResult ub, - OpFoldResult step); +Range emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc, + OpFoldResult lb, OpFoldResult ub, + OpFoldResult step); /// Get back the original induction variable values after loop normalization. void denormalizeInductionVariable(RewriterBase &rewriter, Location loc, diff --git a/mlir/lib/Dialect/SCF/Utils/Utils.cpp b/mlir/lib/Dialect/SCF/Utils/Utils.cpp index a031e53fe0ffbb..ff5e3a002263d3 100644 --- a/mlir/lib/Dialect/SCF/Utils/Utils.cpp +++ b/mlir/lib/Dialect/SCF/Utils/Utils.cpp @@ -464,9 +464,9 @@ LogicalResult mlir::loopUnrollByFactor( return success(); } -LoopParams mlir::emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc, - OpFoldResult lb, OpFoldResult ub, - OpFoldResult step) { +Range mlir::emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc, + OpFoldResult lb, OpFoldResult ub, + OpFoldResult step) { // For non-index types, generate `arith` instructions // Check if the loop is already known to have a constant zero lower bound or // a constant one step. @@ -478,8 +478,8 @@ LoopParams mlir::emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc, if (auto stepCst = getConstantIntValue(step)) isStepOne = stepCst.value() == 1; - Type loopParamsType = getType(lb); - assert(loopParamsType == getType(ub) && loopParamsType == getType(step) && + Type rangeType = getType(lb); + assert(rangeType == getType(ub) && rangeType == getType(step) && "expected matching types"); // Compute the number of iterations the loop executes: ceildiv(ub - lb, step) @@ -501,8 +501,8 @@ LoopParams mlir::emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc, getValueOrCreateConstantIntOp(rewriter, loc, step)); } - OpFoldResult newLowerBound = rewriter.getZeroAttr(loopParamsType); - OpFoldResult newStep = rewriter.getOneAttr(loopParamsType); + OpFoldResult newLowerBound = rewriter.getZeroAttr(rangeType); + OpFoldResult newStep = rewriter.getOneAttr(rangeType); return {newLowerBound, newUpperBound, newStep}; } @@ -626,18 +626,17 @@ LogicalResult mlir::coalesceLoops(RewriterBase &rewriter, Value lb = loop.getLowerBound(); Value ub = loop.getUpperBound(); Value step = loop.getStep(); - auto newLoopParams = + auto newLoopRange = emitNormalizedLoopBounds(rewriter, loop.getLoc(), lb, ub, step); rewriter.modifyOpInPlace(loop, [&]() { - loop.setLowerBound(getValueOrCreateConstantIntOp( - rewriter, loop.getLoc(), newLoopParams.lowerBound)); - loop.setUpperBound(getValueOrCreateConstantIntOp( - rewriter, loop.getLoc(), newLoopParams.upperBound)); + loop.setLowerBound(getValueOrCreateConstantIntOp(rewriter, loop.getLoc(), + newLoopRange.offset)); + loop.setUpperBound(getValueOrCreateConstantIntOp(rewriter, loop.getLoc(), + newLoopRange.size)); loop.setStep(getValueOrCreateConstantIntOp(rewriter, loop.getLoc(), - newLoopParams.step)); + newLoopRange.stride)); }); - rewriter.setInsertionPointToStart(innermost.getBody()); denormalizeInductionVariable(rewriter, loop.getLoc(), loop.getInductionVar(), lb, step); @@ -780,9 +779,9 @@ void mlir::collapseParallelLoops( Value lb = loops.getLowerBound()[i]; Value ub = loops.getUpperBound()[i]; Value step = loops.getStep()[i]; - auto newLoopParams = emitNormalizedLoopBounds(rewriter, loc, lb, ub, step); + auto newLoopRange = emitNormalizedLoopBounds(rewriter, loc, lb, ub, step); normalizedUpperBounds.push_back(getValueOrCreateConstantIntOp( - rewriter, loops.getLoc(), newLoopParams.upperBound)); + rewriter, loops.getLoc(), newLoopRange.size)); rewriter.setInsertionPointToStart(loops.getBody()); denormalizeInductionVariable(rewriter, loc, loops.getInductionVars()[i], lb, From b1f9440fa9286638bb1fe72a14d220770d1987cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Fri, 14 Jun 2024 20:56:43 +0200 Subject: [PATCH 144/155] [GlobalIsel] Import GEP flags (#93850) https://github.com/llvm/llvm-project/pull/90824 --- .../CodeGen/GlobalISel/GenericMachineInstrs.h | 12 +- llvm/include/llvm/CodeGen/MachineInstr.h | 2 + llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 6 +- llvm/lib/CodeGen/MIRParser/MILexer.cpp | 1 + llvm/lib/CodeGen/MIRParser/MILexer.h | 1 + llvm/lib/CodeGen/MIRPrinter.cpp | 2 + llvm/lib/CodeGen/MachineInstr.cpp | 5 + .../GlobalISel/arm64-irtranslator-gep.ll | 8 +- .../GlobalISel/arm64-irtranslator-switch.ll | 4 +- .../GlobalISel/irtranslator-gep-flags.ll | 122 ++++++++++++++++++ .../test/CodeGen/AArch64/arm64-this-return.ll | 2 +- .../AMDGPU/GlobalISel/function-returns.ll | 4 +- .../irtranslator-call-return-values.ll | 8 +- .../GlobalISel/irtranslator-call-sret.ll | 16 +-- .../AMDGPU/GlobalISel/irtranslator-call.ll | 32 ++--- .../GlobalISel/irtranslator-sibling-call.ll | 24 ++-- .../irtranslator/aggregate_struct_return.ll | 24 ++-- .../GlobalISel/irtranslator/sret_pointer.ll | 4 +- .../Mips/GlobalISel/irtranslator/var_arg.ll | 4 +- ...calling-conv-ilp32-ilp32f-ilp32d-common.ll | 24 ++-- .../calling-conv-lp64-lp64f-lp64d-common.ll | 24 ++-- .../RISCV/GlobalISel/irtranslator/vararg.ll | 32 ++--- .../x86_64-irtranslator-struct-return.ll | 24 ++-- 23 files changed, 258 insertions(+), 127 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-gep-flags.ll diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h index 22737256377133..995031f7c00be8 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h @@ -26,6 +26,10 @@ namespace llvm { /// A base class for all GenericMachineInstrs. class GenericMachineInstr : public MachineInstr { + constexpr static unsigned PoisonFlags = NoUWrap | NoSWrap | NoUSWrap | + IsExact | Disjoint | NonNeg | + FmNoNans | FmNoInfs; + public: GenericMachineInstr() = delete; @@ -37,14 +41,10 @@ class GenericMachineInstr : public MachineInstr { return isPreISelGenericOpcode(MI->getOpcode()); } - bool hasPoisonGeneratingFlags() const { - return getFlags() & (NoUWrap | NoSWrap | IsExact | Disjoint | NonNeg | - FmNoNans | FmNoInfs); - } + bool hasPoisonGeneratingFlags() const { return getFlags() & PoisonFlags; } void dropPoisonGeneratingFlags() { - clearFlags(NoUWrap | NoSWrap | IsExact | Disjoint | NonNeg | FmNoNans | - FmNoInfs); + clearFlags(PoisonFlags); assert(!hasPoisonGeneratingFlags()); } }; diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index db48a0ae551451..b3cb5c8b84839d 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -117,6 +117,8 @@ class MachineInstr NoConvergent = 1 << 17, // Call does not require convergence guarantees. NonNeg = 1 << 18, // The operand is non-negative. Disjoint = 1 << 19, // Each bit is zero in at least one of the inputs. + NoUSWrap = 1 << 20, // Instruction supports geps + // no unsigned signed wrap. }; private: diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 25b14c860284dc..7efcf214602605 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1581,10 +1581,8 @@ bool IRTranslator::translateGetElementPtr(const User &U, LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL); uint32_t Flags = 0; - if (isa(U)) { - const Instruction &I = cast(U); - Flags = MachineInstr::copyFlagsFromInstruction(I); - } + if (const Instruction *I = dyn_cast(&U)) + Flags = MachineInstr::copyFlagsFromInstruction(*I); // Normalize Vector GEP - all scalar operands should be converted to the // splat vector. diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/llvm/lib/CodeGen/MIRParser/MILexer.cpp index 7bb21655320474..114f0e8a57108c 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.cpp +++ b/llvm/lib/CodeGen/MIRParser/MILexer.cpp @@ -212,6 +212,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) { .Case("reassoc", MIToken::kw_reassoc) .Case("nuw", MIToken::kw_nuw) .Case("nsw", MIToken::kw_nsw) + .Case("nusw", MIToken::kw_nusw) .Case("exact", MIToken::kw_exact) .Case("nneg", MIToken::kw_nneg) .Case("disjoint", MIToken::kw_disjoint) diff --git a/llvm/lib/CodeGen/MIRParser/MILexer.h b/llvm/lib/CodeGen/MIRParser/MILexer.h index 6617ec68e94150..49a19896804eeb 100644 --- a/llvm/lib/CodeGen/MIRParser/MILexer.h +++ b/llvm/lib/CodeGen/MIRParser/MILexer.h @@ -69,6 +69,7 @@ struct MIToken { kw_contract, kw_afn, kw_reassoc, + kw_nusw, kw_nuw, kw_nsw, kw_exact, diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index 26d534f369ae5f..49993f7381ec77 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -813,6 +813,8 @@ void MIPrinter::print(const MachineInstr &MI) { OS << "nneg "; if (MI.getFlag(MachineInstr::Disjoint)) OS << "disjoint "; + if (MI.getFlag(MachineInstr::NoUSWrap)) + OS << "nusw "; OS << TII->getName(MI.getOpcode()); if (I < E) diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp index 02479f31f0b692..198af9339c1598 100644 --- a/llvm/lib/CodeGen/MachineInstr.cpp +++ b/llvm/lib/CodeGen/MachineInstr.cpp @@ -576,6 +576,11 @@ uint32_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) { MIFlags |= MachineInstr::MIFlag::NoSWrap; if (TI->hasNoUnsignedWrap()) MIFlags |= MachineInstr::MIFlag::NoUWrap; + } else if (const GetElementPtrInst *GEP = dyn_cast(&I)) { + if (GEP->hasNoUnsignedSignedWrap()) + MIFlags |= MachineInstr::MIFlag::NoUSWrap; + if (GEP->hasNoUnsignedWrap()) + MIFlags |= MachineInstr::MIFlag::NoUWrap; } // Copy the nonneg flag. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-gep.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-gep.ll index fcc399d41fad89..f0d9aa4dcd25d2 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-gep.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-gep.ll @@ -20,8 +20,8 @@ define i32 @cse_gep(ptr %ptr, i32 %idx) { ; O0-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] ; O0-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL1]](s64) ; O0-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; O0-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) - ; O0-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.gep2) + ; O0-NEXT: %11:_(p0) = nuw nusw G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) + ; O0-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %11(p0) :: (load (s32) from %ir.gep2) ; O0-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]] ; O0-NEXT: $w0 = COPY [[ADD]](s32) ; O0-NEXT: RET_ReallyLR implicit $w0 @@ -39,8 +39,8 @@ define i32 @cse_gep(ptr %ptr, i32 %idx) { ; O3-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0) ; O3-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.gep1) ; O3-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; O3-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[PTR_ADD]], [[C1]](s64) - ; O3-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from %ir.gep2) + ; O3-NEXT: %9:_(p0) = nuw nusw G_PTR_ADD [[PTR_ADD]], [[C1]](s64) + ; O3-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %9(p0) :: (load (s32) from %ir.gep2) ; O3-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]] ; O3-NEXT: $w0 = COPY [[ADD]](s32) ; O3-NEXT: RET_ReallyLR implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll index 476b3c709ffc57..3b12885923db65 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll @@ -795,8 +795,8 @@ define void @jt_multiple_jump_tables(ptr %arg, i32 %arg1, ptr %arg2) { ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[PHI]], [[C111]] ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[GV]], [[MUL]](s64) ; CHECK-NEXT: [[C112:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[PTR_ADD]], [[C112]](s64) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[PTR_ADD1]](p0) :: (load (p0) from %ir.tmp59) + ; CHECK-NEXT: %120:_(p0) = nuw nusw G_PTR_ADD [[PTR_ADD]], [[C112]](s64) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD %120(p0) :: (load (p0) from %ir.tmp59) ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; CHECK-NEXT: $x0 = COPY [[COPY]](p0) ; CHECK-NEXT: $x1 = COPY [[LOAD]](p0) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-gep-flags.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-gep-flags.ll new file mode 100644 index 00000000000000..34ac4f6361d963 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-gep-flags.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -O0 -mtriple=aarch64-linux-gnu -global-isel -stop-after=irtranslator %s -o - | FileCheck %s + +define i32 @gep_nusw_nuw(ptr %ptr, i32 %idx) { + ; CHECK-LABEL: name: gep_nusw_nuw + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $w1, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL]](s64) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.gep1) + ; CHECK-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL1]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: %11:_(p0) = nuw nusw G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %11(p0) :: (load (s32) from %ir.gep2) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]] + ; CHECK-NEXT: $w0 = COPY [[ADD]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %sidx = sext i32 %idx to i64 + %gep1 = getelementptr inbounds [4 x i32], ptr %ptr, i64 %sidx, i64 0 + %v1 = load i32, ptr %gep1 + %gep2 = getelementptr nusw nuw [4 x i32], ptr %ptr, i64 %sidx, i64 1 + %v2 = load i32, ptr %gep2 + %res = add i32 %v1, %v2 + ret i32 %res + } + +define i32 @gep_nuw(ptr %ptr, i32 %idx) { + ; CHECK-LABEL: name: gep_nuw + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $w1, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL]](s64) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.gep1) + ; CHECK-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL1]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.gep2) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]] + ; CHECK-NEXT: $w0 = COPY [[ADD]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %sidx = sext i32 %idx to i64 + %gep1 = getelementptr inbounds [4 x i32], ptr %ptr, i64 %sidx, i64 0 + %v1 = load i32, ptr %gep1 + %gep2 = getelementptr nuw [4 x i32], ptr %ptr, i64 %sidx, i64 1 + %v2 = load i32, ptr %gep2 + %res = add i32 %v1, %v2 + ret i32 %res + } + +define i32 @gep_nusw(ptr %ptr, i32 %idx) { + ; CHECK-LABEL: name: gep_nusw + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $w1, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL]](s64) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.gep1) + ; CHECK-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL1]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: %11:_(p0) = nusw G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %11(p0) :: (load (s32) from %ir.gep2) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]] + ; CHECK-NEXT: $w0 = COPY [[ADD]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %sidx = sext i32 %idx to i64 + %gep1 = getelementptr inbounds [4 x i32], ptr %ptr, i64 %sidx, i64 0 + %v1 = load i32, ptr %gep1 + %gep2 = getelementptr nusw [4 x i32], ptr %ptr, i64 %sidx, i64 1 + %v2 = load i32, ptr %gep2 + %res = add i32 %v1, %v2 + ret i32 %res + } + +define i32 @gep_none(ptr %ptr, i32 %idx) { + ; CHECK-LABEL: name: gep_none + ; CHECK: bb.1 (%ir-block.0): + ; CHECK-NEXT: liveins: $w1, $x0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY1]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL]](s64) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[PTR_ADD]](p0) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir.gep1) + ; CHECK-NEXT: [[MUL1:%[0-9]+]]:_(s64) = G_MUL [[SEXT]], [[C]] + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[MUL1]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[PTR_ADD1]], [[C1]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p0) :: (load (s32) from %ir.gep2) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]] + ; CHECK-NEXT: $w0 = COPY [[ADD]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %sidx = sext i32 %idx to i64 + %gep1 = getelementptr inbounds [4 x i32], ptr %ptr, i64 %sidx, i64 0 + %v1 = load i32, ptr %gep1 + %gep2 = getelementptr [4 x i32], ptr %ptr, i64 %sidx, i64 1 + %v2 = load i32, ptr %gep2 + %res = add i32 %v1, %v2 + ret i32 %res + } diff --git a/llvm/test/CodeGen/AArch64/arm64-this-return.ll b/llvm/test/CodeGen/AArch64/arm64-this-return.ll index 1ea0adc3aa1d11..a497ba2f8310fa 100644 --- a/llvm/test/CodeGen/AArch64/arm64-this-return.ll +++ b/llvm/test/CodeGen/AArch64/arm64-this-return.ll @@ -148,7 +148,7 @@ define ptr @E_ctor_base(ptr %this, i32 %x) { ; GISEL-MIR: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp ; GISEL-MIR: [[COPY2:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) ; GISEL-MIR: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GISEL-MIR: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[COPY]], [[C]](s64) + ; GISEL-MIR: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw nusw G_PTR_ADD [[COPY]], [[C]](s64) ; GISEL-MIR: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp ; GISEL-MIR: $x0 = COPY [[PTR_ADD]](p0) ; GISEL-MIR: $w1 = COPY [[COPY1]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll index e6c835fa25406a..388ef2497e4356 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll @@ -952,9 +952,9 @@ define void @void_func_sret_struct_i8_i32(ptr addrspace(5) sret({ i8, i32 }) %ar ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (volatile load (s8) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p1) :: (volatile load (s32) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = nuw G_PTR_ADD [[COPY]], [[C]](s32) + ; CHECK-NEXT: %5:_(p5) = nuw nusw G_PTR_ADD [[COPY]], [[C]](s32) ; CHECK-NEXT: G_STORE [[LOAD]](s8), [[COPY]](p5) :: (store (s8) into %ir.arg0, addrspace 5) - ; CHECK-NEXT: G_STORE [[LOAD1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5) + ; CHECK-NEXT: G_STORE [[LOAD1]](s32), %5(p5) :: (store (s32) into %ir.gep1, addrspace 5) ; CHECK-NEXT: SI_RETURN %val0 = load volatile i8, ptr addrspace(1) undef %val1 = load volatile i32, ptr addrspace(1) undef diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll index 37f2118572d84e..097def586e61c4 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-return-values.ll @@ -2914,8 +2914,8 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(ptr addrspa ; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (p1) from %ir.p.kernarg.offset1, align 16, addrspace 4) ; GCN-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[INT]], [[C]](s64) - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32) from %ir.idx.kernarg.offset, align 8, addrspace 4) + ; GCN-NEXT: %17:_(p4) = nuw nusw G_PTR_ADD [[INT]], [[C]](s64) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %17(p4) :: (dereferenceable invariant load (s32) from %ir.idx.kernarg.offset, align 8, addrspace 4) ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0 ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_v33i32_func_v33i32_i32 @@ -2923,7 +2923,7 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(ptr addrspa ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -2947,7 +2947,7 @@ define amdgpu_kernel void @test_call_external_v33i32_func_v33i32_i32(ptr addrspa ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll index 854f3463b64d82..cca35d66049cc7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call-sret.ll @@ -24,16 +24,16 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval ; GCN-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.1.out.val ; GCN-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = nuw G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32) + ; GCN-NEXT: %17:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32) ; GCN-NEXT: G_STORE [[C]](s8), [[FRAME_INDEX]](p5) :: (store (s8) into %ir.in.val, addrspace 5) - ; GCN-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.in.gep1, addrspace 5) + ; GCN-NEXT: G_STORE [[C1]](s32), %17(p5) :: (store (s32) into %ir.in.gep1, addrspace 5) ; GCN-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; GCN-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32 ; GCN-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; GCN-NEXT: [[DEF1:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; GCN-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; GCN-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64) + ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64) ; GCN-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] ; GCN-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; GCN-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -50,15 +50,15 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval ; GCN-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; GCN-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; GCN-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GCN-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32) + ; GCN-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32) ; GCN-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; GCN-NEXT: G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.in.val, align 4, addrspace 5) + ; GCN-NEXT: G_MEMCPY [[PTR_ADD1]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.in.val, align 4, addrspace 5) ; GCN-NEXT: $vgpr0 = COPY [[FRAME_INDEX1]](p5) ; GCN-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) ; GCN-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) ; GCN-NEXT: $sgpr6_sgpr7 = COPY [[DEF1]](p4) - ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) + ; GCN-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) ; GCN-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) ; GCN-NEXT: $sgpr12 = COPY [[COPY12]](s32) ; GCN-NEXT: $sgpr13 = COPY [[COPY13]](s32) @@ -67,9 +67,9 @@ define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval ; GCN-NEXT: $vgpr31 = COPY [[OR1]](s32) ; GCN-NEXT: $sgpr30_sgpr31 = noconvergent G_SI_CALL [[GV]](p0), @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32, csr_amdgpu, implicit $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $vgpr31 ; GCN-NEXT: ADJCALLSTACKDOWN 0, 8, implicit-def $scc - ; GCN-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = nuw G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32) + ; GCN-NEXT: %45:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX1]], [[C2]](s32) ; GCN-NEXT: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[FRAME_INDEX1]](p5) :: (dereferenceable load (s8) from %ir.out.val, addrspace 5) - ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p5) :: (dereferenceable load (s32) from %ir.out.gep1, addrspace 5) + ; GCN-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %45(p5) :: (dereferenceable load (s32) from %ir.out.gep1, addrspace 5) ; GCN-NEXT: G_STORE [[LOAD]](s8), [[DEF]](p1) :: (volatile store (s8) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: G_STORE [[LOAD1]](s32), [[DEF]](p1) :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) ; GCN-NEXT: S_ENDPGM 0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll index 392b0ae6823e44..c3694158e7b971 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-call.ll @@ -4015,16 +4015,16 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.val ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = nuw G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32) + ; CHECK-NEXT: %14:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C2]](s32) ; CHECK-NEXT: G_STORE [[C]](s8), [[FRAME_INDEX]](p5) :: (store (s8) into %ir.val, addrspace 5) - ; CHECK-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p5) :: (store (s32) into %ir.gep1, addrspace 5) + ; CHECK-NEXT: G_STORE [[C1]](s32), %14(p5) :: (store (s32) into %ir.gep1, addrspace 5) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @external_void_func_byval_struct_i8_i32 ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C3]](s64) ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -4041,14 +4041,14 @@ define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]] ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32) ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: G_MEMCPY [[PTR_ADD2]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.val, align 4, addrspace 5) + ; CHECK-NEXT: G_MEMCPY [[PTR_ADD1]](p5), [[FRAME_INDEX]](p5), [[C7]](s32), 0 :: (dereferenceable store (s64) into stack, align 4, addrspace 5), (dereferenceable load (s64) from %ir.val, align 4, addrspace 5) ; CHECK-NEXT: [[COPY18:%[0-9]+]]:_(<4 x s32>) = COPY $private_rsrc_reg ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) @@ -4589,15 +4589,15 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr) ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<32 x s32>) = G_LOAD [[INT]](p4) :: (dereferenceable invariant load (<32 x s32>) from %ir.val.kernarg.offset1, align 16, addrspace 4) ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = nuw G_PTR_ADD [[INT]], [[C]](s64) - ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s64) from %ir.tmp.kernarg.offset, align 16, addrspace 4) + ; CHECK-NEXT: %17:_(p4) = nuw nusw G_PTR_ADD [[INT]], [[C]](s64) + ; CHECK-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD %17(p4) :: (dereferenceable invariant load (s64) from %ir.tmp.kernarg.offset, align 16, addrspace 4) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $scc ; CHECK-NEXT: [[GV:%[0-9]+]]:_(p0) = G_GLOBAL_VALUE @stack_passed_f64_arg ; CHECK-NEXT: [[COPY9:%[0-9]+]]:_(p4) = COPY [[COPY7]] ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p4) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY10:%[0-9]+]]:_(p4) = COPY [[COPY8]](p4) ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 136 - ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY10]], [[C1]](s64) ; CHECK-NEXT: [[COPY11:%[0-9]+]]:_(s64) = COPY [[COPY6]] ; CHECK-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY [[COPY5]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY [[COPY4]] @@ -4615,15 +4615,15 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<32 x s32>) ; CHECK-NEXT: [[AMDGPU_WAVE_ADDRESS:%[0-9]+]]:_(p5) = G_AMDGPU_WAVE_ADDRESS $sp_reg ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) - ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack, align 16, addrspace 5) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C4]](s32) + ; CHECK-NEXT: G_STORE [[UV31]](s32), [[PTR_ADD1]](p5) :: (store (s32) into stack, align 16, addrspace 5) ; CHECK-NEXT: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](s64) ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C5]](s32) - ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 4, addrspace 5) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C5]](s32) + ; CHECK-NEXT: G_STORE [[UV32]](s32), [[PTR_ADD2]](p5) :: (store (s32) into stack + 4, addrspace 5) ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32) - ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD4]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) + ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p5) = G_PTR_ADD [[AMDGPU_WAVE_ADDRESS]], [[C6]](s32) + ; CHECK-NEXT: G_STORE [[UV33]](s32), [[PTR_ADD3]](p5) :: (store (s32) into stack + 8, align 8, addrspace 5) ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32) ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32) ; CHECK-NEXT: $vgpr2 = COPY [[UV2]](s32) @@ -4659,7 +4659,7 @@ define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val ; CHECK-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY18]](<4 x s32>) ; CHECK-NEXT: $sgpr4_sgpr5 = COPY [[COPY9]](p4) ; CHECK-NEXT: $sgpr6_sgpr7 = COPY [[DEF]](p4) - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD1]](p4) + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY [[PTR_ADD]](p4) ; CHECK-NEXT: $sgpr10_sgpr11 = COPY [[COPY11]](s64) ; CHECK-NEXT: $sgpr12 = COPY [[COPY12]](s32) ; CHECK-NEXT: $sgpr13 = COPY [[COPY13]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll index 02bf7725015151..ecad793ad58987 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-sibling-call.ll @@ -26,8 +26,8 @@ define fastcc i32 @i32_fastcc_i32_i32_stack_object(i32 %arg0, i32 %arg1) #1 { ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = nuw G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) - ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) + ; GCN-NEXT: %4:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) + ; GCN-NEXT: G_STORE [[C]](s32), %4(p5) :: (volatile store (s32) into %ir.gep, addrspace 5) ; GCN-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[COPY1]] ; GCN-NEXT: $vgpr0 = COPY [[ADD]](s32) ; GCN-NEXT: SI_RETURN implicit $vgpr0 @@ -68,8 +68,8 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_stack_object(i32 %a, i32 %b, ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = nuw G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) - ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) + ; GCN-NEXT: %5:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) + ; GCN-NEXT: G_STORE [[C]](s32), %5(p5) :: (volatile store (s32) into %ir.gep, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32 ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) @@ -95,8 +95,8 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_callee_stack_object(i32 %a, i ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = nuw G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) - ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) + ; GCN-NEXT: %5:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C1]](s32) + ; GCN-NEXT: G_STORE [[C]](s32), %5(p5) :: (volatile store (s32) into %ir.gep, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_stack_object ; GCN-NEXT: $vgpr0 = COPY [[COPY]](s32) ; GCN-NEXT: $vgpr1 = COPY [[COPY1]](s32) @@ -451,8 +451,8 @@ define fastcc i32 @sibling_call_i32_fastcc_i32_i32_a32i32_stack_object(i32 %a, i ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = nuw G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s32) - ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) + ; GCN-NEXT: %39:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s32) + ; GCN-NEXT: G_STORE [[C]](s32), %39(p5) :: (volatile store (s32) into %ir.gep, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 ; GCN-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN-NEXT: G_STORE [[LOAD]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5) @@ -646,8 +646,8 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32(i32 %a, i3 ; GCN-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 ; GCN-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = nuw G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s32) - ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) + ; GCN-NEXT: %39:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s32) + ; GCN-NEXT: G_STORE [[C]](s32), %39(p5) :: (volatile store (s32) into %ir.gep, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 ; GCN-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN-NEXT: G_STORE [[LOAD]](s32), [[FRAME_INDEX4]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5) @@ -751,8 +751,8 @@ define fastcc i32 @sibling_call_stack_objecti32_fastcc_i32_i32_a32i32_larger_arg ; GCN-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GCN-NEXT: [[FRAME_INDEX7:%[0-9]+]]:_(p5) = G_FRAME_INDEX %stack.0.alloca ; GCN-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; GCN-NEXT: [[PTR_ADD:%[0-9]+]]:_(p5) = nuw G_PTR_ADD [[FRAME_INDEX7]], [[C2]](s32) - ; GCN-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p5) :: (volatile store (s32) into %ir.gep, addrspace 5) + ; GCN-NEXT: %47:_(p5) = nuw nusw G_PTR_ADD [[FRAME_INDEX7]], [[C2]](s32) + ; GCN-NEXT: G_STORE [[C]](s32), %47(p5) :: (volatile store (s32) into %ir.gep, addrspace 5) ; GCN-NEXT: [[GV:%[0-9]+]]:ccr_sgpr_64(p0) = G_GLOBAL_VALUE @i32_fastcc_i32_i32_a32i32 ; GCN-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p5) = G_FRAME_INDEX %fixed-stack.2 ; GCN-NEXT: G_STORE [[C1]](s32), [[FRAME_INDEX8]](p5) :: (store (s32) into %fixed-stack.2, align 16, addrspace 5) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/aggregate_struct_return.ll b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/aggregate_struct_return.ll index 7f42c3ec859260..d1a024856b3751 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/aggregate_struct_return.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/aggregate_struct_return.ll @@ -12,12 +12,12 @@ define { float, float } @add_complex_float(ptr %a, ptr %b) { ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) ; MIPS32-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY2]](p0) :: (load (s32) from %ir..realp) ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; MIPS32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (load (s32) from %ir..imagp) + ; MIPS32-NEXT: %5:_(p0) = nuw nusw G_PTR_ADD [[COPY]], [[C]](s32) + ; MIPS32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %5(p0) :: (load (s32) from %ir..imagp) ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY [[COPY1]](p0) ; MIPS32-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[COPY3]](p0) :: (load (s32) from %ir..realp1) - ; MIPS32-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[COPY1]], [[C]](s32) - ; MIPS32-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (load (s32) from %ir..imagp3) + ; MIPS32-NEXT: %9:_(p0) = nuw nusw G_PTR_ADD [[COPY1]], [[C]](s32) + ; MIPS32-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD %9(p0) :: (load (s32) from %ir..imagp3) ; MIPS32-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[LOAD]], [[LOAD2]] ; MIPS32-NEXT: [[FADD1:%[0-9]+]]:_(s32) = G_FADD [[LOAD1]], [[LOAD3]] ; MIPS32-NEXT: $f0 = COPY [[FADD]](s32) @@ -50,12 +50,12 @@ define { double, double } @add_complex_double(ptr %a, ptr %b) { ; MIPS32-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) ; MIPS32-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[COPY2]](p0) :: (load (s64) from %ir..realp) ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; MIPS32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[COPY]], [[C]](s32) - ; MIPS32-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64) from %ir..imagp) + ; MIPS32-NEXT: %5:_(p0) = nuw nusw G_PTR_ADD [[COPY]], [[C]](s32) + ; MIPS32-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD %5(p0) :: (load (s64) from %ir..imagp) ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY [[COPY1]](p0) ; MIPS32-NEXT: [[LOAD2:%[0-9]+]]:_(s64) = G_LOAD [[COPY3]](p0) :: (load (s64) from %ir..realp1) - ; MIPS32-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[COPY1]], [[C]](s32) - ; MIPS32-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (load (s64) from %ir..imagp3) + ; MIPS32-NEXT: %9:_(p0) = nuw nusw G_PTR_ADD [[COPY1]], [[C]](s32) + ; MIPS32-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD %9(p0) :: (load (s64) from %ir..imagp3) ; MIPS32-NEXT: [[FADD:%[0-9]+]]:_(s64) = G_FADD [[LOAD]], [[LOAD2]] ; MIPS32-NEXT: [[FADD1:%[0-9]+]]:_(s64) = G_FADD [[LOAD1]], [[LOAD3]] ; MIPS32-NEXT: $d0 = COPY [[FADD]](s64) @@ -91,9 +91,9 @@ define void @call_ret_complex_float(ptr %z) { ; MIPS32-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; MIPS32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[COPY]], [[C]](s32) + ; MIPS32-NEXT: %5:_(p0) = nuw nusw G_PTR_ADD [[COPY]], [[C]](s32) ; MIPS32-NEXT: G_STORE [[COPY1]](s32), [[COPY3]](p0) :: (store (s32) into %ir..realp) - ; MIPS32-NEXT: G_STORE [[COPY2]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir..imagp) + ; MIPS32-NEXT: G_STORE [[COPY2]](s32), %5(p0) :: (store (s32) into %ir..imagp) ; MIPS32-NEXT: RetRA entry: %call = call { float, float } @ret_complex_float() @@ -120,9 +120,9 @@ define void @call_ret_complex_double(ptr %z) { ; MIPS32-NEXT: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp ; MIPS32-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; MIPS32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[COPY]], [[C]](s32) + ; MIPS32-NEXT: %5:_(p0) = nuw nusw G_PTR_ADD [[COPY]], [[C]](s32) ; MIPS32-NEXT: G_STORE [[COPY1]](s64), [[COPY3]](p0) :: (store (s64) into %ir..realp) - ; MIPS32-NEXT: G_STORE [[COPY2]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir..imagp) + ; MIPS32-NEXT: G_STORE [[COPY2]](s64), %5(p0) :: (store (s64) into %ir..imagp) ; MIPS32-NEXT: RetRA entry: %call = call { double, double } @ret_complex_double() diff --git a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/sret_pointer.ll b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/sret_pointer.ll index f3f762c742eb40..58dc2f12293060 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/sret_pointer.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/sret_pointer.ll @@ -13,8 +13,8 @@ define void @ZeroInit(ptr noalias sret(%struct.S) %agg.result) { ; MIPS32-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY [[COPY]](p0) ; MIPS32-NEXT: G_STORE [[C]](s32), [[COPY1]](p0) :: (store (s32) into %ir.x) ; MIPS32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; MIPS32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[COPY]], [[C1]](s32) - ; MIPS32-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.y) + ; MIPS32-NEXT: %4:_(p0) = nuw nusw G_PTR_ADD [[COPY]], [[C1]](s32) + ; MIPS32-NEXT: G_STORE [[C]](s32), %4(p0) :: (store (s32) into %ir.y) ; MIPS32-NEXT: RetRA entry: %x = getelementptr inbounds %struct.S, ptr %agg.result, i32 0, i32 0 diff --git a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/var_arg.ll b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/var_arg.ll index 3e7eb15a22a2d4..214e5aa44d5c01 100644 --- a/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/var_arg.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/irtranslator/var_arg.ll @@ -31,8 +31,8 @@ define void @testVaCopyArg(ptr %fmt, ...) { ; MIPS32-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.va_copy), [[FRAME_INDEX5]](p0), [[FRAME_INDEX4]](p0) ; MIPS32-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX5]](p0) :: (dereferenceable load (p0) from %ir.aq) ; MIPS32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; MIPS32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[LOAD]], [[C]](s32) - ; MIPS32-NEXT: G_STORE [[PTR_ADD]](p0), [[FRAME_INDEX5]](p0) :: (store (p0) into %ir.aq) + ; MIPS32-NEXT: %13:_(p0) = nuw nusw G_PTR_ADD [[LOAD]], [[C]](s32) + ; MIPS32-NEXT: G_STORE %13(p0), [[FRAME_INDEX5]](p0) :: (store (p0) into %ir.aq) ; MIPS32-NEXT: [[LOAD1:%[0-9]+]]:_(p0) = G_LOAD [[LOAD]](p0) :: (load (p0) from %ir.argp.cur) ; MIPS32-NEXT: G_STORE [[LOAD1]](p0), [[FRAME_INDEX6]](p0) :: (store (p0) into %ir.s) ; MIPS32-NEXT: [[LOAD2:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX6]](p0) :: (dereferenceable load (p0) from %ir.s) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll index 1a3489521af19c..e08b0f2c4bc3c9 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll @@ -379,14 +379,14 @@ define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result ; RV32I-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; RV32I-NEXT: G_STORE [[C]](s32), [[COPY]](p0) :: (store (s32) into %ir.agg.result) ; RV32I-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; RV32I-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[COPY]], [[C4]](s32) - ; RV32I-NEXT: G_STORE [[C1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.b) + ; RV32I-NEXT: %3:_(p0) = nuw nusw G_PTR_ADD [[COPY]], [[C4]](s32) + ; RV32I-NEXT: G_STORE [[C1]](s32), %3(p0) :: (store (s32) into %ir.b) ; RV32I-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; RV32I-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[COPY]], [[C5]](s32) - ; RV32I-NEXT: G_STORE [[C2]](s32), [[PTR_ADD1]](p0) :: (store (s32) into %ir.c) + ; RV32I-NEXT: %6:_(p0) = nuw nusw G_PTR_ADD [[COPY]], [[C5]](s32) + ; RV32I-NEXT: G_STORE [[C2]](s32), %6(p0) :: (store (s32) into %ir.c) ; RV32I-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; RV32I-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[COPY]], [[C6]](s32) - ; RV32I-NEXT: G_STORE [[C3]](s32), [[PTR_ADD2]](p0) :: (store (s32) into %ir.d) + ; RV32I-NEXT: %9:_(p0) = nuw nusw G_PTR_ADD [[COPY]], [[C6]](s32) + ; RV32I-NEXT: G_STORE [[C3]](s32), %9(p0) :: (store (s32) into %ir.d) ; RV32I-NEXT: PseudoRET store i32 1, ptr %agg.result, align 4 %b = getelementptr inbounds %struct.large, ptr %agg.result, i32 0, i32 1 @@ -408,8 +408,8 @@ define i32 @caller_large_struct_ret() nounwind { ; ILP32-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 ; ILP32-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s32) from %ir.1) ; ILP32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; ILP32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[FRAME_INDEX]], [[C]](s32) - ; ILP32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s32) from %ir.3) + ; ILP32-NEXT: %3:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C]](s32) + ; ILP32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %3(p0) :: (dereferenceable load (s32) from %ir.3) ; ILP32-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]] ; ILP32-NEXT: $x10 = COPY [[ADD]](s32) ; ILP32-NEXT: PseudoRET implicit $x10 @@ -423,8 +423,8 @@ define i32 @caller_large_struct_ret() nounwind { ; ILP32F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 ; ILP32F-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s32) from %ir.1) ; ILP32F-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; ILP32F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[FRAME_INDEX]], [[C]](s32) - ; ILP32F-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s32) from %ir.3) + ; ILP32F-NEXT: %3:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C]](s32) + ; ILP32F-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %3(p0) :: (dereferenceable load (s32) from %ir.3) ; ILP32F-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]] ; ILP32F-NEXT: $x10 = COPY [[ADD]](s32) ; ILP32F-NEXT: PseudoRET implicit $x10 @@ -438,8 +438,8 @@ define i32 @caller_large_struct_ret() nounwind { ; ILP32D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 ; ILP32D-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s32) from %ir.1) ; ILP32D-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; ILP32D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[FRAME_INDEX]], [[C]](s32) - ; ILP32D-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s32) from %ir.3) + ; ILP32D-NEXT: %3:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C]](s32) + ; ILP32D-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD %3(p0) :: (dereferenceable load (s32) from %ir.3) ; ILP32D-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD]], [[LOAD1]] ; ILP32D-NEXT: $x10 = COPY [[ADD]](s32) ; ILP32D-NEXT: PseudoRET implicit $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll index b175b8d92e6c9d..625575af229c90 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll @@ -402,14 +402,14 @@ define void @callee_large_struct_ret(ptr noalias sret(%struct.large) %agg.result ; RV64I-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 ; RV64I-NEXT: G_STORE [[C]](s64), [[COPY]](p0) :: (store (s64) into %ir.agg.result, align 4) ; RV64I-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; RV64I-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[COPY]], [[C4]](s64) - ; RV64I-NEXT: G_STORE [[C1]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.b, align 4) + ; RV64I-NEXT: %3:_(p0) = nuw nusw G_PTR_ADD [[COPY]], [[C4]](s64) + ; RV64I-NEXT: G_STORE [[C1]](s64), %3(p0) :: (store (s64) into %ir.b, align 4) ; RV64I-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; RV64I-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[COPY]], [[C5]](s64) - ; RV64I-NEXT: G_STORE [[C2]](s64), [[PTR_ADD1]](p0) :: (store (s64) into %ir.c, align 4) + ; RV64I-NEXT: %6:_(p0) = nuw nusw G_PTR_ADD [[COPY]], [[C5]](s64) + ; RV64I-NEXT: G_STORE [[C2]](s64), %6(p0) :: (store (s64) into %ir.c, align 4) ; RV64I-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; RV64I-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[COPY]], [[C6]](s64) - ; RV64I-NEXT: G_STORE [[C3]](s64), [[PTR_ADD2]](p0) :: (store (s64) into %ir.d, align 4) + ; RV64I-NEXT: %9:_(p0) = nuw nusw G_PTR_ADD [[COPY]], [[C6]](s64) + ; RV64I-NEXT: G_STORE [[C3]](s64), %9(p0) :: (store (s64) into %ir.d, align 4) ; RV64I-NEXT: PseudoRET store i64 1, ptr %agg.result, align 4 %b = getelementptr inbounds %struct.large, ptr %agg.result, i64 0, i32 1 @@ -431,8 +431,8 @@ define i64 @caller_large_struct_ret() nounwind { ; LP64-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 ; LP64-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.1) ; LP64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; LP64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) - ; LP64-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s64) from %ir.3) + ; LP64-NEXT: %3:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) + ; LP64-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD %3(p0) :: (dereferenceable load (s64) from %ir.3) ; LP64-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LOAD]], [[LOAD1]] ; LP64-NEXT: $x10 = COPY [[ADD]](s64) ; LP64-NEXT: PseudoRET implicit $x10 @@ -446,8 +446,8 @@ define i64 @caller_large_struct_ret() nounwind { ; LP64F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 ; LP64F-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.1) ; LP64F-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; LP64F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) - ; LP64F-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s64) from %ir.3) + ; LP64F-NEXT: %3:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) + ; LP64F-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD %3(p0) :: (dereferenceable load (s64) from %ir.3) ; LP64F-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LOAD]], [[LOAD1]] ; LP64F-NEXT: $x10 = COPY [[ADD]](s64) ; LP64F-NEXT: PseudoRET implicit $x10 @@ -461,8 +461,8 @@ define i64 @caller_large_struct_ret() nounwind { ; LP64D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2 ; LP64D-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.1) ; LP64D-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 - ; LP64D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) - ; LP64D-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s64) from %ir.3) + ; LP64D-NEXT: %3:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX]], [[C]](s64) + ; LP64D-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD %3(p0) :: (dereferenceable load (s64) from %ir.3) ; LP64D-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[LOAD]], [[LOAD1]] ; LP64D-NEXT: $x10 = COPY [[ADD]](s64) ; LP64D-NEXT: PseudoRET implicit $x10 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vararg.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vararg.ll index d26b3ecff7d3a8..3b12ad57851638 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vararg.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/vararg.ll @@ -67,8 +67,8 @@ define i32 @va1(ptr %fmt, ...) { ; RV32-NEXT: G_VASTART [[FRAME_INDEX1]](p0) :: (store (s32) into %ir.va) ; RV32-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (dereferenceable load (p0) from %ir.va) ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; RV32-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[LOAD]], [[C1]](s32) - ; RV32-NEXT: G_STORE [[PTR_ADD7]](p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va) + ; RV32-NEXT: %20:_(p0) = nuw nusw G_PTR_ADD [[LOAD]], [[C1]](s32) + ; RV32-NEXT: G_STORE %20(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va) ; RV32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.argp.cur) ; RV32-NEXT: $x10 = COPY [[LOAD1]](s32) ; RV32-NEXT: PseudoRET implicit $x10 @@ -105,8 +105,8 @@ define i32 @va1(ptr %fmt, ...) { ; RV64-NEXT: G_VASTART [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.va) ; RV64-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX1]](p0) :: (dereferenceable load (p0) from %ir.va, align 4) ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; RV64-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[LOAD]], [[C1]](s64) - ; RV64-NEXT: G_STORE [[PTR_ADD7]](p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4) + ; RV64-NEXT: %20:_(p0) = nuw nusw G_PTR_ADD [[LOAD]], [[C1]](s64) + ; RV64-NEXT: G_STORE %20(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4) ; RV64-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.argp.cur) ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD1]](s32) ; RV64-NEXT: $x10 = COPY [[ANYEXT]](s64) @@ -687,8 +687,8 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; RV32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] ; RV32-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ADD]](s32) ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; RV32-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[INTTOPTR]], [[C3]](s32) - ; RV32-NEXT: G_STORE [[PTR_ADD7]](p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va) + ; RV32-NEXT: %25:_(p0) = nuw nusw G_PTR_ADD [[INTTOPTR]], [[C3]](s32) + ; RV32-NEXT: G_STORE %25(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va) ; RV32-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[AND]](s32) ; RV32-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[INTTOPTR1]](p0) :: (load (s64) from %ir.3) ; RV32-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD1]](s64) @@ -733,8 +733,8 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; RV64-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] ; RV64-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ADD]](s32) ; RV64-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; RV64-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[INTTOPTR]], [[C3]](s64) - ; RV64-NEXT: G_STORE [[PTR_ADD7]](p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4) + ; RV64-NEXT: %25:_(p0) = nuw nusw G_PTR_ADD [[INTTOPTR]], [[C3]](s64) + ; RV64-NEXT: G_STORE %25(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4) ; RV64-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[AND]](s32) ; RV64-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[INTTOPTR1]](p0) :: (load (s64) from %ir.3) ; RV64-NEXT: $x10 = COPY [[LOAD1]](s64) @@ -974,8 +974,8 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; RV32-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] ; RV32-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ADD]](s32) ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; RV32-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[INTTOPTR]], [[C3]](s32) - ; RV32-NEXT: G_STORE [[PTR_ADD5]](p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va) + ; RV32-NEXT: %24:_(p0) = nuw nusw G_PTR_ADD [[INTTOPTR]], [[C3]](s32) + ; RV32-NEXT: G_STORE %24(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va) ; RV32-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[AND]](s32) ; RV32-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[INTTOPTR1]](p0) :: (load (s64) from %ir.3) ; RV32-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[MV]], [[LOAD1]] @@ -1020,8 +1020,8 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; RV64-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ADD]], [[C2]] ; RV64-NEXT: [[INTTOPTR:%[0-9]+]]:_(p0) = G_INTTOPTR [[ADD]](s32) ; RV64-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; RV64-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[INTTOPTR]], [[C3]](s64) - ; RV64-NEXT: G_STORE [[PTR_ADD6]](p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4) + ; RV64-NEXT: %25:_(p0) = nuw nusw G_PTR_ADD [[INTTOPTR]], [[C3]](s64) + ; RV64-NEXT: G_STORE %25(p0), [[FRAME_INDEX1]](p0) :: (store (p0) into %ir.va, align 4) ; RV64-NEXT: [[INTTOPTR1:%[0-9]+]]:_(p0) = G_INTTOPTR [[AND]](s32) ; RV64-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[INTTOPTR1]](p0) :: (load (s64) from %ir.3) ; RV64-NEXT: [[ADD1:%[0-9]+]]:_(s64) = G_ADD [[COPY1]], [[LOAD1]] @@ -1724,8 +1724,8 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; RV32-NEXT: G_VASTART [[FRAME_INDEX2]](p0) :: (store (s32) into %ir.va) ; RV32-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX2]](p0) :: (dereferenceable load (p0) from %ir.va) ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 - ; RV32-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[LOAD]], [[C1]](s32) - ; RV32-NEXT: G_STORE [[PTR_ADD7]](p0), [[FRAME_INDEX2]](p0) :: (store (p0) into %ir.va) + ; RV32-NEXT: %21:_(p0) = nuw nusw G_PTR_ADD [[LOAD]], [[C1]](s32) + ; RV32-NEXT: G_STORE %21(p0), [[FRAME_INDEX2]](p0) :: (store (p0) into %ir.va) ; RV32-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.argp.cur) ; RV32-NEXT: $x10 = COPY [[LOAD1]](s32) ; RV32-NEXT: PseudoRET implicit $x10 @@ -1763,8 +1763,8 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; RV64-NEXT: G_VASTART [[FRAME_INDEX2]](p0) :: (store (s64) into %ir.va) ; RV64-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX2]](p0) :: (dereferenceable load (p0) from %ir.va, align 4) ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; RV64-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[LOAD]], [[C1]](s64) - ; RV64-NEXT: G_STORE [[PTR_ADD7]](p0), [[FRAME_INDEX2]](p0) :: (store (p0) into %ir.va, align 4) + ; RV64-NEXT: %21:_(p0) = nuw nusw G_PTR_ADD [[LOAD]], [[C1]](s64) + ; RV64-NEXT: G_STORE %21(p0), [[FRAME_INDEX2]](p0) :: (store (p0) into %ir.va, align 4) ; RV64-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[LOAD]](p0) :: (load (s32) from %ir.argp.cur) ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD1]](s32) ; RV64-NEXT: $x10 = COPY [[ANYEXT]](s64) diff --git a/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll b/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll index 68d546ab093887..171ccb287f2b97 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll +++ b/llvm/test/CodeGen/X86/GlobalISel/x86_64-irtranslator-struct-return.ll @@ -77,12 +77,12 @@ define { double, double } @test_return_d2(double %d.coerce0, double %d.coerce1) ; ALL-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.d ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.1) ; ALL-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64) - ; ALL-NEXT: G_STORE [[COPY1]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.2) + ; ALL-NEXT: %5:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64) + ; ALL-NEXT: G_STORE [[COPY1]](s64), %5(p0) :: (store (s64) into %ir.2) ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.3, align 8), (load (s8) from %ir.4, align 8) ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.5) - ; ALL-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) - ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s64) from %ir.5 + 8) + ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) + ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s64) from %ir.5 + 8) ; ALL-NEXT: $xmm0 = COPY [[LOAD]](s64) ; ALL-NEXT: $xmm1 = COPY [[LOAD1]](s64) ; ALL-NEXT: RET 0, implicit $xmm0, implicit $xmm1 @@ -170,14 +170,14 @@ define { i64, i32 } @test_return_i3(i64 %i.coerce0, i32 %i.coerce1) { ; ALL-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.3.tmp ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX2]](p0) :: (store (s64) into %ir.0, align 4) ; ALL-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[FRAME_INDEX2]], [[C1]](s64) - ; ALL-NEXT: G_STORE [[COPY1]](s32), [[PTR_ADD]](p0) :: (store (s32) into %ir.1) + ; ALL-NEXT: %7:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX2]], [[C1]](s64) + ; ALL-NEXT: G_STORE [[COPY1]](s32), %7(p0) :: (store (s32) into %ir.1) ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX1]](p0), [[FRAME_INDEX2]](p0), [[C]](s64), 0 :: (store (s8) into %ir.2, align 4), (load (s8) from %ir.3, align 4) ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.4, align 4), (load (s8) from %ir.5, align 4) ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX3]](p0), [[FRAME_INDEX]](p0), [[C]](s64), 0 :: (store (s8) into %ir.6, align 8), (load (s8) from %ir.7, align 4) ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX3]](p0) :: (dereferenceable load (s64) from %ir.tmp) - ; ALL-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s64) - ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s32) from %ir.tmp + 8, align 8) + ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX3]], [[C1]](s64) + ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s32) from %ir.tmp + 8, align 8) ; ALL-NEXT: $rax = COPY [[LOAD]](s64) ; ALL-NEXT: $edx = COPY [[LOAD1]](s32) ; ALL-NEXT: RET 0, implicit $rax, implicit $edx @@ -215,12 +215,12 @@ define { i64, i64 } @test_return_i4(i64 %i.coerce0, i64 %i.coerce1) { ; ALL-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1.i ; ALL-NEXT: G_STORE [[COPY]](s64), [[FRAME_INDEX1]](p0) :: (store (s64) into %ir.1, align 4) ; ALL-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 - ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64) - ; ALL-NEXT: G_STORE [[COPY1]](s64), [[PTR_ADD]](p0) :: (store (s64) into %ir.2, align 4) + ; ALL-NEXT: %5:_(p0) = nuw nusw G_PTR_ADD [[FRAME_INDEX1]], [[C1]](s64) + ; ALL-NEXT: G_STORE [[COPY1]](s64), %5(p0) :: (store (s64) into %ir.2, align 4) ; ALL-NEXT: G_MEMCPY [[FRAME_INDEX]](p0), [[FRAME_INDEX1]](p0), [[C]](s64), 0 :: (store (s8) into %ir.3, align 4), (load (s8) from %ir.4, align 4) ; ALL-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (dereferenceable load (s64) from %ir.5, align 4) - ; ALL-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) - ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p0) :: (dereferenceable load (s64) from %ir.5 + 8, align 4) + ; ALL-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[FRAME_INDEX]], [[C1]](s64) + ; ALL-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (dereferenceable load (s64) from %ir.5 + 8, align 4) ; ALL-NEXT: $rax = COPY [[LOAD]](s64) ; ALL-NEXT: $rdx = COPY [[LOAD1]](s64) ; ALL-NEXT: RET 0, implicit $rax, implicit $rdx From 1af1c9fb98e5c99ce2aa3a9af8ede489ea85c745 Mon Sep 17 00:00:00 2001 From: Stefan Pintilie Date: Fri, 14 Jun 2024 13:56:52 -0500 Subject: [PATCH 145/155] [NFC][PowerPC] Update the option to -enable-subreg-liveness. --- llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir b/llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir index 1bc8766cf78d4a..e1fd6180b26621 100644 --- a/llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir +++ b/llvm/test/CodeGen/PowerPC/subreg-lanemasks.mir @@ -1,4 +1,4 @@ -# RUN: llc -mcpu=pwr10 -ppc-track-subreg-liveness -filetype=null \ +# RUN: llc -mcpu=pwr10 -enable-subreg-liveness -filetype=null \ # RUN: -mtriple=powerpc64le-unknown-linux-gnu -run-pass=greedy,virtregrewriter \ # RUN: -debug-only=regalloc -o - %s 2>&1 | FileCheck %s # REQUIRES: asserts From 005758eb6b35aaf548c3a59da860ecd2465a73f0 Mon Sep 17 00:00:00 2001 From: PiJoules <6019989+PiJoules@users.noreply.github.com> Date: Fri, 14 Jun 2024 12:11:49 -0700 Subject: [PATCH 146/155] [libc][stdlib] Make the FreeListHeap constant-initializable (#95453) This refactors some of the FreeListHeap, FreeList, and Block classes to have constexpr ctors so we can constinit a global allocator that does not require running some global function or global ctor to initialize. This is needed to prevent worrying about initialization order and any other module-ctor can invoke malloc without worry. --- libc/src/__support/fixedvector.h | 7 + libc/src/stdlib/CMakeLists.txt | 8 +- libc/src/stdlib/block.h | 10 +- libc/src/stdlib/freelist.h | 37 +++-- libc/src/stdlib/freelist_heap.h | 69 +++++++-- libc/src/stdlib/freelist_malloc.cpp | 48 ++++++ libc/src/stdlib/realloc.h | 20 +++ libc/test/src/stdlib/CMakeLists.txt | 2 + libc/test/src/stdlib/freelist_heap_test.cpp | 140 ++++++++---------- libc/test/src/stdlib/freelist_malloc_test.cpp | 56 +++++++ 10 files changed, 283 insertions(+), 114 deletions(-) create mode 100644 libc/src/stdlib/freelist_malloc.cpp create mode 100644 libc/src/stdlib/realloc.h create mode 100644 libc/test/src/stdlib/freelist_malloc_test.cpp diff --git a/libc/src/__support/fixedvector.h b/libc/src/__support/fixedvector.h index 43028a0a84637f..403b1620d20df2 100644 --- a/libc/src/__support/fixedvector.h +++ b/libc/src/__support/fixedvector.h @@ -30,6 +30,13 @@ template class FixedVector { push_back(*begin); } + using const_iterator = typename cpp::array::const_iterator; + constexpr FixedVector(const_iterator begin, const_iterator end) + : store{}, item_count{} { + for (; begin != end; ++begin) + push_back(*begin); + } + constexpr FixedVector(size_t count, const T &value) : store{}, item_count{} { for (size_t i = 0; i < count; ++i) push_back(value); diff --git a/libc/src/stdlib/CMakeLists.txt b/libc/src/stdlib/CMakeLists.txt index 6d2c5acca96057..e26c19f03f5ab7 100644 --- a/libc/src/stdlib/CMakeLists.txt +++ b/libc/src/stdlib/CMakeLists.txt @@ -418,8 +418,14 @@ else() libc.src.string.memory_utils.inline_memcpy libc.src.string.memory_utils.inline_memset ) - add_entrypoint_external( + add_entrypoint_object( malloc + SRCS + freelist_malloc.cpp + HDRS + malloc.h + DEPENDS + .freelist_heap ) add_entrypoint_external( free diff --git a/libc/src/stdlib/block.h b/libc/src/stdlib/block.h index afb18c1ef738fc..b0462a12afb39b 100644 --- a/libc/src/stdlib/block.h +++ b/libc/src/stdlib/block.h @@ -245,7 +245,7 @@ class Block { void mark_free() { info_.used = 0; } /// Marks this block as the last one in the chain. - void mark_last() { info_.last = 1; } + constexpr void mark_last() { info_.last = 1; } /// Clears the last bit from this block. void clear_last() { info_.last = 1; } @@ -259,6 +259,8 @@ class Block { return check_status() == internal::BlockStatus::VALID; } + constexpr Block(size_t prev_outer_size, size_t outer_size); + private: /// Consumes the block and returns as a span of bytes. static ByteSpan as_bytes(Block *&&block); @@ -266,8 +268,6 @@ class Block { /// Consumes the span of bytes and uses it to construct and return a block. static Block *as_block(size_t prev_outer_size, ByteSpan bytes); - Block(size_t prev_outer_size, size_t outer_size); - /// Returns a `BlockStatus` that is either VALID or indicates the reason why /// the block is invalid. /// @@ -442,7 +442,9 @@ Block *Block::prev() const { // Private template method implementations. template -Block::Block(size_t prev_outer_size, size_t outer_size) { +constexpr Block::Block(size_t prev_outer_size, + size_t outer_size) + : info_{} { prev_ = prev_outer_size / ALIGNMENT; next_ = outer_size / ALIGNMENT; info_.used = 0; diff --git a/libc/src/stdlib/freelist.h b/libc/src/stdlib/freelist.h index 789bc164fb161b..eb5088b25d43fa 100644 --- a/libc/src/stdlib/freelist.h +++ b/libc/src/stdlib/freelist.h @@ -69,36 +69,44 @@ template class FreeList { /// Removes a chunk from this freelist. bool remove_chunk(cpp::span chunk); -private: - // For a given size, find which index into chunks_ the node should be written - // to. - size_t find_chunk_ptr_for_size(size_t size, bool non_null) const; + /// For a given size, find which index into chunks_ the node should be written + /// to. + constexpr size_t find_chunk_ptr_for_size(size_t size, bool non_null) const; struct FreeListNode { FreeListNode *next; size_t size; }; -public: - explicit FreeList(cpp::array sizes) + constexpr void set_freelist_node(FreeListNode &node, + cpp::span chunk); + + constexpr explicit FreeList(const cpp::array &sizes) : chunks_(NUM_BUCKETS + 1, 0), sizes_(sizes.begin(), sizes.end()) {} +private: FixedVector chunks_; FixedVector sizes_; }; +template +constexpr void FreeList::set_freelist_node(FreeListNode &node, + span chunk) { + // Add it to the correct list. + size_t chunk_ptr = find_chunk_ptr_for_size(chunk.size(), false); + node.size = chunk.size(); + node.next = chunks_[chunk_ptr]; + chunks_[chunk_ptr] = &node; +} + template bool FreeList::add_chunk(span chunk) { // Check that the size is enough to actually store what we need if (chunk.size() < sizeof(FreeListNode)) return false; - // Add it to the correct list. - size_t chunk_ptr = find_chunk_ptr_for_size(chunk.size(), false); - - FreeListNode *node = - ::new (chunk.data()) FreeListNode{chunks_[chunk_ptr], chunk.size()}; - chunks_[chunk_ptr] = node; + FreeListNode *node = ::new (chunk.data()) FreeListNode; + set_freelist_node(*node, chunk); return true; } @@ -163,8 +171,9 @@ bool FreeList::remove_chunk(span chunk) { } template -size_t FreeList::find_chunk_ptr_for_size(size_t size, - bool non_null) const { +constexpr size_t +FreeList::find_chunk_ptr_for_size(size_t size, + bool non_null) const { size_t chunk_ptr = 0; for (chunk_ptr = 0u; chunk_ptr < sizes_.size(); chunk_ptr++) { if (sizes_[chunk_ptr] >= size && diff --git a/libc/src/stdlib/freelist_heap.h b/libc/src/stdlib/freelist_heap.h index b65d361e9ca733..6357c047021df3 100644 --- a/libc/src/stdlib/freelist_heap.h +++ b/libc/src/stdlib/freelist_heap.h @@ -30,6 +30,7 @@ static constexpr cpp::array DEFAULT_BUCKETS{16, 32, 64, template class FreeListHeap { public: using BlockType = Block<>; + using FreeListType = FreeList; struct HeapStats { size_t total_bytes; @@ -39,7 +40,19 @@ template class FreeListHeap { size_t total_allocate_calls; size_t total_free_calls; }; - FreeListHeap(span region); + + FreeListHeap(span region) + : FreeListHeap(&*region.begin(), &*region.end(), region.size()) { + auto result = BlockType::init(region); + BlockType *block = *result; + freelist_.add_chunk(block_to_span(block)); + } + + constexpr FreeListHeap(void *start, cpp::byte *end, size_t total_bytes) + : block_region_start_(start), block_region_end_(end), + freelist_(DEFAULT_BUCKETS), heap_stats_{} { + heap_stats_.total_bytes = total_bytes; + } void *allocate(size_t size); void free(void *ptr); @@ -47,27 +60,53 @@ template class FreeListHeap { void *calloc(size_t num, size_t size); const HeapStats &heap_stats() const { return heap_stats_; } + void reset_heap_stats() { heap_stats_ = {}; } + + void *region_start() const { return block_region_start_; } + size_t region_size() const { + return reinterpret_cast(block_region_end_) - + reinterpret_cast(block_region_start_); + } + +protected: + constexpr void set_freelist_node(typename FreeListType::FreeListNode &node, + cpp::span chunk) { + freelist_.set_freelist_node(node, chunk); + } private: span block_to_span(BlockType *block) { return span(block->usable_space(), block->inner_size()); } - span region_; - FreeList freelist_; + bool is_valid_ptr(void *ptr) { + return ptr >= block_region_start_ && ptr < block_region_end_; + } + + void *block_region_start_; + void *block_region_end_; + FreeListType freelist_; HeapStats heap_stats_; }; -template -FreeListHeap::FreeListHeap(span region) - : region_(region), freelist_(DEFAULT_BUCKETS), heap_stats_() { - auto result = BlockType::init(region); - BlockType *block = *result; +template +struct FreeListHeapBuffer : public FreeListHeap { + using parent = FreeListHeap; + using FreeListNode = typename parent::FreeListType::FreeListNode; - freelist_.add_chunk(block_to_span(block)); + constexpr FreeListHeapBuffer() + : FreeListHeap(&block, buffer + sizeof(buffer), BUFF_SIZE), + block(0, BUFF_SIZE), node{}, buffer{} { + block.mark_last(); - heap_stats_.total_bytes = region.size(); -} + cpp::span chunk(buffer, sizeof(buffer)); + parent::set_freelist_node(node, chunk); + } + + typename parent::BlockType block; + FreeListNode node; + cpp::byte buffer[BUFF_SIZE - sizeof(block) - sizeof(node)]; +}; template void *FreeListHeap::allocate(size_t size) { @@ -97,7 +136,7 @@ void *FreeListHeap::allocate(size_t size) { template void FreeListHeap::free(void *ptr) { cpp::byte *bytes = static_cast(ptr); - LIBC_ASSERT(bytes >= region_.data() && bytes < region_.data() + region_.size() && "Invalid pointer"); + LIBC_ASSERT(is_valid_ptr(bytes) && "Invalid pointer"); BlockType *chunk_block = BlockType::from_usable_space(bytes); @@ -131,7 +170,7 @@ template void FreeListHeap::free(void *ptr) { heap_stats_.total_free_calls += 1; } -// Follows contract of the C standard realloc() function +// Follows constract of the C standard realloc() function // If ptr is free'd, will return nullptr. template void *FreeListHeap::realloc(void *ptr, size_t size) { @@ -146,7 +185,7 @@ void *FreeListHeap::realloc(void *ptr, size_t size) { cpp::byte *bytes = static_cast(ptr); - if (bytes < region_.data() || bytes >= region_.data() + region_.size()) + if (!is_valid_ptr(bytes)) return nullptr; BlockType *chunk_block = BlockType::from_usable_space(bytes); @@ -177,6 +216,8 @@ void *FreeListHeap::calloc(size_t num, size_t size) { return ptr; } +extern FreeListHeap<> *freelist_heap; + } // namespace LIBC_NAMESPACE #endif // LLVM_LIBC_SRC_STDLIB_FREELIST_HEAP_H diff --git a/libc/src/stdlib/freelist_malloc.cpp b/libc/src/stdlib/freelist_malloc.cpp new file mode 100644 index 00000000000000..185b36444e3716 --- /dev/null +++ b/libc/src/stdlib/freelist_malloc.cpp @@ -0,0 +1,48 @@ +//===-- Implementation for freelist_malloc --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "freelist_heap.h" +#include "src/stdlib/calloc.h" +#include "src/stdlib/free.h" +#include "src/stdlib/malloc.h" +#include "src/stdlib/realloc.h" + +#include + +namespace LIBC_NAMESPACE { + +namespace { +// Users can define LIBC_FREELIST_MALLOC_SIZE for setting the default buffer +// size used by freelist malloc. +#ifdef LIBC_FREELIST_MALLOC_SIZE +constexpr size_t SIZE = LIBC_FREELIST_MALLOC_SIZE; +#else +// TODO: We should probably have something akin to what scudo/sanitizer +// allocators do where each platform defines this. +constexpr size_t SIZE = 0x40000000ULL; // 1GB +#endif +LIBC_CONSTINIT FreeListHeapBuffer freelist_heap_buffer; +} // namespace + +FreeListHeap<> *freelist_heap = &freelist_heap_buffer; + +LLVM_LIBC_FUNCTION(void *, malloc, (size_t size)) { + return freelist_heap->allocate(size); +} + +LLVM_LIBC_FUNCTION(void, free, (void *ptr)) { return freelist_heap->free(ptr); } + +LLVM_LIBC_FUNCTION(void *, calloc, (size_t num, size_t size)) { + return freelist_heap->calloc(num, size); +} + +LLVM_LIBC_FUNCTION(void *, realloc, (void *ptr, size_t size)) { + return freelist_heap->realloc(ptr, size); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdlib/realloc.h b/libc/src/stdlib/realloc.h new file mode 100644 index 00000000000000..6e025faa7a8ce2 --- /dev/null +++ b/libc/src/stdlib/realloc.h @@ -0,0 +1,20 @@ +//===-- Implementation header for realloc -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include + +#ifndef LLVM_LIBC_SRC_STDLIB_REALLOC_H +#define LLVM_LIBC_SRC_STDLIB_REALLOC_H + +namespace LIBC_NAMESPACE { + +void *realloc(void *ptr, size_t size); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDLIB_REALLOC_H diff --git a/libc/test/src/stdlib/CMakeLists.txt b/libc/test/src/stdlib/CMakeLists.txt index f3033a4d958bf6..648404afb5730e 100644 --- a/libc/test/src/stdlib/CMakeLists.txt +++ b/libc/test/src/stdlib/CMakeLists.txt @@ -85,9 +85,11 @@ add_libc_test( libc-stdlib-tests SRCS freelist_heap_test.cpp + freelist_malloc_test.cpp DEPENDS libc.src.__support.CPP.span libc.src.stdlib.freelist_heap + libc.src.stdlib.malloc libc.src.string.memcmp libc.src.string.memcpy ) diff --git a/libc/test/src/stdlib/freelist_heap_test.cpp b/libc/test/src/stdlib/freelist_heap_test.cpp index b89f47f9a2def9..e30c23e724a060 100644 --- a/libc/test/src/stdlib/freelist_heap_test.cpp +++ b/libc/test/src/stdlib/freelist_heap_test.cpp @@ -14,27 +14,47 @@ namespace LIBC_NAMESPACE { -TEST(LlvmLibcFreeListHeap, CanAllocate) { - constexpr size_t N = 2048; - constexpr size_t ALLOC_SIZE = 512; - alignas(FreeListHeap<>::BlockType) cpp::byte buf[N] = {cpp::byte(0)}; +using LIBC_NAMESPACE::freelist_heap; - FreeListHeap<> allocator(buf); +// Similar to `LlvmLibcBlockTest` in block_test.cpp, we'd like to run the same +// tests independently for different parameters. In this case, we'd like to test +// functionality for a `FreeListHeap` and the global `freelist_heap` which was +// constinit'd. Functionally, it should operate the same if the FreeListHeap +// were initialized locally at runtime or at compile-time. +// +// Note that calls to `allocate` for each test case here don't always explicitly +// `free` them afterwards, so when testing the global allocator, allocations +// made in tests leak and aren't free'd. This is fine for the purposes of this +// test file. +#define TEST_FOR_EACH_ALLOCATOR(TestCase, BufferSize) \ + class LlvmLibcFreeListHeapTest##TestCase : public testing::Test { \ + public: \ + void RunTest(FreeListHeap<> &allocator, [[maybe_unused]] size_t N); \ + }; \ + TEST_F(LlvmLibcFreeListHeapTest##TestCase, TestCase) { \ + alignas(FreeListHeap<>::BlockType) \ + cpp::byte buf[BufferSize] = {cpp::byte(0)}; \ + FreeListHeap<> allocator(buf); \ + RunTest(allocator, BufferSize); \ + RunTest(*freelist_heap, freelist_heap->region_size()); \ + } \ + void LlvmLibcFreeListHeapTest##TestCase::RunTest(FreeListHeap<> &allocator, \ + size_t N) + +TEST_FOR_EACH_ALLOCATOR(CanAllocate, 2048) { + constexpr size_t ALLOC_SIZE = 512; void *ptr = allocator.allocate(ALLOC_SIZE); ASSERT_NE(ptr, static_cast(nullptr)); // In this case, the allocator should be returning us the start of the chunk. EXPECT_EQ(ptr, static_cast( - &buf[0] + FreeListHeap<>::BlockType::BLOCK_OVERHEAD)); + reinterpret_cast(allocator.region_start()) + + FreeListHeap<>::BlockType::BLOCK_OVERHEAD)); } -TEST(LlvmLibcFreeListHeap, AllocationsDontOverlap) { - constexpr size_t N = 2048; +TEST_FOR_EACH_ALLOCATOR(AllocationsDontOverlap, 2048) { constexpr size_t ALLOC_SIZE = 512; - alignas(FreeListHeap<>::BlockType) cpp::byte buf[N] = {cpp::byte(0)}; - - FreeListHeap<> allocator(buf); void *ptr1 = allocator.allocate(ALLOC_SIZE); void *ptr2 = allocator.allocate(ALLOC_SIZE); @@ -49,14 +69,10 @@ TEST(LlvmLibcFreeListHeap, AllocationsDontOverlap) { EXPECT_GT(ptr2_start, ptr1_end); } -TEST(LlvmLibcFreeListHeap, CanFreeAndRealloc) { +TEST_FOR_EACH_ALLOCATOR(CanFreeAndRealloc, 2048) { // There's not really a nice way to test that free works, apart from to try // and get that value back again. - constexpr size_t N = 2048; constexpr size_t ALLOC_SIZE = 512; - alignas(FreeListHeap<>::BlockType) cpp::byte buf[N] = {cpp::byte(0)}; - - FreeListHeap<> allocator(buf); void *ptr1 = allocator.allocate(ALLOC_SIZE); allocator.free(ptr1); @@ -65,15 +81,13 @@ TEST(LlvmLibcFreeListHeap, CanFreeAndRealloc) { EXPECT_EQ(ptr1, ptr2); } -TEST(LlvmLibcFreeListHeap, ReturnsNullWhenAllocationTooLarge) { - constexpr size_t N = 2048; - alignas(FreeListHeap<>::BlockType) cpp::byte buf[N] = {cpp::byte(0)}; - - FreeListHeap<> allocator(buf); - +TEST_FOR_EACH_ALLOCATOR(ReturnsNullWhenAllocationTooLarge, 2048) { EXPECT_EQ(allocator.allocate(N), static_cast(nullptr)); } +// NOTE: This doesn't use TEST_FOR_EACH_ALLOCATOR because the first `allocate` +// here will likely actually return a nullptr since the same global allocator +// is used for other test cases and we don't explicitly free them. TEST(LlvmLibcFreeListHeap, ReturnsNullWhenFull) { constexpr size_t N = 2048; alignas(FreeListHeap<>::BlockType) cpp::byte buf[N] = {cpp::byte(0)}; @@ -85,12 +99,7 @@ TEST(LlvmLibcFreeListHeap, ReturnsNullWhenFull) { EXPECT_EQ(allocator.allocate(1), static_cast(nullptr)); } -TEST(LlvmLibcFreeListHeap, ReturnedPointersAreAligned) { - constexpr size_t N = 2048; - alignas(FreeListHeap<>::BlockType) cpp::byte buf[N] = {cpp::byte(0)}; - - FreeListHeap<> allocator(buf); - +TEST_FOR_EACH_ALLOCATOR(ReturnedPointersAreAligned, 2048) { void *ptr1 = allocator.allocate(1); // Should be aligned to native pointer alignment @@ -105,13 +114,9 @@ TEST(LlvmLibcFreeListHeap, ReturnedPointersAreAligned) { EXPECT_EQ(ptr2_start % alignment, static_cast(0)); } -TEST(LlvmLibcFreeListHeap, CanRealloc) { - constexpr size_t N = 2048; +TEST_FOR_EACH_ALLOCATOR(CanRealloc, 2048) { constexpr size_t ALLOC_SIZE = 512; constexpr size_t kNewAllocSize = 768; - alignas(FreeListHeap<>::BlockType) cpp::byte buf[N] = {cpp::byte(1)}; - - FreeListHeap<> allocator(buf); void *ptr1 = allocator.allocate(ALLOC_SIZE); void *ptr2 = allocator.realloc(ptr1, kNewAllocSize); @@ -120,23 +125,19 @@ TEST(LlvmLibcFreeListHeap, CanRealloc) { ASSERT_NE(ptr2, static_cast(nullptr)); } -TEST(LlvmLibcFreeListHeap, ReallocHasSameContent) { - constexpr size_t N = 2048; +TEST_FOR_EACH_ALLOCATOR(ReallocHasSameContent, 2048) { constexpr size_t ALLOC_SIZE = sizeof(int); constexpr size_t kNewAllocSize = sizeof(int) * 2; - alignas(FreeListHeap<>::BlockType) cpp::byte buf[N] = {cpp::byte(1)}; // Data inside the allocated block. cpp::byte data1[ALLOC_SIZE]; // Data inside the reallocated block. cpp::byte data2[ALLOC_SIZE]; - FreeListHeap<> allocator(buf); - int *ptr1 = reinterpret_cast(allocator.allocate(ALLOC_SIZE)); *ptr1 = 42; - memcpy(data1, ptr1, ALLOC_SIZE); + LIBC_NAMESPACE::memcpy(data1, ptr1, ALLOC_SIZE); int *ptr2 = reinterpret_cast(allocator.realloc(ptr1, kNewAllocSize)); - memcpy(data2, ptr2, ALLOC_SIZE); + LIBC_NAMESPACE::memcpy(data2, ptr2, ALLOC_SIZE); ASSERT_NE(ptr1, static_cast(nullptr)); ASSERT_NE(ptr2, static_cast(nullptr)); @@ -144,13 +145,9 @@ TEST(LlvmLibcFreeListHeap, ReallocHasSameContent) { EXPECT_EQ(LIBC_NAMESPACE::memcmp(data1, data2, ALLOC_SIZE), 0); } -TEST(LlvmLibcFreeListHeap, ReturnsNullReallocFreedPointer) { - constexpr size_t N = 2048; +TEST_FOR_EACH_ALLOCATOR(ReturnsNullReallocFreedPointer, 2048) { constexpr size_t ALLOC_SIZE = 512; constexpr size_t kNewAllocSize = 256; - alignas(FreeListHeap<>::BlockType) cpp::byte buf[N] = {cpp::byte(0)}; - - FreeListHeap<> allocator(buf); void *ptr1 = allocator.allocate(ALLOC_SIZE); allocator.free(ptr1); @@ -159,13 +156,9 @@ TEST(LlvmLibcFreeListHeap, ReturnsNullReallocFreedPointer) { EXPECT_EQ(static_cast(nullptr), ptr2); } -TEST(LlvmLibcFreeListHeap, ReallocSmallerSize) { - constexpr size_t N = 2048; +TEST_FOR_EACH_ALLOCATOR(ReallocSmallerSize, 2048) { constexpr size_t ALLOC_SIZE = 512; constexpr size_t kNewAllocSize = 256; - alignas(FreeListHeap<>::BlockType) cpp::byte buf[N] = {cpp::byte(0)}; - - FreeListHeap<> allocator(buf); void *ptr1 = allocator.allocate(ALLOC_SIZE); void *ptr2 = allocator.realloc(ptr1, kNewAllocSize); @@ -174,13 +167,9 @@ TEST(LlvmLibcFreeListHeap, ReallocSmallerSize) { EXPECT_EQ(ptr1, ptr2); } -TEST(LlvmLibcFreeListHeap, ReallocTooLarge) { - constexpr size_t N = 2048; +TEST_FOR_EACH_ALLOCATOR(ReallocTooLarge, 2048) { constexpr size_t ALLOC_SIZE = 512; - constexpr size_t kNewAllocSize = 4096; - alignas(FreeListHeap<>::BlockType) cpp::byte buf[N] = {cpp::byte(0)}; - - FreeListHeap<> allocator(buf); + size_t kNewAllocSize = N * 2; // Large enough to fail. void *ptr1 = allocator.allocate(ALLOC_SIZE); void *ptr2 = allocator.realloc(ptr1, kNewAllocSize); @@ -190,49 +179,38 @@ TEST(LlvmLibcFreeListHeap, ReallocTooLarge) { EXPECT_EQ(static_cast(nullptr), ptr2); } -TEST(LlvmLibcFreeListHeap, CanCalloc) { - constexpr size_t N = 2048; +TEST_FOR_EACH_ALLOCATOR(CanCalloc, 2048) { constexpr size_t ALLOC_SIZE = 128; - constexpr size_t kNum = 4; - constexpr int size = kNum * ALLOC_SIZE; - alignas(FreeListHeap<>::BlockType) cpp::byte buf[N] = {cpp::byte(1)}; + constexpr size_t NUM = 4; + constexpr int size = NUM * ALLOC_SIZE; constexpr cpp::byte zero{0}; - FreeListHeap<> allocator(buf); - cpp::byte *ptr1 = - reinterpret_cast(allocator.calloc(kNum, ALLOC_SIZE)); + reinterpret_cast(allocator.calloc(NUM, ALLOC_SIZE)); // calloc'd content is zero. - for (int i = 0; i < size; i++) + for (int i = 0; i < size; i++) { EXPECT_EQ(ptr1[i], zero); + } } -TEST(LlvmLibcFreeListHeap, CanCallocWeirdSize) { - constexpr size_t N = 2048; +TEST_FOR_EACH_ALLOCATOR(CanCallocWeirdSize, 2048) { constexpr size_t ALLOC_SIZE = 143; - constexpr size_t kNum = 3; - constexpr int size = kNum * ALLOC_SIZE; - alignas(FreeListHeap<>::BlockType) cpp::byte buf[N] = {cpp::byte(132)}; + constexpr size_t NUM = 3; + constexpr int size = NUM * ALLOC_SIZE; constexpr cpp::byte zero{0}; - FreeListHeap<> allocator(buf); - cpp::byte *ptr1 = - reinterpret_cast(allocator.calloc(kNum, ALLOC_SIZE)); + reinterpret_cast(allocator.calloc(NUM, ALLOC_SIZE)); // calloc'd content is zero. - for (int i = 0; i < size; i++) + for (int i = 0; i < size; i++) { EXPECT_EQ(ptr1[i], zero); + } } -TEST(LlvmLibcFreeListHeap, CallocTooLarge) { - constexpr size_t N = 2048; - constexpr size_t ALLOC_SIZE = 2049; - alignas(FreeListHeap<>::BlockType) cpp::byte buf[N] = {cpp::byte(1)}; - - FreeListHeap<> allocator(buf); - +TEST_FOR_EACH_ALLOCATOR(CallocTooLarge, 2048) { + size_t ALLOC_SIZE = N + 1; EXPECT_EQ(allocator.calloc(1, ALLOC_SIZE), static_cast(nullptr)); } diff --git a/libc/test/src/stdlib/freelist_malloc_test.cpp b/libc/test/src/stdlib/freelist_malloc_test.cpp new file mode 100644 index 00000000000000..b2527c5b571b16 --- /dev/null +++ b/libc/test/src/stdlib/freelist_malloc_test.cpp @@ -0,0 +1,56 @@ +//===-- Unittests for freelist_malloc -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdlib/calloc.h" +#include "src/stdlib/free.h" +#include "src/stdlib/freelist_heap.h" +#include "src/stdlib/malloc.h" +#include "test/UnitTest/Test.h" + +using LIBC_NAMESPACE::freelist_heap; + +TEST(LlvmLibcFreeListMalloc, MallocStats) { + constexpr size_t kAllocSize = 256; + constexpr size_t kCallocNum = 4; + constexpr size_t kCallocSize = 64; + + freelist_heap->reset_heap_stats(); // Do this because other tests might've + // called the same global allocator. + + void *ptr1 = LIBC_NAMESPACE::malloc(kAllocSize); + + const auto &freelist_heap_stats = freelist_heap->heap_stats(); + + ASSERT_NE(ptr1, static_cast(nullptr)); + EXPECT_EQ(freelist_heap_stats.bytes_allocated, kAllocSize); + EXPECT_EQ(freelist_heap_stats.cumulative_allocated, kAllocSize); + EXPECT_EQ(freelist_heap_stats.cumulative_freed, size_t(0)); + + LIBC_NAMESPACE::free(ptr1); + EXPECT_EQ(freelist_heap_stats.bytes_allocated, size_t(0)); + EXPECT_EQ(freelist_heap_stats.cumulative_allocated, kAllocSize); + EXPECT_EQ(freelist_heap_stats.cumulative_freed, kAllocSize); + + void *ptr2 = LIBC_NAMESPACE::calloc(kCallocNum, kCallocSize); + ASSERT_NE(ptr2, static_cast(nullptr)); + EXPECT_EQ(freelist_heap_stats.bytes_allocated, kCallocNum * kCallocSize); + EXPECT_EQ(freelist_heap_stats.cumulative_allocated, + kAllocSize + kCallocNum * kCallocSize); + EXPECT_EQ(freelist_heap_stats.cumulative_freed, kAllocSize); + + for (size_t i = 0; i < kCallocNum * kCallocSize; ++i) { + EXPECT_EQ(reinterpret_cast(ptr2)[i], uint8_t(0)); + } + + LIBC_NAMESPACE::free(ptr2); + EXPECT_EQ(freelist_heap_stats.bytes_allocated, size_t(0)); + EXPECT_EQ(freelist_heap_stats.cumulative_allocated, + kAllocSize + kCallocNum * kCallocSize); + EXPECT_EQ(freelist_heap_stats.cumulative_freed, + kAllocSize + kCallocNum * kCallocSize); +} From cc7a18c18011d1e0c70187ceb60e3e69bf7cd0ee Mon Sep 17 00:00:00 2001 From: Jeremy Day Date: Fri, 14 Jun 2024 12:17:39 -0700 Subject: [PATCH 147/155] Set Support system_libs if WIN32, not just MSVC or MINGW (#95505) The previous check was false when compiling with `clang++`, which prevented `ntdll` from being specified as a link library, causing an undefined symbol error when trying to resolve `RtlGetLastNtStatus`. Since we always want to link these libraries on Windows, the check can be simplified to just `if( WIN32 )`. --- llvm/lib/Support/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index c7f8ac325a97a2..4d8ce329029d00 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -37,7 +37,7 @@ if(LLVM_ENABLE_ZSTD) list(APPEND imported_libs ${zstd_target}) endif() -if( MSVC OR MINGW ) +if( WIN32 ) # libuuid required for FOLDERID_Profile usage in lib/Support/Windows/Path.inc. # advapi32 required for CryptAcquireContextW in lib/Support/Windows/Path.inc. # ntdll required for RtlGetLastNtStatus in lib/Support/ErrorHandling.cpp. @@ -72,7 +72,7 @@ elseif( CMAKE_HOST_UNIX ) add_compile_definitions(_BSD_SOURCE) set(system_libs ${system_libs} bsd network) endif() -endif( MSVC OR MINGW ) +endif( WIN32 ) # Delay load shell32.dll if possible to speed up process startup. set (delayload_flags) From c63b9a5af72a7d83d936c12ae4bc79828c073edf Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Fri, 14 Jun 2024 15:37:17 -0400 Subject: [PATCH 148/155] [clang-interp] Use -fno-sized-deallocation in two tests (#95546) At least on my Windows machine, these two tests fail due to not being able to look up `??3@YAXPEAX_K@Z` (which is `void __cdecl operator delete(void *, unsigned __int64)` in demangled) after 130e93cc26ca. Since they don't test anything related to sized deallocation, just disable sized allocation for them. Possibly fixes #95451. --- clang/test/Interpreter/inline-virtual.cpp | 6 ++++-- clang/unittests/Interpreter/InterpreterTest.cpp | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/clang/test/Interpreter/inline-virtual.cpp b/clang/test/Interpreter/inline-virtual.cpp index d862b3354f61fe..9c31208a4a6421 100644 --- a/clang/test/Interpreter/inline-virtual.cpp +++ b/clang/test/Interpreter/inline-virtual.cpp @@ -3,8 +3,10 @@ // // We disable RTTI to avoid problems on Windows for non-RTTI builds of LLVM // where the JIT cannot find ??_7type_info@@6B@. -// RUN: cat %s | clang-repl -Xcc -fno-rtti | FileCheck %s -// RUN: cat %s | clang-repl -Xcc -fno-rtti -Xcc -O2 | FileCheck %s +// RUN: cat %s | clang-repl -Xcc -fno-rtti -Xcc -fno-sized-deallocation \ +// RUN: | FileCheck %s +// RUN: cat %s | clang-repl -Xcc -fno-rtti -Xcc -fno-sized-deallocation \ +// RUN: -Xcc -O2 | FileCheck %s extern "C" int printf(const char *, ...); diff --git a/clang/unittests/Interpreter/InterpreterTest.cpp b/clang/unittests/Interpreter/InterpreterTest.cpp index 683295a18d5199..bbd854149d5f59 100644 --- a/clang/unittests/Interpreter/InterpreterTest.cpp +++ b/clang/unittests/Interpreter/InterpreterTest.cpp @@ -286,7 +286,8 @@ TEST_F(InterpreterTest, InstantiateTemplate) { // https://github.com/llvm/llvm-project/issues/94994. #ifndef __arm__ TEST_F(InterpreterTest, Value) { - std::unique_ptr Interp = createInterpreter(); + std::vector Args = {"-fno-sized-deallocation"}; + std::unique_ptr Interp = createInterpreter(Args); Value V1; llvm::cantFail(Interp->ParseAndExecute("int x = 42;")); From 445fc51800d391d0c912d8c6c918b016e0604319 Mon Sep 17 00:00:00 2001 From: Felipe de Azevedo Piovezan Date: Fri, 14 Jun 2024 12:40:30 -0700 Subject: [PATCH 149/155] [lldb][test] Force dwarf4 usage in test requiring it (#95449) This test is explicitly checking for dwarf 4 behavior on Apple platforms, so we should explicitly use the dwarf4 flag. Related to https://github.com/llvm/llvm-project/pull/95164 --- lldb/test/Shell/SymbolFile/DWARF/x86/apple-index-is-used.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/apple-index-is-used.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/apple-index-is-used.cpp index 00440531e99f73..5bcb2cbcbbe293 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/apple-index-is-used.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/apple-index-is-used.cpp @@ -1,5 +1,5 @@ // Test that we use the apple indexes. -// RUN: %clang %s -g -c -o %t --target=x86_64-apple-macosx +// RUN: %clang %s -g -c -o %t --target=x86_64-apple-macosx -gdwarf-4 // RUN: lldb-test symbols %t | FileCheck %s // CHECK: .apple_names index present From feed66f3eae5006bb05e6cb34801930fd940daa8 Mon Sep 17 00:00:00 2001 From: Cyndy Ishida Date: Fri, 14 Jun 2024 13:08:27 -0700 Subject: [PATCH 150/155] [InstallAPI] Pick up input headers by directory traversal (#94508) Match TAPI behavior and allow input headers to be resolved via a passed directory, which is expected to be a library sitting in a build directory. --- .../clang/Basic/DiagnosticInstallAPIKinds.td | 2 + .../clang/InstallAPI/DirectoryScanner.h | 81 +++++ clang/include/clang/InstallAPI/HeaderFile.h | 13 + clang/include/clang/InstallAPI/Library.h | 65 ++++ clang/include/clang/InstallAPI/MachO.h | 1 + clang/lib/InstallAPI/CMakeLists.txt | 2 + clang/lib/InstallAPI/DirectoryScanner.cpp | 300 ++++++++++++++++++ clang/lib/InstallAPI/Library.cpp | 40 +++ clang/test/InstallAPI/asm.test | 2 +- clang/test/InstallAPI/basic.test | 4 +- clang/test/InstallAPI/binary-attributes.test | 6 +- clang/test/InstallAPI/cpp.test | 4 +- clang/test/InstallAPI/diagnostics-dsym.test | 4 +- .../InstallAPI/directory-scanning-dylib.test | 57 ++++ .../directory-scanning-frameworks.test | 88 +++++ clang/test/InstallAPI/functions.test | 2 +- clang/test/InstallAPI/variables.test | 2 +- clang/tools/clang-installapi/Options.cpp | 51 ++- clang/tools/clang-installapi/Options.h | 3 + 19 files changed, 703 insertions(+), 24 deletions(-) create mode 100644 clang/include/clang/InstallAPI/DirectoryScanner.h create mode 100644 clang/include/clang/InstallAPI/Library.h create mode 100644 clang/lib/InstallAPI/DirectoryScanner.cpp create mode 100644 clang/lib/InstallAPI/Library.cpp create mode 100644 clang/test/InstallAPI/directory-scanning-dylib.test create mode 100644 clang/test/InstallAPI/directory-scanning-frameworks.test diff --git a/clang/include/clang/Basic/DiagnosticInstallAPIKinds.td b/clang/include/clang/Basic/DiagnosticInstallAPIKinds.td index cdf27247602f2b..e10fa71011f304 100644 --- a/clang/include/clang/Basic/DiagnosticInstallAPIKinds.td +++ b/clang/include/clang/Basic/DiagnosticInstallAPIKinds.td @@ -26,6 +26,8 @@ def err_unsupported_environment : Error<"environment '%0' is not supported: '%1' def err_unsupported_os : Error<"os '%0' is not supported: '%1'">; def err_cannot_read_input_list : Error<"could not read %0 input list '%1': %2">; def err_invalid_label: Error<"label '%0' is reserved: use a different label name for -X