From 3365d62179011aad6da3e4cbcb31044eec3462a2 Mon Sep 17 00:00:00 2001 From: Congcong Cai Date: Mon, 1 Apr 2024 15:53:57 +0800 Subject: [PATCH 001/201] [clang-tidy] add new check readability-enum-initial-value (#86129) Fixes: #85243. --- .../clang-tidy/readability/CMakeLists.txt | 1 + .../readability/EnumInitialValueCheck.cpp | 200 ++++++++++++++++++ .../readability/EnumInitialValueCheck.h | 38 ++++ .../readability/ReadabilityTidyModule.cpp | 3 + clang-tools-extra/docs/ReleaseNotes.rst | 6 + .../docs/clang-tidy/checks/list.rst | 1 + .../checks/readability/enum-initial-value.rst | 75 +++++++ .../checkers/readability/enum-initial-value.c | 80 +++++++ .../readability/enum-initial-value.cpp | 27 +++ 9 files changed, 431 insertions(+) create mode 100644 clang-tools-extra/clang-tidy/readability/EnumInitialValueCheck.cpp create mode 100644 clang-tools-extra/clang-tidy/readability/EnumInitialValueCheck.h create mode 100644 clang-tools-extra/docs/clang-tidy/checks/readability/enum-initial-value.rst create mode 100644 clang-tools-extra/test/clang-tidy/checkers/readability/enum-initial-value.c create mode 100644 clang-tools-extra/test/clang-tidy/checkers/readability/enum-initial-value.cpp diff --git a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt index 5728c9970fb65d..dd772d69202548 100644 --- a/clang-tools-extra/clang-tidy/readability/CMakeLists.txt +++ b/clang-tools-extra/clang-tidy/readability/CMakeLists.txt @@ -17,6 +17,7 @@ add_clang_library(clangTidyReadabilityModule DeleteNullPointerCheck.cpp DuplicateIncludeCheck.cpp ElseAfterReturnCheck.cpp + EnumInitialValueCheck.cpp FunctionCognitiveComplexityCheck.cpp FunctionSizeCheck.cpp IdentifierLengthCheck.cpp diff --git a/clang-tools-extra/clang-tidy/readability/EnumInitialValueCheck.cpp b/clang-tools-extra/clang-tidy/readability/EnumInitialValueCheck.cpp new file mode 100644 index 00000000000000..8f2841c32259a2 --- /dev/null +++ b/clang-tools-extra/clang-tidy/readability/EnumInitialValueCheck.cpp @@ -0,0 +1,200 @@ +//===--- EnumInitialValueCheck.cpp - clang-tidy ---------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "EnumInitialValueCheck.h" +#include "../utils/LexerUtils.h" +#include "clang/AST/Decl.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Basic/Diagnostic.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" + +using namespace clang::ast_matchers; + +namespace clang::tidy::readability { + +static bool isNoneEnumeratorsInitialized(const EnumDecl &Node) { + return llvm::all_of(Node.enumerators(), [](const EnumConstantDecl *ECD) { + return ECD->getInitExpr() == nullptr; + }); +} + +static bool isOnlyFirstEnumeratorInitialized(const EnumDecl &Node) { + bool IsFirst = true; + for (const EnumConstantDecl *ECD : Node.enumerators()) { + if ((IsFirst && ECD->getInitExpr() == nullptr) || + (!IsFirst && ECD->getInitExpr() != nullptr)) + return false; + IsFirst = false; + } + return !IsFirst; +} + +static bool areAllEnumeratorsInitialized(const EnumDecl &Node) { + return llvm::all_of(Node.enumerators(), [](const EnumConstantDecl *ECD) { + return ECD->getInitExpr() != nullptr; + }); +} + +/// Check if \p Enumerator is initialized with a (potentially negated) \c +/// IntegerLiteral. +static bool isInitializedByLiteral(const EnumConstantDecl *Enumerator) { + const Expr *const Init = Enumerator->getInitExpr(); + if (!Init) + return false; + return Init->isIntegerConstantExpr(Enumerator->getASTContext()); +} + +static void cleanInitialValue(DiagnosticBuilder &Diag, + const EnumConstantDecl *ECD, + const SourceManager &SM, + const LangOptions &LangOpts) { + const SourceRange InitExprRange = ECD->getInitExpr()->getSourceRange(); + if (InitExprRange.isInvalid() || InitExprRange.getBegin().isMacroID() || + InitExprRange.getEnd().isMacroID()) + return; + std::optional EqualToken = utils::lexer::findNextTokenSkippingComments( + ECD->getLocation(), SM, LangOpts); + if (!EqualToken.has_value() || + EqualToken.value().getKind() != tok::TokenKind::equal) + return; + const SourceLocation EqualLoc{EqualToken->getLocation()}; + if (EqualLoc.isInvalid() || EqualLoc.isMacroID()) + return; + Diag << FixItHint::CreateRemoval(EqualLoc) + << FixItHint::CreateRemoval(InitExprRange); + return; +} + +namespace { + +AST_MATCHER(EnumDecl, isMacro) { + SourceLocation Loc = Node.getBeginLoc(); + return Loc.isMacroID(); +} + +AST_MATCHER(EnumDecl, hasConsistentInitialValues) { + return isNoneEnumeratorsInitialized(Node) || + isOnlyFirstEnumeratorInitialized(Node) || + areAllEnumeratorsInitialized(Node); +} + +AST_MATCHER(EnumDecl, hasZeroInitialValueForFirstEnumerator) { + const EnumDecl::enumerator_range Enumerators = Node.enumerators(); + if (Enumerators.empty()) + return false; + const EnumConstantDecl *ECD = *Enumerators.begin(); + return isOnlyFirstEnumeratorInitialized(Node) && + isInitializedByLiteral(ECD) && ECD->getInitVal().isZero(); +} + +/// Excludes bitfields because enumerators initialized with the result of a +/// bitwise operator on enumeration values or any other expr that is not a +/// potentially negative integer literal. +/// Enumerations where it is not directly clear if they are used with +/// bitmask, evident when enumerators are only initialized with (potentially +/// negative) integer literals, are ignored. This is also the case when all +/// enumerators are powers of two (e.g., 0, 1, 2). +AST_MATCHER(EnumDecl, hasSequentialInitialValues) { + const EnumDecl::enumerator_range Enumerators = Node.enumerators(); + if (Enumerators.empty()) + return false; + const EnumConstantDecl *const FirstEnumerator = *Node.enumerator_begin(); + llvm::APSInt PrevValue = FirstEnumerator->getInitVal(); + if (!isInitializedByLiteral(FirstEnumerator)) + return false; + bool AllEnumeratorsArePowersOfTwo = true; + for (const EnumConstantDecl *Enumerator : llvm::drop_begin(Enumerators)) { + const llvm::APSInt NewValue = Enumerator->getInitVal(); + if (NewValue != ++PrevValue) + return false; + if (!isInitializedByLiteral(Enumerator)) + return false; + PrevValue = NewValue; + AllEnumeratorsArePowersOfTwo &= NewValue.isPowerOf2(); + } + return !AllEnumeratorsArePowersOfTwo; +} + +} // namespace + +EnumInitialValueCheck::EnumInitialValueCheck(StringRef Name, + ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + AllowExplicitZeroFirstInitialValue( + Options.get("AllowExplicitZeroFirstInitialValue", true)), + AllowExplicitSequentialInitialValues( + Options.get("AllowExplicitSequentialInitialValues", true)) {} + +void EnumInitialValueCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { + Options.store(Opts, "AllowExplicitZeroFirstInitialValue", + AllowExplicitZeroFirstInitialValue); + Options.store(Opts, "AllowExplicitSequentialInitialValues", + AllowExplicitSequentialInitialValues); +} + +void EnumInitialValueCheck::registerMatchers(MatchFinder *Finder) { + Finder->addMatcher( + enumDecl(unless(isMacro()), unless(hasConsistentInitialValues())) + .bind("inconsistent"), + this); + if (!AllowExplicitZeroFirstInitialValue) + Finder->addMatcher( + enumDecl(hasZeroInitialValueForFirstEnumerator()).bind("zero_first"), + this); + if (!AllowExplicitSequentialInitialValues) + Finder->addMatcher(enumDecl(unless(isMacro()), hasSequentialInitialValues()) + .bind("sequential"), + this); +} + +void EnumInitialValueCheck::check(const MatchFinder::MatchResult &Result) { + if (const auto *Enum = Result.Nodes.getNodeAs("inconsistent")) { + DiagnosticBuilder Diag = + diag(Enum->getBeginLoc(), + "inital values in enum %0 are not consistent, consider explicit " + "initialization of all, none or only the first enumerator") + << Enum; + for (const EnumConstantDecl *ECD : Enum->enumerators()) + if (ECD->getInitExpr() == nullptr) { + const SourceLocation EndLoc = Lexer::getLocForEndOfToken( + ECD->getLocation(), 0, *Result.SourceManager, getLangOpts()); + if (EndLoc.isMacroID()) + continue; + llvm::SmallString<8> Str{" = "}; + ECD->getInitVal().toString(Str); + Diag << FixItHint::CreateInsertion(EndLoc, Str); + } + return; + } + + if (const auto *Enum = Result.Nodes.getNodeAs("zero_first")) { + const EnumConstantDecl *ECD = *Enum->enumerator_begin(); + const SourceLocation Loc = ECD->getLocation(); + if (Loc.isInvalid() || Loc.isMacroID()) + return; + DiagnosticBuilder Diag = diag(Loc, "zero initial value for the first " + "enumerator in %0 can be disregarded") + << Enum; + cleanInitialValue(Diag, ECD, *Result.SourceManager, getLangOpts()); + return; + } + if (const auto *Enum = Result.Nodes.getNodeAs("sequential")) { + DiagnosticBuilder Diag = + diag(Enum->getBeginLoc(), + "sequential initial value in %0 can be ignored") + << Enum; + for (const EnumConstantDecl *ECD : llvm::drop_begin(Enum->enumerators())) + cleanInitialValue(Diag, ECD, *Result.SourceManager, getLangOpts()); + return; + } +} + +} // namespace clang::tidy::readability diff --git a/clang-tools-extra/clang-tidy/readability/EnumInitialValueCheck.h b/clang-tools-extra/clang-tidy/readability/EnumInitialValueCheck.h new file mode 100644 index 00000000000000..66087e4ee170da --- /dev/null +++ b/clang-tools-extra/clang-tidy/readability/EnumInitialValueCheck.h @@ -0,0 +1,38 @@ +//===--- EnumInitialValueCheck.h - clang-tidy -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_ENUMINITIALVALUECHECK_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_ENUMINITIALVALUECHECK_H + +#include "../ClangTidyCheck.h" + +namespace clang::tidy::readability { + +/// Enforces consistent style for enumerators' initialization, covering three +/// styles: none, first only, or all initialized explicitly. +/// +/// For the user-facing documentation see: +/// http://clang.llvm.org/extra/clang-tidy/checks/readability/enum-initial-value.html +class EnumInitialValueCheck : public ClangTidyCheck { +public: + EnumInitialValueCheck(StringRef Name, ClangTidyContext *Context); + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; + void registerMatchers(ast_matchers::MatchFinder *Finder) override; + void check(const ast_matchers::MatchFinder::MatchResult &Result) override; + std::optional getCheckTraversalKind() const override { + return TK_IgnoreUnlessSpelledInSource; + } + +private: + const bool AllowExplicitZeroFirstInitialValue; + const bool AllowExplicitSequentialInitialValues; +}; + +} // namespace clang::tidy::readability + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_READABILITY_ENUMINITIALVALUECHECK_H diff --git a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp index bca2c425111f6c..376b84683df74e 100644 --- a/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp +++ b/clang-tools-extra/clang-tidy/readability/ReadabilityTidyModule.cpp @@ -22,6 +22,7 @@ #include "DeleteNullPointerCheck.h" #include "DuplicateIncludeCheck.h" #include "ElseAfterReturnCheck.h" +#include "EnumInitialValueCheck.h" #include "FunctionCognitiveComplexityCheck.h" #include "FunctionSizeCheck.h" #include "IdentifierLengthCheck.h" @@ -92,6 +93,8 @@ class ReadabilityModule : public ClangTidyModule { "readability-duplicate-include"); CheckFactories.registerCheck( "readability-else-after-return"); + CheckFactories.registerCheck( + "readability-enum-initial-value"); CheckFactories.registerCheck( "readability-function-cognitive-complexity"); CheckFactories.registerCheck( diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 78b09d23d4427f..309b844615a121 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -123,6 +123,12 @@ New checks Finds initializer lists for aggregate types that could be written as designated initializers instead. +- New :doc:`readability-enum-initial-value + ` check. + + Enforces consistent style for enumerators' initialization, covering three + styles: none, first only, or all initialized explicitly. + - New :doc:`readability-use-std-min-max ` check. diff --git a/clang-tools-extra/docs/clang-tidy/checks/list.rst b/clang-tools-extra/docs/clang-tidy/checks/list.rst index 79e81dd174e4f3..188a42bfddd383 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/list.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/list.rst @@ -352,6 +352,7 @@ Clang-Tidy Checks :doc:`readability-delete-null-pointer `, "Yes" :doc:`readability-duplicate-include `, "Yes" :doc:`readability-else-after-return `, "Yes" + :doc:`readability-enum-initial-value `, "Yes" :doc:`readability-function-cognitive-complexity `, :doc:`readability-function-size `, :doc:`readability-identifier-length `, diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability/enum-initial-value.rst b/clang-tools-extra/docs/clang-tidy/checks/readability/enum-initial-value.rst new file mode 100644 index 00000000000000..660efc1eaff3e5 --- /dev/null +++ b/clang-tools-extra/docs/clang-tidy/checks/readability/enum-initial-value.rst @@ -0,0 +1,75 @@ +.. title:: clang-tidy - readability-enum-initial-value + +readability-enum-initial-value +============================== + +Enforces consistent style for enumerators' initialization, covering three +styles: none, first only, or all initialized explicitly. + +When adding new enumerations, inconsistent initial value will cause potential +enumeration value conflicts. + +In an enumeration, the following three cases are accepted. +1. none of enumerators are explicit initialized. +2. the first enumerator is explicit initialized. +3. all of enumerators are explicit initialized. + +.. code-block:: c++ + + // valid, none of enumerators are initialized. + enum A { + e0, + e1, + e2, + }; + + // valid, the first enumerator is initialized. + enum A { + e0 = 0, + e1, + e2, + }; + + // valid, all of enumerators are initialized. + enum A { + e0 = 0, + e1 = 1, + e2 = 2, + }; + + // invalid, e1 is not explicit initialized. + enum A { + e0 = 0, + e1, + e2 = 2, + }; + +Options +------- + +.. option:: AllowExplicitZeroFirstInitialValue + + If set to `false`, the first enumerator must not be explicitly initialized. + See examples below. Default is `true`. + + .. code-block:: c++ + + enum A { + e0 = 0, // not allowed if AllowExplicitZeroFirstInitialValue is false + e1, + e2, + }; + + +.. option:: AllowExplicitSequentialInitialValues + + If set to `false`, sequential initializations are not allowed. + See examples below. Default is `true`. + + .. code-block:: c++ + + enum A { + e0 = 1, // not allowed if AllowExplicitSequentialInitialValues is false + e1 = 2, + e2 = 3, + }; diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/enum-initial-value.c b/clang-tools-extra/test/clang-tidy/checkers/readability/enum-initial-value.c new file mode 100644 index 00000000000000..c66288cbe3e957 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/enum-initial-value.c @@ -0,0 +1,80 @@ +// RUN: %check_clang_tidy %s readability-enum-initial-value %t +// RUN: %check_clang_tidy -check-suffix=ENABLE %s readability-enum-initial-value %t -- \ +// RUN: -config='{CheckOptions: { \ +// RUN: readability-enum-initial-value.AllowExplicitZeroFirstInitialValue: false, \ +// RUN: readability-enum-initial-value.AllowExplicitSequentialInitialValues: false, \ +// RUN: }}' + +enum EError { + // CHECK-MESSAGES: :[[@LINE-1]]:1: warning: inital values in enum 'EError' are not consistent + // CHECK-MESSAGES-ENABLE: :[[@LINE-2]]:1: warning: inital values in enum 'EError' are not consistent + EError_a = 1, + EError_b, + // CHECK-FIXES: EError_b = 2, + EError_c = 3, +}; + +enum ENone { + ENone_a, + ENone_b, + EENone_c, +}; + +enum EFirst { + EFirst_a = 1, + EFirst_b, + EFirst_c, +}; + +enum EAll { + EAll_a = 1, + EAll_b = 2, + EAll_c = 4, +}; + +#define ENUMERATOR_1 EMacro1_b +enum EMacro1 { + // CHECK-MESSAGES: :[[@LINE-1]]:1: warning: inital values in enum 'EMacro1' are not consistent + // CHECK-MESSAGES-ENABLE: :[[@LINE-2]]:1: warning: inital values in enum 'EMacro1' are not consistent + EMacro1_a = 1, + ENUMERATOR_1, + // CHECK-FIXES: ENUMERATOR_1 = 2, + EMacro1_c = 3, +}; + + +#define ENUMERATOR_2 EMacro2_b = 2 +enum EMacro2 { + // CHECK-MESSAGES: :[[@LINE-1]]:1: warning: inital values in enum 'EMacro2' are not consistent + // CHECK-MESSAGES-ENABLE: :[[@LINE-2]]:1: warning: inital values in enum 'EMacro2' are not consistent + EMacro2_a = 1, + ENUMERATOR_2, + EMacro2_c, + // CHECK-FIXES: EMacro2_c = 3, +}; + +enum EnumZeroFirstInitialValue { + EnumZeroFirstInitialValue_0 = 0, + // CHECK-MESSAGES-ENABLE: :[[@LINE-1]]:3: warning: zero initial value for the first enumerator in 'EnumZeroFirstInitialValue' can be disregarded + // CHECK-FIXES-ENABLE: EnumZeroFirstInitialValue_0 , + EnumZeroFirstInitialValue_1, + EnumZeroFirstInitialValue_2, +}; + +enum EnumZeroFirstInitialValueWithComment { + EnumZeroFirstInitialValueWithComment_0 = /* == */ 0, + // CHECK-MESSAGES-ENABLE: :[[@LINE-1]]:3: warning: zero initial value for the first enumerator in 'EnumZeroFirstInitialValueWithComment' can be disregarded + // CHECK-FIXES-ENABLE: EnumZeroFirstInitialValueWithComment_0 /* == */ , + EnumZeroFirstInitialValueWithComment_1, + EnumZeroFirstInitialValueWithComment_2, +}; + +enum EnumSequentialInitialValue { + // CHECK-MESSAGES-ENABLE: :[[@LINE-1]]:1: warning: sequential initial value in 'EnumSequentialInitialValue' can be ignored + EnumSequentialInitialValue_0 = 2, + // CHECK-FIXES-ENABLE: EnumSequentialInitialValue_0 = 2, + EnumSequentialInitialValue_1 = 3, + // CHECK-FIXES-ENABLE: EnumSequentialInitialValue_1 , + EnumSequentialInitialValue_2 = 4, + // CHECK-FIXES-ENABLE: EnumSequentialInitialValue_2 , +}; diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/enum-initial-value.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/enum-initial-value.cpp new file mode 100644 index 00000000000000..3c4ba970372a07 --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/enum-initial-value.cpp @@ -0,0 +1,27 @@ +// RUN: %check_clang_tidy %s readability-enum-initial-value %t + +enum class EError { + // CHECK-MESSAGES: :[[@LINE-1]]:1: warning: inital values in enum 'EError' are not consistent + EError_a = 1, + EError_b, + // CHECK-FIXES: EError_b = 2, + EError_c = 3, +}; + +enum class ENone { + ENone_a, + ENone_b, + EENone_c, +}; + +enum class EFirst { + EFirst_a = 1, + EFirst_b, + EFirst_c, +}; + +enum class EAll { + EAll_a = 1, + EAll_b = 2, + EAll_c = 3, +}; From a4dec9d6bc67c4d8fbd4a4f54ffaa0399def9627 Mon Sep 17 00:00:00 2001 From: Ryotaro KASUGA Date: Mon, 1 Apr 2024 17:04:44 +0900 Subject: [PATCH 002/201] [CodeGen] Fix register pressure computation in MachinePipeliner (#87030) `RegisterClassInfo::getRegPressureSetLimit` has been changed to return a smaller value than before so the limit may become negative in later calculations. As a workaround, change to use `TargetRegisterInfo::getRegPressureSetLimit`. Also improve tests. --- llvm/lib/CodeGen/MachinePipeliner.cpp | 2 +- llvm/test/CodeGen/AArch64/sms-regpress.mir | 158 +++++++++++++++++ llvm/test/CodeGen/PowerPC/sms-regpress.mir | 186 +++------------------ 3 files changed, 180 insertions(+), 166 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sms-regpress.mir diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index eb42a78603d407..b9c6765be445a0 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -1268,7 +1268,7 @@ class HighRegisterPressureDetector { // Calculate the upper limit of each pressure set void computePressureSetLimit(const RegisterClassInfo &RCI) { for (unsigned PSet = 0; PSet < PSetNum; PSet++) - PressureSetLimit[PSet] = RCI.getRegPressureSetLimit(PSet); + PressureSetLimit[PSet] = TRI->getRegPressureSetLimit(MF, PSet); // We assume fixed registers, such as stack pointer, are already in use. // Therefore subtracting the weight of the fixed registers from the limit of diff --git a/llvm/test/CodeGen/AArch64/sms-regpress.mir b/llvm/test/CodeGen/AArch64/sms-regpress.mir new file mode 100644 index 00000000000000..ad98d5c6124fcf --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sms-regpress.mir @@ -0,0 +1,158 @@ +# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-max-mii=40 -pipeliner-register-pressure -pipeliner-ii-search-range=30 -debug-only=pipeliner 2>&1 | FileCheck %s + +# Check that if the register pressure is too high, the schedule is rejected, II is incremented, and scheduling continues. +# The specific value of II is not important. + +# CHECK: {{^ *}}Try to schedule with {{[0-9]+$}} +# CHECK: {{^ *}}Rejected the schedule because of too high register pressure{{$}} +# CHECK: {{^ *}}Try to schedule with {{[0-9]+$}} +# CHECK: {{^ *}}Schedule Found? 1 (II={{[0-9]+}}){{$}} + +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + + define dso_local double @kernel(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, i32 noundef %n) local_unnamed_addr { + entry: + %0 = load double, ptr %a, align 8 + %arrayidx1 = getelementptr inbounds i8, ptr %a, i64 8 + %1 = load double, ptr %arrayidx1, align 8 + %cmp133 = icmp sgt i32 %n, 0 + br i1 %cmp133, label %for.body.preheader, label %for.cond.cleanup + + for.body.preheader: ; preds = %entry + %wide.trip.count = zext nneg i32 %n to i64 + br label %for.body + + for.cond.cleanup: ; preds = %for.body, %entry + %res.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add54, %for.body ] + ret double %res.0.lcssa + + for.body: ; preds = %for.body.preheader, %for.body + %lsr.iv137 = phi i64 [ %wide.trip.count, %for.body.preheader ], [ %lsr.iv.next, %for.body ] + %lsr.iv = phi ptr [ %b, %for.body.preheader ], [ %scevgep, %for.body ] + %res.0135 = phi double [ 0.000000e+00, %for.body.preheader ], [ %add54, %for.body ] + %2 = load double, ptr %lsr.iv, align 8 + %3 = tail call double @llvm.fmuladd.f64(double %0, double %2, double %0) + %4 = tail call double @llvm.fmuladd.f64(double %3, double %2, double %3) + %5 = tail call double @llvm.fmuladd.f64(double %4, double %2, double %4) + %6 = tail call double @llvm.fmuladd.f64(double %5, double %2, double %5) + %7 = tail call double @llvm.fmuladd.f64(double %6, double %2, double %6) + %8 = tail call double @llvm.fmuladd.f64(double %7, double %2, double %7) + %9 = tail call double @llvm.fmuladd.f64(double %8, double %2, double %8) + %10 = tail call double @llvm.fmuladd.f64(double %9, double %2, double %9) + %11 = tail call double @llvm.fmuladd.f64(double %10, double %2, double %10) + %12 = tail call double @llvm.fmuladd.f64(double %11, double %2, double %11) + %13 = tail call double @llvm.fmuladd.f64(double %12, double %2, double %12) + %14 = tail call double @llvm.fmuladd.f64(double %13, double %2, double %13) + %15 = tail call double @llvm.fmuladd.f64(double %14, double %2, double %14) + %16 = tail call double @llvm.fmuladd.f64(double %15, double %2, double %15) + %17 = tail call double @llvm.fmuladd.f64(double %16, double %2, double %16) + %18 = tail call double @llvm.fmuladd.f64(double %17, double %2, double %17) + %add = fadd double %17, %18 + %19 = tail call double @llvm.fmuladd.f64(double %18, double %2, double %add) + %add35 = fadd double %10, %19 + %20 = tail call double @llvm.fmuladd.f64(double %3, double %2, double %add35) + %add38 = fadd double %11, %20 + %21 = tail call double @llvm.fmuladd.f64(double %4, double %2, double %add38) + %add41 = fadd double %12, %21 + %22 = tail call double @llvm.fmuladd.f64(double %5, double %2, double %add41) + %add44 = fadd double %14, %15 + %add45 = fadd double %13, %add44 + %add46 = fadd double %add45, %22 + %23 = tail call double @llvm.fmuladd.f64(double %6, double %2, double %add46) + %mul = fmul double %2, %7 + %mul51 = fmul double %1, %mul + %24 = tail call double @llvm.fmuladd.f64(double %mul51, double %9, double %23) + %25 = tail call double @llvm.fmuladd.f64(double %8, double %1, double %24) + %add54 = fadd double %res.0135, %25 + %scevgep = getelementptr i8, ptr %lsr.iv, i64 8 + %lsr.iv.next = add nsw i64 %lsr.iv137, -1 + %exitcond.not = icmp eq i64 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + } + + declare double @llvm.fmuladd.f64(double, double, double) + +... +--- +name: kernel +tracksRegLiveness: true +liveins: + - { reg: '$x0', virtual-reg: '%10' } + - { reg: '$x1', virtual-reg: '%11' } + - { reg: '$w2', virtual-reg: '%12' } +body: | + bb.0.entry: + successors: %bb.1, %bb.4 + liveins: $x0, $x1, $w2 + + %12:gpr32common = COPY $w2 + %11:gpr64 = COPY $x1 + %10:gpr64common = COPY $x0 + dead $wzr = SUBSWri %12, 1, 0, implicit-def $nzcv + Bcc 10, %bb.1, implicit $nzcv + + bb.4: + %13:fpr64 = FMOVD0 + B %bb.2 + + bb.1.for.body.preheader: + %0:fpr64 = LDRDui %10, 0 :: (load (s64) from %ir.a) + %1:fpr64 = LDRDui %10, 1 :: (load (s64) from %ir.arrayidx1) + %16:gpr32 = ORRWrs $wzr, %12, 0 + %2:gpr64all = SUBREG_TO_REG 0, killed %16, %subreg.sub_32 + %15:fpr64 = FMOVD0 + B %bb.3 + + bb.2.for.cond.cleanup: + %3:fpr64 = PHI %13, %bb.4, %7, %bb.3 + $d0 = COPY %3 + RET_ReallyLR implicit $d0 + + bb.3.for.body: + successors: %bb.2, %bb.3 + + %4:gpr64sp = PHI %2, %bb.1, %9, %bb.3 + %5:gpr64sp = PHI %11, %bb.1, %8, %bb.3 + %6:fpr64 = PHI %15, %bb.1, %7, %bb.3 + early-clobber %17:gpr64sp, %18:fpr64 = LDRDpost %5, 8 :: (load (s64) from %ir.lsr.iv) + %19:fpr64 = nofpexcept FMADDDrrr %0, %18, %0, implicit $fpcr + %20:fpr64 = nofpexcept FMADDDrrr %19, %18, %19, implicit $fpcr + %21:fpr64 = nofpexcept FMADDDrrr %20, %18, %20, implicit $fpcr + %22:fpr64 = nofpexcept FMADDDrrr %21, %18, %21, implicit $fpcr + %23:fpr64 = nofpexcept FMADDDrrr %22, %18, %22, implicit $fpcr + %24:fpr64 = nofpexcept FMADDDrrr %23, %18, %23, implicit $fpcr + %25:fpr64 = nofpexcept FMADDDrrr %24, %18, %24, implicit $fpcr + %26:fpr64 = nofpexcept FMADDDrrr %25, %18, %25, implicit $fpcr + %27:fpr64 = nofpexcept FMADDDrrr %26, %18, %26, implicit $fpcr + %28:fpr64 = nofpexcept FMADDDrrr %27, %18, %27, implicit $fpcr + %29:fpr64 = nofpexcept FMADDDrrr %28, %18, %28, implicit $fpcr + %30:fpr64 = nofpexcept FMADDDrrr %29, %18, %29, implicit $fpcr + %31:fpr64 = nofpexcept FMADDDrrr %30, %18, %30, implicit $fpcr + %32:fpr64 = nofpexcept FMADDDrrr %31, %18, %31, implicit $fpcr + %33:fpr64 = nofpexcept FMADDDrrr %32, %18, %32, implicit $fpcr + %34:fpr64 = nofpexcept FMADDDrrr %33, %18, %33, implicit $fpcr + %35:fpr64 = nofpexcept FADDDrr %33, %34, implicit $fpcr + %36:fpr64 = nofpexcept FMADDDrrr %34, %18, killed %35, implicit $fpcr + %37:fpr64 = nofpexcept FADDDrr %26, killed %36, implicit $fpcr + %38:fpr64 = nofpexcept FMADDDrrr %19, %18, killed %37, implicit $fpcr + %39:fpr64 = nofpexcept FADDDrr %27, killed %38, implicit $fpcr + %40:fpr64 = nofpexcept FMADDDrrr %20, %18, killed %39, implicit $fpcr + %41:fpr64 = nofpexcept FADDDrr %28, killed %40, implicit $fpcr + %42:fpr64 = nofpexcept FMADDDrrr %21, %18, killed %41, implicit $fpcr + %43:fpr64 = nofpexcept FADDDrr %30, %31, implicit $fpcr + %44:fpr64 = nofpexcept FADDDrr %29, killed %43, implicit $fpcr + %45:fpr64 = nofpexcept FADDDrr killed %44, killed %42, implicit $fpcr + %46:fpr64 = nofpexcept FMADDDrrr %22, %18, killed %45, implicit $fpcr + %47:fpr64 = nofpexcept FMULDrr %18, %23, implicit $fpcr + %48:fpr64 = nofpexcept FMULDrr %1, killed %47, implicit $fpcr + %49:fpr64 = nofpexcept FMADDDrrr killed %48, %25, killed %46, implicit $fpcr + %50:fpr64 = nofpexcept FMADDDrrr %24, %1, killed %49, implicit $fpcr + %7:fpr64 = nofpexcept FADDDrr %6, killed %50, implicit $fpcr + %8:gpr64all = COPY %17 + %51:gpr64 = nsw SUBSXri %4, 1, 0, implicit-def $nzcv + %9:gpr64all = COPY %51 + Bcc 0, %bb.2, implicit $nzcv + B %bb.3 + +... diff --git a/llvm/test/CodeGen/PowerPC/sms-regpress.mir b/llvm/test/CodeGen/PowerPC/sms-regpress.mir index cebd78af882dfd..b01115c49fd8d5 100644 --- a/llvm/test/CodeGen/PowerPC/sms-regpress.mir +++ b/llvm/test/CodeGen/PowerPC/sms-regpress.mir @@ -1,41 +1,30 @@ -# RUN: llc --verify-machineinstrs -mcpu=pwr9 -o - %s -run-pass=pipeliner -ppc-enable-pipeliner -pipeliner-register-pressure -pipeliner-max-mii=50 -pipeliner-ii-search-range=30 -pipeliner-max-stages=10 -debug-only=pipeliner 2>&1 | FileCheck %s +# RUN: llc --verify-machineinstrs -mcpu=pwr9 -o - %s -run-pass=pipeliner -ppc-enable-pipeliner -pipeliner-register-pressure -pipeliner-max-mii=50 -pipeliner-ii-search-range=30 -pipeliner-max-stages=10 -debug-only=pipeliner 2>&1 | FileCheck %s # REQUIRES: asserts # Check that if the register pressure is too high, the schedule is rejected, II is incremented, and scheduling continues. # The specific value of II is not important. -# CHECK: Try to schedule with 21 -# CHECK: Can't schedule -# CHECK: Try to schedule with 22 -# CHECK: Can't schedule -# CHECK: Try to schedule with 23 -# CHECK: Rejected the schedule because of too high register pressure -# CHECK: Try to schedule with 24 -# CHECK: Rejected the schedule because of too high register pressure -# CHECK: Try to schedule with 25 -# CHECK: Rejected the schedule because of too high register pressure -# CHECK: Try to schedule with 26 -# CHECK: Schedule Found? 1 (II=26) +# CHECK: {{^ *}}Try to schedule with {{[0-9]+$}} +# CHECK: {{^ *}}Rejected the schedule because of too high register pressure{{$}} +# CHECK: {{^ *}}Try to schedule with {{[0-9]+$}} +# CHECK: {{^ *}}Schedule Found? 1 (II={{[0-9]+}}){{$}} --- | - ; ModuleID = 'a.ll' - source_filename = "a.c" target datalayout = "e-m:e-Fn32-i64:64-n32:64" target triple = "ppc64le" - ; Function Attrs: nofree nosync nounwind memory(argmem: read) uwtable - define dso_local double @kernel(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, i32 noundef signext %n) local_unnamed_addr #0 { + define dso_local double @kernel(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, i32 noundef signext %n) local_unnamed_addr { entry: - %0 = load double, ptr %a, align 8, !tbaa !3 - %arrayidx1 = getelementptr inbounds double, ptr %a, i64 1 - %1 = load double, ptr %arrayidx1, align 8, !tbaa !3 + %0 = load double, ptr %a, align 8 + %arrayidx1 = getelementptr inbounds i8, ptr %a, i64 8 + %1 = load double, ptr %arrayidx1, align 8 %cmp163 = icmp sgt i32 %n, 0 br i1 %cmp163, label %for.body.preheader, label %for.cond.cleanup for.body.preheader: ; preds = %entry - %wide.trip.count = zext i32 %n to i64 - %scevgep1 = getelementptr i8, ptr %b, i64 -8 + %wide.trip.count = zext nneg i32 %n to i64 + %scevgep167 = getelementptr i8, ptr %b, i64 -8 call void @llvm.set.loop.iterations.i64(i64 %wide.trip.count) br label %for.body @@ -43,11 +32,11 @@ %res.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %30, %for.body ] ret double %res.0.lcssa - for.body: ; preds = %for.body, %for.body.preheader + for.body: ; preds = %for.body.preheader, %for.body %res.0165 = phi double [ 0.000000e+00, %for.body.preheader ], [ %30, %for.body ] - %2 = phi ptr [ %scevgep1, %for.body.preheader ], [ %3, %for.body ] + %2 = phi ptr [ %scevgep167, %for.body.preheader ], [ %3, %for.body ] %3 = getelementptr i8, ptr %2, i64 8 - %4 = load double, ptr %3, align 8, !tbaa !3 + %4 = load double, ptr %3, align 8 %5 = tail call double @llvm.fmuladd.f64(double %0, double %4, double %0) %6 = tail call double @llvm.fmuladd.f64(double %5, double %4, double %5) %7 = tail call double @llvm.fmuladd.f64(double %6, double %4, double %6) @@ -92,152 +81,23 @@ %mul66 = fmul double %12, %mul65 %30 = tail call double @llvm.fmuladd.f64(double %mul66, double %10, double %res.0165) %31 = call i1 @llvm.loop.decrement.i64(i64 1) - br i1 %31, label %for.body, label %for.cond.cleanup, !llvm.loop !7 + br i1 %31, label %for.body, label %for.cond.cleanup } - ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) - declare double @llvm.fmuladd.f64(double, double, double) #1 + declare double @llvm.fmuladd.f64(double, double, double) - ; Function Attrs: nocallback noduplicate nofree nosync nounwind willreturn - declare void @llvm.set.loop.iterations.i64(i64) #2 + declare void @llvm.set.loop.iterations.i64(i64) - ; Function Attrs: nocallback noduplicate nofree nosync nounwind willreturn - declare i1 @llvm.loop.decrement.i64(i64) #2 + declare i1 @llvm.loop.decrement.i64(i64) - attributes #0 = { nofree nosync nounwind memory(argmem: read) uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crbits,+crypto,+direct-move,+extdiv,+htm,+isa-v206-instructions,+isa-v207-instructions,+isa-v30-instructions,+power8-vector,+power9-vector,+quadword-atomics,+vsx,-aix-small-local-exec-tls,-privileged,-rop-protect,-spe" } - attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } - attributes #2 = { nocallback noduplicate nofree nosync nounwind willreturn } - - !llvm.module.flags = !{!0, !1} - !llvm.ident = !{!2} - - !0 = !{i32 1, !"wchar_size", i32 4} - !1 = !{i32 7, !"uwtable", i32 2} - !2 = !{!"clang version 18.0.0 (https://miratech-soft@dev.azure.com/miratech-soft/llvm/_git/llvm c8d01fb665fc5d9378100a6d92ebcd3be49be655)"} - !3 = !{!4, !4, i64 0} - !4 = !{!"double", !5, i64 0} - !5 = !{!"omnipotent char", !6, i64 0} - !6 = !{!"Simple C/C++ TBAA"} - !7 = distinct !{!7, !8, !9} - !8 = !{!"llvm.loop.mustprogress"} - !9 = !{!"llvm.loop.unroll.disable"} - ... --- name: kernel -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false tracksRegLiveness: true -hasWinCFI: false -callsEHReturn: false -callsUnwindInit: false -hasEHCatchret: false -hasEHScopes: false -hasEHFunclets: false -isOutlined: false -debugInstrRef: false -failsVerification: false -tracksDebugUserValues: false -registers: - - { id: 0, class: vsfrc, preferred-register: '' } - - { id: 1, class: vsfrc, preferred-register: '' } - - { id: 2, class: g8rc, preferred-register: '' } - - { id: 3, class: vsfrc, preferred-register: '' } - - { id: 4, class: vsfrc, preferred-register: '' } - - { id: 5, class: g8rc_and_g8rc_nox0, preferred-register: '' } - - { id: 6, class: g8rc, preferred-register: '' } - - { id: 7, class: vsfrc, preferred-register: '' } - - { id: 8, class: g8rc_and_g8rc_nox0, preferred-register: '' } - - { id: 9, class: g8rc_and_g8rc_nox0, preferred-register: '' } - - { id: 10, class: g8rc, preferred-register: '' } - - { id: 11, class: gprc, preferred-register: '' } - - { id: 12, class: vsfrc, preferred-register: '' } - - { id: 13, class: crrc, preferred-register: '' } - - { id: 14, class: vsfrc, preferred-register: '' } - - { id: 15, class: g8rc, preferred-register: '' } - - { id: 16, class: g8rc, preferred-register: '' } - - { id: 17, class: g8rc, preferred-register: '' } - - { id: 18, class: f8rc, preferred-register: '' } - - { id: 19, class: g8rc_and_g8rc_nox0, preferred-register: '' } - - { id: 20, class: vsfrc, preferred-register: '' } - - { id: 21, class: vsfrc, preferred-register: '' } - - { id: 22, class: vsfrc, preferred-register: '' } - - { id: 23, class: vsfrc, preferred-register: '' } - - { id: 24, class: vsfrc, preferred-register: '' } - - { id: 25, class: vsfrc, preferred-register: '' } - - { id: 26, class: vsfrc, preferred-register: '' } - - { id: 27, class: vsfrc, preferred-register: '' } - - { id: 28, class: vsfrc, preferred-register: '' } - - { id: 29, class: vsfrc, preferred-register: '' } - - { id: 30, class: vsfrc, preferred-register: '' } - - { id: 31, class: vsfrc, preferred-register: '' } - - { id: 32, class: vsfrc, preferred-register: '' } - - { id: 33, class: vsfrc, preferred-register: '' } - - { id: 34, class: vsfrc, preferred-register: '' } - - { id: 35, class: vsfrc, preferred-register: '' } - - { id: 36, class: vsfrc, preferred-register: '' } - - { id: 37, class: vsfrc, preferred-register: '' } - - { id: 38, class: vsfrc, preferred-register: '' } - - { id: 39, class: vsfrc, preferred-register: '' } - - { id: 40, class: vsfrc, preferred-register: '' } - - { id: 41, class: vsfrc, preferred-register: '' } - - { id: 42, class: vsfrc, preferred-register: '' } - - { id: 43, class: vsfrc, preferred-register: '' } - - { id: 44, class: vsfrc, preferred-register: '' } - - { id: 45, class: vsfrc, preferred-register: '' } - - { id: 46, class: vsfrc, preferred-register: '' } - - { id: 47, class: vsfrc, preferred-register: '' } - - { id: 48, class: vsfrc, preferred-register: '' } - - { id: 49, class: vsfrc, preferred-register: '' } - - { id: 50, class: vsfrc, preferred-register: '' } - - { id: 51, class: vsfrc, preferred-register: '' } - - { id: 52, class: vsfrc, preferred-register: '' } - - { id: 53, class: vsfrc, preferred-register: '' } - - { id: 54, class: vsfrc, preferred-register: '' } - - { id: 55, class: vsfrc, preferred-register: '' } - - { id: 56, class: vsfrc, preferred-register: '' } - - { id: 57, class: vsfrc, preferred-register: '' } - - { id: 58, class: vsfrc, preferred-register: '' } - - { id: 59, class: vsfrc, preferred-register: '' } - - { id: 60, class: vsfrc, preferred-register: '' } - - { id: 61, class: vsfrc, preferred-register: '' } - - { id: 62, class: crbitrc, preferred-register: '' } liveins: - { reg: '$x3', virtual-reg: '%8' } - { reg: '$x4', virtual-reg: '%9' } - { reg: '$x5', virtual-reg: '%10' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 1 - adjustsStack: false - hasCalls: false - stackProtector: '' - functionContext: '' - maxCallFrameSize: 4294967295 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - hasTailCall: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: [] -stack: [] -entry_values: [] -callSites: [] -debugValueSubstitutions: [] -constants: [] -machineFunctionInfo: {} body: | bb.0.entry: successors: %bb.2(0x50000000), %bb.1(0x30000000) @@ -251,16 +111,12 @@ body: | BCC 44, killed %13, %bb.2 bb.1: - successors: %bb.3(0x80000000) - %12:vsfrc = XXLXORdpz B %bb.3 bb.2.for.body.preheader: - successors: %bb.4(0x80000000) - - %0:vsfrc = DFLOADf64 0, %8 :: (load (s64) from %ir.a, !tbaa !3) - %1:vsfrc = DFLOADf64 8, killed %8 :: (load (s64) from %ir.arrayidx1, !tbaa !3) + %0:vsfrc = DFLOADf64 0, %8 :: (load (s64) from %ir.a) + %1:vsfrc = DFLOADf64 8, killed %8 :: (load (s64) from %ir.arrayidx1) %16:g8rc = IMPLICIT_DEF %15:g8rc = INSERT_SUBREG killed %16, killed %11, %subreg.sub_32 %17:g8rc = RLDICL killed %15, 0, 32 @@ -279,7 +135,7 @@ body: | %4:vsfrc = PHI %14, %bb.2, %7, %bb.4 %5:g8rc_and_g8rc_nox0 = PHI %2, %bb.2, %6, %bb.4 - %18:f8rc, %19:g8rc_and_g8rc_nox0 = LFDU 8, killed %5 :: (load (s64) from %ir.3, !tbaa !3) + %18:f8rc, %19:g8rc_and_g8rc_nox0 = LFDU 8, killed %5 :: (load (s64) from %ir.3) %6:g8rc = COPY killed %19 %20:vsfrc = nofpexcept XSMADDADP %0, %0, %18, implicit $rm %21:vsfrc = nofpexcept XSMADDADP %20, %20, %18, implicit $rm From ef0291e5f4451abbafab0c839bf51a6382f735f3 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Mon, 1 Apr 2024 16:10:10 +0800 Subject: [PATCH 003/201] [NFC] [Serialization] Reordering lexcical and visible TU block after type decl offsets This patch reorder the lexical block for the translation unit, visible update block for the TU and the viisble upaete block for the extern C context after the type decl offsets block. This should be a NFC patch. This is helpful for later optimizations for eliding unreachable declarations in the global module fragment. See the comments in https://github.com/llvm/llvm-project/pull/76930. Simply, if we want to get the reachable sets of declaratins during the writing process, we need to write the file-level context later than the process of writing declarations (which is the main process to determine the reachable set). --- clang/lib/Serialization/ASTWriter.cpp | 71 ++++++++++++++---------- clang/test/Modules/language-linkage.cppm | 2 +- 2 files changed, 42 insertions(+), 31 deletions(-) diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 1e5734c9c834eb..2438fbc166062f 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -4959,38 +4959,12 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, Stream.EmitRecord(METADATA_OLD_FORMAT, Record); } - // Create a lexical update block containing all of the declarations in the - // translation unit that do not come from other AST files. const TranslationUnitDecl *TU = Context.getTranslationUnitDecl(); - SmallVector NewGlobalKindDeclPairs; - for (const auto *D : TU->noload_decls()) { - if (!D->isFromASTFile()) { - NewGlobalKindDeclPairs.push_back(D->getKind()); - NewGlobalKindDeclPairs.push_back(GetDeclRef(D)); - } - } - - auto Abv = std::make_shared(); - Abv->Add(llvm::BitCodeAbbrevOp(TU_UPDATE_LEXICAL)); - Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)); - unsigned TuUpdateLexicalAbbrev = Stream.EmitAbbrev(std::move(Abv)); - { - RecordData::value_type Record[] = {TU_UPDATE_LEXICAL}; - Stream.EmitRecordWithBlob(TuUpdateLexicalAbbrev, Record, - bytes(NewGlobalKindDeclPairs)); - } - // And a visible updates block for the translation unit. - Abv = std::make_shared(); - Abv->Add(llvm::BitCodeAbbrevOp(UPDATE_VISIBLE)); - Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6)); - Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)); - UpdateVisibleAbbrev = Stream.EmitAbbrev(std::move(Abv)); - WriteDeclContextVisibleUpdate(TU); - - // If we have any extern "C" names, write out a visible update for them. - if (Context.ExternCContext) - WriteDeclContextVisibleUpdate(Context.ExternCContext); + // Force all top level declarations to be emitted. + for (const auto *D : TU->noload_decls()) + if (!D->isFromASTFile()) + GetDeclRef(D); // If the translation unit has an anonymous namespace, and we don't already // have an update block for it, write it as an update block. @@ -5131,6 +5105,14 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, for (auto *D : SemaRef.DeclsToCheckForDeferredDiags) DeclsToCheckForDeferredDiags.push_back(GetDeclRef(D)); + { + auto Abv = std::make_shared(); + Abv->Add(llvm::BitCodeAbbrevOp(UPDATE_VISIBLE)); + Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6)); + Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)); + UpdateVisibleAbbrev = Stream.EmitAbbrev(std::move(Abv)); + } + RecordData DeclUpdatesOffsetsRecord; // Keep writing types, declarations, and declaration update records @@ -5158,6 +5140,35 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, WriteTypeDeclOffsets(); if (!DeclUpdatesOffsetsRecord.empty()) Stream.EmitRecord(DECL_UPDATE_OFFSETS, DeclUpdatesOffsetsRecord); + + // Create a lexical update block containing all of the declarations in the + // translation unit that do not come from other AST files. + { + SmallVector NewGlobalKindDeclPairs; + for (const auto *D : TU->noload_decls()) { + if (!D->isFromASTFile()) { + NewGlobalKindDeclPairs.push_back(D->getKind()); + NewGlobalKindDeclPairs.push_back(GetDeclRef(D)); + } + } + + auto Abv = std::make_shared(); + Abv->Add(llvm::BitCodeAbbrevOp(TU_UPDATE_LEXICAL)); + Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)); + unsigned TuUpdateLexicalAbbrev = Stream.EmitAbbrev(std::move(Abv)); + + RecordData::value_type Record[] = {TU_UPDATE_LEXICAL}; + Stream.EmitRecordWithBlob(TuUpdateLexicalAbbrev, Record, + bytes(NewGlobalKindDeclPairs)); + } + + // And a visible updates block for the translation unit. + WriteDeclContextVisibleUpdate(TU); + + // If we have any extern "C" names, write out a visible update for them. + if (Context.ExternCContext) + WriteDeclContextVisibleUpdate(Context.ExternCContext); + WriteFileDeclIDsMap(); WriteSourceManagerBlock(Context.getSourceManager(), PP); WriteComments(); diff --git a/clang/test/Modules/language-linkage.cppm b/clang/test/Modules/language-linkage.cppm index bf7982cd9207d6..a5db9e9ebc07ec 100644 --- a/clang/test/Modules/language-linkage.cppm +++ b/clang/test/Modules/language-linkage.cppm @@ -14,5 +14,5 @@ void foo() {} extern "C" void bar() {} -// CHECK: define {{.*}}@bar( // CHECK: define {{.*}}@_Z3foov( +// CHECK: define {{.*}}@bar( From da1d3d8fb9e7dba1cc89327f5119fa7c0cadef81 Mon Sep 17 00:00:00 2001 From: superZWT123 Date: Mon, 1 Apr 2024 17:19:46 +0800 Subject: [PATCH 004/201] =?UTF-8?q?[TableGen]=20Introduce=20a=20less=20agg?= =?UTF-8?q?ressive=20suppression=20for=20HwMode=20Decoder=E2=80=A6=20(#860?= =?UTF-8?q?60)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Remove 'AllModes' and 'DefaultMode' suffixes for DecoderTables under default HwMode. 2. Introduce a less aggressive suppression for HwMode DecoderTable, only reduce necessary tables duplications. This allows encodings under different HwModes to retain the original DecoderNamespace. 3. Change 'suppress-per-hwmode-duplicates' command option from bool type to enum type, allowing users to choose what level of suppression to use. --- llvm/test/TableGen/HwModeEncodeDecode2.td | 9 +- llvm/test/TableGen/HwModeEncodeDecode3.td | 71 ++++++++++----- llvm/utils/TableGen/DecoderEmitter.cpp | 104 +++++++++++++++++----- 3 files changed, 137 insertions(+), 47 deletions(-) diff --git a/llvm/test/TableGen/HwModeEncodeDecode2.td b/llvm/test/TableGen/HwModeEncodeDecode2.td index 5159501d8148eb..cf96dda6c8bf3b 100644 --- a/llvm/test/TableGen/HwModeEncodeDecode2.td +++ b/llvm/test/TableGen/HwModeEncodeDecode2.td @@ -1,6 +1,6 @@ // RUN: llvm-tblgen -gen-disassembler -I %p/../../include %s | \ // RUN: FileCheck %s --check-prefix=DECODER -// RUN: llvm-tblgen -gen-disassembler --suppress-per-hwmode-duplicates -I \ +// RUN: llvm-tblgen -gen-disassembler --suppress-per-hwmode-duplicates=O2 -I \ // RUN: %p/../../include %s | FileCheck %s --check-prefix=DECODER-SUPPRESS // Test duplicate table suppression for per-HwMode decoders. @@ -105,11 +105,10 @@ let OutOperandList = (outs) in { // DECODER-DAG: Opcode: fooTypeEncA:baz // DECODER-DAG: Opcode: bar - -// DECODER-SUPPRESS-LABEL: DecoderTableAlt_AllModes32[] = -// DECODER-SUPPRESS-DAG: Opcode: unrelated -// DECODER-SUPPRESS-LABEL: DecoderTable_AllModes32[] = +// DECODER-SUPPRESS-LABEL: DecoderTable32[] = // DECODER-SUPPRESS-DAG: Opcode: bar +// DECODER-SUPPRESS-LABEL: DecoderTableAlt32[] = +// DECODER-SUPPRESS-DAG: Opcode: unrelated // DECODER-SUPPRESS-LABEL: DecoderTable_ModeA32[] = // DECODER-SUPPRESS-DAG: Opcode: fooTypeEncA:foo // DECODER-SUPPRESS-NOT: Opcode: bar diff --git a/llvm/test/TableGen/HwModeEncodeDecode3.td b/llvm/test/TableGen/HwModeEncodeDecode3.td index 406e52d25be706..8e0266b2c55af9 100644 --- a/llvm/test/TableGen/HwModeEncodeDecode3.td +++ b/llvm/test/TableGen/HwModeEncodeDecode3.td @@ -2,8 +2,10 @@ // RUN: FileCheck %s --check-prefix=ENCODER // RUN: llvm-tblgen -gen-disassembler -I %p/../../include %s | \ // RUN: FileCheck %s --check-prefix=DECODER -// RUN: llvm-tblgen -gen-disassembler --suppress-per-hwmode-duplicates -I \ -// RUN: %p/../../include %s | FileCheck %s --check-prefix=DECODER-SUPPRESS +// RUN: llvm-tblgen -gen-disassembler --suppress-per-hwmode-duplicates=O1 -I \ +// RUN: %p/../../include %s | FileCheck %s --check-prefix=DECODER-SUPPRESS-O1 +// RUN: llvm-tblgen -gen-disassembler --suppress-per-hwmode-duplicates=O2 -I \ +// RUN: %p/../../include %s | FileCheck %s --check-prefix=DECODER-SUPPRESS-O2 include "llvm/Target/Target.td" @@ -99,16 +101,20 @@ def unrelated: Instruction { } -// DECODER-LABEL: DecoderTableAlt_DefaultMode32[] = +// Under default settings, using 'HwMode' to dictate instruction encodings results in +// significant duplication of DecoderTables. The three tables ‘DecoderTableAlt32’, +// ‘DecoderTableAlt_ModeA32’, and ‘DecoderTableAlt_ModeB32’ are exact duplicates and +// could effectively be merged into one. +// DECODER-LABEL: DecoderTable32[] = +// DECODER-DAG: Opcode: bar +// DECODER-LABEL: DecoderTable64[] = +// DECODER-DAG: Opcode: fooTypeEncDefault:foo +// DECODER-LABEL: DecoderTableAlt32[] = // DECODER-DAG: Opcode: unrelated // DECODER-LABEL: DecoderTableAlt_ModeA32[] = // DECODER-DAG: Opcode: unrelated // DECODER-LABEL: DecoderTableAlt_ModeB32[] = // DECODER-DAG: Opcode: unrelated -// DECODER-LABEL: DecoderTable_DefaultMode32[] = -// DECODER-DAG: Opcode: bar -// DECODER-LABEL: DecoderTable_DefaultMode64[] = -// DECODER-DAG: Opcode: fooTypeEncDefault:foo // DECODER-LABEL: DecoderTable_ModeA32[] = // DECODER-DAG: Opcode: fooTypeEncA:foo // DECODER-DAG: Opcode: bar @@ -117,21 +123,42 @@ def unrelated: Instruction { // DECODER-DAG: Opcode: fooTypeEncA:baz // DECODER-DAG: Opcode: bar - -// DECODER-SUPPRESS-LABEL: DecoderTableAlt_AllModes32[] = -// DECODER-SUPPRESS-DAG: Opcode: unrelated -// DECODER-SUPPRESS-LABEL: DecoderTable_AllModes32[] = -// DECODER-SUPPRESS-DAG: Opcode: bar -// DECODER-SUPPRESS-LABEL: DecoderTable_DefaultMode64[] = -// DECODER-SUPPRESS-NOT: Opcode: bar -// DECODER-SUPPRESS-DAG: Opcode: fooTypeEncDefault:foo -// DECODER-SUPPRESS-LABEL: DecoderTable_ModeA32[] = -// DECODER-SUPPRESS-DAG: Opcode: fooTypeEncA:foo -// DECODER-SUPPRESS-NOT: Opcode: bar -// DECODER-SUPPRESS-LABEL: DecoderTable_ModeB32[] = -// DECODER-SUPPRESS-DAG: Opcode: fooTypeEncB:foo -// DECODER-SUPPRESS-DAG: Opcode: fooTypeEncA:baz -// DECODER-SUPPRESS-NOT: Opcode: bar +// Under the 'O1' optimization level, unnecessary duplicate tables will be eliminated, +// reducing the three ‘Alt’ tables down to just one. +// DECODER-SUPPRESS-O1-LABEL: DecoderTable32[] = +// DECODER-SUPPRESS-O1-DAG: Opcode: bar +// DECODER-SUPPRESS-O1-LABEL: DecoderTable64[] = +// DECODER-SUPPRESS-O1-DAG: Opcode: fooTypeEncDefault:foo +// DECODER-SUPPRESS-O1-LABEL: DecoderTableAlt32[] = +// DECODER-SUPPRESS-O1-DAG: Opcode: unrelated +// DECODER-SUPPRESS-O1-LABEL: DecoderTable_ModeA32[] = +// DECODER-SUPPRESS-O1-DAG: Opcode: fooTypeEncA:foo +// DECODER-SUPPRESS-O1-DAG: Opcode: bar +// DECODER-SUPPRESS-O1-LABEL: DecoderTable_ModeB32[] = +// DECODER-SUPPRESS-O1-DAG: Opcode: fooTypeEncB:foo +// DECODER-SUPPRESS-O1-DAG: Opcode: fooTypeEncA:baz +// DECODER-SUPPRESS-O1-DAG: Opcode: bar + +// Under the 'O2' optimization condition, instructions possessing the 'EncodingByHwMode' +// attribute will be extracted from their original DecoderNamespace and placed into their +// respective HwMode tables. Meanwhile, other instructions that do not have the 'EncodingByHwMode' +// attribute but are within the same DecoderNamespace will be stored in the 'Default' table. This +// approach will significantly reduce instruction redundancy, but it necessitates users to thoroughly +// consider the interplay between HwMode and DecoderNamespace for their instructions. +// DECODER-SUPPRESS-O2-LABEL: DecoderTable32[] = +// DECODER-SUPPRESS-O2-DAG: Opcode: bar +// DECODER-SUPPRESS-O2-LABEL: DecoderTable64[] = +// DECODER-SUPPRESS-O2-NOT: Opcode: bar +// DECODER-SUPPRESS-O2-DAG: Opcode: fooTypeEncDefault:foo +// DECODER-SUPPRESS-O2-LABEL: DecoderTableAlt32[] = +// DECODER-SUPPRESS-O2-DAG: Opcode: unrelated +// DECODER-SUPPRESS-O2-LABEL: DecoderTable_ModeA32[] = +// DECODER-SUPPRESS-O2-DAG: Opcode: fooTypeEncA:foo +// DECODER-SUPPRESS-O2-NOT: Opcode: bar +// DECODER-SUPPRESS-O2-LABEL: DecoderTable_ModeB32[] = +// DECODER-SUPPRESS-O2-DAG: Opcode: fooTypeEncB:foo +// DECODER-SUPPRESS-O2-DAG: Opcode: fooTypeEncA:baz +// DECODER-SUPPRESS-O2-NOT: Opcode: bar // ENCODER-LABEL: static const uint64_t InstBits_DefaultMode[] = { // ENCODER: UINT64_C(2), // bar diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp index 494dc93faacef8..3bd7f432ff9ad6 100644 --- a/llvm/utils/TableGen/DecoderEmitter.cpp +++ b/llvm/utils/TableGen/DecoderEmitter.cpp @@ -54,10 +54,27 @@ using namespace llvm; extern cl::OptionCategory DisassemblerEmitterCat; -cl::opt DecoderEmitterSuppressDuplicates( +enum SuppressLevel { + SUPPRESSION_DISABLE, + SUPPRESSION_LEVEL1, + SUPPRESSION_LEVEL2 +}; + +cl::opt DecoderEmitterSuppressDuplicates( "suppress-per-hwmode-duplicates", cl::desc("Suppress duplication of instrs into per-HwMode decoder tables"), - cl::init(false), cl::cat(DisassemblerEmitterCat)); + cl::values( + clEnumValN( + SUPPRESSION_DISABLE, "O0", + "Do not prevent DecoderTable duplications caused by HwModes"), + clEnumValN( + SUPPRESSION_LEVEL1, "O1", + "Remove duplicate DecoderTable entries generated due to HwModes"), + clEnumValN( + SUPPRESSION_LEVEL2, "O2", + "Extract HwModes-specific instructions into new DecoderTables, " + "significantly reducing Table Duplications")), + cl::init(SUPPRESSION_DISABLE), cl::cat(DisassemblerEmitterCat)); namespace { @@ -128,6 +145,7 @@ struct EncodingIDAndOpcode { }; using EncodingIDsVec = std::vector; +using NamespacesHwModesMap = std::map>; raw_ostream &operator<<(raw_ostream &OS, const EncodingAndInst &Value) { if (Value.EncodingDef != Value.Inst->TheDef) @@ -2417,21 +2435,65 @@ static bool Check(DecodeStatus &Out, DecodeStatus In) { // Collect all HwModes referenced by the target for encoding purposes, // returning a vector of corresponding names. -static void -collectHwModesReferencedForEncodings(const CodeGenHwModes &HWM, - std::vector &Names) { +static void collectHwModesReferencedForEncodings( + const CodeGenHwModes &HWM, std::vector &Names, + NamespacesHwModesMap &NamespacesWithHwModes) { SmallBitVector BV(HWM.getNumModeIds()); for (const auto &MS : HWM.getHwModeSelects()) { for (const HwModeSelect::PairType &P : MS.second.Items) { - if (P.second->isSubClassOf("InstructionEncoding")) + if (P.second->isSubClassOf("InstructionEncoding")) { + std::string DecoderNamespace = + std::string(P.second->getValueAsString("DecoderNamespace")); + if (P.first == DefaultMode) { + NamespacesWithHwModes[DecoderNamespace].insert(""); + } else { + NamespacesWithHwModes[DecoderNamespace].insert( + HWM.getMode(P.first).Name); + } BV.set(P.first); + } } } transform(BV.set_bits(), std::back_inserter(Names), [&HWM](const int &M) { + if (M == DefaultMode) + return StringRef(""); return HWM.getModeName(M, /*IncludeDefault=*/true); }); } +static void +handleHwModesUnrelatedEncodings(const CodeGenInstruction *Instr, + const std::vector &HwModeNames, + NamespacesHwModesMap &NamespacesWithHwModes, + std::vector &GlobalEncodings) { + const Record *InstDef = Instr->TheDef; + + switch (DecoderEmitterSuppressDuplicates) { + case SUPPRESSION_DISABLE: { + for (StringRef HwModeName : HwModeNames) + GlobalEncodings.emplace_back(InstDef, Instr, HwModeName); + break; + } + case SUPPRESSION_LEVEL1: { + std::string DecoderNamespace = + std::string(InstDef->getValueAsString("DecoderNamespace")); + auto It = NamespacesWithHwModes.find(DecoderNamespace); + if (It != NamespacesWithHwModes.end()) { + for (StringRef HwModeName : It->second) + GlobalEncodings.emplace_back(InstDef, Instr, HwModeName); + } else { + // Only emit the encoding once, as it's DecoderNamespace doesn't + // contain any HwModes. + GlobalEncodings.emplace_back(InstDef, Instr, ""); + } + break; + } + case SUPPRESSION_LEVEL2: + GlobalEncodings.emplace_back(InstDef, Instr, ""); + break; + } +} + // Emits disassembler code for instruction decoding. void DecoderEmitter::run(raw_ostream &o) { formatted_raw_ostream OS(o); @@ -2457,10 +2519,12 @@ namespace llvm { // Parameterize the decoders based on namespace and instruction width. // First, collect all encoding-related HwModes referenced by the target. + // And establish a mapping table between DecoderNamespace and HwMode. // If HwModeNames is empty, add the empty string so we always have one HwMode. const CodeGenHwModes &HWM = Target.getHwModes(); std::vector HwModeNames; - collectHwModesReferencedForEncodings(HWM, HwModeNames); + NamespacesHwModesMap NamespacesWithHwModes; + collectHwModesReferencedForEncodings(HWM, HwModeNames, NamespacesWithHwModes); if (HwModeNames.empty()) HwModeNames.push_back(""); @@ -2471,22 +2535,22 @@ namespace llvm { if (const RecordVal *RV = InstDef->getValue("EncodingInfos")) { if (DefInit *DI = dyn_cast_or_null(RV->getValue())) { EncodingInfoByHwMode EBM(DI->getDef(), HWM); - for (auto &KV : EBM) - NumberedEncodings.emplace_back( - KV.second, NumberedInstruction, - HWM.getModeName(KV.first, /*IncludeDefault=*/true)); + for (auto &[ModeId, Encoding] : EBM) { + // DecoderTables with DefaultMode should not have any suffix. + if (ModeId == DefaultMode) { + NumberedEncodings.emplace_back(Encoding, NumberedInstruction, ""); + } else { + NumberedEncodings.emplace_back(Encoding, NumberedInstruction, + HWM.getMode(ModeId).Name); + } + } continue; } } - // This instruction is encoded the same on all HwModes. Emit it for all - // HwModes by default, otherwise leave it in a single common table. - if (DecoderEmitterSuppressDuplicates) { - NumberedEncodings.emplace_back(InstDef, NumberedInstruction, "AllModes"); - } else { - for (StringRef HwModeName : HwModeNames) - NumberedEncodings.emplace_back(InstDef, NumberedInstruction, - HwModeName); - } + // This instruction is encoded the same on all HwModes. + // According to user needs, provide varying degrees of suppression. + handleHwModesUnrelatedEncodings(NumberedInstruction, HwModeNames, + NamespacesWithHwModes, NumberedEncodings); } for (const auto &NumberedAlias : RK.getAllDerivedDefinitions("AdditionalEncoding")) From 8827ff92b96d78ef455157574061d745df2909af Mon Sep 17 00:00:00 2001 From: Victor Perez Date: Mon, 1 Apr 2024 11:57:14 +0200 Subject: [PATCH 005/201] [MLIR][Arith] Add rounding mode attribute to `truncf` (#86152) Add rounding mode attribute to `arith`. This attribute can be used in different FP `arith` operations to control rounding mode. Rounding modes correspond to IEEE 754-specified rounding modes. Use in `arith.truncf` folding. As this is not supported in dialects other than LLVM, conversion should fail for now in case this attribute is present. --------- Signed-off-by: Victor Perez --- .../ArithCommon/AttrToLLVMConverter.h | 48 +++++++++++++++++++ .../mlir/Dialect/Arith/IR/ArithBase.td | 25 ++++++++++ .../include/mlir/Dialect/Arith/IR/ArithOps.td | 21 ++++++-- .../Dialect/Arith/IR/ArithOpsInterfaces.td | 33 +++++++++++++ .../ArithCommon/AttrToLLVMConverter.cpp | 31 ++++++++++++ .../ArithToAMDGPU/ArithToAMDGPU.cpp | 3 ++ .../Conversion/ArithToLLVM/ArithToLLVM.cpp | 32 ++++++++++++- .../Conversion/ArithToSPIRV/ArithToSPIRV.cpp | 9 ++++ mlir/lib/Dialect/Arith/IR/ArithOps.cpp | 46 ++++++++++++++---- .../Dialect/Arith/Transforms/ExpandOps.cpp | 5 ++ .../Conversion/ArithToLLVM/arith-to-llvm.mlir | 15 ++++++ mlir/test/Dialect/Arith/canonicalize.mlir | 45 +++++++++++++++++ mlir/test/Dialect/Arith/ops.mlir | 10 ++++ 13 files changed, 309 insertions(+), 14 deletions(-) diff --git a/mlir/include/mlir/Conversion/ArithCommon/AttrToLLVMConverter.h b/mlir/include/mlir/Conversion/ArithCommon/AttrToLLVMConverter.h index 32d7979c32dfb2..0891e2ba7be760 100644 --- a/mlir/include/mlir/Conversion/ArithCommon/AttrToLLVMConverter.h +++ b/mlir/include/mlir/Conversion/ArithCommon/AttrToLLVMConverter.h @@ -36,6 +36,20 @@ convertArithOverflowFlagsToLLVM(arith::IntegerOverflowFlags arithFlags); LLVM::IntegerOverflowFlagsAttr convertArithOverflowAttrToLLVM(arith::IntegerOverflowFlagsAttr flagsAttr); +/// Creates an LLVM rounding mode enum value from a given arithmetic rounding +/// mode enum value. +LLVM::RoundingMode +convertArithRoundingModeToLLVM(arith::RoundingMode roundingMode); + +/// Creates an LLVM rounding mode attribute from a given arithmetic rounding +/// mode attribute. +LLVM::RoundingModeAttr +convertArithRoundingModeAttrToLLVM(arith::RoundingModeAttr roundingModeAttr); + +/// Returns an attribute for the default LLVM FP exception behavior. +LLVM::FPExceptionBehaviorAttr +getLLVMDefaultFPExceptionBehavior(MLIRContext &context); + // Attribute converter that populates a NamedAttrList by removing the fastmath // attribute from the source operation attributes, and replacing it with an // equivalent LLVM fastmath attribute. @@ -89,6 +103,40 @@ class AttrConvertOverflowToLLVM { private: NamedAttrList convertedAttr; }; + +template +class AttrConverterConstrainedFPToLLVM { + static_assert(TargetOp::template hasTrait< + LLVM::FPExceptionBehaviorOpInterface::Trait>(), + "Target constrained FP operations must implement " + "LLVM::FPExceptionBehaviorOpInterface"); + +public: + AttrConverterConstrainedFPToLLVM(SourceOp srcOp) { + // Copy the source attributes. + convertedAttr = NamedAttrList{srcOp->getAttrs()}; + + if constexpr (TargetOp::template hasTrait< + LLVM::RoundingModeOpInterface::Trait>()) { + // Get the name of the rounding mode attribute. + StringRef arithAttrName = srcOp.getRoundingModeAttrName(); + // Remove the source attribute. + auto arithAttr = + cast(convertedAttr.erase(arithAttrName)); + // Set the target attribute. + convertedAttr.set(TargetOp::getRoundingModeAttrName(), + convertArithRoundingModeAttrToLLVM(arithAttr)); + } + convertedAttr.set(TargetOp::getFPExceptionBehaviorAttrName(), + getLLVMDefaultFPExceptionBehavior(*srcOp->getContext())); + } + + ArrayRef getAttrs() const { return convertedAttr.getAttrs(); } + +private: + NamedAttrList convertedAttr; +}; + } // namespace arith } // namespace mlir diff --git a/mlir/include/mlir/Dialect/Arith/IR/ArithBase.td b/mlir/include/mlir/Dialect/Arith/IR/ArithBase.td index c8a42c43c880b0..19a2ade2e95a0e 100644 --- a/mlir/include/mlir/Dialect/Arith/IR/ArithBase.td +++ b/mlir/include/mlir/Dialect/Arith/IR/ArithBase.td @@ -156,4 +156,29 @@ def Arith_IntegerOverflowAttr : let assemblyFormat = "`<` $value `>`"; } +//===----------------------------------------------------------------------===// +// Arith_RoundingMode +//===----------------------------------------------------------------------===// + +// These correspond to LLVM's values defined in: +// llvm/include/llvm/ADT/FloatingPointMode.h + +def Arith_RToNearestTiesToEven // Round to nearest, ties to even + : I32EnumAttrCase<"to_nearest_even", 0>; +def Arith_RDownward // Round toward -inf + : I32EnumAttrCase<"downward", 1>; +def Arith_RUpward // Round toward +inf + : I32EnumAttrCase<"upward", 2>; +def Arith_RTowardZero // Round toward 0 + : I32EnumAttrCase<"toward_zero", 3>; +def Arith_RToNearestTiesAwayFromZero // Round to nearest, ties away from zero + : I32EnumAttrCase<"to_nearest_away", 4>; + +def Arith_RoundingModeAttr : I32EnumAttr< + "RoundingMode", "Floating point rounding mode", + [Arith_RToNearestTiesToEven, Arith_RDownward, Arith_RUpward, + Arith_RTowardZero, Arith_RToNearestTiesAwayFromZero]> { + let cppNamespace = "::mlir::arith"; +} + #endif // ARITH_BASE diff --git a/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td b/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td index c9df50d0395d1f..ead19c69a0831c 100644 --- a/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td +++ b/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td @@ -1227,17 +1227,32 @@ def Arith_TruncIOp : Arith_IToICastOp<"trunci"> { // TruncFOp //===----------------------------------------------------------------------===// -def Arith_TruncFOp : Arith_FToFCastOp<"truncf"> { +def Arith_TruncFOp : + Arith_Op<"truncf", + [Pure, SameOperandsAndResultShape, + DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods]>, + Arguments<(ins FloatLike:$in, + OptionalAttr:$roundingmode)>, + Results<(outs FloatLike:$out)> { let summary = "cast from floating-point to narrower floating-point"; let description = [{ Truncate a floating-point value to a smaller floating-point-typed value. The destination type must be strictly narrower than the source type. - If the value cannot be exactly represented, it is rounded using the default - rounding mode. When operating on vectors, casts elementwise. + If the value cannot be exactly represented, it is rounded using the + provided rounding mode or the default one if no rounding mode is provided. + When operating on vectors, casts elementwise. }]; + let builders = [ + OpBuilder<(ins "Type":$out, "Value":$in), [{ + $_state.addOperands(in); + $_state.addTypes(out); + }]> + ]; let hasFolder = 1; let hasVerifier = 1; + let assemblyFormat = "$in ($roundingmode^)? attr-dict `:` type($in) `to` type($out)"; } //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/Arith/IR/ArithOpsInterfaces.td b/mlir/include/mlir/Dialect/Arith/IR/ArithOpsInterfaces.td index 73a5d9c32ef205..82d6c9ad6b03da 100644 --- a/mlir/include/mlir/Dialect/Arith/IR/ArithOpsInterfaces.td +++ b/mlir/include/mlir/Dialect/Arith/IR/ArithOpsInterfaces.td @@ -106,4 +106,37 @@ def ArithIntegerOverflowFlagsInterface : OpInterface<"ArithIntegerOverflowFlagsI ]; } +def ArithRoundingModeInterface : OpInterface<"ArithRoundingModeInterface"> { + let description = [{ + Access to op rounding mode. + }]; + + let cppNamespace = "::mlir::arith"; + + let methods = [ + InterfaceMethod< + /*desc=*/ "Returns a RoundingModeAttr attribute for the operation", + /*returnType=*/ "RoundingModeAttr", + /*methodName=*/ "getRoundingModeAttr", + /*args=*/ (ins), + /*methodBody=*/ [{}], + /*defaultImpl=*/ [{ + auto op = cast(this->getOperation()); + return op.getRoundingmodeAttr(); + }] + >, + StaticInterfaceMethod< + /*desc=*/ [{Returns the name of the RoundingModeAttr attribute for + the operation}], + /*returnType=*/ "StringRef", + /*methodName=*/ "getRoundingModeAttrName", + /*args=*/ (ins), + /*methodBody=*/ [{}], + /*defaultImpl=*/ [{ + return "roundingmode"; + }] + > + ]; +} + #endif // ARITH_OPS_INTERFACES diff --git a/mlir/lib/Conversion/ArithCommon/AttrToLLVMConverter.cpp b/mlir/lib/Conversion/ArithCommon/AttrToLLVMConverter.cpp index dab064a3a954ec..f12eba98480d33 100644 --- a/mlir/lib/Conversion/ArithCommon/AttrToLLVMConverter.cpp +++ b/mlir/lib/Conversion/ArithCommon/AttrToLLVMConverter.cpp @@ -55,3 +55,34 @@ LLVM::IntegerOverflowFlagsAttr mlir::arith::convertArithOverflowAttrToLLVM( return LLVM::IntegerOverflowFlagsAttr::get( flagsAttr.getContext(), convertArithOverflowFlagsToLLVM(arithFlags)); } + +LLVM::RoundingMode +mlir::arith::convertArithRoundingModeToLLVM(arith::RoundingMode roundingMode) { + switch (roundingMode) { + case arith::RoundingMode::downward: + return LLVM::RoundingMode::TowardNegative; + case arith::RoundingMode::to_nearest_away: + return LLVM::RoundingMode::NearestTiesToAway; + case arith::RoundingMode::to_nearest_even: + return LLVM::RoundingMode::NearestTiesToEven; + case arith::RoundingMode::toward_zero: + return LLVM::RoundingMode::TowardZero; + case arith::RoundingMode::upward: + return LLVM::RoundingMode::TowardPositive; + } + llvm_unreachable("Unhandled rounding mode"); +} + +LLVM::RoundingModeAttr mlir::arith::convertArithRoundingModeAttrToLLVM( + arith::RoundingModeAttr roundingModeAttr) { + assert(roundingModeAttr && "Expecting valid attribute"); + return LLVM::RoundingModeAttr::get( + roundingModeAttr.getContext(), + convertArithRoundingModeToLLVM(roundingModeAttr.getValue())); +} + +LLVM::FPExceptionBehaviorAttr +mlir::arith::getLLVMDefaultFPExceptionBehavior(MLIRContext &context) { + return LLVM::FPExceptionBehaviorAttr::get(&context, + LLVM::FPExceptionBehavior::Ignore); +} diff --git a/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp b/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp index b51a13ae362e92..0113a3df0b8e3d 100644 --- a/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp +++ b/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp @@ -175,6 +175,9 @@ static Value clampInput(PatternRewriter &rewriter, Location loc, } LogicalResult TruncFToFloat8RewritePattern::match(arith::TruncFOp op) const { + // Only supporting default rounding mode as of now. + if (op.getRoundingmodeAttr()) + return failure(); Type outType = op.getOut().getType(); if (auto outVecType = outType.dyn_cast()) { if (outVecType.isScalable()) diff --git a/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp b/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp index 1f01f4a75c5b3e..d882f1184f4570 100644 --- a/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp +++ b/mlir/lib/Conversion/ArithToLLVM/ArithToLLVM.cpp @@ -28,6 +28,31 @@ using namespace mlir; namespace { +/// Operations whose conversion will depend on whether they are passed a +/// rounding mode attribute or not. +/// +/// `SourceOp` is the source operation; `TargetOp`, the operation it will lower +/// to; `AttrConvert` is the attribute conversion to convert the rounding mode +/// attribute. +template typename AttrConvert = + AttrConvertPassThrough> +struct ConstrainedVectorConvertToLLVMPattern + : public VectorConvertToLLVMPattern { + using VectorConvertToLLVMPattern::VectorConvertToLLVMPattern; + + LogicalResult + matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + if (Constrained != static_cast(op.getRoundingModeAttr())) + return failure(); + return VectorConvertToLLVMPattern::matchAndRewrite(op, adaptor, + rewriter); + } +}; + //===----------------------------------------------------------------------===// // Straightforward Op Lowerings //===----------------------------------------------------------------------===// @@ -112,7 +137,11 @@ using SubIOpLowering = VectorConvertToLLVMPattern; using TruncFOpLowering = - VectorConvertToLLVMPattern; + ConstrainedVectorConvertToLLVMPattern; +using ConstrainedTruncFOpLowering = ConstrainedVectorConvertToLLVMPattern< + arith::TruncFOp, LLVM::ConstrainedFPTruncIntr, true, + arith::AttrConverterConstrainedFPToLLVM>; using TruncIOpLowering = VectorConvertToLLVMPattern; using UIToFPOpLowering = @@ -537,6 +566,7 @@ void mlir::arith::populateArithToLLVMConversionPatterns( SubFOpLowering, SubIOpLowering, TruncFOpLowering, + ConstrainedTruncFOpLowering, TruncIOpLowering, UIToFPOpLowering, XOrIOpLowering diff --git a/mlir/lib/Conversion/ArithToSPIRV/ArithToSPIRV.cpp b/mlir/lib/Conversion/ArithToSPIRV/ArithToSPIRV.cpp index 7456bf7a87a3b7..806981728561c1 100644 --- a/mlir/lib/Conversion/ArithToSPIRV/ArithToSPIRV.cpp +++ b/mlir/lib/Conversion/ArithToSPIRV/ArithToSPIRV.cpp @@ -805,6 +805,15 @@ struct TypeCastingOpPattern final : public OpConversionPattern { } else { rewriter.template replaceOpWithNewOp(op, dstType, adaptor.getOperands()); + if (auto roundingModeOp = + dyn_cast(*op)) { + if (arith::RoundingModeAttr roundingMode = + roundingModeOp.getRoundingModeAttr()) { + // TODO: Perform rounding mode attribute conversion and attach to new + // operation when defined in the dialect. + return failure(); + } + } } return success(); } diff --git a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp index 2f32d9a26e7752..0d466795fac0de 100644 --- a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp +++ b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp @@ -91,6 +91,29 @@ arith::CmpIPredicate arith::invertPredicate(arith::CmpIPredicate pred) { llvm_unreachable("unknown cmpi predicate kind"); } +/// Equivalent to +/// convertRoundingModeToLLVM(convertArithRoundingModeToLLVM(roundingMode)). +/// +/// Not possible to implement as chain of calls as this would introduce a +/// circular dependency with MLIRArithAttrToLLVMConversion and make arith depend +/// on the LLVM dialect and on translation to LLVM. +static llvm::RoundingMode +convertArithRoundingModeToLLVMIR(RoundingMode roundingMode) { + switch (roundingMode) { + case RoundingMode::downward: + return llvm::RoundingMode::TowardNegative; + case RoundingMode::to_nearest_away: + return llvm::RoundingMode::NearestTiesToAway; + case RoundingMode::to_nearest_even: + return llvm::RoundingMode::NearestTiesToEven; + case RoundingMode::toward_zero: + return llvm::RoundingMode::TowardZero; + case RoundingMode::upward: + return llvm::RoundingMode::TowardPositive; + } + llvm_unreachable("Unhandled rounding mode"); +} + static arith::CmpIPredicateAttr invertPredicate(arith::CmpIPredicateAttr pred) { return arith::CmpIPredicateAttr::get(pred.getContext(), invertPredicate(pred.getValue())); @@ -1233,13 +1256,12 @@ static bool checkWidthChangeCast(TypeRange inputs, TypeRange outputs) { } /// Attempts to convert `sourceValue` to an APFloat value with -/// `targetSemantics`, without any information loss or rounding. -static FailureOr -convertFloatValue(APFloat sourceValue, - const llvm::fltSemantics &targetSemantics) { +/// `targetSemantics` and `roundingMode`, without any information loss. +static FailureOr convertFloatValue( + APFloat sourceValue, const llvm::fltSemantics &targetSemantics, + llvm::RoundingMode roundingMode = llvm::RoundingMode::NearestTiesToEven) { bool losesInfo = false; - auto status = sourceValue.convert( - targetSemantics, llvm::RoundingMode::NearestTiesToEven, &losesInfo); + auto status = sourceValue.convert(targetSemantics, roundingMode, &losesInfo); if (losesInfo || status != APFloat::opOK) return failure(); @@ -1391,15 +1413,19 @@ LogicalResult arith::TruncIOp::verify() { //===----------------------------------------------------------------------===// /// Perform safe const propagation for truncf, i.e., only propagate if FP value -/// can be represented without precision loss or rounding. This is because the -/// semantics of `arith.truncf` do not assume a specific rounding mode. +/// can be represented without precision loss. OpFoldResult arith::TruncFOp::fold(FoldAdaptor adaptor) { auto resElemType = cast(getElementTypeOrSelf(getType())); const llvm::fltSemantics &targetSemantics = resElemType.getFloatSemantics(); return constFoldCastOp( adaptor.getOperands(), getType(), - [&targetSemantics](const APFloat &a, bool &castStatus) { - FailureOr result = convertFloatValue(a, targetSemantics); + [this, &targetSemantics](const APFloat &a, bool &castStatus) { + RoundingMode roundingMode = + getRoundingmode().value_or(RoundingMode::to_nearest_even); + llvm::RoundingMode llvmRoundingMode = + convertArithRoundingModeToLLVMIR(roundingMode); + FailureOr result = + convertFloatValue(a, targetSemantics, llvmRoundingMode); if (failed(result)) { castStatus = false; return a; diff --git a/mlir/lib/Dialect/Arith/Transforms/ExpandOps.cpp b/mlir/lib/Dialect/Arith/Transforms/ExpandOps.cpp index 71e14a153cfda9..dd04a599655894 100644 --- a/mlir/lib/Dialect/Arith/Transforms/ExpandOps.cpp +++ b/mlir/lib/Dialect/Arith/Transforms/ExpandOps.cpp @@ -253,6 +253,11 @@ struct BFloat16TruncFOpConverter : public OpRewritePattern { return rewriter.notifyMatchFailure(op, "not a trunc of f32 to bf16."); } + if (op.getRoundingmodeAttr()) { + return rewriter.notifyMatchFailure( + op, "only applicable to default rounding mode."); + } + Type i16Ty = b.getI16Type(); Type i32Ty = b.getI32Type(); Type f32Ty = b.getF32Type(); diff --git a/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir b/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir index 29268eef47e853..56ae930e6d6273 100644 --- a/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir +++ b/mlir/test/Conversion/ArithToLLVM/arith-to-llvm.mlir @@ -289,6 +289,21 @@ func.func @fptrunc_vector(%arg0 : vector<2xf32>, %arg1 : vector<2xf64>) { return } +// CHECK-LABEL: experimental_constrained_fptrunc +func.func @experimental_constrained_fptrunc(%arg0 : f64) { +// CHECK-NEXT: = llvm.intr.experimental.constrained.fptrunc {{.*}} tonearest ignore : f64 to f32 + %0 = arith.truncf %arg0 to_nearest_even : f64 to f32 +// CHECK-NEXT: = llvm.intr.experimental.constrained.fptrunc {{.*}} downward ignore : f64 to f32 + %1 = arith.truncf %arg0 downward : f64 to f32 +// CHECK-NEXT: = llvm.intr.experimental.constrained.fptrunc {{.*}} upward ignore : f64 to f32 + %2 = arith.truncf %arg0 upward : f64 to f32 +// CHECK-NEXT: = llvm.intr.experimental.constrained.fptrunc {{.*}} towardzero ignore : f64 to f32 + %3 = arith.truncf %arg0 toward_zero : f64 to f32 +// CHECK-NEXT: = llvm.intr.experimental.constrained.fptrunc {{.*}} tonearestaway ignore : f64 to f32 + %4 = arith.truncf %arg0 to_nearest_away : f64 to f32 + return +} + // Check sign and zero extension and truncation of integers. // CHECK-LABEL: @integer_extension_and_truncation func.func @integer_extension_and_truncation(%arg0 : i3) { diff --git a/mlir/test/Dialect/Arith/canonicalize.mlir b/mlir/test/Dialect/Arith/canonicalize.mlir index bdc6c91d926775..79a318565e98f9 100644 --- a/mlir/test/Dialect/Arith/canonicalize.mlir +++ b/mlir/test/Dialect/Arith/canonicalize.mlir @@ -757,6 +757,51 @@ func.func @truncFPConstant() -> bf16 { return %0 : bf16 } +// CHECK-LABEL: @truncFPToNearestEvenConstant +// CHECK: %[[cres:.+]] = arith.constant 1.000000e+00 : bf16 +// CHECK: return %[[cres]] +func.func @truncFPToNearestEvenConstant() -> bf16 { + %cst = arith.constant 1.000000e+00 : f32 + %0 = arith.truncf %cst to_nearest_even : f32 to bf16 + return %0 : bf16 +} + +// CHECK-LABEL: @truncFPDownwardConstant +// CHECK: %[[cres:.+]] = arith.constant 1.000000e+00 : bf16 +// CHECK: return %[[cres]] +func.func @truncFPDownwardConstant() -> bf16 { + %cst = arith.constant 1.000000e+00 : f32 + %0 = arith.truncf %cst downward : f32 to bf16 + return %0 : bf16 +} + +// CHECK-LABEL: @truncFPUpwardConstant +// CHECK: %[[cres:.+]] = arith.constant 1.000000e+00 : bf16 +// CHECK: return %[[cres]] +func.func @truncFPUpwardConstant() -> bf16 { + %cst = arith.constant 1.000000e+00 : f32 + %0 = arith.truncf %cst upward : f32 to bf16 + return %0 : bf16 +} + +// CHECK-LABEL: @truncFPTowardZeroConstant +// CHECK: %[[cres:.+]] = arith.constant 1.000000e+00 : bf16 +// CHECK: return %[[cres]] +func.func @truncFPTowardZeroConstant() -> bf16 { + %cst = arith.constant 1.000000e+00 : f32 + %0 = arith.truncf %cst toward_zero : f32 to bf16 + return %0 : bf16 +} + +// CHECK-LABEL: @truncFPToNearestAwayConstant +// CHECK: %[[cres:.+]] = arith.constant 1.000000e+00 : bf16 +// CHECK: return %[[cres]] +func.func @truncFPToNearestAwayConstant() -> bf16 { + %cst = arith.constant 1.000000e+00 : f32 + %0 = arith.truncf %cst to_nearest_away : f32 to bf16 + return %0 : bf16 +} + // CHECK-LABEL: @truncFPVectorConstant // CHECK: %[[cres:.+]] = arith.constant dense<[0.000000e+00, 1.000000e+00]> : vector<2xbf16> // CHECK: return %[[cres]] diff --git a/mlir/test/Dialect/Arith/ops.mlir b/mlir/test/Dialect/Arith/ops.mlir index e499573e324b5f..f684e02344a517 100644 --- a/mlir/test/Dialect/Arith/ops.mlir +++ b/mlir/test/Dialect/Arith/ops.mlir @@ -703,6 +703,16 @@ func.func @test_truncf_scalable_vector(%arg0 : vector<[8]xf32>) -> vector<[8]xbf return %0 : vector<[8]xbf16> } +// CHECK-LABEL: test_truncf_rounding_mode +func.func @test_truncf_rounding_mode(%arg0 : f64) -> (f32, f32, f32, f32, f32) { + %0 = arith.truncf %arg0 to_nearest_even : f64 to f32 + %1 = arith.truncf %arg0 downward : f64 to f32 + %2 = arith.truncf %arg0 upward : f64 to f32 + %3 = arith.truncf %arg0 toward_zero : f64 to f32 + %4 = arith.truncf %arg0 to_nearest_away : f64 to f32 + return %0, %1, %2, %3, %4 : f32, f32, f32, f32, f32 +} + // CHECK-LABEL: test_uitofp func.func @test_uitofp(%arg0 : i32) -> f32 { %0 = arith.uitofp %arg0 : i32 to f32 From b342d87f89a7cc588abd0d28f69b8dfd9e5cfa0a Mon Sep 17 00:00:00 2001 From: Pengcheng Wang Date: Mon, 1 Apr 2024 18:04:28 +0800 Subject: [PATCH 006/201] [TableGen][NFC] Add maybe_unused to MRI (#87044) This suppresses warning `unused variable 'MRI' [-Wunused-variable]` for those fusions that don't need `MRI`. --- llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp index e9e63fa8d0de8c..63b827a35177e3 100644 --- a/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp +++ b/llvm/utils/TableGen/MacroFusionPredicatorEmitter.cpp @@ -108,7 +108,8 @@ void MacroFusionPredicatorEmitter::emitMacroFusionImpl( OS.indent(4) << "const TargetSubtargetInfo &STI,\n"; OS.indent(4) << "const MachineInstr *FirstMI,\n"; OS.indent(4) << "const MachineInstr &SecondMI) {\n"; - OS.indent(2) << "auto &MRI = SecondMI.getMF()->getRegInfo();\n"; + OS.indent(2) + << "[[maybe_unused]] auto &MRI = SecondMI.getMF()->getRegInfo();\n"; emitPredicates(Predicates, IsCommutable, PE, OS); From a34834138a53f7eb28ba24d325b258ac6f51ae8c Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 1 Apr 2024 11:12:21 +0100 Subject: [PATCH 007/201] [VPlan] Inline addVPValue into single caller (NFCI). Inline the function into its single caller. --- llvm/lib/Transforms/Vectorize/VPlan.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 6c90c793e66cae..fdb5c125434ade 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -3006,13 +3006,6 @@ class VPlan { void setName(const Twine &newName) { Name = newName.str(); } - void addVPValue(Value *V, VPValue *VPV) { - assert(VPV->isLiveIn() && "VPV must be a live-in."); - assert(V && "Trying to add a null Value to VPlan"); - assert(!Value2VPValue.count(V) && "Value already exists in VPlan"); - Value2VPValue[V] = VPV; - } - /// Gets the live-in VPValue for \p V or adds a new live-in (if none exists /// yet) for \p V. VPValue *getOrAddLiveIn(Value *V) { @@ -3020,7 +3013,9 @@ class VPlan { if (!Value2VPValue.count(V)) { VPValue *VPV = new VPValue(V); VPLiveInsToFree.push_back(VPV); - addVPValue(V, VPV); + assert(VPV->isLiveIn() && "VPV must be a live-in."); + assert(!Value2VPValue.count(V) && "Value already exists in VPlan"); + Value2VPValue[V] = VPV; } assert(Value2VPValue.count(V) && "Value does not exist in VPlan"); From c9bcb2b7ddd08e09d75b263273ddb6e0a49a82da Mon Sep 17 00:00:00 2001 From: Wang Pengcheng Date: Mon, 1 Apr 2024 18:16:52 +0800 Subject: [PATCH 008/201] [TableGen] Fix MacroFusion.td We are missing `[[maybe_unused]]`. --- llvm/test/TableGen/MacroFusion.td | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/TableGen/MacroFusion.td b/llvm/test/TableGen/MacroFusion.td index b54b0506c56359..6cf22f5447150b 100644 --- a/llvm/test/TableGen/MacroFusion.td +++ b/llvm/test/TableGen/MacroFusion.td @@ -83,7 +83,7 @@ def TestSingleFusion: SingleFusion<"test-single-fusion", "HasTestSingleFusion", // CHECK-PREDICATOR-NEXT: const TargetSubtargetInfo &STI, // CHECK-PREDICATOR-NEXT: const MachineInstr *FirstMI, // CHECK-PREDICATOR-NEXT: const MachineInstr &SecondMI) { -// CHECK-PREDICATOR-NEXT: auto &MRI = SecondMI.getMF()->getRegInfo(); +// CHECK-PREDICATOR-NEXT: {{[[]}}{{[[]}}maybe_unused{{[]]}}{{[]]}} auto &MRI = SecondMI.getMF()->getRegInfo(); // CHECK-PREDICATOR-NEXT: { // CHECK-PREDICATOR-NEXT: const MachineInstr *MI = FirstMI; // CHECK-PREDICATOR-NEXT: if (MI->getOperand(0).getReg() != Test::X0) @@ -101,7 +101,7 @@ def TestSingleFusion: SingleFusion<"test-single-fusion", "HasTestSingleFusion", // CHECK-PREDICATOR-NEXT: const TargetSubtargetInfo &STI, // CHECK-PREDICATOR-NEXT: const MachineInstr *FirstMI, // CHECK-PREDICATOR-NEXT: const MachineInstr &SecondMI) { -// CHECK-PREDICATOR-NEXT: auto &MRI = SecondMI.getMF()->getRegInfo(); +// CHECK-PREDICATOR-NEXT: {{[[]}}{{[[]}}maybe_unused{{[]]}}{{[]]}} auto &MRI = SecondMI.getMF()->getRegInfo(); // CHECK-PREDICATOR-NEXT: { // CHECK-PREDICATOR-NEXT: const MachineInstr *MI = &SecondMI; // CHECK-PREDICATOR-NEXT: if (!( @@ -149,7 +149,7 @@ def TestSingleFusion: SingleFusion<"test-single-fusion", "HasTestSingleFusion", // CHECK-PREDICATOR-NEXT: const TargetSubtargetInfo &STI, // CHECK-PREDICATOR-NEXT: const MachineInstr *FirstMI, // CHECK-PREDICATOR-NEXT: const MachineInstr &SecondMI) { -// CHECK-PREDICATOR-NEXT: auto &MRI = SecondMI.getMF()->getRegInfo(); +// CHECK-PREDICATOR-NEXT: {{[[]}}{{[[]}}maybe_unused{{[]]}}{{[]]}} auto &MRI = SecondMI.getMF()->getRegInfo(); // CHECK-PREDICATOR-NEXT: { // CHECK-PREDICATOR-NEXT: const MachineInstr *MI = &SecondMI; // CHECK-PREDICATOR-NEXT: if (!( @@ -185,7 +185,7 @@ def TestSingleFusion: SingleFusion<"test-single-fusion", "HasTestSingleFusion", // CHECK-PREDICATOR-NEXT: const TargetSubtargetInfo &STI, // CHECK-PREDICATOR-NEXT: const MachineInstr *FirstMI, // CHECK-PREDICATOR-NEXT: const MachineInstr &SecondMI) { -// CHECK-PREDICATOR-NEXT: auto &MRI = SecondMI.getMF()->getRegInfo(); +// CHECK-PREDICATOR-NEXT: {{[[]}}{{[[]}}maybe_unused{{[]]}}{{[]]}} auto &MRI = SecondMI.getMF()->getRegInfo(); // CHECK-PREDICATOR-NEXT: { // CHECK-PREDICATOR-NEXT: const MachineInstr *MI = &SecondMI; // CHECK-PREDICATOR-NEXT: if (!( From e701c1a653088488ef67a9fa5b01ab37a482b690 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 1 Apr 2024 12:07:30 +0100 Subject: [PATCH 009/201] [VPlan] Use recipe's debug loc for VPWidenMemoryInstructionRecipe (NFCI) Now that VPRecipeBase manages debug locations for recipes, use it in VPWidenMemoryInstructionRecipe. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 10 +++++----- llvm/lib/Transforms/Vectorize/VPlan.h | 16 +++++++++------- .../lib/Transforms/Vectorize/VPlanTransforms.cpp | 6 ++++-- .../unittests/Transforms/Vectorize/VPlanTest.cpp | 6 +++--- 4 files changed, 21 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 452c84f2dcf505..0834865173b2f1 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8074,11 +8074,11 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef Operands, } if (LoadInst *Load = dyn_cast(I)) return new VPWidenMemoryInstructionRecipe(*Load, Ptr, Mask, Consecutive, - Reverse); + Reverse, I->getDebugLoc()); StoreInst *Store = cast(I); - return new VPWidenMemoryInstructionRecipe(*Store, Ptr, Operands[0], Mask, - Consecutive, Reverse); + return new VPWidenMemoryInstructionRecipe( + *Store, Ptr, Operands[0], Mask, Consecutive, Reverse, I->getDebugLoc()); } /// Creates a VPWidenIntOrFpInductionRecpipe for \p Phi. If needed, it will also @@ -9340,7 +9340,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { // Handle Stores: if (SI) { - State.setDebugLocFrom(SI->getDebugLoc()); + State.setDebugLocFrom(getDebugLoc()); for (unsigned Part = 0; Part < State.UF; ++Part) { Instruction *NewSI = nullptr; @@ -9372,7 +9372,7 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { // Handle loads. assert(LI && "Must have a load instruction"); - State.setDebugLocFrom(LI->getDebugLoc()); + State.setDebugLocFrom(getDebugLoc()); for (unsigned Part = 0; Part < State.UF; ++Part) { Value *NewLI; if (CreateGatherScatter) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index fdb5c125434ade..3baca43f72767d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2278,8 +2278,8 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase { public: VPWidenMemoryInstructionRecipe(LoadInst &Load, VPValue *Addr, VPValue *Mask, - bool Consecutive, bool Reverse) - : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr}), + bool Consecutive, bool Reverse, DebugLoc DL) + : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr}, DL), Ingredient(Load), Consecutive(Consecutive), Reverse(Reverse) { assert((Consecutive || !Reverse) && "Reverse implies consecutive"); new VPValue(this, &Load); @@ -2288,8 +2288,9 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase { VPWidenMemoryInstructionRecipe(StoreInst &Store, VPValue *Addr, VPValue *StoredValue, VPValue *Mask, - bool Consecutive, bool Reverse) - : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr, StoredValue}), + bool Consecutive, bool Reverse, DebugLoc DL) + : VPRecipeBase(VPDef::VPWidenMemoryInstructionSC, {Addr, StoredValue}, + DL), Ingredient(Store), Consecutive(Consecutive), Reverse(Reverse) { assert((Consecutive || !Reverse) && "Reverse implies consecutive"); setMask(Mask); @@ -2299,10 +2300,11 @@ class VPWidenMemoryInstructionRecipe : public VPRecipeBase { if (isStore()) return new VPWidenMemoryInstructionRecipe( cast(Ingredient), getAddr(), getStoredValue(), getMask(), - Consecutive, Reverse); + Consecutive, Reverse, getDebugLoc()); - return new VPWidenMemoryInstructionRecipe( - cast(Ingredient), getAddr(), getMask(), Consecutive, Reverse); + return new VPWidenMemoryInstructionRecipe(cast(Ingredient), + getAddr(), getMask(), Consecutive, + Reverse, getDebugLoc()); } VP_CLASSOF_IMPL(VPDef::VPWidenMemoryInstructionSC) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 6f881d4328f601..957c97cdea5d02 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -64,11 +64,13 @@ void VPlanTransforms::VPInstructionsToVPRecipes( if (LoadInst *Load = dyn_cast(Inst)) { NewRecipe = new VPWidenMemoryInstructionRecipe( *Load, Ingredient.getOperand(0), nullptr /*Mask*/, - false /*Consecutive*/, false /*Reverse*/); + false /*Consecutive*/, false /*Reverse*/, + Ingredient.getDebugLoc()); } else if (StoreInst *Store = dyn_cast(Inst)) { NewRecipe = new VPWidenMemoryInstructionRecipe( *Store, Ingredient.getOperand(1), Ingredient.getOperand(0), - nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/); + nullptr /*Mask*/, false /*Consecutive*/, false /*Reverse*/, + Ingredient.getDebugLoc()); } else if (GetElementPtrInst *GEP = dyn_cast(Inst)) { NewRecipe = new VPWidenGEPRecipe(GEP, Ingredient.operands()); } else if (CallInst *CI = dyn_cast(Inst)) { diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index e537aac75515ca..02e7ca341fe229 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -1036,7 +1036,7 @@ TEST(VPRecipeTest, CastVPWidenMemoryInstructionRecipeToVPUserAndVPDef) { new LoadInst(Int32, UndefValue::get(Int32Ptr), "", false, Align(1)); VPValue Addr; VPValue Mask; - VPWidenMemoryInstructionRecipe Recipe(*Load, &Addr, &Mask, true, false); + VPWidenMemoryInstructionRecipe Recipe(*Load, &Addr, &Mask, true, false, {}); EXPECT_TRUE(isa(&Recipe)); VPRecipeBase *BaseR = &Recipe; EXPECT_TRUE(isa(BaseR)); @@ -1131,7 +1131,7 @@ TEST(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { new LoadInst(Int32, UndefValue::get(Int32Ptr), "", false, Align(1)); VPValue Addr; VPValue Mask; - VPWidenMemoryInstructionRecipe Recipe(*Load, &Addr, &Mask, true, false); + VPWidenMemoryInstructionRecipe Recipe(*Load, &Addr, &Mask, true, false, {}); EXPECT_FALSE(Recipe.mayHaveSideEffects()); EXPECT_TRUE(Recipe.mayReadFromMemory()); EXPECT_FALSE(Recipe.mayWriteToMemory()); @@ -1146,7 +1146,7 @@ TEST(VPRecipeTest, MayHaveSideEffectsAndMayReadWriteMemory) { VPValue Mask; VPValue StoredV; VPWidenMemoryInstructionRecipe Recipe(*Store, &Addr, &StoredV, &Mask, false, - false); + false, {}); EXPECT_TRUE(Recipe.mayHaveSideEffects()); EXPECT_FALSE(Recipe.mayReadFromMemory()); EXPECT_TRUE(Recipe.mayWriteToMemory()); From f546b6ef3c15a156959dde16fa5f03a350a0a2be Mon Sep 17 00:00:00 2001 From: harishch4 Date: Mon, 1 Apr 2024 17:13:22 +0530 Subject: [PATCH 010/201] =?UTF-8?q?[Flang]=20Relaxing=20an=20error=20when?= =?UTF-8?q?=20contiguous=20pointer=20is=20assigned=20to=20a=20non-contig?= =?UTF-8?q?=E2=80=A6=20(#86781)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …uous function. Fix from [thtsikas](https://github.com/thtsikas) based on a discussion in [slack](https://flang-compiler.slack.com/archives/C5C58TT32/p1711124374836079). Example: ``` Program test Integer, Pointer, Contiguous :: cont(:) Interface Function f() Integer, Pointer :: f(:) End Function End Interface cont => f() Print *, cont(3) End Program Function f() Integer, Pointer :: f(:) Allocate (f(4),Source=[1,1,42,1]) ! f => f(4:1:-1) !! not contiguous, runtime error End Function f ``` Understanding is that the standard intended to allow this pattern. The restriction 10.2.2.3 p6 Data pointer assignment "If the pointer object has the CONTIGUOUS attribute, the pointer target shall be contiguous." is not associated with a numbered constraint. If there is a mechanism for injecting runtime checks, this would be a place to do it. Absent that, a warning is the best we can do. No other compiler treats contigPtr => func() as an error when func() is not CONTIGUOUS, so a warning would probably be better for consistency. https://godbolt.org/z/5cM6roeEE --- flang/lib/Semantics/pointer-assignment.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flang/lib/Semantics/pointer-assignment.cpp b/flang/lib/Semantics/pointer-assignment.cpp index 58155a29da1ee5..4b4ce153084d8e 100644 --- a/flang/lib/Semantics/pointer-assignment.cpp +++ b/flang/lib/Semantics/pointer-assignment.cpp @@ -266,7 +266,7 @@ bool PointerAssignmentChecker::Check(const evaluate::FunctionRef &f) { } else if (isContiguous_ && !funcResult->attrs.test(FunctionResult::Attr::Contiguous)) { msg = "CONTIGUOUS %s is associated with the result of reference to" - " function '%s' that is not contiguous"_err_en_US; + " function '%s' that is not known to be contiguous"_warn_en_US; } else if (lhsType_) { const auto *frTypeAndShape{funcResult->GetTypeAndShape()}; CHECK(frTypeAndShape); From 4213f4a9ae0ef70e02da9f40653b4e04eea00c74 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 1 Apr 2024 07:28:34 -0500 Subject: [PATCH 011/201] [Libomptarget] Fix resizing the buffer of RPC handles Summary: The previous code would potentially make it smaller if a device with a lower ID touched it later. Also we should minimize changes to the state for multi threaded reasons. This just sets up an owned slot for each at initialization time. --- openmp/libomptarget/plugins-nextgen/common/include/RPC.h | 4 ++++ .../plugins-nextgen/common/src/PluginInterface.cpp | 2 +- openmp/libomptarget/plugins-nextgen/common/src/RPC.cpp | 4 +++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/openmp/libomptarget/plugins-nextgen/common/include/RPC.h b/openmp/libomptarget/plugins-nextgen/common/include/RPC.h index b621cc0da4587d..01bf539bcb3f32 100644 --- a/openmp/libomptarget/plugins-nextgen/common/include/RPC.h +++ b/openmp/libomptarget/plugins-nextgen/common/include/RPC.h @@ -23,6 +23,7 @@ namespace llvm::omp::target { namespace plugin { +struct GenericPluginTy; struct GenericDeviceTy; class GenericGlobalHandlerTy; class DeviceImageTy; @@ -33,6 +34,9 @@ class DeviceImageTy; /// these routines will perform no action. struct RPCServerTy { public: + /// Initializes the handles to the number of devices we may need to service. + RPCServerTy(plugin::GenericPluginTy &Plugin); + /// Check if this device image is using an RPC server. This checks for the /// precense of an externally visible symbol in the device image that will /// be present whenever RPC code is called. diff --git a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp index 55e2865d6aae42..b5f3c45c835fdb 100644 --- a/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/src/PluginInterface.cpp @@ -1492,7 +1492,7 @@ Error GenericPluginTy::init() { GlobalHandler = createGlobalHandler(); assert(GlobalHandler && "Invalid global handler"); - RPCServer = new RPCServerTy(); + RPCServer = new RPCServerTy(*this); assert(RPCServer && "Invalid RPC server"); return Plugin::success(); diff --git a/openmp/libomptarget/plugins-nextgen/common/src/RPC.cpp b/openmp/libomptarget/plugins-nextgen/common/src/RPC.cpp index fab0f6838f4a87..faa2cbd4f02fe1 100644 --- a/openmp/libomptarget/plugins-nextgen/common/src/RPC.cpp +++ b/openmp/libomptarget/plugins-nextgen/common/src/RPC.cpp @@ -21,6 +21,9 @@ using namespace llvm; using namespace omp; using namespace target; +RPCServerTy::RPCServerTy(plugin::GenericPluginTy &Plugin) + : Handles(Plugin.getNumDevices()) {} + llvm::Expected RPCServerTy::isDeviceUsingRPC(plugin::GenericDeviceTy &Device, plugin::GenericGlobalHandlerTy &Handler, @@ -101,7 +104,6 @@ Error RPCServerTy::initDevice(plugin::GenericDeviceTy &Device, if (auto Err = Device.dataSubmit(ClientPtr, ClientBuffer, rpc_get_client_size(), nullptr)) return Err; - Handles.resize(Device.getDeviceId() + 1); Handles[Device.getDeviceId()] = RPCDevice.handle; #endif return Error::success(); From da9f06c9b1179423302e3e7ccb27431ced44e548 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Mon, 1 Apr 2024 08:46:22 -0400 Subject: [PATCH 012/201] [GISEL] G_SPLAT_VECTOR can take a splat that is larger than the vector element (#86974) This is what SelectionDAG does. We'd like to reuse SelectionDAG patterns. --- llvm/docs/GlobalISel/GenericOpcode.rst | 4 ++++ llvm/lib/CodeGen/MachineVerifier.cpp | 17 ++++++++++++----- .../MachineVerifier/test_g_splat_vector.mir | 4 ++-- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/llvm/docs/GlobalISel/GenericOpcode.rst b/llvm/docs/GlobalISel/GenericOpcode.rst index cae2c21b80d7e7..a12627c01d205b 100644 --- a/llvm/docs/GlobalISel/GenericOpcode.rst +++ b/llvm/docs/GlobalISel/GenericOpcode.rst @@ -690,6 +690,10 @@ G_SPLAT_VECTOR Create a vector where all elements are the scalar from the source operand. +The type of the operand must be equal to or larger than the vector element +type. If the operand is larger than the vector element type, the scalar is +implicitly truncated to the vector element type. + Vector Reduction Operations --------------------------- diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp index e4e05ce9278caf..fd7ea28426470a 100644 --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1768,16 +1768,23 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); LLT SrcTy = MRI->getType(MI->getOperand(1).getReg()); - if (!DstTy.isScalableVector()) + if (!DstTy.isScalableVector()) { report("Destination type must be a scalable vector", MI); + break; + } - if (!SrcTy.isScalar()) + if (!SrcTy.isScalar()) { report("Source type must be a scalar", MI); + break; + } - if (DstTy.getScalarType() != SrcTy) - report("Element type of the destination must be the same type as the " - "source type", + if (TypeSize::isKnownGT(DstTy.getElementType().getSizeInBits(), + SrcTy.getSizeInBits())) { + report("Element type of the destination must be the same size or smaller " + "than the source type", MI); + break; + } break; } diff --git a/llvm/test/MachineVerifier/test_g_splat_vector.mir b/llvm/test/MachineVerifier/test_g_splat_vector.mir index 0d1d8a3e6dcc64..00074349776fa7 100644 --- a/llvm/test/MachineVerifier/test_g_splat_vector.mir +++ b/llvm/test/MachineVerifier/test_g_splat_vector.mir @@ -22,6 +22,6 @@ body: | ; CHECK: Source type must be a scalar %6:_() = G_SPLAT_VECTOR %2 - ; CHECK: Element type of the destination must be the same type as the source type - %7:_() = G_SPLAT_VECTOR %0 + ; CHECK: Element type of the destination must be the same size or smaller than the source type + %7:_() = G_SPLAT_VECTOR %0 ... From 41afef9066eec8daf517ac357a628cdf30c95e39 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Mon, 1 Apr 2024 04:20:08 -0700 Subject: [PATCH 013/201] [SLP]Fix PR87011: Missing sign extension of demoted type before zero extension Need to drop skipping of the first zext/sext nodes, it leads to incorrect and less profitable code. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 6 ++--- .../RISCV/init-ext-node-not-truncable.ll | 2 +- .../SLPVectorizer/X86/sext-inseltpoison.ll | 22 +++++-------------- .../test/Transforms/SLPVectorizer/X86/sext.ll | 22 +++++-------------- .../SLPVectorizer/X86/zext-inseltpoison.ll | 9 ++------ .../test/Transforms/SLPVectorizer/X86/zext.ll | 9 ++------ 6 files changed, 19 insertions(+), 51 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 2bc0c5dcc6069d..1ffc39a9067431 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -14263,11 +14263,9 @@ void BoUpSLP::computeMinimumValueSizes() { SmallVector RootDemotes; if (NodeIdx != 0 && VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize && - (VectorizableTree[NodeIdx]->getOpcode() == Instruction::ZExt || - VectorizableTree[NodeIdx]->getOpcode() == Instruction::SExt || - VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc)) { + VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) { assert(IsStoreOrInsertElt && "Expected store/insertelement seeded graph."); - IsTruncRoot = VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc; + IsTruncRoot = true; RootDemotes.push_back(NodeIdx); IsProfitableToDemoteRoot = true; ++NodeIdx; diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/init-ext-node-not-truncable.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/init-ext-node-not-truncable.ll index 436fba3261d602..1166b1fca826b6 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/init-ext-node-not-truncable.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/init-ext-node-not-truncable.ll @@ -7,7 +7,7 @@ define void @test() { ; CHECK-LABEL: define void @test( ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: store <2 x i64> , ptr @h, align 8 +; CHECK-NEXT: store <2 x i64> , ptr @h, align 8 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll index 5ae0ad932fdddb..b64743aaa283c7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SSE2 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SLM +; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX @@ -11,20 +11,10 @@ ; define <2 x i64> @loadext_2i8_to_2i64(ptr %p0) { -; SSE2-LABEL: @loadext_2i8_to_2i64( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P0:%.*]], i64 1 -; SSE2-NEXT: [[I0:%.*]] = load i8, ptr [[P0]], align 1 -; SSE2-NEXT: [[I1:%.*]] = load i8, ptr [[P1]], align 1 -; SSE2-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i64 -; SSE2-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i64 -; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> poison, i64 [[X0]], i32 0 -; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1 -; SSE2-NEXT: ret <2 x i64> [[V1]] -; -; SLM-LABEL: @loadext_2i8_to_2i64( -; SLM-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1 -; SLM-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64> -; SLM-NEXT: ret <2 x i64> [[TMP3]] +; SSE-LABEL: @loadext_2i8_to_2i64( +; SSE-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1 +; SSE-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i64> +; SSE-NEXT: ret <2 x i64> [[TMP2]] ; ; AVX-LABEL: @loadext_2i8_to_2i64( ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sext.ll b/llvm/test/Transforms/SLPVectorizer/X86/sext.ll index 7d38aeb0c36357..744a50906cfc48 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/sext.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/sext.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SSE2 -; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SLM +; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE +; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX @@ -11,20 +11,10 @@ ; define <2 x i64> @loadext_2i8_to_2i64(ptr %p0) { -; SSE2-LABEL: @loadext_2i8_to_2i64( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P0:%.*]], i64 1 -; SSE2-NEXT: [[I0:%.*]] = load i8, ptr [[P0]], align 1 -; SSE2-NEXT: [[I1:%.*]] = load i8, ptr [[P1]], align 1 -; SSE2-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i64 -; SSE2-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i64 -; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0 -; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1 -; SSE2-NEXT: ret <2 x i64> [[V1]] -; -; SLM-LABEL: @loadext_2i8_to_2i64( -; SLM-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1 -; SLM-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64> -; SLM-NEXT: ret <2 x i64> [[TMP3]] +; SSE-LABEL: @loadext_2i8_to_2i64( +; SSE-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1 +; SSE-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i64> +; SSE-NEXT: ret <2 x i64> [[TMP2]] ; ; AVX-LABEL: @loadext_2i8_to_2i64( ; AVX-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1 diff --git a/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll index d1f6c41e5c30ec..27996a7064c0d8 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll @@ -12,13 +12,8 @@ define <2 x i64> @loadext_2i8_to_2i64(ptr %p0) { ; SSE2-LABEL: @loadext_2i8_to_2i64( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P0:%.*]], i64 1 -; SSE2-NEXT: [[I0:%.*]] = load i8, ptr [[P0]], align 1 -; SSE2-NEXT: [[I1:%.*]] = load i8, ptr [[P1]], align 1 -; SSE2-NEXT: [[X0:%.*]] = zext i8 [[I0]] to i64 -; SSE2-NEXT: [[X1:%.*]] = zext i8 [[I1]] to i64 -; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> poison, i64 [[X0]], i32 0 -; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1 +; SSE2-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1 +; SSE2-NEXT: [[V1:%.*]] = zext <2 x i8> [[TMP1]] to <2 x i64> ; SSE2-NEXT: ret <2 x i64> [[V1]] ; ; SLM-LABEL: @loadext_2i8_to_2i64( diff --git a/llvm/test/Transforms/SLPVectorizer/X86/zext.ll b/llvm/test/Transforms/SLPVectorizer/X86/zext.ll index 829e4bab20ffaf..94870420f2bfc6 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/zext.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/zext.ll @@ -12,13 +12,8 @@ define <2 x i64> @loadext_2i8_to_2i64(ptr %p0) { ; SSE2-LABEL: @loadext_2i8_to_2i64( -; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P0:%.*]], i64 1 -; SSE2-NEXT: [[I0:%.*]] = load i8, ptr [[P0]], align 1 -; SSE2-NEXT: [[I1:%.*]] = load i8, ptr [[P1]], align 1 -; SSE2-NEXT: [[X0:%.*]] = zext i8 [[I0]] to i64 -; SSE2-NEXT: [[X1:%.*]] = zext i8 [[I1]] to i64 -; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0 -; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1 +; SSE2-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1 +; SSE2-NEXT: [[V1:%.*]] = zext <2 x i8> [[TMP1]] to <2 x i64> ; SSE2-NEXT: ret <2 x i64> [[V1]] ; ; SLM-LABEL: @loadext_2i8_to_2i64( From a7206a6fa32ada15578e3afddcc1480364c25f4c Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Mon, 1 Apr 2024 09:35:42 -0400 Subject: [PATCH 014/201] [RISCV] ReadStoreData is read later in the pipeline for SiFive7 (#86454) Store data is read later in the pipeline, so we use SiFive7AnyToGPRBypass to model that a store instruction can begin some cycles before that data is ready. --- llvm/lib/Target/RISCV/RISCVSchedSiFive7.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td index 3586d235bdbbb9..54a13889ee698a 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -950,7 +950,7 @@ def : InstRW<[WriteIALU], (instrs COPY)>; def : SiFive7AnyToGPRBypass; def : SiFive7AnyToGPRBypass; def : ReadAdvance; -def : ReadAdvance; +def : SiFive7AnyToGPRBypass; def : ReadAdvance; def : SiFive7AnyToGPRBypass; def : SiFive7AnyToGPRBypass; From 971b852546a7d96bc8887ced913724b884cf40df Mon Sep 17 00:00:00 2001 From: Jakub Kuderski Date: Mon, 1 Apr 2024 11:40:09 -0400 Subject: [PATCH 015/201] [mlir][NFC] Simplify type checks with isa predicates (#87183) For more context on isa predicates, see: https://github.com/llvm/llvm-project/pull/83753. --- .../Conversion/GPUCommon/GPUOpsLowering.cpp | 3 +- .../Conversion/VectorToGPU/VectorToGPU.cpp | 10 ++--- .../Affine/Analysis/AffineAnalysis.cpp | 3 +- .../Affine/Transforms/SuperVectorize.cpp | 5 +-- .../Dialect/Affine/Utils/LoopFusionUtils.cpp | 3 +- .../FuncBufferizableOpInterfaceImpl.cpp | 2 +- .../Transforms/OneShotModuleBufferize.cpp | 8 ++-- mlir/lib/Dialect/EmitC/IR/EmitC.cpp | 3 +- .../GPU/TransformOps/GPUTransformOps.cpp | 24 ++++------- .../GPU/Transforms/AsyncRegionRewriter.cpp | 5 +-- mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | 6 +-- mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 6 +-- .../Linalg/Transforms/ElementwiseToLinalg.cpp | 3 +- .../Linalg/Transforms/Vectorization.cpp | 9 ++-- mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp | 2 +- .../Dialect/SPIRV/IR/CooperativeMatrixOps.cpp | 3 +- mlir/lib/Dialect/Shape/IR/Shape.cpp | 5 +-- mlir/lib/Dialect/Traits.cpp | 15 ++++--- .../lib/Dialect/Transform/IR/TransformOps.cpp | 2 +- mlir/lib/Dialect/Vector/IR/VectorOps.cpp | 9 ++-- .../Vector/Transforms/VectorDistribute.cpp | 41 +++++++++---------- mlir/lib/IR/AffineMap.cpp | 4 +- mlir/lib/IR/Operation.cpp | 4 +- mlir/lib/TableGen/Class.cpp | 4 +- mlir/lib/Target/Cpp/TranslateToCpp.cpp | 5 +-- .../Target/SPIRV/Serialization/Serializer.cpp | 8 ++-- .../Transforms/Utils/DialectConversion.cpp | 6 +-- .../Transforms/Utils/OneToNTypeConversion.cpp | 3 +- 28 files changed, 83 insertions(+), 118 deletions(-) diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp index 73d418cb841327..993c09b03c0fde 100644 --- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp +++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp @@ -545,8 +545,7 @@ LogicalResult impl::scalarizeVectorOp(Operation *op, ValueRange operands, ConversionPatternRewriter &rewriter, const LLVMTypeConverter &converter) { TypeRange operandTypes(operands); - if (llvm::none_of(operandTypes, - [](Type type) { return isa(type); })) { + if (llvm::none_of(operandTypes, llvm::IsaPred)) { return rewriter.notifyMatchFailure(op, "expected vector operand"); } if (op->getNumRegions() != 0 || op->getNumSuccessors() != 0) diff --git a/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp b/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp index 85fb8a539912f7..399c0450824ee5 100644 --- a/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp +++ b/mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp @@ -202,9 +202,7 @@ template static bool integerExtendSupportsMMAMatrixType(ExtOpTy extOp) { if (!isa(extOp.getOperand().getDefiningOp())) return false; - return llvm::all_of(extOp->getUsers(), [](Operation *user) { - return isa(user); - }); + return llvm::all_of(extOp->getUsers(), llvm::IsaPred); } static bool fpExtendSupportsMMAMatrixType(arith::ExtFOp extOp) { return true; } @@ -345,15 +343,13 @@ getSliceContract(Operation *op, static SetVector getOpToConvert(mlir::Operation *op, bool useNvGpu) { auto hasVectorDest = [](Operation *op) { - return llvm::any_of(op->getResultTypes(), - [](Type t) { return isa(t); }); + return llvm::any_of(op->getResultTypes(), llvm::IsaPred); }; BackwardSliceOptions backwardSliceOptions; backwardSliceOptions.filter = hasVectorDest; auto hasVectorSrc = [](Operation *op) { - return llvm::any_of(op->getOperandTypes(), - [](Type t) { return isa(t); }); + return llvm::any_of(op->getOperandTypes(), llvm::IsaPred); }; ForwardSliceOptions forwardSliceOptions; forwardSliceOptions.filter = hasVectorSrc; diff --git a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp index 61244921bc38ac..69b3d41e17c2d4 100644 --- a/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp +++ b/mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp @@ -136,8 +136,7 @@ static bool isLocallyDefined(Value v, Operation *enclosingOp) { bool mlir::affine::isLoopMemoryParallel(AffineForOp forOp) { // Any memref-typed iteration arguments are treated as serializing. - if (llvm::any_of(forOp.getResultTypes(), - [](Type type) { return isa(type); })) + if (llvm::any_of(forOp.getResultTypes(), llvm::IsaPred)) return false; // Collect all load and store ops in loop nest rooted at 'forOp'. diff --git a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp index 46c7871f40232f..71e9648a5e00fa 100644 --- a/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp @@ -609,9 +609,8 @@ makePattern(const DenseSet ¶llelLoops, int vectorRank, } static NestedPattern &vectorTransferPattern() { - static auto pattern = affine::matcher::Op([](Operation &op) { - return isa(op); - }); + static auto pattern = affine::matcher::Op( + llvm::IsaPred); return pattern; } diff --git a/mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp index fb45528ad5e7d1..84ae4b52dcf4e8 100644 --- a/mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp @@ -211,8 +211,7 @@ static unsigned getMaxLoopDepth(ArrayRef srcOps, unsigned loopDepth = getInnermostCommonLoopDepth(targetDstOps); // Return common loop depth for loads if there are no store ops. - if (all_of(targetDstOps, - [&](Operation *op) { return isa(op); })) + if (all_of(targetDstOps, llvm::IsaPred)) return loopDepth; // Check dependences on all pairs of ops in 'targetDstOps' and store the diff --git a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp index 4cdbbf35dc876b..053ea7935260a2 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp @@ -326,7 +326,7 @@ struct FuncOpInterface static bool supportsUnstructuredControlFlow() { return true; } bool hasTensorSemantics(Operation *op) const { - auto isaTensor = [](Type type) { return isa(type); }; + auto isaTensor = llvm::IsaPred; // A function has tensor semantics if it has tensor arguments/results. auto funcOp = cast(op); diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp index 33feea0b956ca0..0a4072605c265f 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp @@ -67,6 +67,7 @@ #include "mlir/Dialect/Bufferization/Transforms/Transforms.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Operation.h" using namespace mlir; @@ -277,9 +278,10 @@ static void equivalenceAnalysis(func::FuncOp funcOp, /// Return "true" if the given function signature has tensor semantics. static bool hasTensorSignature(func::FuncOp funcOp) { - auto isaTensor = [](Type t) { return isa(t); }; - return llvm::any_of(funcOp.getFunctionType().getInputs(), isaTensor) || - llvm::any_of(funcOp.getFunctionType().getResults(), isaTensor); + return llvm::any_of(funcOp.getFunctionType().getInputs(), + llvm::IsaPred) || + llvm::any_of(funcOp.getFunctionType().getResults(), + llvm::IsaPred); } /// Store all functions of the `moduleOp` in `orderedFuncOps`, sorted by diff --git a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp index ab5c418e844fbf..f4a9dc3ca509c8 100644 --- a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp +++ b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp @@ -224,8 +224,7 @@ LogicalResult emitc::CallOpaqueOp::verify() { } } - if (llvm::any_of(getResultTypes(), - [](Type type) { return isa(type); })) { + if (llvm::any_of(getResultTypes(), llvm::IsaPred)) { return emitOpError() << "cannot return array type"; } diff --git a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp index fc3a4375694588..b584f63f16e0aa 100644 --- a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp +++ b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp @@ -296,22 +296,14 @@ checkMappingAttributeTypes(std::optional transformOp, "scf.forall op requires a mapping attribute"); } - bool hasBlockMapping = - llvm::any_of(forallOp.getMapping().value(), [](Attribute attr) { - return isa(attr); - }); - bool hasWarpgroupMapping = - llvm::any_of(forallOp.getMapping().value(), [](Attribute attr) { - return isa(attr); - }); - bool hasWarpMapping = - llvm::any_of(forallOp.getMapping().value(), [](Attribute attr) { - return isa(attr); - }); - bool hasThreadMapping = - llvm::any_of(forallOp.getMapping().value(), [](Attribute attr) { - return isa(attr); - }); + bool hasBlockMapping = llvm::any_of(forallOp.getMapping().value(), + llvm::IsaPred); + bool hasWarpgroupMapping = llvm::any_of( + forallOp.getMapping().value(), llvm::IsaPred); + bool hasWarpMapping = llvm::any_of(forallOp.getMapping().value(), + llvm::IsaPred); + bool hasThreadMapping = llvm::any_of(forallOp.getMapping().value(), + llvm::IsaPred); int64_t countMappingTypes = 0; countMappingTypes += hasBlockMapping ? 1 : 0; countMappingTypes += hasWarpgroupMapping ? 1 : 0; diff --git a/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp b/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp index 40903f199afddd..b2fa3a99c53fc3 100644 --- a/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp @@ -232,9 +232,8 @@ struct GpuAsyncRegionPass::DeferWaitCallback { // control flow code. static bool areAllUsersExecuteOrAwait(Value token) { return !token.use_empty() && - llvm::all_of(token.getUsers(), [](Operation *user) { - return isa(user); - }); + llvm::all_of(token.getUsers(), + llvm::IsaPred); } // Add the `asyncToken` as dependency as needed after `op`. diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 3ba6ac6ccc8142..e5c19a916392e1 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -2786,10 +2786,8 @@ LogicalResult LLVM::BitcastOp::verify() { if (!resultType) return success(); - auto isVector = [](Type type) { - return llvm::isa( - type); - }; + auto isVector = + llvm::IsaPred; // Due to bitcast requiring both operands to be of the same size, it is not // possible for only one of the two to be a pointer of vectors. diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index 6954eee93efd14..2d7219fef87c64 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -28,6 +28,7 @@ #include "mlir/IR/AffineExprVisitor.h" #include "mlir/IR/AffineMap.h" #include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/BuiltinTypeInterfaces.h" #include "mlir/IR/Matchers.h" #include "mlir/IR/OpImplementation.h" #include "mlir/IR/OperationSupport.h" @@ -119,8 +120,7 @@ static void fillStructuredOpRegion(OpBuilder &opBuilder, Region ®ion, TypeRange inputTypes, TypeRange outputTypes, ArrayRef attrs, RegionBuilderFn regionBuilder) { - assert(llvm::all_of(outputTypes, - [](Type t) { return llvm::isa(t); })); + assert(llvm::all_of(outputTypes, llvm::IsaPred)); SmallVector argTypes; SmallVector argLocs; @@ -162,7 +162,7 @@ static void buildStructuredOp(OpBuilder &b, OperationState &state, resultTensorTypes.value_or(TypeRange()); if (!resultTensorTypes) copy_if(outputs.getTypes(), std::back_inserter(derivedResultTypes), - [](Type type) { return llvm::isa(type); }); + llvm::IsaPred); state.addOperands(inputs); state.addOperands(outputs); diff --git a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseToLinalg.cpp b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseToLinalg.cpp index 5508aaf9d87537..28d6752fc2d388 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/ElementwiseToLinalg.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ElementwiseToLinalg.cpp @@ -27,8 +27,7 @@ static bool isElementwiseMappableOpOnRankedTensors(Operation *op) { // TODO: The conversion pattern can be made to work for `any_of` here, but // it's more complex as it requires tracking which operands are scalars. - return llvm::all_of(op->getOperandTypes(), - [](Type type) { return isa(type); }); + return llvm::all_of(op->getOperandTypes(), llvm::IsaPred); } /// Given `op` assumed `isElementwiseMappableOpOnRankedTensors`, iterate over diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index c74ab1e6448bec..25785653a71675 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -3537,15 +3537,14 @@ struct Conv1DGenerator // Otherwise, check for one or zero `ext` predecessor. The `ext` operands // must be block arguments or extension of block arguments. bool setOperKind(Operation *reduceOp) { - int numBlockArguments = llvm::count_if( - reduceOp->getOperands(), [](Value v) { return isa(v); }); + int numBlockArguments = + llvm::count_if(reduceOp->getOperands(), llvm::IsaPred); switch (numBlockArguments) { case 1: { // Will be convolution if feeder is a MulOp. // Otherwise, if it can be pooling. - auto feedValIt = llvm::find_if(reduceOp->getOperands(), [](Value v) { - return !isa(v); - }); + auto feedValIt = llvm::find_if_not(reduceOp->getOperands(), + llvm::IsaPred); Operation *feedOp = (*feedValIt).getDefiningOp(); if (isCastOfBlockArgument(feedOp)) { oper = Pool; diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp index c09a3403f9a3e3..9ba96e4be7d1fc 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -457,7 +457,7 @@ static ParseResult parseRegions(OpAsmParser &parser, OperationState &state, } static bool isComputeOperation(Operation *op) { - return isa(op) || isa(op); + return isa(op); } namespace { diff --git a/mlir/lib/Dialect/SPIRV/IR/CooperativeMatrixOps.cpp b/mlir/lib/Dialect/SPIRV/IR/CooperativeMatrixOps.cpp index d532d466334a56..2ff3efdc96a7f8 100644 --- a/mlir/lib/Dialect/SPIRV/IR/CooperativeMatrixOps.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/CooperativeMatrixOps.cpp @@ -125,8 +125,7 @@ LogicalResult KHRCooperativeMatrixMulAddOp::verify() { if (getMatrixOperands()) { Type elementTypes[] = {typeA.getElementType(), typeB.getElementType(), typeC.getElementType()}; - if (!llvm::all_of(elementTypes, - [](Type ty) { return isa(ty); })) { + if (!llvm::all_of(elementTypes, llvm::IsaPred)) { return emitOpError("Matrix Operands require all matrix element types to " "be Integer Types"); } diff --git a/mlir/lib/Dialect/Shape/IR/Shape.cpp b/mlir/lib/Dialect/Shape/IR/Shape.cpp index f5a3717f815de5..58c3f4c334577c 100644 --- a/mlir/lib/Dialect/Shape/IR/Shape.cpp +++ b/mlir/lib/Dialect/Shape/IR/Shape.cpp @@ -65,9 +65,8 @@ LogicalResult shape::getShapeVec(Value input, } static bool isErrorPropagationPossible(TypeRange operandTypes) { - return llvm::any_of(operandTypes, [](Type ty) { - return llvm::isa(ty); - }); + return llvm::any_of(operandTypes, + llvm::IsaPred); } static LogicalResult verifySizeOrIndexOp(Operation *op) { diff --git a/mlir/lib/Dialect/Traits.cpp b/mlir/lib/Dialect/Traits.cpp index d4e0f8a3137053..2efc157ce79617 100644 --- a/mlir/lib/Dialect/Traits.cpp +++ b/mlir/lib/Dialect/Traits.cpp @@ -188,9 +188,8 @@ Type OpTrait::util::getBroadcastedType(Type type1, Type type2, /// Returns a tuple corresponding to whether range has tensor or vector type. template static std::tuple hasTensorOrVectorType(iterator_range types) { - return std::make_tuple( - llvm::any_of(types, [](Type t) { return isa(t); }), - llvm::any_of(types, [](Type t) { return isa(t); })); + return {llvm::any_of(types, llvm::IsaPred), + llvm::any_of(types, llvm::IsaPred)}; } static bool isCompatibleInferredReturnShape(ArrayRef inferred, @@ -202,7 +201,7 @@ static bool isCompatibleInferredReturnShape(ArrayRef inferred, }; if (inferred.size() != existing.size()) return false; - for (auto [inferredDim, existingDim] : llvm::zip(inferred, existing)) + for (auto [inferredDim, existingDim] : llvm::zip_equal(inferred, existing)) if (!isCompatible(inferredDim, existingDim)) return false; return true; @@ -238,8 +237,8 @@ LogicalResult OpTrait::impl::verifyCompatibleOperandBroadcast(Operation *op) { std::get<1>(resultsHasTensorVectorType))) return op->emitError("cannot broadcast vector with tensor"); - auto rankedOperands = make_filter_range( - op->getOperandTypes(), [](Type t) { return isa(t); }); + auto rankedOperands = + make_filter_range(op->getOperandTypes(), llvm::IsaPred); // If all operands are unranked, then all result shapes are possible. if (rankedOperands.empty()) @@ -257,8 +256,8 @@ LogicalResult OpTrait::impl::verifyCompatibleOperandBroadcast(Operation *op) { return op->emitOpError("operands don't have broadcast-compatible shapes"); } - auto rankedResults = make_filter_range( - op->getResultTypes(), [](Type t) { return isa(t); }); + auto rankedResults = + make_filter_range(op->getResultTypes(), llvm::IsaPred); // If all of the results are unranked then no further verification. if (rankedResults.empty()) diff --git a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp index 578b2492bbab46..c8d06ba157b904 100644 --- a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp @@ -819,7 +819,7 @@ bool transform::CastOp::areCastCompatible(TypeRange inputs, TypeRange outputs) { assert(outputs.size() == 1 && "expected one output"); return llvm::all_of( std::initializer_list{inputs.front(), outputs.front()}, - [](Type ty) { return isa(ty); }); + llvm::IsaPred); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp index e566bfacf37984..3e6425879cc67f 100644 --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -898,13 +898,12 @@ static LogicalResult verifyOutputShape( AffineMap resMap = op.getIndexingMapsArray()[2]; auto extentsMap = AffineMap::get(/*dimCount=*/extents.size(), - /*symCount=*/0, extents, ctx); + /*symbolCount=*/0, extents, ctx); // Compose the resMap with the extentsMap, which is a constant map. AffineMap expectedMap = simplifyAffineMap(resMap.compose(extentsMap)); - assert( - llvm::all_of(expectedMap.getResults(), - [](AffineExpr e) { return isa(e); }) && - "expected constant extent along all dimensions."); + assert(llvm::all_of(expectedMap.getResults(), + llvm::IsaPred) && + "expected constant extent along all dimensions."); // Extract the expected shape and build the type. auto expectedShape = llvm::to_vector<4>( llvm::map_range(expectedMap.getResults(), [](AffineExpr e) { diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp index b3ab4a916121e3..a67e03e85f7145 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorDistribute.cpp @@ -598,9 +598,8 @@ struct WarpOpTransferWrite : public OpRewritePattern { } // Do not process warp ops that contain only TransferWriteOps. - if (llvm::all_of(warpOp.getOps(), [](Operation &op) { - return isa(&op); - })) + if (llvm::all_of(warpOp.getOps(), + llvm::IsaPred)) return failure(); SmallVector yieldValues = {writeOp.getVector()}; @@ -746,8 +745,8 @@ struct WarpOpConstant : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(WarpExecuteOnLane0Op warpOp, PatternRewriter &rewriter) const override { - OpOperand *yieldOperand = getWarpResult( - warpOp, [](Operation *op) { return isa(op); }); + OpOperand *yieldOperand = + getWarpResult(warpOp, llvm::IsaPred); if (!yieldOperand) return failure(); auto constantOp = yieldOperand->get().getDefiningOp(); @@ -1060,8 +1059,8 @@ struct WarpOpBroadcast : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(WarpExecuteOnLane0Op warpOp, PatternRewriter &rewriter) const override { - OpOperand *operand = getWarpResult( - warpOp, [](Operation *op) { return isa(op); }); + OpOperand *operand = + getWarpResult(warpOp, llvm::IsaPred); if (!operand) return failure(); unsigned int operandNumber = operand->getOperandNumber(); @@ -1097,8 +1096,8 @@ struct WarpOpShapeCast : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(WarpExecuteOnLane0Op warpOp, PatternRewriter &rewriter) const override { - OpOperand *operand = getWarpResult( - warpOp, [](Operation *op) { return isa(op); }); + OpOperand *operand = + getWarpResult(warpOp, llvm::IsaPred); if (!operand) return failure(); @@ -1156,8 +1155,8 @@ struct WarpOpCreateMask : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(WarpExecuteOnLane0Op warpOp, PatternRewriter &rewriter) const override { - OpOperand *yieldOperand = getWarpResult( - warpOp, [](Operation *op) { return isa(op); }); + OpOperand *yieldOperand = + getWarpResult(warpOp, llvm::IsaPred); if (!yieldOperand) return failure(); @@ -1222,8 +1221,8 @@ struct WarpOpExtract : public OpRewritePattern { using OpRewritePattern::OpRewritePattern; LogicalResult matchAndRewrite(WarpExecuteOnLane0Op warpOp, PatternRewriter &rewriter) const override { - OpOperand *operand = getWarpResult( - warpOp, [](Operation *op) { return isa(op); }); + OpOperand *operand = + getWarpResult(warpOp, llvm::IsaPred); if (!operand) return failure(); unsigned int operandNumber = operand->getOperandNumber(); @@ -1325,9 +1324,8 @@ struct WarpOpExtractElement : public OpRewritePattern { warpShuffleFromIdxFn(std::move(fn)) {} LogicalResult matchAndRewrite(WarpExecuteOnLane0Op warpOp, PatternRewriter &rewriter) const override { - OpOperand *operand = getWarpResult(warpOp, [](Operation *op) { - return isa(op); - }); + OpOperand *operand = + getWarpResult(warpOp, llvm::IsaPred); if (!operand) return failure(); unsigned int operandNumber = operand->getOperandNumber(); @@ -1422,8 +1420,8 @@ struct WarpOpInsertElement : public OpRewritePattern { LogicalResult matchAndRewrite(WarpExecuteOnLane0Op warpOp, PatternRewriter &rewriter) const override { - OpOperand *operand = getWarpResult( - warpOp, [](Operation *op) { return isa(op); }); + OpOperand *operand = + getWarpResult(warpOp, llvm::IsaPred); if (!operand) return failure(); unsigned int operandNumber = operand->getOperandNumber(); @@ -1503,8 +1501,7 @@ struct WarpOpInsert : public OpRewritePattern { LogicalResult matchAndRewrite(WarpExecuteOnLane0Op warpOp, PatternRewriter &rewriter) const override { - OpOperand *operand = getWarpResult( - warpOp, [](Operation *op) { return isa(op); }); + OpOperand *operand = getWarpResult(warpOp, llvm::IsaPred); if (!operand) return failure(); unsigned int operandNumber = operand->getOperandNumber(); @@ -1808,8 +1805,8 @@ struct WarpOpReduction : public OpRewritePattern { LogicalResult matchAndRewrite(WarpExecuteOnLane0Op warpOp, PatternRewriter &rewriter) const override { - OpOperand *yieldOperand = getWarpResult( - warpOp, [](Operation *op) { return isa(op); }); + OpOperand *yieldOperand = + getWarpResult(warpOp, llvm::IsaPred); if (!yieldOperand) return failure(); diff --git a/mlir/lib/IR/AffineMap.cpp b/mlir/lib/IR/AffineMap.cpp index 00a0f05b633303..6cdc2682753fc7 100644 --- a/mlir/lib/IR/AffineMap.cpp +++ b/mlir/lib/IR/AffineMap.cpp @@ -359,9 +359,7 @@ bool AffineMap::isSingleConstant() const { } bool AffineMap::isConstant() const { - return llvm::all_of(getResults(), [](AffineExpr expr) { - return isa(expr); - }); + return llvm::all_of(getResults(), llvm::IsaPred); } int64_t AffineMap::getSingleConstantResult() const { diff --git a/mlir/lib/IR/Operation.cpp b/mlir/lib/IR/Operation.cpp index d6d59837d48ac8..ca5ff9f72e3e29 100644 --- a/mlir/lib/IR/Operation.cpp +++ b/mlir/lib/IR/Operation.cpp @@ -1288,9 +1288,7 @@ LogicalResult OpTrait::impl::verifyNoRegionArguments(Operation *op) { } LogicalResult OpTrait::impl::verifyElementwise(Operation *op) { - auto isMappableType = [](Type type) { - return llvm::isa(type); - }; + auto isMappableType = llvm::IsaPred; auto resultMappableTypes = llvm::to_vector<1>( llvm::make_filter_range(op->getResultTypes(), isMappableType)); auto operandMappableTypes = llvm::to_vector<2>( diff --git a/mlir/lib/TableGen/Class.cpp b/mlir/lib/TableGen/Class.cpp index 9092adcc627c08..fedf64fd96b0d4 100644 --- a/mlir/lib/TableGen/Class.cpp +++ b/mlir/lib/TableGen/Class.cpp @@ -369,9 +369,7 @@ void Class::finalize() { Visibility Class::getLastVisibilityDecl() const { auto reverseDecls = llvm::reverse(declarations); - auto it = llvm::find_if(reverseDecls, [](auto &decl) { - return isa(decl); - }); + auto it = llvm::find_if(reverseDecls, llvm::IsaPred); return it == reverseDecls.end() ? (isStruct ? Visibility::Public : Visibility::Private) : cast(**it).getVisibility(); diff --git a/mlir/lib/Target/Cpp/TranslateToCpp.cpp b/mlir/lib/Target/Cpp/TranslateToCpp.cpp index 95c7af2f07be46..0b07b4b06dfc71 100644 --- a/mlir/lib/Target/Cpp/TranslateToCpp.cpp +++ b/mlir/lib/Target/Cpp/TranslateToCpp.cpp @@ -1000,8 +1000,7 @@ static LogicalResult printOperation(CppEmitter &emitter, "with multiple blocks needs variables declared at top"); } - if (llvm::any_of(functionOp.getResultTypes(), - [](Type type) { return isa(type); })) { + if (llvm::any_of(functionOp.getResultTypes(), llvm::IsaPred)) { return functionOp.emitOpError() << "cannot emit array type as result type"; } @@ -1576,7 +1575,7 @@ LogicalResult CppEmitter::emitTypes(Location loc, ArrayRef types) { } LogicalResult CppEmitter::emitTupleType(Location loc, ArrayRef types) { - if (llvm::any_of(types, [](Type type) { return isa(type); })) { + if (llvm::any_of(types, llvm::IsaPred)) { return emitError(loc, "cannot emit tuple of array type"); } os << "std::tuple<"; diff --git a/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp b/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp index 4a4e878d8af915..9a74ac115e9555 100644 --- a/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp +++ b/mlir/lib/Target/SPIRV/Serialization/Serializer.cpp @@ -1031,9 +1031,9 @@ Serializer::processBlock(Block *block, bool omitLabel, // into multiple basic blocks. If that's the case, we need to emit the merge // right now and then create new blocks for further serialization of the ops // in this block. - if (emitMerge && llvm::any_of(block->getOperations(), [](Operation &op) { - return isa(op); - })) { + if (emitMerge && + llvm::any_of(block->getOperations(), + llvm::IsaPred)) { if (failed(emitMerge())) return failure(); emitMerge = nullptr; @@ -1045,7 +1045,7 @@ Serializer::processBlock(Block *block, bool omitLabel, } // Process each op in this block except the terminator. - for (auto &op : llvm::make_range(block->begin(), std::prev(block->end()))) { + for (Operation &op : llvm::drop_end(*block)) { if (failed(processOperation(&op))) return failure(); } diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index 2ec0b964b304f6..8671c1008902a0 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -2825,9 +2825,9 @@ static void computeNecessaryMaterializations( } // Check to see if this is an argument materialization. - auto isBlockArg = [](Value v) { return isa(v); }; - if (llvm::any_of(op->getOperands(), isBlockArg) || - llvm::any_of(inverseMapping[op->getResult(0)], isBlockArg)) { + if (llvm::any_of(op->getOperands(), llvm::IsaPred) || + llvm::any_of(inverseMapping[op->getResult(0)], + llvm::IsaPred)) { mat->setMaterializationKind(MaterializationKind::Argument); } diff --git a/mlir/lib/Transforms/Utils/OneToNTypeConversion.cpp b/mlir/lib/Transforms/Utils/OneToNTypeConversion.cpp index 7cb957d5ec29ea..fef9d8eb0fef74 100644 --- a/mlir/lib/Transforms/Utils/OneToNTypeConversion.cpp +++ b/mlir/lib/Transforms/Utils/OneToNTypeConversion.cpp @@ -392,8 +392,7 @@ applyPartialOneToNConversion(Operation *op, OneToNTypeConverter &typeConverter, // Argument materialization. assert(castKind == getCastKindName(CastKind::Argument) && "unexpected value of cast kind attribute"); - assert(llvm::all_of(operands, - [&](Value v) { return isa(v); })); + assert(llvm::all_of(operands, llvm::IsaPred)); maybeResult = typeConverter.materializeArgumentConversion( rewriter, castOp->getLoc(), resultTypes.front(), castOp.getOperands()); From 1351d17826e1efa3da3b29b6e345d44cb0ce3bc9 Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Mon, 1 Apr 2024 08:52:35 -0700 Subject: [PATCH 016/201] [InstrFDO][TypeProf] Implement binary instrumentation and profile read/write (#66825) (The profile format change is split into a standalone change into https://github.com/llvm/llvm-project/pull/81691) * For InstrFDO value profiling, implement instrumentation and lowering for virtual table address. * This is controlled by `-enable-vtable-value-profiling` and off by default. * When the option is on, raw profiles will carry serialized `VTableProfData` structs and compressed vtables as payloads. * Implement profile reader and writer support * Raw profile reader is used by `llvm-profdata` but not compiler. Raw profile reader will construct InstrProfSymtab with symbol names, and map profiled runtime address to vtable symbols. * Indexed profile reader is used by `llvm-profdata` and compiler. When initialized, the reader stores a pointer to the beginning of in-memory compressed vtable names and the length of string. When used in `llvm-profdata`, reader decompress the string to show symbols of a profiled site. When used in compiler, string decompression doesn't happen since IR is used to construct InstrProfSymtab. * Indexed profile writer collects the list of vtable names, and stores that to index profiles. * Text profile reader and writer support are added but mostly follow the implementation for indirect-call value type. * `llvm-profdata show -show-vtables ` is implemented. rfc in https://discourse.llvm.org/t/rfc-dynamic-type-profiling-and-optimizations-in-llvm/74600#pick-instrumentation-points-and-instrument-runtime-types-7 --- .../Linux/instrprof-vtable-value-prof.cpp | 142 ++++ .../llvm/Analysis/IndirectCallVisitor.h | 62 +- llvm/include/llvm/ProfileData/InstrProf.h | 128 +++- .../llvm/ProfileData/InstrProfReader.h | 6 + .../llvm/ProfileData/InstrProfWriter.h | 4 + llvm/lib/ProfileData/InstrProf.cpp | 66 +- llvm/lib/ProfileData/InstrProfReader.cpp | 33 +- llvm/lib/ProfileData/InstrProfWriter.cpp | 29 +- .../Instrumentation/InstrProfiling.cpp | 157 ++++- .../Instrumentation/PGOInstrumentation.cpp | 14 + .../Instrumentation/ValueProfilePlugins.inc | 35 +- .../PGOProfile/vtable_prof_unsupported.ll | 34 + .../Transforms/PGOProfile/vtable_profile.ll | 98 +++ .../Inputs/vtable-value-prof.proftext | 74 +++ .../llvm-profdata/vtable-value-prof.test | 83 +++ llvm/tools/llvm-profdata/llvm-profdata.cpp | 26 + llvm/unittests/ProfileData/InstrProfTest.cpp | 620 ++++++++++++++---- 17 files changed, 1419 insertions(+), 192 deletions(-) create mode 100644 compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp create mode 100644 llvm/test/Transforms/PGOProfile/vtable_prof_unsupported.ll create mode 100644 llvm/test/Transforms/PGOProfile/vtable_profile.ll create mode 100644 llvm/test/tools/llvm-profdata/Inputs/vtable-value-prof.proftext create mode 100644 llvm/test/tools/llvm-profdata/vtable-value-prof.test diff --git a/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp b/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp new file mode 100644 index 00000000000000..5c8426b40892f6 --- /dev/null +++ b/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp @@ -0,0 +1,142 @@ +// REQUIRES: lld-available + +// RUN: %clangxx_pgogen -fuse-ld=lld -O2 -g -fprofile-generate=. -mllvm -enable-vtable-value-profiling %s -o %t-test +// RUN: env LLVM_PROFILE_FILE=%t-test.profraw %t-test + +// Show vtable profiles from raw profile. +// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-test.profraw | FileCheck %s --check-prefixes=COMMON,RAW + +// Generate indexed profile from raw profile and show the data. +// RUN: llvm-profdata merge %t-test.profraw -o %t-test.profdata +// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-test.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED + +// Generate text profile from raw and indexed profiles respectively and show the data. +// RUN: llvm-profdata merge --text %t-test.profraw -o %t-raw.proftext +// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text %t-raw.proftext | FileCheck %s --check-prefix=ICTEXT +// RUN: llvm-profdata merge --text %t-test.profdata -o %t-indexed.proftext +// RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text %t-indexed.proftext | FileCheck %s --check-prefix=ICTEXT + +// Generate indexed profile from text profiles and show the data +// RUN: llvm-profdata merge --binary %t-raw.proftext -o %t-text.profraw +// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-text.profraw | FileCheck %s --check-prefixes=COMMON,INDEXED +// RUN: llvm-profdata merge --binary %t-indexed.proftext -o %t-text.profdata +// RUN: llvm-profdata show --function=main --ic-targets --show-vtables %t-text.profdata | FileCheck %s --check-prefixes=COMMON,INDEXED + +// COMMON: Counters: +// COMMON-NEXT: main: +// COMMON-NEXT: Hash: 0x0f9a16fe6d398548 +// COMMON-NEXT: Counters: 2 +// COMMON-NEXT: Indirect Call Site Count: 2 +// COMMON-NEXT: Number of instrumented vtables: 2 +// RAW: Indirect Target Results: +// RAW-NEXT: [ 0, _ZN8Derived15func1Eii, 250 ] (25.00%) +// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func1Eii, 750 ] (75.00%) +// RAW-NEXT: [ 1, _ZN8Derived15func2Eii, 250 ] (25.00%) +// RAW-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func2Eii, 750 ] (75.00%) +// RAW-NEXT: VTable Results: +// RAW-NEXT: [ 0, _ZTV8Derived1, 250 ] (25.00%) +// RAW-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%) +// RAW-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%) +// RAW-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%) +// INDEXED: Indirect Target Results: +// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func1Eii, 750 ] (75.00%) +// INDEXED-NEXT: [ 0, _ZN8Derived15func1Eii, 250 ] (25.00%) +// INDEXED-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func2Eii, 750 ] (75.00%) +// INDEXED-NEXT: [ 1, _ZN8Derived15func2Eii, 250 ] (25.00%) +// INDEXED-NEXT: VTable Results: +// INDEXED-NEXT: [ 0, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%) +// INDEXED-NEXT: [ 0, _ZTV8Derived1, 250 ] (25.00%) +// INDEXED-NEXT: [ 1, {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%) +// INDEXED-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%) +// COMMON: Instrumentation level: IR entry_first = 0 +// COMMON-NEXT: Functions shown: 1 +// COMMON-NEXT: Total functions: 6 +// COMMON-NEXT: Maximum function count: 1000 +// COMMON-NEXT: Maximum internal block count: 250 +// COMMON-NEXT: Statistics for indirect call sites profile: +// COMMON-NEXT: Total number of sites: 2 +// COMMON-NEXT: Total number of sites with values: 2 +// COMMON-NEXT: Total number of profiled values: 4 +// COMMON-NEXT: Value sites histogram: +// COMMON-NEXT: NumTargets, SiteCount +// COMMON-NEXT: 2, 2 +// COMMON-NEXT: Statistics for vtable profile: +// COMMON-NEXT: Total number of sites: 2 +// COMMON-NEXT: Total number of sites with values: 2 +// COMMON-NEXT: Total number of profiled values: 4 +// COMMON-NEXT: Value sites histogram: +// COMMON-NEXT: NumTargets, SiteCount +// COMMON-NEXT: 2, 2 + +// ICTEXT: :ir +// ICTEXT: main +// ICTEXT: # Func Hash: +// ICTEXT: 1124236338992350536 +// ICTEXT: # Num Counters: +// ICTEXT: 2 +// ICTEXT: # Counter Values: +// ICTEXT: 1000 +// ICTEXT: 1 +// ICTEXT: # Num Value Kinds: +// ICTEXT: 2 +// ICTEXT: # ValueKind = IPVK_IndirectCallTarget: +// ICTEXT: 0 +// ICTEXT: # NumValueSites: +// ICTEXT: 2 +// ICTEXT: 2 +// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func1Eii:750 +// ICTEXT: _ZN8Derived15func1Eii:250 +// ICTEXT: 2 +// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZN12_GLOBAL__N_18Derived25func2Eii:750 +// ICTEXT: _ZN8Derived15func2Eii:250 +// ICTEXT: # ValueKind = IPVK_VTableTarget: +// ICTEXT: 2 +// ICTEXT: # NumValueSites: +// ICTEXT: 2 +// ICTEXT: 2 +// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:750 +// ICTEXT: _ZTV8Derived1:250 +// ICTEXT: 2 +// ICTEXT: {{.*}}instrprof-vtable-value-prof.cpp;_ZTVN12_GLOBAL__N_18Derived2E:750 +// ICTEXT: _ZTV8Derived1:250 + +#include +#include +class Base { +public: + virtual int func1(int a, int b) = 0; + virtual int func2(int a, int b) = 0; +}; +class Derived1 : public Base { +public: + int func1(int a, int b) override { return a + b; } + + int func2(int a, int b) override { return a * b; } +}; +namespace { +class Derived2 : public Base { +public: + int func1(int a, int b) override { return a - b; } + + int func2(int a, int b) override { return a * (a - b); } +}; +} // namespace +__attribute__((noinline)) Base *createType(int a) { + Base *base = nullptr; + if (a % 4 == 0) + base = new Derived1(); + else + base = new Derived2(); + return base; +} +int main(int argc, char **argv) { + int sum = 0; + for (int i = 0; i < 1000; i++) { + int a = rand(); + int b = rand(); + Base *ptr = createType(i); + sum += ptr->func1(a, b) + ptr->func2(b, a); + } + printf("sum is %d\n", sum); + return 0; +} diff --git a/llvm/include/llvm/Analysis/IndirectCallVisitor.h b/llvm/include/llvm/Analysis/IndirectCallVisitor.h index 0825e19ecd2d24..50815f4e3e8398 100644 --- a/llvm/include/llvm/Analysis/IndirectCallVisitor.h +++ b/llvm/include/llvm/Analysis/IndirectCallVisitor.h @@ -16,23 +16,75 @@ #include namespace llvm { -// Visitor class that finds all indirect call. +// Visitor class that finds indirect calls or instructions that gives vtable +// value, depending on Type. struct PGOIndirectCallVisitor : public InstVisitor { + enum class InstructionType { + kIndirectCall = 0, + kVTableVal = 1, + }; std::vector IndirectCalls; - PGOIndirectCallVisitor() = default; + std::vector ProfiledAddresses; + PGOIndirectCallVisitor(InstructionType Type) : Type(Type) {} void visitCallBase(CallBase &Call) { - if (Call.isIndirectCall()) + if (!Call.isIndirectCall()) + return; + + if (Type == InstructionType::kIndirectCall) { IndirectCalls.push_back(&Call); + return; + } + + assert(Type == InstructionType::kVTableVal && "Control flow guaranteed"); + + LoadInst *LI = dyn_cast(Call.getCalledOperand()); + // The code pattern to look for + // + // %vtable = load ptr, ptr %b + // %vfn = getelementptr inbounds ptr, ptr %vtable, i64 1 + // %2 = load ptr, ptr %vfn + // %call = tail call i32 %2(ptr %b) + // + // %vtable is the vtable address value to profile, and + // %2 is the indirect call target address to profile. + if (LI != nullptr) { + Value *Ptr = LI->getPointerOperand(); + Value *VTablePtr = Ptr->stripInBoundsConstantOffsets(); + // This is a heuristic to find address feeding instructions. + // FIXME: Add support in the frontend so LLVM type intrinsics are + // emitted without LTO. This way, added intrinsics could filter + // non-vtable instructions and reduce instrumentation overhead. + // Since a non-vtable profiled address is not within the address + // range of vtable objects, it's stored as zero in indexed profiles. + // A pass that looks up symbol with an zero hash will (almost) always + // find nullptr and skip the actual transformation (e.g., comparison + // of symbols). So the performance overhead from non-vtable profiled + // address is negligible if exists at all. Comparing loaded address + // with symbol address guarantees correctness. + if (VTablePtr != nullptr && isa(VTablePtr)) + ProfiledAddresses.push_back(cast(VTablePtr)); + } } + +private: + InstructionType Type; }; -// Helper function that finds all indirect call sites. inline std::vector findIndirectCalls(Function &F) { - PGOIndirectCallVisitor ICV; + PGOIndirectCallVisitor ICV( + PGOIndirectCallVisitor::InstructionType::kIndirectCall); ICV.visit(F); return ICV.IndirectCalls; } + +inline std::vector findVTableAddrs(Function &F) { + PGOIndirectCallVisitor ICV( + PGOIndirectCallVisitor::InstructionType::kVTableVal); + ICV.visit(F); + return ICV.ProfiledAddresses; +} + } // namespace llvm #endif diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index 3a71c02d6d5cfb..fd66c4ed948f36 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -17,6 +17,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitmaskEnum.h" +#include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" @@ -89,6 +90,9 @@ inline StringRef getInstrProfValueProfMemOpFuncName() { /// Return the name prefix of variables containing instrumented function names. inline StringRef getInstrProfNameVarPrefix() { return "__profn_"; } +/// Return the name prefix of variables containing virtual table profile data. +inline StringRef getInstrProfVTableVarPrefix() { return "__profvt_"; } + /// Return the name prefix of variables containing per-function control data. inline StringRef getInstrProfDataVarPrefix() { return "__profd_"; } @@ -106,9 +110,9 @@ inline StringRef getInstrProfVNodesVarName() { return "__llvm_prf_vnodes"; } /// Return the name of the variable holding the strings (possibly compressed) /// of all function's PGO names. -inline StringRef getInstrProfNamesVarName() { - return "__llvm_prf_nm"; -} +inline StringRef getInstrProfNamesVarName() { return "__llvm_prf_nm"; } + +inline StringRef getInstrProfVTableNamesVarName() { return "__llvm_prf_vnm"; } /// Return the name of a covarage mapping variable (internal linkage) /// for each instrumented source module. Such variables are allocated @@ -140,7 +144,8 @@ inline StringRef getInstrProfRegFuncName() { return "__llvm_profile_register_function"; } -/// Return the name of the runtime interface that registers the PGO name strings. +/// Return the name of the runtime interface that registers the PGO name +/// strings. inline StringRef getInstrProfNamesRegFuncName() { return "__llvm_profile_register_names_function"; } @@ -246,6 +251,9 @@ Error collectGlobalObjectNameStrings(ArrayRef NameStrs, Error collectPGOFuncNameStrings(ArrayRef NameVars, std::string &Result, bool doCompression = true); +Error collectVTableStrings(ArrayRef VTables, + std::string &Result, bool doCompression); + /// Check if INSTR_PROF_RAW_VERSION_VAR is defined. This global is only being /// set in IR PGO compilation. bool isIRPGOFlagSet(const Module *M); @@ -288,6 +296,8 @@ inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; } /// Return the PGOFuncName meta data associated with a function. MDNode *getPGOFuncNameMetadata(const Function &F); +std::string getPGOName(const GlobalVariable &V, bool InLTO = false); + /// Create the PGOFuncName meta data if PGOFuncName is different from /// function's raw name. This should only apply to internal linkage functions /// declared by users only. @@ -295,7 +305,7 @@ void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName); /// Check if we can use Comdat for profile variables. This will eliminate /// the duplicated profile variables for Comdat functions. -bool needsComdatForCounter(const Function &F, const Module &M); +bool needsComdatForCounter(const GlobalObject &GV, const Module &M); /// An enum describing the attributes of an instrumented profile. enum class InstrProfKind { @@ -431,23 +441,34 @@ class InstrProfSymtab { using AddrHashMap = std::vector>; private: + using AddrIntervalMap = + IntervalMap>; StringRef Data; uint64_t Address = 0; - // Unique name strings. + // Unique name strings. Used to ensure entries in MD5NameMap (a vector that's + // going to be sorted) has unique MD5 keys in the first place. StringSet<> NameTab; + // Records the unique virtual table names. This is used by InstrProfWriter to + // write out an on-disk chained hash table of virtual table names. + // InstrProfWriter stores per function profile data (keyed by function names) + // so it doesn't use a StringSet for function names. + StringSet<> VTableNames; // A map from MD5 keys to function name strings. std::vector> MD5NameMap; + // A map from MD5 keys to function define. We only populate this map // when build the Symtab from a Module. std::vector> MD5FuncMap; // A map from function runtime address to function name MD5 hash. // This map is only populated and used by raw instr profile reader. AddrHashMap AddrToMD5Map; + + AddrIntervalMap::Allocator VTableAddrMapAllocator; + // This map is only populated and used by raw instr profile reader. + AddrIntervalMap VTableAddrMap; bool Sorted = false; - static StringRef getExternalSymbol() { - return "** External Symbol **"; - } + static StringRef getExternalSymbol() { return "** External Symbol **"; } // Returns the canonial name of the given PGOName. In a canonical name, all // suffixes that begins with "." except ".__uniq." are stripped. @@ -469,7 +490,7 @@ class InstrProfSymtab { inline void finalizeSymtab(); public: - InstrProfSymtab() = default; + InstrProfSymtab() : VTableAddrMap(VTableAddrMapAllocator) {} // Not copyable or movable. // Consider std::unique_ptr for move. @@ -488,9 +509,19 @@ class InstrProfSymtab { /// \c NameStrings is a string composed of one of more sub-strings /// encoded in the format described in \c collectPGOFuncNameStrings. - /// This method is a wrapper to \c readPGOFuncNameStrings method. + /// This method is a wrapper to \c readAndDecodeStrings method. Error create(StringRef NameStrings); + /// Initialize symtab states with function names and vtable names. \c + /// FuncNameStrings is a string composed of one or more encoded function name + /// strings, and \c VTableNameStrings composes of one or more encoded vtable + /// names. This interface is solely used by raw profile reader. + Error create(StringRef FuncNameStrings, StringRef VTableNameStrings); + + /// Initialize 'this' with the set of vtable names encoded in + /// \c CompressedVTableNames. + Error initVTableNamesFromCompressedStrings(StringRef CompressedVTableNames); + /// This interface is used by reader of CoverageMapping test /// format. inline Error create(StringRef D, uint64_t BaseAddr); @@ -503,32 +534,69 @@ class InstrProfSymtab { /// Create InstrProfSymtab from a set of names iteratable from /// \p IterRange. This interface is used by IndexedProfReader. - template Error create(const NameIterRange &IterRange); - - /// Update the symtab by adding \p FuncName to the table. This interface - /// is used by the raw and text profile readers. - Error addFuncName(StringRef FuncName) { - if (FuncName.empty()) + template + Error create(const NameIterRange &IterRange); + + /// Create InstrProfSymtab from a set of function names and vtable + /// names iteratable from \p IterRange. This interface is used by + /// IndexedProfReader. + template + Error create(const FuncNameIterRange &FuncIterRange, + const VTableNameIterRange &VTableIterRange); + + Error addSymbolName(StringRef SymbolName) { + if (SymbolName.empty()) return make_error(instrprof_error::malformed, - "function name is empty"); - auto Ins = NameTab.insert(FuncName); + "symbol name is empty"); + + // Insert into NameTab so that MD5NameMap (a vector that will be sorted) + // won't have duplicated entries in the first place. + auto Ins = NameTab.insert(SymbolName); if (Ins.second) { MD5NameMap.push_back(std::make_pair( - IndexedInstrProf::ComputeHash(FuncName), Ins.first->getKey())); + IndexedInstrProf::ComputeHash(SymbolName), Ins.first->getKey())); Sorted = false; } return Error::success(); } + /// The method name is kept since there are many callers. + /// It just forwards to 'addSymbolName'. + Error addFuncName(StringRef FuncName) { return addSymbolName(FuncName); } + + /// Adds VTableName as a known symbol, and inserts it to a map that + /// tracks all vtable names. + Error addVTableName(StringRef VTableName) { + if (Error E = addSymbolName(VTableName)) + return E; + + // Record VTableName. InstrProfWriter uses this set. The comment around + // class member explains why. + VTableNames.insert(VTableName); + return Error::success(); + } + + const StringSet<> &getVTableNames() const { return VTableNames; } + /// Map a function address to its name's MD5 hash. This interface /// is only used by the raw profiler reader. void mapAddress(uint64_t Addr, uint64_t MD5Val) { AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val)); } + /// Map the address range (i.e., [start_address, end_address)) of a variable + /// to its names' MD5 hash. This interface is only used by the raw profile + /// reader. + void mapVTableAddress(uint64_t StartAddr, uint64_t EndAddr, uint64_t MD5Val) { + VTableAddrMap.insert(StartAddr, EndAddr, MD5Val); + } + /// Return a function's hash, or 0, if the function isn't in this SymTab. uint64_t getFunctionHashFromAddress(uint64_t Address); + /// Return a vtable's hash, or 0 if the vtable doesn't exist in this SymTab. + uint64_t getVTableHashFromAddress(uint64_t Address); + /// Return function's PGO name from the function name's symbol /// address in the object file. If an error occurs, return /// an empty string. @@ -574,6 +642,24 @@ Error InstrProfSymtab::create(const NameIterRange &IterRange) { return Error::success(); } +template +Error InstrProfSymtab::create(const FuncNameIterRange &FuncIterRange, + const VTableNameIterRange &VTableIterRange) { + // Iterate elements by StringRef rather than by const reference. + // StringRef is small enough, so the loop is efficient whether + // element in the range is std::string or StringRef. + for (StringRef Name : FuncIterRange) + if (Error E = addFuncName(Name)) + return E; + + for (StringRef VTableName : VTableIterRange) + if (Error E = addVTableName(VTableName)) + return E; + + finalizeSymtab(); + return Error::success(); +} + void InstrProfSymtab::finalizeSymtab() { if (Sorted) return; @@ -877,6 +963,8 @@ struct InstrProfRecord { return ValueData->IndirectCallSites; case IPVK_MemOPSize: return ValueData->MemOPSizes; + case IPVK_VTableTarget: + return ValueData->VTableTargets; default: llvm_unreachable("Unknown value kind!"); } diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h index cfde5d3fc77d65..e46570af3873f9 100644 --- a/llvm/include/llvm/ProfileData/InstrProfReader.h +++ b/llvm/include/llvm/ProfileData/InstrProfReader.h @@ -626,6 +626,12 @@ class InstrProfReaderIndex : public InstrProfReaderIndexBase { InstrProfKind getProfileKind() const override; Error populateSymtab(InstrProfSymtab &Symtab) override { + // FIXME: the create method calls 'finalizeSymtab' and sorts a bunch of + // arrays/maps. Since there are other data sources other than 'HashTable' to + // populate a symtab, it might make sense to have something like this + // 1. Let each data source populate Symtab and init the arrays/maps without + // calling 'finalizeSymtab' + // 2. Call 'finalizeSymtab' once to get all arrays/maps sorted if needed. return Symtab.create(HashTable->keys()); } }; diff --git a/llvm/include/llvm/ProfileData/InstrProfWriter.h b/llvm/include/llvm/ProfileData/InstrProfWriter.h index d2156c86787273..4b42392bc1e061 100644 --- a/llvm/include/llvm/ProfileData/InstrProfWriter.h +++ b/llvm/include/llvm/ProfileData/InstrProfWriter.h @@ -63,6 +63,9 @@ class InstrProfWriter { // List of binary ids. std::vector BinaryIds; + // Read the vtable names from raw instr profile reader. + StringSet<> VTableNames; + // An enum describing the attributes of the profile. InstrProfKind ProfileKind = InstrProfKind::Unknown; // Use raw pointer here for the incomplete type object. @@ -95,6 +98,7 @@ class InstrProfWriter { void addRecord(NamedInstrProfRecord &&I, function_ref Warn) { addRecord(std::move(I), 1, Warn); } + void addVTableName(StringRef VTableName) { VTableNames.insert(VTableName); } /// Add \p SrcTraces using reservoir sampling where \p SrcStreamSize is the /// total number of temporal profiling traces the source has seen. diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 8cf97fcb1dab57..90c3cfc45b98ae 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -34,6 +34,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Compression.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" @@ -57,6 +58,8 @@ using namespace llvm; +#define DEBUG_TYPE "instrprof" + static cl::opt StaticFuncFullModulePrefix( "static-func-full-module-prefix", cl::init(true), cl::Hidden, cl::desc("Use full module build paths in the profile counter names for " @@ -219,6 +222,12 @@ cl::opt DoInstrProfNameCompression( "enable-name-compression", cl::desc("Enable name/filename string compression"), cl::init(true)); +cl::opt EnableVTableValueProfiling( + "enable-vtable-value-profiling", cl::init(false), + cl::desc("If true, the virtual table address will be instrumented to know " + "the types of a C++ pointer. The information is used in indirect " + "call promotion to do selective vtable-based comparison.")); + std::string getInstrProfSectionName(InstrProfSectKind IPSK, Triple::ObjectFormatType OF, bool AddSegmentInfo) { @@ -378,6 +387,13 @@ std::string getPGOFuncName(const Function &F, bool InLTO, uint64_t Version) { return getPGOFuncName(F.getName(), GlobalValue::ExternalLinkage, ""); } +std::string getPGOName(const GlobalVariable &V, bool InLTO) { + // PGONameMetadata should be set by compiler at profile use time + // and read by symtab creation to look up symbols corresponding to + // a MD5 hash. + return getIRPGOObjectName(V, InLTO, /*PGONameMetadata=*/nullptr); +} + // See getIRPGOObjectName() for a discription of the format. std::pair getParsedIRPGOName(StringRef IRPGOName) { auto [FileName, MangledName] = IRPGOName.split(kGlobalIdentifierDelimiter); @@ -459,6 +475,7 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) { if (Error E = addFuncWithName(F, getPGOFuncName(F, InLTO))) return E; } + Sorted = false; finalizeSymtab(); return Error::success(); @@ -517,6 +534,25 @@ Error InstrProfSymtab::create(StringRef NameStrings) { std::bind(&InstrProfSymtab::addFuncName, this, std::placeholders::_1)); } +Error InstrProfSymtab::create(StringRef FuncNameStrings, + StringRef VTableNameStrings) { + if (Error E = readAndDecodeStrings(FuncNameStrings, + std::bind(&InstrProfSymtab::addFuncName, + this, std::placeholders::_1))) + return E; + + return readAndDecodeStrings( + VTableNameStrings, + std::bind(&InstrProfSymtab::addVTableName, this, std::placeholders::_1)); +} + +Error InstrProfSymtab::initVTableNamesFromCompressedStrings( + StringRef CompressedVTableStrings) { + return readAndDecodeStrings( + CompressedVTableStrings, + std::bind(&InstrProfSymtab::addVTableName, this, std::placeholders::_1)); +} + StringRef InstrProfSymtab::getCanonicalName(StringRef PGOName) { // In ThinLTO, local function may have been promoted to global and have // suffix ".llvm." added to the function name. We need to add the @@ -560,6 +596,12 @@ Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) { return Error::success(); } +uint64_t InstrProfSymtab::getVTableHashFromAddress(uint64_t Address) { + // Given a runtime address, look up the hash value in the interval map, and + // fallback to value 0 if a hash value is not found. + return VTableAddrMap.lookup(Address, 0); +} + uint64_t InstrProfSymtab::getFunctionHashFromAddress(uint64_t Address) { finalizeSymtab(); auto It = partition_point(AddrToMD5Map, [=](std::pair A) { @@ -636,6 +678,16 @@ Error collectPGOFuncNameStrings(ArrayRef NameVars, NameStrs, compression::zlib::isAvailable() && doCompression, Result); } +Error collectVTableStrings(ArrayRef VTables, + std::string &Result, bool doCompression) { + std::vector VTableNameStrs; + for (auto *VTable : VTables) + VTableNameStrs.push_back(getPGOName(*VTable)); + return collectGlobalObjectNameStrings( + VTableNameStrs, compression::zlib::isAvailable() && doCompression, + Result); +} + void InstrProfRecord::accumulateCounts(CountSumOrPercent &Sum) const { uint64_t FuncSum = 0; Sum.NumEntries += Counts.size(); @@ -898,6 +950,9 @@ uint64_t InstrProfRecord::remapValue(uint64_t Value, uint32_t ValueKind, if (ValueKind == IPVK_IndirectCallTarget) return SymTab->getFunctionHashFromAddress(Value); + if (ValueKind == IPVK_VTableTarget) + return SymTab->getVTableHashFromAddress(Value); + return Value; } @@ -1288,8 +1343,8 @@ void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName) { F.setMetadata(getPGOFuncNameMetadataName(), N); } -bool needsComdatForCounter(const Function &F, const Module &M) { - if (F.hasComdat()) +bool needsComdatForCounter(const GlobalObject &GO, const Module &M) { + if (GO.hasComdat()) return true; if (!Triple(M.getTargetTriple()).supportsCOMDAT()) @@ -1305,7 +1360,7 @@ bool needsComdatForCounter(const Function &F, const Module &M) { // available_externally functions will end up being duplicated in raw profile // data. This can result in distorted profile as the counts of those dups // will be accumulated by the profile merger. - GlobalValue::LinkageTypes Linkage = F.getLinkage(); + GlobalValue::LinkageTypes Linkage = GO.getLinkage(); if (Linkage != GlobalValue::ExternalWeakLinkage && Linkage != GlobalValue::AvailableExternallyLinkage) return false; @@ -1461,7 +1516,7 @@ void OverlapStats::dump(raw_fd_ostream &OS) const { for (unsigned I = 0; I < IPVK_Last - IPVK_First + 1; I++) { if (Base.ValueCounts[I] < 1.0f && Test.ValueCounts[I] < 1.0f) continue; - char ProfileKindName[20]; + char ProfileKindName[20] = {0}; switch (I) { case IPVK_IndirectCallTarget: strncpy(ProfileKindName, "IndirectCall", 19); @@ -1469,6 +1524,9 @@ void OverlapStats::dump(raw_fd_ostream &OS) const { case IPVK_MemOPSize: strncpy(ProfileKindName, "MemOP", 19); break; + case IPVK_VTableTarget: + strncpy(ProfileKindName, "VTable", 19); + break; default: snprintf(ProfileKindName, 19, "VP[%d]", I); break; diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index a275d4852c1562..7ac5c561dc0809 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -370,8 +370,11 @@ TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { } else if (ValueKind == IPVK_VTableTarget) { if (InstrProfSymtab::isExternalSymbol(VD.first)) Value = 0; - else + else { + if (Error E = Symtab->addVTableName(VD.first)) + return E; Value = IndexedInstrProf::ComputeHash(VD.first); + } } else { READ_NUM(VD.first, Value); } @@ -539,7 +542,8 @@ Error RawInstrProfReader::readNextHeader(const char *CurrentPos) { template Error RawInstrProfReader::createSymtab(InstrProfSymtab &Symtab) { - if (Error E = Symtab.create(StringRef(NamesStart, NamesEnd - NamesStart))) + if (Error E = Symtab.create(StringRef(NamesStart, NamesEnd - NamesStart), + StringRef(VNamesStart, VNamesEnd - VNamesStart))) return error(std::move(E)); for (const RawInstrProf::ProfileData *I = Data; I != DataEnd; ++I) { const IntPtrT FPtr = swap(I->FunctionPointer); @@ -547,6 +551,21 @@ Error RawInstrProfReader::createSymtab(InstrProfSymtab &Symtab) { continue; Symtab.mapAddress(FPtr, swap(I->NameRef)); } + + if (VTableBegin != nullptr && VTableEnd != nullptr) { + for (const RawInstrProf::VTableProfileData *I = VTableBegin; + I != VTableEnd; ++I) { + const IntPtrT VPtr = swap(I->VTablePointer); + if (!VPtr) + continue; + // Map both begin and end address to the name hash, since the instrumented + // address could be somewhere in the middle. + // VPtr is of type uint32_t or uint64_t so 'VPtr + I->VTableSize' marks + // the end of vtable address. + Symtab.mapVTableAddress(VPtr, VPtr + swap(I->VTableSize), + swap(I->VTableNameHash)); + } + } return success(); } @@ -1397,7 +1416,15 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() { if (Symtab) return *Symtab; - std::unique_ptr NewSymtab = std::make_unique(); + auto NewSymtab = std::make_unique(); + + if (Error E = NewSymtab->initVTableNamesFromCompressedStrings( + StringRef(VTableNamePtr, CompressedVTableNamesLen))) { + auto [ErrCode, Msg] = InstrProfError::take(std::move(E)); + consumeError(error(ErrCode, Msg)); + } + + // finalizeSymtab is called inside populateSymtab. if (Error E = Index->populateSymtab(*NewSymtab)) { auto [ErrCode, Msg] = InstrProfError::take(std::move(E)); consumeError(error(ErrCode, Msg)); diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index 8f067f8d05e2b9..c2c94ba30c6583 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -19,6 +19,7 @@ #include "llvm/ProfileData/InstrProf.h" #include "llvm/ProfileData/MemProf.h" #include "llvm/ProfileData/ProfileCommon.h" +#include "llvm/Support/Compression.h" #include "llvm/Support/Endian.h" #include "llvm/Support/EndianStream.h" #include "llvm/Support/Error.h" @@ -636,13 +637,18 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { uint64_t VTableNamesSectionStart = OS.tell(); if (!WritePrevVersion) { - // Use a dummy (and uncompressed) string as compressed vtable names and get - // the necessary profile format change in place for version 12. - // TODO: Store the list of vtable names in InstrProfWriter and use the - // real compressed name. - std::string CompressedVTableNames = "VTableNames"; + std::vector VTableNameStrs; + for (StringRef VTableName : VTableNames.keys()) + VTableNameStrs.push_back(VTableName.str()); + + std::string CompressedVTableNames; + if (!VTableNameStrs.empty()) + if (Error E = collectGlobalObjectNameStrings( + VTableNameStrs, compression::zlib::isAvailable(), + CompressedVTableNames)) + return E; - uint64_t CompressedStringLen = CompressedVTableNames.length(); + const uint64_t CompressedStringLen = CompressedVTableNames.length(); // Record the length of compressed string. OS.write(CompressedStringLen); @@ -652,12 +658,11 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { OS.writeByte(static_cast(c)); // Pad up to a multiple of 8. - // InstrProfReader would read bytes according to 'CompressedStringLen'. - uint64_t PaddedLength = alignTo(CompressedStringLen, 8); + // InstrProfReader could read bytes according to 'CompressedStringLen'. + const uint64_t PaddedLength = alignTo(CompressedStringLen, 8); - for (uint64_t K = CompressedStringLen; K < PaddedLength; K++) { + for (uint64_t K = CompressedStringLen; K < PaddedLength; K++) OS.writeByte(0); - } } uint64_t TemporalProfTracesSectionStart = 0; @@ -866,6 +871,10 @@ Error InstrProfWriter::writeText(raw_fd_ostream &OS) { } } + for (const auto &VTableName : VTableNames) + if (Error E = Symtab.addVTableName(VTableName.getKey())) + return E; + if (static_cast(ProfileKind & InstrProfKind::TemporalProfile)) writeTextTemporalProfTraceData(OS, Symtab); diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index c42c53edd51190..f9b58d9f278214 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -64,6 +64,9 @@ using namespace llvm; #define DEBUG_TYPE "instrprof" namespace llvm { +// Command line option to enable vtable value profiling. Defined in +// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling= +extern cl::opt EnableVTableValueProfiling; // TODO: Remove -debug-info-correlate in next LLVM release, in favor of // -profile-correlate=debug-info. cl::opt DebugInfoCorrelate( @@ -219,12 +222,18 @@ class InstrLowerer final { PerFunctionProfileData() = default; }; DenseMap ProfileDataMap; + // Key is virtual table variable, value is 'VTableProfData' in the form of + // GlobalVariable. + DenseMap VTableDataMap; /// If runtime relocation is enabled, this maps functions to the load /// instruction that produces the profile relocation bias. DenseMap FunctionToProfileBiasMap; std::vector CompilerUsedVars; std::vector UsedVars; std::vector ReferencedNames; + // The list of virtual table variables of which the VTableProfData is + // collected. + std::vector ReferencedVTables; GlobalVariable *NamesVar = nullptr; size_t NamesSize = 0; @@ -308,7 +317,7 @@ class InstrLowerer final { GlobalValue::LinkageTypes Linkage); /// Set Comdat property of GV, if required. - void maybeSetComdat(GlobalVariable *GV, Function *Fn, StringRef VarName); + void maybeSetComdat(GlobalVariable *GV, GlobalObject *GO, StringRef VarName); /// Setup the sections into which counters and bitmaps are allocated. GlobalVariable *setupProfileSection(InstrProfInstBase *Inc, @@ -317,9 +326,15 @@ class InstrLowerer final { /// Create INSTR_PROF_DATA variable for counters and bitmaps. void createDataVariable(InstrProfCntrInstBase *Inc); + /// Get the counters for virtual table values, creating them if necessary. + void getOrCreateVTableProfData(GlobalVariable *GV); + /// Emit the section with compressed function names. void emitNameData(); + /// Emit the section with compressed vtable names. + void emitVTableNames(); + /// Emit value nodes section for value profiling. void emitVNodes(); @@ -763,6 +778,12 @@ bool InstrLowerer::lower() { } } + if (EnableVTableValueProfiling) + for (GlobalVariable &GV : M.globals()) + // Global variables with type metadata are virtual table variables. + if (GV.hasMetadata(LLVMContext::MD_type)) + getOrCreateVTableProfData(&GV); + for (Function &F : M) MadeChange |= lowerIntrinsics(&F); @@ -776,6 +797,7 @@ bool InstrLowerer::lower() { emitVNodes(); emitNameData(); + emitVTableNames(); // Emit runtime hook for the cases where the target does not unconditionally // require pulling in profile runtime, and coverage is enabled on code that is @@ -1193,13 +1215,13 @@ static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) { return true; } -void InstrLowerer::maybeSetComdat(GlobalVariable *GV, Function *Fn, +void InstrLowerer::maybeSetComdat(GlobalVariable *GV, GlobalObject *GO, StringRef CounterGroupName) { // Place lowered global variables in a comdat group if the associated function - // is a COMDAT. This will make sure that only one copy of global variable - // (e.g. function counters) of the COMDAT function will be emitted after - // linking. - bool NeedComdat = needsComdatForCounter(*Fn, M); + // or global variable is a COMDAT. This will make sure that only one copy of + // global variable (e.g. function counters) of the COMDAT function will be + // emitted after linking. + bool NeedComdat = needsComdatForCounter(*GO, M); bool UseComdat = (NeedComdat || TT.isOSBinFormatELF()); if (!UseComdat) @@ -1237,6 +1259,104 @@ void InstrLowerer::maybeSetComdat(GlobalVariable *GV, Function *Fn, GV->setLinkage(GlobalValue::InternalLinkage); } +static inline bool shouldRecordVTableAddr(GlobalVariable *GV) { + if (!profDataReferencedByCode(*GV->getParent())) + return false; + + if (!GV->hasLinkOnceLinkage() && !GV->hasLocalLinkage() && + !GV->hasAvailableExternallyLinkage()) + return true; + + // This avoids the profile data from referencing internal symbols in + // COMDAT. + if (GV->hasLocalLinkage() && GV->hasComdat()) + return false; + + return true; +} + +// FIXME: Introduce an internal alias like what's done for functions to reduce +// the number of relocation entries. +static inline Constant *getVTableAddrForProfData(GlobalVariable *GV) { + auto *Int8PtrTy = PointerType::getUnqual(GV->getContext()); + + // Store a nullptr in __profvt_ if a real address shouldn't be used. + if (!shouldRecordVTableAddr(GV)) + return ConstantPointerNull::get(Int8PtrTy); + + return ConstantExpr::getBitCast(GV, Int8PtrTy); +} + +void InstrLowerer::getOrCreateVTableProfData(GlobalVariable *GV) { + assert(!DebugInfoCorrelate && + "Value profiling is not supported with lightweight instrumentation"); + if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) + return; + + // Skip llvm internal global variable or __prof variables. + if (GV->getName().starts_with("llvm.") || + GV->getName().starts_with("__llvm") || + GV->getName().starts_with("__prof")) + return; + + // VTableProfData already created + auto It = VTableDataMap.find(GV); + if (It != VTableDataMap.end() && It->second) + return; + + GlobalValue::LinkageTypes Linkage = GV->getLinkage(); + GlobalValue::VisibilityTypes Visibility = GV->getVisibility(); + + // This is to keep consistent with per-function profile data + // for correctness. + if (TT.isOSBinFormatXCOFF()) { + Linkage = GlobalValue::InternalLinkage; + Visibility = GlobalValue::DefaultVisibility; + } + + LLVMContext &Ctx = M.getContext(); + Type *DataTypes[] = { +#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) LLVMType, +#include "llvm/ProfileData/InstrProfData.inc" +#undef INSTR_PROF_VTABLE_DATA + }; + + auto *DataTy = StructType::get(Ctx, ArrayRef(DataTypes)); + + // Used by INSTR_PROF_VTABLE_DATA MACRO + Constant *VTableAddr = getVTableAddrForProfData(GV); + const std::string PGOVTableName = getPGOName(*GV); + // Record the length of the vtable. This is needed since vtable pointers + // loaded from C++ objects might be from the middle of a vtable definition. + uint32_t VTableSizeVal = + M.getDataLayout().getTypeAllocSize(GV->getValueType()); + + Constant *DataVals[] = { +#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) Init, +#include "llvm/ProfileData/InstrProfData.inc" +#undef INSTR_PROF_VTABLE_DATA + }; + + auto *Data = + new GlobalVariable(M, DataTy, /*constant=*/false, Linkage, + ConstantStruct::get(DataTy, DataVals), + getInstrProfVTableVarPrefix() + PGOVTableName); + + Data->setVisibility(Visibility); + Data->setSection(getInstrProfSectionName(IPSK_vtab, TT.getObjectFormat())); + Data->setAlignment(Align(8)); + + maybeSetComdat(Data, GV, Data->getName()); + + VTableDataMap[GV] = Data; + + ReferencedVTables.push_back(GV); + + // VTable is used by runtime but not referenced by other + // sections. Conservatively mark it linker retained. + UsedVars.push_back(Data); +} + GlobalVariable *InstrLowerer::setupProfileSection(InstrProfInstBase *Inc, InstrProfSectKind IPSK) { GlobalVariable *NamePtr = Inc->getName(); @@ -1633,6 +1753,31 @@ void InstrLowerer::emitNameData() { NamePtr->eraseFromParent(); } +void InstrLowerer::emitVTableNames() { + if (!EnableVTableValueProfiling || ReferencedVTables.empty()) + return; + + // Collect the PGO names of referenced vtables and compress them. + std::string CompressedVTableNames; + if (Error E = collectVTableStrings(ReferencedVTables, CompressedVTableNames, + DoInstrProfNameCompression)) { + report_fatal_error(Twine(toString(std::move(E))), false); + } + + auto &Ctx = M.getContext(); + auto *VTableNamesVal = ConstantDataArray::getString( + Ctx, StringRef(CompressedVTableNames), false /* AddNull */); + GlobalVariable *VTableNamesVar = + new GlobalVariable(M, VTableNamesVal->getType(), true /* constant */, + GlobalValue::PrivateLinkage, VTableNamesVal, + getInstrProfVTableNamesVarName()); + VTableNamesVar->setSection( + getInstrProfSectionName(IPSK_vname, TT.getObjectFormat())); + VTableNamesVar->setAlignment(Align(1)); + // Make VTableNames linker retained. + UsedVars.push_back(VTableNamesVar); +} + void InstrLowerer::emitRegistration() { if (!needsRuntimeRegistrationOfSectionRange(TT)) return; diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 50eccc69a38a00..98c6f8cbf5afef 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -327,6 +327,9 @@ extern cl::opt PGOViewCounts; // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name= extern cl::opt ViewBlockFreqFuncName; +// Command line option to enable vtable value profiling. Defined in +// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling= +extern cl::opt EnableVTableValueProfiling; extern cl::opt ProfileCorrelate; } // namespace llvm @@ -581,6 +584,8 @@ template class FuncPGOInstrumentation { NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); NumOfPGOBB += MST.bbInfoSize(); ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget); + if (EnableVTableValueProfiling) + ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget); } else { NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts(); NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size(); @@ -1775,6 +1780,15 @@ static bool InstrumentAllFunctions( // (before LTO/ThinLTO linking) to create these variables. if (!IsCS) createIRLevelProfileFlagVar(M, /*IsCS=*/false); + + Triple TT(M.getTargetTriple()); + LLVMContext &Ctx = M.getContext(); + if (!TT.isOSBinFormatELF() && EnableVTableValueProfiling) + Ctx.diagnose(DiagnosticInfoPGOProfile( + M.getName().data(), + Twine("VTable value profiling is presently not " + "supported for non-ELF object formats"), + DS_Warning)); std::unordered_multimap ComdatMembers; collectComdatMembers(M, ComdatMembers); diff --git a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc index 3a129de1acd02d..b47ef8523ea112 100644 --- a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc +++ b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc @@ -90,9 +90,38 @@ public: } }; +///--------------------- VirtualTableValueProfilingPlugin -------------------- +class VTableProfilingPlugin { + Function &F; + +public: + static constexpr InstrProfValueKind Kind = IPVK_VTableTarget; + + VTableProfilingPlugin(Function &Fn, TargetLibraryInfo &TLI) : F(Fn) {} + + void run(std::vector &Candidates) { + std::vector Result = findVTableAddrs(F); + for (Instruction *I : Result) { + Instruction *InsertPt = I->getNextNonDebugInstruction(); + // When finding an insertion point, keep PHI and EH pad instructions + // before vp intrinsics. This is similar to + // `BasicBlock::getFirstInsertionPt`. + while (InsertPt && (dyn_cast(InsertPt) || InsertPt->isEHPad())) + InsertPt = InsertPt->getNextNonDebugInstruction(); + // Skip instrumentating the value if InsertPt is the last instruction. + // FIXME: Set InsertPt to the end of basic block to instrument the value + // if InsertPt is the last instruction. + if (InsertPt == nullptr) + continue; + + Instruction *AnnotatedInst = I; + Candidates.emplace_back(CandidateInfo{I, InsertPt, AnnotatedInst}); + } + } +}; + ///----------------------- Registration of the plugins ------------------------- /// For now, registering a plugin with the ValueProfileCollector is done by /// adding the plugin type to the VP_PLUGIN_LIST macro. -#define VP_PLUGIN_LIST \ - MemIntrinsicPlugin, \ - IndirectCallPromotionPlugin +#define VP_PLUGIN_LIST \ + MemIntrinsicPlugin, IndirectCallPromotionPlugin, VTableProfilingPlugin diff --git a/llvm/test/Transforms/PGOProfile/vtable_prof_unsupported.ll b/llvm/test/Transforms/PGOProfile/vtable_prof_unsupported.ll new file mode 100644 index 00000000000000..f72a20fdc71a6e --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/vtable_prof_unsupported.ll @@ -0,0 +1,34 @@ +; RUN: opt < %s -passes=pgo-instr-gen -enable-vtable-value-profiling -S 2>&1 | FileCheck %s + +; Test that unsupported warning is emitted for non-ELF object files. +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx14.0.0" + +; CHECK: warning: {{.*}} VTable value profiling is presently not supported for non-ELF object formats + +@_ZTV4Base = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN4Base4funcEi] }, !type !0, !type !1 +@_ZTV7Derived = constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN7Derived4funcEi] }, !type !0, !type !1, !type !2, !type !3 + +@llvm.compiler.used = appending global [2 x ptr] [ptr @_ZTV4Base, ptr @_ZTV7Derived], section "llvm.metadata" + +define i32 @_Z4funci(i32 %a) { +entry: + %call = call ptr @_Z10createTypev() + %vtable = load ptr, ptr %call + %0 = call i1 @llvm.public.type.test(ptr %vtable, metadata !"_ZTS7Derived") + call void @llvm.assume(i1 %0) + %1 = load ptr, ptr %vtable + %call1 = call i32 %1(ptr %call, i32 %a) + ret i32 %call1 +} + +declare ptr @_Z10createTypev() +declare i1 @llvm.public.type.test(ptr, metadata) +declare void @llvm.assume(i1) +declare i32 @_ZN4Base4funcEi(ptr, i32) +declare i32 @_ZN7Derived4funcEi(ptr , i32) + +!0 = !{i64 16, !"_ZTS4Base"} +!1 = !{i64 16, !"_ZTSM4BaseFiiE.virtual"} +!2 = !{i64 16, !"_ZTS7Derived"} +!3 = !{i64 16, !"_ZTSM7DerivedFiiE.virtual"} diff --git a/llvm/test/Transforms/PGOProfile/vtable_profile.ll b/llvm/test/Transforms/PGOProfile/vtable_profile.ll new file mode 100644 index 00000000000000..a8440031e1493d --- /dev/null +++ b/llvm/test/Transforms/PGOProfile/vtable_profile.ll @@ -0,0 +1,98 @@ +; RUN: opt < %s -passes=pgo-instr-gen -enable-vtable-value-profiling -S 2>&1 | FileCheck %s --check-prefix=GEN --implicit-check-not="VTable value profiling is presently not supported" +; RUN: opt < %s -passes=pgo-instr-gen,instrprof -enable-vtable-value-profiling -S 2>&1 | FileCheck %s --check-prefix=LOWER --implicit-check-not="VTable value profiling is presently not supported" + +; __llvm_prf_vnm stores zlib-compressed vtable names. +; REQUIRES: zlib + +source_filename = "vtable_local.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; The test IR is generated based on the following C++ program. +; Base1 has external linkage and Base2 has local linkage. +; class Derived uses multiple inheritance so its virtual table +; global variable contains two vtables. func1 is loaded from +; the vtable compatible with class Base1, and func2 is loaded +; from the vtable compatible with class Base2. + +; class Base1 { +; public: +; virtual int func1(int a) ; +; }; +; +; namespace { +; class Base2 { +; public: +; __attribute__((noinline)) virtual int func2(int a) { +; return a; +; } +; }; +; } + +; class Derived : public Base1, public Base2 { +; public: +; Derived(int c) : v(c) {} +; private: +; int v; +; }; +; +; Derived* createType(); + +; int func(int a) { +; Derived* d = createType(); +; return d->func2(a) + d->func1(a); +; } + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@_ZTV7Derived = constant { [3 x ptr], [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base15func1Ei], [3 x ptr] [ptr inttoptr (i64 -8 to ptr), ptr null, ptr @_ZN12_GLOBAL__N_15Base25func2Ei] }, !type !0, !type !3, !type !6, !type !8, !type !10 +@_ZTV5Base1 = available_externally constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN5Base15func1Ei] }, !type !0 +@_ZTVN12_GLOBAL__N_15Base2E = internal constant { [3 x ptr] } { [3 x ptr] [ptr null, ptr null, ptr @_ZN12_GLOBAL__N_15Base25func2Ei] }, !type !11, !type !8; !vcall_visibility !12 +@llvm.compiler.used = appending global [1 x ptr] [ptr @_ZTV5Base1], section "llvm.metadata" + +; GEN: __llvm_profile_raw_version = comdat any +; GEN: __llvm_profile_raw_version = hidden constant i64 72057594037927946, comdat +; GEN: __profn__Z4funci = private constant [8 x i8] c"_Z4funci" + +; LOWER: $__profvt__ZTV7Derived = comdat nodeduplicate +; LOWER: $"__profvt_vtable_local.ll;_ZTVN12_GLOBAL__N_15Base2E" = comdat nodeduplicate +; LOWER: @__profvt__ZTV7Derived = global { i64, ptr, i32 } { i64 -4576307468236080025, ptr @_ZTV7Derived, i32 48 }, section "__llvm_prf_vtab", comdat, align 8 +; LOWER: @"__profvt_vtable_local.ll;_ZTVN12_GLOBAL__N_15Base2E" = internal global { i64, ptr, i32 } { i64 1419990121885302679, ptr @_ZTVN12_GLOBAL__N_15Base2E, i32 24 }, section "__llvm_prf_vtab", comdat, align 8 +; LOWER: @__llvm_prf_vnm = private constant [64 x i8] c"7>x\DA\8B\8F\0A\093wI-\CA,KMa,+IL\CAI\8D\CF\C9ON\CC\D1\CB\C9\B1\8E\07J\FA\19\1A\C5\BB\FB\F8;9\FA\C4\C7\FB\C5\1B\9A:%\16\A7\1A\B9\02\00\19:\12o", section "__llvm_prf_vns", align 1 +; LOWER: @llvm.used = appending global [5 x ptr] [ptr @__profvt__ZTV7Derived, ptr @"__profvt_vtable_local.ll;_ZTVN12_GLOBAL__N_15Base2E", ptr @__llvm_prf_vnodes, ptr @__llvm_prf_nm, ptr @__llvm_prf_vnm], section "llvm.metadata" + +define i32 @_Z4funci(i32 %a) { +entry: + %call = call ptr @_Z10createTypev() + %add.ptr = getelementptr inbounds i8, ptr %call, i64 8 + %vtable = load ptr, ptr %add.ptr +; GEN: [[P1:%[0-9]+]] = ptrtoint ptr %vtable to i64 +; GEN: call void @llvm.instrprof.value.profile(ptr @__profn__Z4funci, i64 [[CFGHash:[0-9]+]], i64 [[P1]], i32 2, i32 0) +; LOWER: [[P1:%[0-9]+]] = ptrtoint ptr %vtable to i64 +; LOWER: call void @__llvm_profile_instrument_target(i64 [[P1]], ptr @__profd__Z4funci, i32 2) + %vfunc1 = load ptr, ptr %vtable + %call1 = call i32 %vfunc1(ptr %add.ptr, i32 %a) + %vtable2 = load ptr, ptr %call +; GEN: [[P2:%[0-9]+]] = ptrtoint ptr %vtable2 to i64 +; GEN: call void @llvm.instrprof.value.profile(ptr @__profn__Z4funci, i64 [[CFGHash]], i64 [[P2]], i32 2, i32 1) +; LOWER: [[P2:%[0-9]+]] = ptrtoint ptr %vtable2 to i64 +; LOWER: call void @__llvm_profile_instrument_target(i64 [[P2]], ptr @__profd__Z4funci, i32 3) + %vfunc2 = load ptr, ptr %vtable2 + %call4 = call i32 %vfunc2(ptr %call, i32 %a) + %add = add nsw i32 %call1, %call4 + ret i32 %add +} + +declare ptr @_Z10createTypev() +declare i32 @_ZN12_GLOBAL__N_15Base25func2Ei(ptr %this, i32 %a) +declare i32 @_ZN5Base15func1Ei(ptr, i32) + +!0 = !{i64 16, !"_ZTS5Base1"} +!3 = !{i64 16, !"_ZTS7Derived"} +!6 = !{i64 40, !7} +!7 = distinct !{} +!8 = !{i64 16, !9} +!9 = distinct !{} +!10 = !{i64 40, !9} +!11 = !{i64 16, !7} diff --git a/llvm/test/tools/llvm-profdata/Inputs/vtable-value-prof.proftext b/llvm/test/tools/llvm-profdata/Inputs/vtable-value-prof.proftext new file mode 100644 index 00000000000000..372f9f97b16454 --- /dev/null +++ b/llvm/test/tools/llvm-profdata/Inputs/vtable-value-prof.proftext @@ -0,0 +1,74 @@ +# IR level Instrumentation Flag +:ir +_Z10createTypei +# Func Hash: +146835647075900052 +# Num Counters: +2 +# Counter Values: +750 +250 + +_ZN8Derived15func1Eii +# Func Hash: +742261418966908927 +# Num Counters: +1 +# Counter Values: +250 + +_ZN8Derived15func2Eii +# Func Hash: +742261418966908927 +# Num Counters: +1 +# Counter Values: +250 + +main +# Func Hash: +1124236338992350536 +# Num Counters: +2 +# Counter Values: +1000 +1 +# Num Value Kinds: +2 +# ValueKind = IPVK_IndirectCallTarget: +0 +# NumValueSites: +2 +2 +vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func1Eii:750 +_ZN8Derived15func1Eii:250 +2 +vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func2Eii:750 +_ZN8Derived15func2Eii:250 +# ValueKind = IPVK_VTableTarget: +2 +# NumValueSites: +2 +2 +vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E:750 +_ZTV8Derived1:250 +2 +vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E:750 +_ZTV8Derived1:250 + +vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func1Eii +# Func Hash: +742261418966908927 +# Num Counters: +1 +# Counter Values: +750 + +vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func2Eii +# Func Hash: +742261418966908927 +# Num Counters: +1 +# Counter Values: +750 + diff --git a/llvm/test/tools/llvm-profdata/vtable-value-prof.test b/llvm/test/tools/llvm-profdata/vtable-value-prof.test new file mode 100644 index 00000000000000..378c2e11b236ba --- /dev/null +++ b/llvm/test/tools/llvm-profdata/vtable-value-prof.test @@ -0,0 +1,83 @@ +; RUN: rm -rf %t && mkdir %t && cd %t + +; Generate indexed profiles from text profiles +RUN: llvm-profdata merge %S/Inputs/vtable-value-prof.proftext -o indexed.profdata + +; Show indexed profiles +RUN: llvm-profdata show --function=main --ic-targets --show-vtables indexed.profdata | FileCheck %s --check-prefix=INDEXED + +; Show text profiles +RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text %S/Inputs/vtable-value-prof.proftext | FileCheck %s --check-prefix=ICTEXT + +; Convert indexed profiles to its textual output and show it. +RUN: llvm-profdata merge --text -o text-from-indexed.proftext indexed.profdata +RUN: llvm-profdata show --function=main --ic-targets --show-vtables text-from-indexed.proftext | FileCheck %s --check-prefix=INDEXED +RUN: llvm-profdata show --function=main --ic-targets --show-vtables --text text-from-indexed.proftext | FileCheck %s --check-prefix=ICTEXT + +INDEXED: Counters: +INDEXED-NEXT: main: +INDEXED-NEXT: Hash: 0x0f9a16fe6d398548 +INDEXED-NEXT: Counters: 2 +INDEXED-NEXT: Indirect Call Site Count: 2 +INDEXED-NEXT: Number of instrumented vtables: 2 +INDEXED-NEXT: Indirect Target Results: +INDEXED-NEXT: [ 0, {{.*}}vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func1Eii, 750 ] (75.00%) +INDEXED-NEXT: [ 0, _ZN8Derived15func1Eii, 250 ] (25.00%) +INDEXED-NEXT: [ 1, {{.*}}vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func2Eii, 750 ] (75.00%) +INDEXED-NEXT: [ 1, _ZN8Derived15func2Eii, 250 ] (25.00%) +INDEXED-NEXT: VTable Results: +INDEXED-NEXT: [ 0, {{.*}}vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%) +INDEXED-NEXT: [ 0, _ZTV8Derived1, 250 ] (25.00%) +INDEXED-NEXT: [ 1, {{.*}}vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E, 750 ] (75.00%) +INDEXED-NEXT: [ 1, _ZTV8Derived1, 250 ] (25.00%) +INDEXED-NEXT: Instrumentation level: IR entry_first = 0 +INDEXED-NEXT: Functions shown: 1 +INDEXED-NEXT: Total functions: 6 +INDEXED-NEXT: Maximum function count: 1000 +INDEXED-NEXT: Maximum internal block count: 250 +INDEXED-NEXT: Statistics for indirect call sites profile: +INDEXED-NEXT: Total number of sites: 2 +INDEXED-NEXT: Total number of sites with values: 2 +INDEXED-NEXT: Total number of profiled values: 4 +INDEXED-NEXT: Value sites histogram: +INDEXED-NEXT: NumTargets, SiteCount +INDEXED-NEXT: 2, 2 +INDEXED-NEXT: Statistics for vtable profile: +INDEXED-NEXT: Total number of sites: 2 +INDEXED-NEXT: Total number of sites with values: 2 +INDEXED-NEXT: Total number of profiled values: 4 +INDEXED-NEXT: Value sites histogram: +INDEXED-NEXT: NumTargets, SiteCount +INDEXED-NEXT: 2, 2 + +ICTEXT: :ir +ICTEXT: main +ICTEXT: # Func Hash: +ICTEXT: 1124236338992350536 +ICTEXT: # Num Counters: +ICTEXT: 2 +ICTEXT: # Counter Values: +ICTEXT: 1000 +ICTEXT: 1 +ICTEXT: # Num Value Kinds: +ICTEXT: 2 +ICTEXT: # ValueKind = IPVK_IndirectCallTarget: +ICTEXT: 0 +ICTEXT: # NumValueSites: +ICTEXT: 2 +ICTEXT: 2 +ICTEXT: {{.*}}vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func1Eii:750 +ICTEXT: _ZN8Derived15func1Eii:250 +ICTEXT: 2 +ICTEXT: {{.*}}vtable_prof.cc;_ZN12_GLOBAL__N_18Derived25func2Eii:750 +ICTEXT: _ZN8Derived15func2Eii:250 +ICTEXT: # ValueKind = IPVK_VTableTarget: +ICTEXT: 2 +ICTEXT: # NumValueSites: +ICTEXT: 2 +ICTEXT: 2 +ICTEXT: {{.*}}vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E:750 +ICTEXT: _ZTV8Derived1:250 +ICTEXT: 2 +ICTEXT: {{.*}}vtable_prof.cc;_ZTVN12_GLOBAL__N_18Derived2E:750 +ICTEXT: _ZTV8Derived1:250 diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp index e8ee3c238194e8..0b78564ccea379 100644 --- a/llvm/tools/llvm-profdata/llvm-profdata.cpp +++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp @@ -361,6 +361,9 @@ cl::opt ShowIndirectCallTargets( "ic-targets", cl::init(false), cl::desc("Show indirect call site target values for shown functions"), cl::sub(ShowSubcommand)); +cl::opt ShowVTables("show-vtables", cl::init(false), + cl::desc("Show vtable names for shown functions"), + cl::sub(ShowSubcommand)); cl::opt ShowMemOPSizes( "memop-sizes", cl::init(false), cl::desc("Show the profiled sizes of the memory intrinsic calls " @@ -739,6 +742,13 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper, }); } + const InstrProfSymtab &symtab = Reader->getSymtab(); + const auto &VTableNames = symtab.getVTableNames(); + + for (const auto &kv : VTableNames) { + WC->Writer.addVTableName(kv.getKey()); + } + if (Reader->hasTemporalProfile()) { auto &Traces = Reader->getTemporalProfTraces(Input.Weight); if (!Traces.empty()) @@ -2834,6 +2844,10 @@ static int showInstrProfile(ShowFormat SFormat, raw_fd_ostream &OS) { OS << " Indirect Call Site Count: " << Func.getNumValueSites(IPVK_IndirectCallTarget) << "\n"; + if (ShowVTables) + OS << " Number of instrumented vtables: " + << Func.getNumValueSites(IPVK_VTableTarget) << "\n"; + uint32_t NumMemOPCalls = Func.getNumValueSites(IPVK_MemOPSize); if (ShowMemOPSizes && NumMemOPCalls > 0) OS << " Number of Memory Intrinsics Calls: " << NumMemOPCalls @@ -2855,6 +2869,13 @@ static int showInstrProfile(ShowFormat SFormat, raw_fd_ostream &OS) { &(Reader->getSymtab())); } + if (ShowVTables) { + OS << " VTable Results:\n"; + traverseAllValueSites(Func, IPVK_VTableTarget, + VPStats[IPVK_VTableTarget], OS, + &(Reader->getSymtab())); + } + if (ShowMemOPSizes && NumMemOPCalls > 0) { OS << " Memory Intrinsic Size Results:\n"; traverseAllValueSites(Func, IPVK_MemOPSize, VPStats[IPVK_MemOPSize], OS, @@ -2903,6 +2924,11 @@ static int showInstrProfile(ShowFormat SFormat, raw_fd_ostream &OS) { VPStats[IPVK_IndirectCallTarget]); } + if (ShownFunctions && ShowVTables) { + OS << "Statistics for vtable profile:\n"; + showValueSitesStats(OS, IPVK_VTableTarget, VPStats[IPVK_VTableTarget]); + } + if (ShownFunctions && ShowMemOPSizes) { OS << "Statistics for memory intrinsic calls sizes profile:\n"; showValueSitesStats(OS, IPVK_MemOPSize, VPStats[IPVK_MemOPSize]); diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp index c9323420bda79b..732f8fd792f8de 100644 --- a/llvm/unittests/ProfileData/InstrProfTest.cpp +++ b/llvm/unittests/ProfileData/InstrProfTest.cpp @@ -638,32 +638,78 @@ TEST_F(InstrProfTest, test_irpgo_read_deprecated_names) { Succeeded()); } +// callee1 to callee6 are from vtable1 to vtable6 respectively. static const char callee1[] = "callee1"; static const char callee2[] = "callee2"; static const char callee3[] = "callee3"; static const char callee4[] = "callee4"; static const char callee5[] = "callee5"; static const char callee6[] = "callee6"; +// callee7 and callee8 are not from any vtables. +static const char callee7[] = "callee7"; +static const char callee8[] = "callee8"; +// 'callee' is primarily used to create multiple-element vtables. +static const char callee[] = "callee"; +static const uint64_t vtable1[] = {uint64_t(callee), uint64_t(callee1)}; +static const uint64_t vtable2[] = {uint64_t(callee2), uint64_t(callee)}; +static const uint64_t vtable3[] = { + uint64_t(callee), + uint64_t(callee3), +}; +static const uint64_t vtable4[] = {uint64_t(callee4), uint64_t(callee)}; +static const uint64_t vtable5[] = {uint64_t(callee5), uint64_t(callee)}; +static const uint64_t vtable6[] = {uint64_t(callee6), uint64_t(callee)}; + +// Returns the address of callee with a numbered suffix in vtable. +static uint64_t getCalleeAddress(const uint64_t *vtableAddr) { + uint64_t CalleeAddr; + // Callee with a numbered suffix is the 2nd element in vtable1 and vtable3, + // and the 1st element in the rest of vtables. + if (vtableAddr == vtable1 || vtableAddr == vtable3) + CalleeAddr = uint64_t(vtableAddr) + 8; + else + CalleeAddr = uint64_t(vtableAddr); + return CalleeAddr; +} -TEST_P(InstrProfReaderWriterTest, icall_data_read_write) { +TEST_P(InstrProfReaderWriterTest, icall_and_vtable_data_read_write) { NamedInstrProfRecord Record1("caller", 0x1234, {1, 2}); - // 4 value sites. - Record1.reserveSites(IPVK_IndirectCallTarget, 4); - InstrProfValueData VD0[] = { - {(uint64_t)callee1, 1}, {(uint64_t)callee2, 2}, {(uint64_t)callee3, 3}}; - Record1.addValueData(IPVK_IndirectCallTarget, 0, VD0, 3, nullptr); - // No value profile data at the second site. - Record1.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr); - InstrProfValueData VD2[] = {{(uint64_t)callee1, 1}, {(uint64_t)callee2, 2}}; - Record1.addValueData(IPVK_IndirectCallTarget, 2, VD2, 2, nullptr); - InstrProfValueData VD3[] = {{(uint64_t)callee1, 1}}; - Record1.addValueData(IPVK_IndirectCallTarget, 3, VD3, 1, nullptr); + // 4 indirect call value sites. + { + Record1.reserveSites(IPVK_IndirectCallTarget, 4); + InstrProfValueData VD0[] = { + {(uint64_t)callee1, 1}, {(uint64_t)callee2, 2}, {(uint64_t)callee3, 3}}; + Record1.addValueData(IPVK_IndirectCallTarget, 0, VD0, 3, nullptr); + // No value profile data at the second site. + Record1.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr); + InstrProfValueData VD2[] = {{(uint64_t)callee1, 1}, {(uint64_t)callee2, 2}}; + Record1.addValueData(IPVK_IndirectCallTarget, 2, VD2, 2, nullptr); + InstrProfValueData VD3[] = {{(uint64_t)callee7, 1}, {(uint64_t)callee8, 2}}; + Record1.addValueData(IPVK_IndirectCallTarget, 3, VD3, 2, nullptr); + } + + // 2 vtable value sites. + { + InstrProfValueData VD0[] = { + {getCalleeAddress(vtable1), 1}, + {getCalleeAddress(vtable2), 2}, + {getCalleeAddress(vtable3), 3}, + }; + InstrProfValueData VD2[] = { + {getCalleeAddress(vtable1), 1}, + {getCalleeAddress(vtable2), 2}, + }; + Record1.addValueData(IPVK_VTableTarget, 0, VD0, 3, nullptr); + Record1.addValueData(IPVK_VTableTarget, 2, VD2, 2, nullptr); + } Writer.addRecord(std::move(Record1), getProfWeight(), Err); Writer.addRecord({"callee1", 0x1235, {3, 4}}, Err); Writer.addRecord({"callee2", 0x1235, {3, 4}}, Err); Writer.addRecord({"callee3", 0x1235, {3, 4}}, Err); + Writer.addRecord({"callee7", 0x1235, {3, 4}}, Err); + Writer.addRecord({"callee8", 0x1235, {3, 4}}, Err); // Set writer value prof data endianness. Writer.setValueProfDataEndianness(getEndianness()); @@ -676,24 +722,63 @@ TEST_P(InstrProfReaderWriterTest, icall_data_read_write) { Expected R = Reader->getInstrProfRecord("caller", 0x1234); ASSERT_THAT_ERROR(R.takeError(), Succeeded()); + + // Test the number of instrumented indirect call sites and the number of + // profiled values at each site. ASSERT_EQ(4U, R->getNumValueSites(IPVK_IndirectCallTarget)); EXPECT_EQ(3U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 0)); EXPECT_EQ(0U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 1)); EXPECT_EQ(2U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 2)); - EXPECT_EQ(1U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 3)); + EXPECT_EQ(2U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 3)); + + // Test the number of instrumented vtable sites and the number of profiled + // values at each site. + ASSERT_EQ(R->getNumValueSites(IPVK_VTableTarget), 2U); + EXPECT_EQ(R->getNumValueDataForSite(IPVK_VTableTarget, 0), 3U); + EXPECT_EQ(R->getNumValueDataForSite(IPVK_VTableTarget, 1), 2U); + + // First indirect site. + { + uint64_t TotalC; + auto VD = R->getValueForSite(IPVK_IndirectCallTarget, 0, &TotalC); + + EXPECT_EQ(VD[0].Count, 3U * getProfWeight()); + EXPECT_EQ(VD[1].Count, 2U * getProfWeight()); + EXPECT_EQ(VD[2].Count, 1U * getProfWeight()); + EXPECT_EQ(TotalC, 6U * getProfWeight()); + + EXPECT_STREQ((const char *)VD[0].Value, "callee3"); + EXPECT_STREQ((const char *)VD[1].Value, "callee2"); + EXPECT_STREQ((const char *)VD[2].Value, "callee1"); + } - uint64_t TotalC; - std::unique_ptr VD = - R->getValueForSite(IPVK_IndirectCallTarget, 0, &TotalC); + // First vtable site. + { + uint64_t TotalC; + auto VD = R->getValueForSite(IPVK_VTableTarget, 0, &TotalC); + + EXPECT_EQ(VD[0].Count, 3U * getProfWeight()); + EXPECT_EQ(VD[1].Count, 2U * getProfWeight()); + EXPECT_EQ(VD[2].Count, 1U * getProfWeight()); + EXPECT_EQ(TotalC, 6U * getProfWeight()); + + EXPECT_EQ(VD[0].Value, getCalleeAddress(vtable3)); + EXPECT_EQ(VD[1].Value, getCalleeAddress(vtable2)); + EXPECT_EQ(VD[2].Value, getCalleeAddress(vtable1)); + } + + // Second vtable site. + { + uint64_t TotalC; + auto VD = R->getValueForSite(IPVK_VTableTarget, 1, &TotalC); - EXPECT_EQ(3U * getProfWeight(), VD[0].Count); - EXPECT_EQ(2U * getProfWeight(), VD[1].Count); - EXPECT_EQ(1U * getProfWeight(), VD[2].Count); - EXPECT_EQ(6U * getProfWeight(), TotalC); + EXPECT_EQ(VD[0].Count, 2U * getProfWeight()); + EXPECT_EQ(VD[1].Count, 1U * getProfWeight()); + EXPECT_EQ(TotalC, 3U * getProfWeight()); - EXPECT_EQ(StringRef((const char *)VD[0].Value, 7), StringRef("callee3")); - EXPECT_EQ(StringRef((const char *)VD[1].Value, 7), StringRef("callee2")); - EXPECT_EQ(StringRef((const char *)VD[2].Value, 7), StringRef("callee1")); + EXPECT_EQ(VD[0].Value, getCalleeAddress(vtable2)); + EXPECT_EQ(VD[1].Value, getCalleeAddress(vtable1)); + } } INSTANTIATE_TEST_SUITE_P( @@ -801,33 +886,53 @@ TEST_P(MaybeSparseInstrProfTest, annotate_vp_data) { ASSERT_EQ(1U, ValueData[3].Count); } -TEST_P(MaybeSparseInstrProfTest, icall_data_merge) { +TEST_P(MaybeSparseInstrProfTest, icall_and_vtable_data_merge) { static const char caller[] = "caller"; NamedInstrProfRecord Record11(caller, 0x1234, {1, 2}); NamedInstrProfRecord Record12(caller, 0x1234, {1, 2}); - // 5 value sites. - Record11.reserveSites(IPVK_IndirectCallTarget, 5); - InstrProfValueData VD0[] = {{uint64_t(callee1), 1}, - {uint64_t(callee2), 2}, - {uint64_t(callee3), 3}, - {uint64_t(callee4), 4}}; - Record11.addValueData(IPVK_IndirectCallTarget, 0, VD0, 4, nullptr); + // 5 value sites for indirect calls. + { + Record11.reserveSites(IPVK_IndirectCallTarget, 5); + InstrProfValueData VD0[] = {{uint64_t(callee1), 1}, + {uint64_t(callee2), 2}, + {uint64_t(callee3), 3}, + {uint64_t(callee4), 4}}; + Record11.addValueData(IPVK_IndirectCallTarget, 0, VD0, 4, nullptr); - // No value profile data at the second site. - Record11.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr); + // No value profile data at the second site. + Record11.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr); - InstrProfValueData VD2[] = { - {uint64_t(callee1), 1}, {uint64_t(callee2), 2}, {uint64_t(callee3), 3}}; - Record11.addValueData(IPVK_IndirectCallTarget, 2, VD2, 3, nullptr); + InstrProfValueData VD2[] = { + {uint64_t(callee1), 1}, {uint64_t(callee2), 2}, {uint64_t(callee3), 3}}; + Record11.addValueData(IPVK_IndirectCallTarget, 2, VD2, 3, nullptr); - InstrProfValueData VD3[] = {{uint64_t(callee1), 1}}; - Record11.addValueData(IPVK_IndirectCallTarget, 3, VD3, 1, nullptr); + InstrProfValueData VD3[] = {{uint64_t(callee7), 1}, {uint64_t(callee8), 2}}; + Record11.addValueData(IPVK_IndirectCallTarget, 3, VD3, 2, nullptr); - InstrProfValueData VD4[] = {{uint64_t(callee1), 1}, - {uint64_t(callee2), 2}, - {uint64_t(callee3), 3}}; - Record11.addValueData(IPVK_IndirectCallTarget, 4, VD4, 3, nullptr); + InstrProfValueData VD4[] = { + {uint64_t(callee1), 1}, {uint64_t(callee2), 2}, {uint64_t(callee3), 3}}; + Record11.addValueData(IPVK_IndirectCallTarget, 4, VD4, 3, nullptr); + } + // 3 value sites for vtables. + { + Record11.reserveSites(IPVK_VTableTarget, 3); + InstrProfValueData VD0[] = {{getCalleeAddress(vtable1), 1}, + {getCalleeAddress(vtable2), 2}, + {getCalleeAddress(vtable3), 3}, + {getCalleeAddress(vtable4), 4}}; + Record11.addValueData(IPVK_VTableTarget, 0, VD0, 4, nullptr); + + InstrProfValueData VD2[] = {{getCalleeAddress(vtable1), 1}, + {getCalleeAddress(vtable2), 2}, + {getCalleeAddress(vtable3), 3}}; + Record11.addValueData(IPVK_VTableTarget, 1, VD2, 3, nullptr); + + InstrProfValueData VD4[] = {{getCalleeAddress(vtable1), 1}, + {getCalleeAddress(vtable2), 2}, + {getCalleeAddress(vtable3), 3}}; + Record11.addValueData(IPVK_VTableTarget, 3, VD4, 3, nullptr); + } // A different record for the same caller. Record12.reserveSites(IPVK_IndirectCallTarget, 5); @@ -843,11 +948,28 @@ TEST_P(MaybeSparseInstrProfTest, icall_data_merge) { Record12.addValueData(IPVK_IndirectCallTarget, 3, nullptr, 0, nullptr); - InstrProfValueData VD42[] = {{uint64_t(callee1), 1}, - {uint64_t(callee2), 2}, - {uint64_t(callee3), 3}}; + InstrProfValueData VD42[] = { + {uint64_t(callee1), 1}, {uint64_t(callee2), 2}, {uint64_t(callee3), 3}}; Record12.addValueData(IPVK_IndirectCallTarget, 4, VD42, 3, nullptr); + // 3 value sites for vtables. + { + Record12.reserveSites(IPVK_VTableTarget, 3); + InstrProfValueData VD0[] = {{getCalleeAddress(vtable2), 5}, + {getCalleeAddress(vtable3), 3}}; + Record12.addValueData(IPVK_VTableTarget, 0, VD0, 2, nullptr); + + InstrProfValueData VD2[] = {{getCalleeAddress(vtable2), 1}, + {getCalleeAddress(vtable3), 3}, + {getCalleeAddress(vtable4), 4}}; + Record12.addValueData(IPVK_VTableTarget, 1, VD2, 3, nullptr); + + InstrProfValueData VD4[] = {{getCalleeAddress(vtable1), 1}, + {getCalleeAddress(vtable2), 2}, + {getCalleeAddress(vtable3), 3}}; + Record12.addValueData(IPVK_VTableTarget, 3, VD4, 3, nullptr); + } + Writer.addRecord(std::move(Record11), Err); // Merge profile data. Writer.addRecord(std::move(Record12), Err); @@ -857,53 +979,95 @@ TEST_P(MaybeSparseInstrProfTest, icall_data_merge) { Writer.addRecord({callee3, 0x1235, {3, 4}}, Err); Writer.addRecord({callee3, 0x1235, {3, 4}}, Err); Writer.addRecord({callee4, 0x1235, {3, 5}}, Err); + Writer.addRecord({callee7, 0x1235, {3, 5}}, Err); + Writer.addRecord({callee8, 0x1235, {3, 5}}, Err); auto Profile = Writer.writeBuffer(); readProfile(std::move(Profile)); + // Test the number of instrumented value sites and the number of profiled + // values for each site. Expected R = Reader->getInstrProfRecord("caller", 0x1234); EXPECT_THAT_ERROR(R.takeError(), Succeeded()); + // For indirect calls. ASSERT_EQ(5U, R->getNumValueSites(IPVK_IndirectCallTarget)); ASSERT_EQ(4U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 0)); ASSERT_EQ(0U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 1)); ASSERT_EQ(4U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 2)); - ASSERT_EQ(1U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 3)); + ASSERT_EQ(2U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 3)); ASSERT_EQ(3U, R->getNumValueDataForSite(IPVK_IndirectCallTarget, 4)); + // For vtables. + ASSERT_EQ(R->getNumValueSites(IPVK_VTableTarget), 3U); + ASSERT_EQ(R->getNumValueDataForSite(IPVK_VTableTarget, 0), 4U); + ASSERT_EQ(R->getNumValueDataForSite(IPVK_VTableTarget, 1), 4U); + ASSERT_EQ(R->getNumValueDataForSite(IPVK_VTableTarget, 2), 3U); + + // Test the merged values for indirect calls. + { + auto VD = R->getValueForSite(IPVK_IndirectCallTarget, 0); + EXPECT_STREQ((const char *)VD[0].Value, "callee2"); + EXPECT_EQ(VD[0].Count, 7U); + EXPECT_STREQ((const char *)VD[1].Value, "callee3"); + EXPECT_EQ(VD[1].Count, 6U); + EXPECT_STREQ((const char *)VD[2].Value, "callee4"); + EXPECT_EQ(VD[2].Count, 4U); + EXPECT_STREQ((const char *)VD[3].Value, "callee1"); + EXPECT_EQ(VD[3].Count, 1U); + + auto VD_2(R->getValueForSite(IPVK_IndirectCallTarget, 2)); + EXPECT_STREQ((const char *)VD_2[0].Value, "callee3"); + EXPECT_EQ(VD_2[0].Count, 6U); + EXPECT_STREQ((const char *)VD_2[1].Value, "callee4"); + EXPECT_EQ(VD_2[1].Count, 4U); + EXPECT_STREQ((const char *)VD_2[2].Value, "callee2"); + EXPECT_EQ(VD_2[2].Count, 3U); + EXPECT_STREQ((const char *)VD_2[3].Value, "callee1"); + EXPECT_EQ(VD_2[3].Count, 1U); + + auto VD_3(R->getValueForSite(IPVK_IndirectCallTarget, 3)); + EXPECT_STREQ((const char *)VD_3[0].Value, "callee8"); + EXPECT_EQ(VD_3[0].Count, 2U); + EXPECT_STREQ((const char *)VD_3[1].Value, "callee7"); + EXPECT_EQ(VD_3[1].Count, 1U); + + auto VD_4(R->getValueForSite(IPVK_IndirectCallTarget, 4)); + EXPECT_STREQ((const char *)VD_4[0].Value, "callee3"); + EXPECT_EQ(VD_4[0].Count, 6U); + EXPECT_STREQ((const char *)VD_4[1].Value, "callee2"); + EXPECT_EQ(VD_4[1].Count, 4U); + EXPECT_STREQ((const char *)VD_4[2].Value, "callee1"); + EXPECT_EQ(VD_4[2].Count, 2U); + } - std::unique_ptr VD = - R->getValueForSite(IPVK_IndirectCallTarget, 0); - ASSERT_EQ(StringRef((const char *)VD[0].Value, 7), StringRef("callee2")); - ASSERT_EQ(7U, VD[0].Count); - ASSERT_EQ(StringRef((const char *)VD[1].Value, 7), StringRef("callee3")); - ASSERT_EQ(6U, VD[1].Count); - ASSERT_EQ(StringRef((const char *)VD[2].Value, 7), StringRef("callee4")); - ASSERT_EQ(4U, VD[2].Count); - ASSERT_EQ(StringRef((const char *)VD[3].Value, 7), StringRef("callee1")); - ASSERT_EQ(1U, VD[3].Count); - - std::unique_ptr VD_2( - R->getValueForSite(IPVK_IndirectCallTarget, 2)); - ASSERT_EQ(StringRef((const char *)VD_2[0].Value, 7), StringRef("callee3")); - ASSERT_EQ(6U, VD_2[0].Count); - ASSERT_EQ(StringRef((const char *)VD_2[1].Value, 7), StringRef("callee4")); - ASSERT_EQ(4U, VD_2[1].Count); - ASSERT_EQ(StringRef((const char *)VD_2[2].Value, 7), StringRef("callee2")); - ASSERT_EQ(3U, VD_2[2].Count); - ASSERT_EQ(StringRef((const char *)VD_2[3].Value, 7), StringRef("callee1")); - ASSERT_EQ(1U, VD_2[3].Count); - - std::unique_ptr VD_3( - R->getValueForSite(IPVK_IndirectCallTarget, 3)); - ASSERT_EQ(StringRef((const char *)VD_3[0].Value, 7), StringRef("callee1")); - ASSERT_EQ(1U, VD_3[0].Count); - - std::unique_ptr VD_4( - R->getValueForSite(IPVK_IndirectCallTarget, 4)); - ASSERT_EQ(StringRef((const char *)VD_4[0].Value, 7), StringRef("callee3")); - ASSERT_EQ(6U, VD_4[0].Count); - ASSERT_EQ(StringRef((const char *)VD_4[1].Value, 7), StringRef("callee2")); - ASSERT_EQ(4U, VD_4[1].Count); - ASSERT_EQ(StringRef((const char *)VD_4[2].Value, 7), StringRef("callee1")); - ASSERT_EQ(2U, VD_4[2].Count); + // Test the merged values for vtables + { + auto VD0 = R->getValueForSite(IPVK_VTableTarget, 0); + EXPECT_EQ(VD0[0].Value, getCalleeAddress(vtable2)); + EXPECT_EQ(VD0[0].Count, 7U); + EXPECT_EQ(VD0[1].Value, getCalleeAddress(vtable3)); + EXPECT_EQ(VD0[1].Count, 6U); + EXPECT_EQ(VD0[2].Value, getCalleeAddress(vtable4)); + EXPECT_EQ(VD0[2].Count, 4U); + EXPECT_EQ(VD0[3].Value, getCalleeAddress(vtable1)); + EXPECT_EQ(VD0[3].Count, 1U); + + auto VD1 = R->getValueForSite(IPVK_VTableTarget, 1); + EXPECT_EQ(VD1[0].Value, getCalleeAddress(vtable3)); + EXPECT_EQ(VD1[0].Count, 6U); + EXPECT_EQ(VD1[1].Value, getCalleeAddress(vtable4)); + EXPECT_EQ(VD1[1].Count, 4U); + EXPECT_EQ(VD1[2].Value, getCalleeAddress(vtable2)); + EXPECT_EQ(VD1[2].Count, 3U); + EXPECT_EQ(VD1[3].Value, getCalleeAddress(vtable1)); + EXPECT_EQ(VD1[3].Count, 1U); + + auto VD2 = R->getValueForSite(IPVK_VTableTarget, 2); + EXPECT_EQ(VD2[0].Value, getCalleeAddress(vtable3)); + EXPECT_EQ(VD2[0].Count, 6U); + EXPECT_EQ(VD2[1].Value, getCalleeAddress(vtable2)); + EXPECT_EQ(VD2[1].Count, 4U); + EXPECT_EQ(VD2[2].Value, getCalleeAddress(vtable1)); + EXPECT_EQ(VD2[2].Count, 2U); + } } struct ValueProfileMergeEdgeCaseTest @@ -1027,30 +1191,62 @@ INSTANTIATE_TEST_SUITE_P( EdgeCaseTest, ValueProfileMergeEdgeCaseTest, ::testing::Combine(::testing::Bool(), /* Sparse */ ::testing::Values(IPVK_IndirectCallTarget, - IPVK_MemOPSize) /* ValueKind */ + IPVK_MemOPSize, + IPVK_VTableTarget) /* ValueKind */ )); static void addValueProfData(InstrProfRecord &Record) { - Record.reserveSites(IPVK_IndirectCallTarget, 5); - InstrProfValueData VD0[] = {{uint64_t(callee1), 400}, - {uint64_t(callee2), 1000}, - {uint64_t(callee3), 500}, - {uint64_t(callee4), 300}, - {uint64_t(callee5), 100}}; - Record.addValueData(IPVK_IndirectCallTarget, 0, VD0, 5, nullptr); - InstrProfValueData VD1[] = {{uint64_t(callee5), 800}, - {uint64_t(callee3), 1000}, - {uint64_t(callee2), 2500}, - {uint64_t(callee1), 1300}}; - Record.addValueData(IPVK_IndirectCallTarget, 1, VD1, 4, nullptr); - InstrProfValueData VD2[] = {{uint64_t(callee6), 800}, - {uint64_t(callee3), 1000}, - {uint64_t(callee4), 5500}}; - Record.addValueData(IPVK_IndirectCallTarget, 2, VD2, 3, nullptr); - InstrProfValueData VD3[] = {{uint64_t(callee2), 1800}, - {uint64_t(callee3), 2000}}; - Record.addValueData(IPVK_IndirectCallTarget, 3, VD3, 2, nullptr); - Record.addValueData(IPVK_IndirectCallTarget, 4, nullptr, 0, nullptr); + // Add test data for indirect calls. + { + Record.reserveSites(IPVK_IndirectCallTarget, 6); + InstrProfValueData VD0[] = {{uint64_t(callee1), 400}, + {uint64_t(callee2), 1000}, + {uint64_t(callee3), 500}, + {uint64_t(callee4), 300}, + {uint64_t(callee5), 100}}; + Record.addValueData(IPVK_IndirectCallTarget, 0, VD0, 5, nullptr); + InstrProfValueData VD1[] = {{uint64_t(callee5), 800}, + {uint64_t(callee3), 1000}, + {uint64_t(callee2), 2500}, + {uint64_t(callee1), 1300}}; + Record.addValueData(IPVK_IndirectCallTarget, 1, VD1, 4, nullptr); + InstrProfValueData VD2[] = {{uint64_t(callee6), 800}, + {uint64_t(callee3), 1000}, + {uint64_t(callee4), 5500}}; + Record.addValueData(IPVK_IndirectCallTarget, 2, VD2, 3, nullptr); + InstrProfValueData VD3[] = {{uint64_t(callee2), 1800}, + {uint64_t(callee3), 2000}}; + Record.addValueData(IPVK_IndirectCallTarget, 3, VD3, 2, nullptr); + Record.addValueData(IPVK_IndirectCallTarget, 4, nullptr, 0, nullptr); + InstrProfValueData VD5[] = {{uint64_t(callee7), 1234}, + {uint64_t(callee8), 5678}}; + Record.addValueData(IPVK_IndirectCallTarget, 5, VD5, 2, nullptr); + } + + // Add test data for vtables + { + Record.reserveSites(IPVK_VTableTarget, 4); + InstrProfValueData VD0[] = { + {getCalleeAddress(vtable1), 400}, {getCalleeAddress(vtable2), 1000}, + {getCalleeAddress(vtable3), 500}, {getCalleeAddress(vtable4), 300}, + {getCalleeAddress(vtable5), 100}, + }; + InstrProfValueData VD1[] = {{getCalleeAddress(vtable5), 800}, + {getCalleeAddress(vtable3), 1000}, + {getCalleeAddress(vtable2), 2500}, + {getCalleeAddress(vtable1), 1300}}; + InstrProfValueData VD2[] = { + {getCalleeAddress(vtable6), 800}, + {getCalleeAddress(vtable3), 1000}, + {getCalleeAddress(vtable4), 5500}, + }; + InstrProfValueData VD3[] = {{getCalleeAddress(vtable2), 1800}, + {getCalleeAddress(vtable3), 2000}}; + Record.addValueData(IPVK_VTableTarget, 0, VD0, 5, nullptr); + Record.addValueData(IPVK_VTableTarget, 1, VD1, 4, nullptr); + Record.addValueData(IPVK_VTableTarget, 2, VD2, 3, nullptr); + Record.addValueData(IPVK_VTableTarget, 3, VD3, 2, nullptr); + } } TEST(ValueProfileReadWriteTest, value_prof_data_read_write) { @@ -1063,59 +1259,107 @@ TEST(ValueProfileReadWriteTest, value_prof_data_read_write) { VPData->deserializeTo(Record, nullptr); // Now read data from Record and sanity check the data - ASSERT_EQ(5U, Record.getNumValueSites(IPVK_IndirectCallTarget)); + ASSERT_EQ(6U, Record.getNumValueSites(IPVK_IndirectCallTarget)); ASSERT_EQ(5U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 0)); ASSERT_EQ(4U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 1)); ASSERT_EQ(3U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 2)); ASSERT_EQ(2U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 3)); ASSERT_EQ(0U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 4)); + ASSERT_EQ(2U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 5)); auto Cmp = [](const InstrProfValueData &VD1, const InstrProfValueData &VD2) { return VD1.Count > VD2.Count; }; + std::unique_ptr VD_0( Record.getValueForSite(IPVK_IndirectCallTarget, 0)); llvm::sort(&VD_0[0], &VD_0[5], Cmp); - ASSERT_EQ(StringRef((const char *)VD_0[0].Value, 7), StringRef("callee2")); - ASSERT_EQ(1000U, VD_0[0].Count); - ASSERT_EQ(StringRef((const char *)VD_0[1].Value, 7), StringRef("callee3")); - ASSERT_EQ(500U, VD_0[1].Count); - ASSERT_EQ(StringRef((const char *)VD_0[2].Value, 7), StringRef("callee1")); - ASSERT_EQ(400U, VD_0[2].Count); - ASSERT_EQ(StringRef((const char *)VD_0[3].Value, 7), StringRef("callee4")); - ASSERT_EQ(300U, VD_0[3].Count); - ASSERT_EQ(StringRef((const char *)VD_0[4].Value, 7), StringRef("callee5")); - ASSERT_EQ(100U, VD_0[4].Count); + EXPECT_STREQ((const char *)VD_0[0].Value, "callee2"); + EXPECT_EQ(1000U, VD_0[0].Count); + EXPECT_STREQ((const char *)VD_0[1].Value, "callee3"); + EXPECT_EQ(500U, VD_0[1].Count); + EXPECT_STREQ((const char *)VD_0[2].Value, "callee1"); + EXPECT_EQ(400U, VD_0[2].Count); + EXPECT_STREQ((const char *)VD_0[3].Value, "callee4"); + EXPECT_EQ(300U, VD_0[3].Count); + EXPECT_STREQ((const char *)VD_0[4].Value, "callee5"); + EXPECT_EQ(100U, VD_0[4].Count); std::unique_ptr VD_1( Record.getValueForSite(IPVK_IndirectCallTarget, 1)); llvm::sort(&VD_1[0], &VD_1[4], Cmp); - ASSERT_EQ(StringRef((const char *)VD_1[0].Value, 7), StringRef("callee2")); - ASSERT_EQ(2500U, VD_1[0].Count); - ASSERT_EQ(StringRef((const char *)VD_1[1].Value, 7), StringRef("callee1")); - ASSERT_EQ(1300U, VD_1[1].Count); - ASSERT_EQ(StringRef((const char *)VD_1[2].Value, 7), StringRef("callee3")); - ASSERT_EQ(1000U, VD_1[2].Count); - ASSERT_EQ(StringRef((const char *)VD_1[3].Value, 7), StringRef("callee5")); - ASSERT_EQ(800U, VD_1[3].Count); + EXPECT_STREQ((const char *)VD_1[0].Value, "callee2"); + EXPECT_EQ(VD_1[0].Count, 2500U); + EXPECT_STREQ((const char *)VD_1[1].Value, "callee1"); + EXPECT_EQ(VD_1[1].Count, 1300U); + EXPECT_STREQ((const char *)VD_1[2].Value, "callee3"); + EXPECT_EQ(VD_1[2].Count, 1000U); + EXPECT_STREQ((const char *)VD_1[3].Value, "callee5"); + EXPECT_EQ(VD_1[3].Count, 800U); std::unique_ptr VD_2( Record.getValueForSite(IPVK_IndirectCallTarget, 2)); llvm::sort(&VD_2[0], &VD_2[3], Cmp); - ASSERT_EQ(StringRef((const char *)VD_2[0].Value, 7), StringRef("callee4")); - ASSERT_EQ(5500U, VD_2[0].Count); - ASSERT_EQ(StringRef((const char *)VD_2[1].Value, 7), StringRef("callee3")); - ASSERT_EQ(1000U, VD_2[1].Count); - ASSERT_EQ(StringRef((const char *)VD_2[2].Value, 7), StringRef("callee6")); - ASSERT_EQ(800U, VD_2[2].Count); + EXPECT_STREQ((const char *)VD_2[0].Value, "callee4"); + EXPECT_EQ(VD_2[0].Count, 5500U); + EXPECT_STREQ((const char *)VD_2[1].Value, "callee3"); + EXPECT_EQ(VD_2[1].Count, 1000U); + EXPECT_STREQ((const char *)VD_2[2].Value, "callee6"); + EXPECT_EQ(VD_2[2].Count, 800U); std::unique_ptr VD_3( Record.getValueForSite(IPVK_IndirectCallTarget, 3)); llvm::sort(&VD_3[0], &VD_3[2], Cmp); - ASSERT_EQ(StringRef((const char *)VD_3[0].Value, 7), StringRef("callee3")); - ASSERT_EQ(2000U, VD_3[0].Count); - ASSERT_EQ(StringRef((const char *)VD_3[1].Value, 7), StringRef("callee2")); - ASSERT_EQ(1800U, VD_3[1].Count); + EXPECT_STREQ((const char *)VD_3[0].Value, "callee3"); + EXPECT_EQ(VD_3[0].Count, 2000U); + EXPECT_STREQ((const char *)VD_3[1].Value, "callee2"); + EXPECT_EQ(VD_3[1].Count, 1800U); + + ASSERT_EQ(Record.getNumValueSites(IPVK_VTableTarget), 4U); + ASSERT_EQ(Record.getNumValueDataForSite(IPVK_VTableTarget, 0), 5U); + ASSERT_EQ(Record.getNumValueDataForSite(IPVK_VTableTarget, 1), 4U); + ASSERT_EQ(Record.getNumValueDataForSite(IPVK_VTableTarget, 2), 3U); + ASSERT_EQ(Record.getNumValueDataForSite(IPVK_VTableTarget, 3), 2U); + + auto VD0(Record.getValueForSite(IPVK_VTableTarget, 0)); + llvm::sort(&VD0[0], &VD0[5], Cmp); + EXPECT_EQ(VD0[0].Value, getCalleeAddress(vtable2)); + EXPECT_EQ(VD0[0].Count, 1000U); + EXPECT_EQ(VD0[1].Value, getCalleeAddress(vtable3)); + EXPECT_EQ(VD0[1].Count, 500U); + EXPECT_EQ(VD0[2].Value, getCalleeAddress(vtable1)); + EXPECT_EQ(VD0[2].Count, 400U); + EXPECT_EQ(VD0[3].Value, getCalleeAddress(vtable4)); + EXPECT_EQ(VD0[3].Count, 300U); + EXPECT_EQ(VD0[4].Value, getCalleeAddress(vtable5)); + EXPECT_EQ(VD0[4].Count, 100U); + + auto VD1(Record.getValueForSite(IPVK_VTableTarget, 1)); + llvm::sort(&VD1[0], &VD1[4], Cmp); + EXPECT_EQ(VD1[0].Value, getCalleeAddress(vtable2)); + EXPECT_EQ(VD1[0].Count, 2500U); + EXPECT_EQ(VD1[1].Value, getCalleeAddress(vtable1)); + EXPECT_EQ(VD1[1].Count, 1300U); + EXPECT_EQ(VD1[2].Value, getCalleeAddress(vtable3)); + EXPECT_EQ(VD1[2].Count, 1000U); + EXPECT_EQ(VD1[3].Value, getCalleeAddress(vtable5)); + EXPECT_EQ(VD1[3].Count, 800U); + + auto VD2(Record.getValueForSite(IPVK_VTableTarget, 2)); + llvm::sort(&VD2[0], &VD2[3], Cmp); + EXPECT_EQ(VD2[0].Value, getCalleeAddress(vtable4)); + EXPECT_EQ(VD2[0].Count, 5500U); + EXPECT_EQ(VD2[1].Value, getCalleeAddress(vtable3)); + EXPECT_EQ(VD2[1].Count, 1000U); + EXPECT_EQ(VD2[2].Value, getCalleeAddress(vtable6)); + EXPECT_EQ(VD2[2].Count, 800U); + + auto VD3(Record.getValueForSite(IPVK_VTableTarget, 3)); + llvm::sort(&VD3[0], &VD3[2], Cmp); + EXPECT_EQ(VD3[0].Value, getCalleeAddress(vtable3)); + EXPECT_EQ(VD3[0].Count, 2000U); + EXPECT_EQ(VD3[1].Value, getCalleeAddress(vtable2)); + EXPECT_EQ(VD3[1].Count, 1800U); } TEST(ValueProfileReadWriteTest, symtab_mapping) { @@ -1132,27 +1376,121 @@ TEST(ValueProfileReadWriteTest, symtab_mapping) { Symtab.mapAddress(uint64_t(callee4), 0x4000ULL); // Missing mapping for callee5 + auto getVTableStartAddr = [](const uint64_t *vtable) -> uint64_t { + return uint64_t(vtable); + }; + auto getVTableEndAddr = [](const uint64_t *vtable) -> uint64_t { + return uint64_t(vtable) + 16; + }; + auto getVTableMidAddr = [](const uint64_t *vtable) -> uint64_t { + return uint64_t(vtable) + 8; + }; + // vtable1, vtable2, vtable3, vtable4 get mapped; vtable5, vtable6 are not + // mapped. + Symtab.mapVTableAddress(getVTableStartAddr(vtable1), + getVTableEndAddr(vtable1), MD5Hash("vtable1")); + Symtab.mapVTableAddress(getVTableStartAddr(vtable2), + getVTableEndAddr(vtable2), MD5Hash("vtable2")); + Symtab.mapVTableAddress(getVTableStartAddr(vtable3), + getVTableEndAddr(vtable3), MD5Hash("vtable3")); + Symtab.mapVTableAddress(getVTableStartAddr(vtable4), + getVTableEndAddr(vtable4), MD5Hash("vtable4")); + VPData->deserializeTo(Record, &Symtab); // Now read data from Record and sanity check the data - ASSERT_EQ(5U, Record.getNumValueSites(IPVK_IndirectCallTarget)); - ASSERT_EQ(5U, Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 0)); + ASSERT_EQ(Record.getNumValueSites(IPVK_IndirectCallTarget), 6U); + ASSERT_EQ(Record.getNumValueDataForSite(IPVK_IndirectCallTarget, 0), 5U); + + // Look up the value correpsonding to the middle of a vtable in symtab and + // test that it's the hash of the name. + EXPECT_EQ(Symtab.getVTableHashFromAddress(getVTableMidAddr(vtable1)), + MD5Hash("vtable1")); + EXPECT_EQ(Symtab.getVTableHashFromAddress(getVTableMidAddr(vtable2)), + MD5Hash("vtable2")); + EXPECT_EQ(Symtab.getVTableHashFromAddress(getVTableMidAddr(vtable3)), + MD5Hash("vtable3")); + EXPECT_EQ(Symtab.getVTableHashFromAddress(getVTableMidAddr(vtable4)), + MD5Hash("vtable4")); auto Cmp = [](const InstrProfValueData &VD1, const InstrProfValueData &VD2) { return VD1.Count > VD2.Count; }; - std::unique_ptr VD_0( - Record.getValueForSite(IPVK_IndirectCallTarget, 0)); + auto VD_0(Record.getValueForSite(IPVK_IndirectCallTarget, 0)); llvm::sort(&VD_0[0], &VD_0[5], Cmp); ASSERT_EQ(VD_0[0].Value, 0x2000ULL); - ASSERT_EQ(1000U, VD_0[0].Count); + ASSERT_EQ(VD_0[0].Count, 1000U); ASSERT_EQ(VD_0[1].Value, 0x3000ULL); - ASSERT_EQ(500U, VD_0[1].Count); + ASSERT_EQ(VD_0[1].Count, 500U); ASSERT_EQ(VD_0[2].Value, 0x1000ULL); - ASSERT_EQ(400U, VD_0[2].Count); + ASSERT_EQ(VD_0[2].Count, 400U); // callee5 does not have a mapped value -- default to 0. ASSERT_EQ(VD_0[4].Value, 0ULL); + + // Sanity check the vtable value data + ASSERT_EQ(Record.getNumValueSites(IPVK_VTableTarget), 4U); + + { + // The first vtable site. + auto VD(Record.getValueForSite(IPVK_VTableTarget, 0)); + ASSERT_EQ(Record.getNumValueDataForSite(IPVK_VTableTarget, 0), 5U); + llvm::sort(&VD[0], &VD[5], Cmp); + EXPECT_EQ(VD[0].Count, 1000U); + EXPECT_EQ(VD[0].Value, MD5Hash("vtable2")); + EXPECT_EQ(VD[1].Count, 500U); + EXPECT_EQ(VD[1].Value, MD5Hash("vtable3")); + EXPECT_EQ(VD[2].Value, MD5Hash("vtable1")); + EXPECT_EQ(VD[2].Count, 400U); + EXPECT_EQ(VD[3].Value, MD5Hash("vtable4")); + EXPECT_EQ(VD[3].Count, 300U); + + // vtable5 isn't mapped -- default to 0. + EXPECT_EQ(VD[4].Value, 0U); + EXPECT_EQ(VD[4].Count, 100U); + } + + { + // The second vtable site. + auto VD(Record.getValueForSite(IPVK_VTableTarget, 1)); + ASSERT_EQ(Record.getNumValueDataForSite(IPVK_VTableTarget, 1), 4U); + llvm::sort(&VD[0], &VD[4], Cmp); + EXPECT_EQ(VD[0].Value, MD5Hash("vtable2")); + EXPECT_EQ(VD[0].Count, 2500U); + EXPECT_EQ(VD[1].Value, MD5Hash("vtable1")); + EXPECT_EQ(VD[1].Count, 1300U); + + EXPECT_EQ(VD[2].Value, MD5Hash("vtable3")); + EXPECT_EQ(VD[2].Count, 1000U); + // vtable5 isn't mapped -- default to 0. + EXPECT_EQ(VD[3].Value, 0U); + EXPECT_EQ(VD[3].Count, 800U); + } + + { + // The third vtable site. + auto VD(Record.getValueForSite(IPVK_VTableTarget, 2)); + ASSERT_EQ(Record.getNumValueDataForSite(IPVK_VTableTarget, 2), 3U); + llvm::sort(&VD[0], &VD[3], Cmp); + EXPECT_EQ(VD[0].Count, 5500U); + EXPECT_EQ(VD[0].Value, MD5Hash("vtable4")); + EXPECT_EQ(VD[1].Count, 1000U); + EXPECT_EQ(VD[1].Value, MD5Hash("vtable3")); + // vtable6 isn't mapped -- default to 0. + EXPECT_EQ(VD[2].Value, 0U); + EXPECT_EQ(VD[2].Count, 800U); + } + + { + // The fourth vtable site. + auto VD(Record.getValueForSite(IPVK_VTableTarget, 3)); + ASSERT_EQ(Record.getNumValueDataForSite(IPVK_VTableTarget, 3), 2U); + llvm::sort(&VD[0], &VD[2], Cmp); + EXPECT_EQ(VD[0].Count, 2000U); + EXPECT_EQ(VD[0].Value, MD5Hash("vtable3")); + EXPECT_EQ(VD[1].Count, 1800U); + EXPECT_EQ(VD[1].Value, MD5Hash("vtable2")); + } } TEST_P(MaybeSparseInstrProfTest, get_max_function_count) { @@ -1278,13 +1616,13 @@ TEST(SymtabTest, instr_prof_symtab_module_test) { std::string IRPGOName = getIRPGOFuncName(*F); auto IRPGOFuncName = ProfSymtab.getFuncOrVarName(IndexedInstrProf::ComputeHash(IRPGOName)); - EXPECT_EQ(StringRef(IRPGOName), IRPGOFuncName); - EXPECT_EQ(StringRef(Funcs[I]), getParsedIRPGOName(IRPGOFuncName).second); + EXPECT_EQ(IRPGOName, IRPGOFuncName); + EXPECT_EQ(Funcs[I], getParsedIRPGOName(IRPGOFuncName).second); // Ensure we can still read this old record name. std::string PGOName = getPGOFuncName(*F); auto PGOFuncName = ProfSymtab.getFuncOrVarName(IndexedInstrProf::ComputeHash(PGOName)); - EXPECT_EQ(StringRef(PGOName), PGOFuncName); + EXPECT_EQ(PGOName, PGOFuncName); EXPECT_THAT(PGOFuncName.str(), EndsWith(Funcs[I].str())); } } From 985c1a44f8d49e0afeba907fe29d881c19b319fc Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Mon, 1 Apr 2024 18:21:51 +0200 Subject: [PATCH 017/201] [libc++] Optimize the two range overload of mismatch (#86853) ``` ----------------------------------------------------------------------------- Benchmark old new ----------------------------------------------------------------------------- bm_mismatch_two_range_overload/1 0.941 ns 1.88 ns bm_mismatch_two_range_overload/2 1.43 ns 2.15 ns bm_mismatch_two_range_overload/3 1.95 ns 2.55 ns bm_mismatch_two_range_overload/4 2.58 ns 2.90 ns bm_mismatch_two_range_overload/5 3.75 ns 3.31 ns bm_mismatch_two_range_overload/6 5.00 ns 3.83 ns bm_mismatch_two_range_overload/7 5.59 ns 4.35 ns bm_mismatch_two_range_overload/8 6.37 ns 4.84 ns bm_mismatch_two_range_overload/16 11.8 ns 6.72 ns bm_mismatch_two_range_overload/64 45.5 ns 2.59 ns bm_mismatch_two_range_overload/512 366 ns 12.6 ns bm_mismatch_two_range_overload/4096 2890 ns 91.6 ns bm_mismatch_two_range_overload/32768 23038 ns 758 ns bm_mismatch_two_range_overload/262144 142813 ns 6573 ns bm_mismatch_two_range_overload/1048576 366679 ns 26710 ns bm_mismatch_two_range_overload/1 0.934 ns 1.88 ns bm_mismatch_two_range_overload/2 1.30 ns 2.58 ns bm_mismatch_two_range_overload/3 1.76 ns 3.28 ns bm_mismatch_two_range_overload/4 2.24 ns 3.98 ns bm_mismatch_two_range_overload/5 2.80 ns 4.92 ns bm_mismatch_two_range_overload/6 3.58 ns 6.01 ns bm_mismatch_two_range_overload/7 4.29 ns 7.03 ns bm_mismatch_two_range_overload/8 4.67 ns 7.39 ns bm_mismatch_two_range_overload/16 9.86 ns 13.1 ns bm_mismatch_two_range_overload/64 38.9 ns 4.55 ns bm_mismatch_two_range_overload/512 348 ns 27.7 ns bm_mismatch_two_range_overload/4096 2881 ns 225 ns bm_mismatch_two_range_overload/32768 23111 ns 1715 ns bm_mismatch_two_range_overload/262144 184846 ns 14416 ns bm_mismatch_two_range_overload/1048576 742885 ns 57264 ns bm_mismatch_two_range_overload/1 0.838 ns 1.19 ns bm_mismatch_two_range_overload/2 1.19 ns 1.65 ns bm_mismatch_two_range_overload/3 1.83 ns 2.06 ns bm_mismatch_two_range_overload/4 2.38 ns 2.42 ns bm_mismatch_two_range_overload/5 3.60 ns 2.47 ns bm_mismatch_two_range_overload/6 3.68 ns 3.05 ns bm_mismatch_two_range_overload/7 4.32 ns 3.36 ns bm_mismatch_two_range_overload/8 5.18 ns 3.58 ns bm_mismatch_two_range_overload/16 10.6 ns 2.84 ns bm_mismatch_two_range_overload/64 39.0 ns 7.78 ns bm_mismatch_two_range_overload/512 247 ns 53.9 ns bm_mismatch_two_range_overload/4096 1927 ns 429 ns bm_mismatch_two_range_overload/32768 15569 ns 3393 ns bm_mismatch_two_range_overload/262144 125413 ns 28504 ns bm_mismatch_two_range_overload/1048576 504549 ns 112729 ns ``` --- .../benchmarks/algorithms/mismatch.bench.cpp | 16 +++++++++ libcxx/include/__algorithm/mismatch.h | 34 ++++++++++++++++--- libcxx/include/__algorithm/ranges_mismatch.h | 22 +++++++----- libcxx/include/__algorithm/simd_utils.h | 9 ++++- .../test/libcxx/transitive_includes/cxx23.csv | 1 + .../test/libcxx/transitive_includes/cxx26.csv | 1 + .../mismatch/mismatch.pass.cpp | 8 ++--- 7 files changed, 74 insertions(+), 17 deletions(-) diff --git a/libcxx/benchmarks/algorithms/mismatch.bench.cpp b/libcxx/benchmarks/algorithms/mismatch.bench.cpp index 06289068bb0492..791782879011e2 100644 --- a/libcxx/benchmarks/algorithms/mismatch.bench.cpp +++ b/libcxx/benchmarks/algorithms/mismatch.bench.cpp @@ -37,4 +37,20 @@ BENCHMARK(bm_mismatch)->Apply(BenchmarkSizes); BENCHMARK(bm_mismatch)->Apply(BenchmarkSizes); BENCHMARK(bm_mismatch)->Apply(BenchmarkSizes); +template +static void bm_mismatch_two_range_overload(benchmark::State& state) { + std::vector vec1(state.range(), '1'); + std::vector vec2(state.range(), '1'); + std::mt19937_64 rng(std::random_device{}()); + + vec1.back() = '2'; + for (auto _ : state) { + benchmark::DoNotOptimize(vec1); + benchmark::DoNotOptimize(std::mismatch(vec1.begin(), vec1.end(), vec2.begin(), vec2.end())); + } +} +BENCHMARK(bm_mismatch_two_range_overload)->DenseRange(1, 8)->Range(16, 1 << 20); +BENCHMARK(bm_mismatch_two_range_overload)->DenseRange(1, 8)->Range(16, 1 << 20); +BENCHMARK(bm_mismatch_two_range_overload)->DenseRange(1, 8)->Range(16, 1 << 20); + BENCHMARK_MAIN(); diff --git a/libcxx/include/__algorithm/mismatch.h b/libcxx/include/__algorithm/mismatch.h index 1cb83b01b2ebe5..8abb273ac17822 100644 --- a/libcxx/include/__algorithm/mismatch.h +++ b/libcxx/include/__algorithm/mismatch.h @@ -11,6 +11,7 @@ #define _LIBCPP___ALGORITHM_MISMATCH_H #include <__algorithm/comp.h> +#include <__algorithm/min.h> #include <__algorithm/simd_utils.h> #include <__algorithm/unwrap_iter.h> #include <__config> @@ -136,6 +137,25 @@ mismatch(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __fi } #if _LIBCPP_STD_VER >= 14 +template +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Iter1, _Iter2> __mismatch( + _Iter1 __first1, _Sent1 __last1, _Iter2 __first2, _Sent2 __last2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) { + while (__first1 != __last1 && __first2 != __last2) { + if (!std::__invoke(__pred, std::__invoke(__proj1, *__first1), std::__invoke(__proj2, *__first2))) + break; + ++__first1; + ++__first2; + } + return {std::move(__first1), std::move(__first2)}; +} + +template +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*> +__mismatch(_Tp* __first1, _Tp* __last1, _Tp* __first2, _Tp* __last2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) { + auto __len = std::min(__last1 - __first1, __last2 - __first2); + return std::__mismatch(__first1, __first1 + __len, __first2, __pred, __proj1, __proj2); +} + template _LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InputIterator1, _InputIterator2> mismatch(_InputIterator1 __first1, @@ -143,10 +163,16 @@ mismatch(_InputIterator1 __first1, _InputIterator2 __first2, _InputIterator2 __last2, _BinaryPredicate __pred) { - for (; __first1 != __last1 && __first2 != __last2; ++__first1, (void)++__first2) - if (!__pred(*__first1, *__first2)) - break; - return pair<_InputIterator1, _InputIterator2>(__first1, __first2); + __identity __proj; + auto __res = std::__mismatch( + std::__unwrap_iter(__first1), + std::__unwrap_iter(__last1), + std::__unwrap_iter(__first2), + std::__unwrap_iter(__last2), + __pred, + __proj, + __proj); + return {std::__rewrap_iter(__first1, __res.first), std::__rewrap_iter(__first2, __res.second)}; } template diff --git a/libcxx/include/__algorithm/ranges_mismatch.h b/libcxx/include/__algorithm/ranges_mismatch.h index 037af39126230a..d8a7dd43af09d5 100644 --- a/libcxx/include/__algorithm/ranges_mismatch.h +++ b/libcxx/include/__algorithm/ranges_mismatch.h @@ -10,6 +10,8 @@ #define _LIBCPP___ALGORITHM_RANGES_MISMATCH_H #include <__algorithm/in_in_result.h> +#include <__algorithm/mismatch.h> +#include <__algorithm/unwrap_range.h> #include <__config> #include <__functional/identity.h> #include <__functional/invoke.h> @@ -42,13 +44,17 @@ struct __fn { template static _LIBCPP_HIDE_FROM_ABI constexpr mismatch_result<_I1, _I2> __go(_I1 __first1, _S1 __last1, _I2 __first2, _S2 __last2, _Pred& __pred, _Proj1& __proj1, _Proj2& __proj2) { - while (__first1 != __last1 && __first2 != __last2) { - if (!std::invoke(__pred, std::invoke(__proj1, *__first1), std::invoke(__proj2, *__first2))) - break; - ++__first1; - ++__first2; + if constexpr (forward_iterator<_I1> && forward_iterator<_I2>) { + auto __range1 = std::__unwrap_range(__first1, __last1); + auto __range2 = std::__unwrap_range(__first2, __last2); + auto __res = + std::__mismatch(__range1.first, __range1.second, __range2.first, __range2.second, __pred, __proj1, __proj2); + return {std::__rewrap_range<_S1>(__first1, __res.first), std::__rewrap_range<_S2>(__first2, __res.second)}; + } else { + auto __res = std::__mismatch( + std::move(__first1), std::move(__last1), std::move(__first2), std::move(__last2), __pred, __proj1, __proj2); + return {std::move(__res.first), std::move(__res.second)}; } - return {std::move(__first1), std::move(__first2)}; } template requires indirectly_comparable, iterator_t<_R2>, _Pred, _Proj1, _Proj2> - _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr mismatch_result, - borrowed_iterator_t<_R2>> + _LIBCPP_NODISCARD_EXT + _LIBCPP_HIDE_FROM_ABI constexpr mismatch_result, borrowed_iterator_t<_R2>> operator()(_R1&& __r1, _R2&& __r2, _Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) const { return __go( ranges::begin(__r1), ranges::end(__r1), ranges::begin(__r2), ranges::end(__r2), __pred, __proj1, __proj2); diff --git a/libcxx/include/__algorithm/simd_utils.h b/libcxx/include/__algorithm/simd_utils.h index 1aedb3db010f77..989a1957987e1e 100644 --- a/libcxx/include/__algorithm/simd_utils.h +++ b/libcxx/include/__algorithm/simd_utils.h @@ -9,6 +9,7 @@ #ifndef _LIBCPP___ALGORITHM_SIMD_UTILS_H #define _LIBCPP___ALGORITHM_SIMD_UTILS_H +#include <__algorithm/min.h> #include <__bit/bit_cast.h> #include <__bit/countr.h> #include <__config> @@ -22,6 +23,9 @@ # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + // TODO: Find out how altivec changes things and allow vectorizations there too. #if _LIBCPP_STD_VER >= 14 && defined(_LIBCPP_CLANG_VER) && _LIBCPP_CLANG_VER >= 1700 && !defined(__ALTIVEC__) # define _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS 1 @@ -94,7 +98,8 @@ _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI size_t __find_first_set(__simd_vector<_T // This has MSan disabled du to https://github.com/llvm/llvm-project/issues/85876 auto __impl = [&](_MaskT) _LIBCPP_NO_SANITIZE("memory") noexcept { - return std::__countr_zero(__builtin_bit_cast(_MaskT, __builtin_convertvector(__vec, __mask_vec))); + return std::min( + _Np, std::__countr_zero(__builtin_bit_cast(_MaskT, __builtin_convertvector(__vec, __mask_vec)))); }; if constexpr (sizeof(__mask_vec) == sizeof(uint8_t)) { @@ -120,4 +125,6 @@ _LIBCPP_END_NAMESPACE_STD #endif // _LIBCPP_HAS_ALGORITHM_VECTOR_UTILS +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_SIMD_UTILS_H diff --git a/libcxx/test/libcxx/transitive_includes/cxx23.csv b/libcxx/test/libcxx/transitive_includes/cxx23.csv index 79c67dc00cfb9b..69429b5bce8250 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx23.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx23.csv @@ -452,6 +452,7 @@ random vector random version ranges compare ranges cstddef +ranges cstdint ranges cwchar ranges initializer_list ranges iterator diff --git a/libcxx/test/libcxx/transitive_includes/cxx26.csv b/libcxx/test/libcxx/transitive_includes/cxx26.csv index 79c67dc00cfb9b..69429b5bce8250 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx26.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx26.csv @@ -452,6 +452,7 @@ random vector random version ranges compare ranges cstddef +ranges cstdint ranges cwchar ranges initializer_list ranges iterator diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/mismatch/mismatch.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/mismatch/mismatch.pass.cpp index 55c9eea863c3ff..eb5f7cacdde34b 100644 --- a/libcxx/test/std/algorithms/alg.nonmodifying/mismatch/mismatch.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/mismatch/mismatch.pass.cpp @@ -187,8 +187,8 @@ int main(int, char**) { { // check the tail of the vectorized loop for (size_t vec_size = 1; vec_size != 256; ++vec_size) { { - std::vector lhs(256); - std::vector rhs(256); + std::vector lhs(vec_size); + std::vector rhs(vec_size); check(lhs, rhs, lhs.size()); lhs.back() = 1; @@ -199,8 +199,8 @@ int main(int, char**) { rhs.back() = 0; } { - std::vector lhs(256); - std::vector rhs(256); + std::vector lhs(vec_size); + std::vector rhs(vec_size); check(lhs, rhs, lhs.size()); lhs.back() = 1; From a8cfa7cbdf6cc1a94ed25c90897d2e031f77a5a9 Mon Sep 17 00:00:00 2001 From: Jakub Kuderski Date: Mon, 1 Apr 2024 12:32:23 -0400 Subject: [PATCH 018/201] [mlir][TD] Allow op printing flags as `transform.print` attrs (#86846) Introduce 3 new optional attributes to the `transform.print` ops: * `assume_verified` * `use_local_scope` * `skip_regions` The primary motivation is to allow printing on large inputs that otherwise take forever to print and verify. For the full context, see this IREE issue: https://github.com/openxla/iree/issues/16901. Also add some tests and fix the op description. --- .../mlir/Dialect/Transform/IR/TransformOps.td | 19 ++++++- .../lib/Dialect/Transform/IR/TransformOps.cpp | 19 ++++++- mlir/test/Dialect/Transform/ops.mlir | 10 ++-- .../Transform/test-interpreter-printing.mlir | 56 +++++++++++++++++++ 4 files changed, 94 insertions(+), 10 deletions(-) create mode 100644 mlir/test/Dialect/Transform/test-interpreter-printing.mlir diff --git a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td index bf1a8016cd9df6..21c9595860d4c5 100644 --- a/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td +++ b/mlir/include/mlir/Dialect/Transform/IR/TransformOps.td @@ -1098,15 +1098,28 @@ def PrintOp : TransformDialectOp<"print", MatchOpInterface]> { let summary = "Dump each payload op"; let description = [{ - This op dumps each payload op that is associated with the `target` operand - to stderr. It also prints the `name` string attribute. If no target is + Prints each payload op that is associated with the `target` operand to + `stdout`. It also prints the `name` string attribute. If no target is specified, the top-level op is dumped. This op is useful for printf-style debugging. + + Supported printing flag attributes: + * `assume_verified` -- skips verification when the unit attribute is + specified. This improves performace but may lead to crashes and + unexpected behavior when the printed payload op is invalid. + * `use_local_scope` -- prints in local scope when the unit attribute is + specified. This improves performance but may not be identical to + printing within the full module. + * `skip_regions` -- does not print regions of operations when the unit + attribute is specified. }]; let arguments = (ins Optional:$target, - OptionalAttr:$name); + OptionalAttr:$name, + OptionalAttr:$assume_verified, + OptionalAttr:$use_local_scope, + OptionalAttr:$skip_regions); let results = (outs); let builders = [ diff --git a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp index c8d06ba157b904..dc19022219e5b2 100644 --- a/mlir/lib/Dialect/Transform/IR/TransformOps.cpp +++ b/mlir/lib/Dialect/Transform/IR/TransformOps.cpp @@ -19,6 +19,7 @@ #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/Diagnostics.h" #include "mlir/IR/Dominance.h" +#include "mlir/IR/OperationSupport.h" #include "mlir/IR/PatternMatch.h" #include "mlir/IR/Verifier.h" #include "mlir/Interfaces/CallInterfaces.h" @@ -2627,14 +2628,26 @@ transform::PrintOp::apply(transform::TransformRewriter &rewriter, if (getName().has_value()) llvm::outs() << *getName() << " "; + OpPrintingFlags printFlags; + if (getAssumeVerified().value_or(false)) + printFlags.assumeVerified(); + if (getUseLocalScope().value_or(false)) + printFlags.useLocalScope(); + if (getSkipRegions().value_or(false)) + printFlags.skipRegions(); + if (!getTarget()) { - llvm::outs() << "top-level ]]]\n" << *state.getTopLevel() << "\n"; + llvm::outs() << "top-level ]]]\n"; + state.getTopLevel()->print(llvm::outs(), printFlags); + llvm::outs() << "\n"; return DiagnosedSilenceableFailure::success(); } llvm::outs() << "]]]\n"; - for (Operation *target : state.getPayloadOps(getTarget())) - llvm::outs() << *target << "\n"; + for (Operation *target : state.getPayloadOps(getTarget())) { + target->print(llvm::outs(), printFlags); + llvm::outs() << "\n"; + } return DiagnosedSilenceableFailure::success(); } diff --git a/mlir/test/Dialect/Transform/ops.mlir b/mlir/test/Dialect/Transform/ops.mlir index a718d6a9e9fd90..ecef7e181e9039 100644 --- a/mlir/test/Dialect/Transform/ops.mlir +++ b/mlir/test/Dialect/Transform/ops.mlir @@ -86,16 +86,18 @@ transform.sequence failures(propagate) { } // CHECK: transform.sequence -// CHECK: print -// CHECK: print -// CHECK: print -// CHECK: print +// CHECK-COUNT-9: print transform.sequence failures(propagate) { ^bb0(%arg0: !transform.any_op): transform.print %arg0 : !transform.any_op transform.print transform.print %arg0 {name = "test"} : !transform.any_op transform.print {name = "test"} + transform.print {name = "test", assume_verified} + transform.print %arg0 {assume_verified} : !transform.any_op + transform.print %arg0 {use_local_scope} : !transform.any_op + transform.print %arg0 {skip_regions} : !transform.any_op + transform.print %arg0 {assume_verified, use_local_scope, skip_regions} : !transform.any_op } // CHECK: transform.sequence diff --git a/mlir/test/Dialect/Transform/test-interpreter-printing.mlir b/mlir/test/Dialect/Transform/test-interpreter-printing.mlir new file mode 100644 index 00000000000000..a54c83d2b249eb --- /dev/null +++ b/mlir/test/Dialect/Transform/test-interpreter-printing.mlir @@ -0,0 +1,56 @@ +// RUN: mlir-opt %s --transform-interpreter --allow-unregistered-dialect --verify-diagnostics | FileCheck %s + +// RUN: mlir-opt %s --transform-interpreter --allow-unregistered-dialect --verify-diagnostics \ +// RUN: --mlir-print-debuginfo | FileCheck %s --check-prefix=CHECK-LOC + +func.func @nested_ops() { + "test.qux"() ({ + // expected-error @below{{fail_to_verify is set}} + "test.baz"() ({ + "test.bar"() : () -> () + }) : () -> () + }) : () -> () +} + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op) { + // CHECK-LABEL{LITERAL}: [[[ IR printer: START top-level ]]] + // CHECK-NEXT: module { + // CHECK-LOC-LABEL{LITERAL}: [[[ IR printer: START top-level ]]] + // CHECK-LOC-NEXT: #{{.+}} = loc( + // CHECK-LOC-NEXT: module { + transform.print {name = "START"} + + // CHECK{LITERAL}: [[[ IR printer: Local scope top-level ]]] + // CHECK-NEXT: module { + // CHECK-LOC{LITERAL}: [[[ IR printer: Local scope top-level ]]] + // CHECK-LOC-NEXT: module { + transform.print {name = "Local scope", use_local_scope} + + %baz = transform.structured.match ops{["test.baz"]} in %arg0 : (!transform.any_op) -> !transform.any_op + + // CHECK{LITERAL}: [[[ IR printer: ]]] + // CHECK-NEXT: "test.baz"() ({ + // CHECK-NEXT: "test.bar"() : () -> () + // CHECK-NEXT: }) : () -> () + transform.print %baz : !transform.any_op + + // CHECK{LITERAL}: [[[ IR printer: Baz ]]] + // CHECK-NEXT: "test.baz"() ({ + transform.print %baz {name = "Baz"} : !transform.any_op + + // CHECK{LITERAL}: [[[ IR printer: No region ]]] + // CHECK-NEXT: "test.baz"() ({...}) : () -> () + transform.print %baz {name = "No region", skip_regions} : !transform.any_op + + // CHECK{LITERAL}: [[[ IR printer: No verify ]]] + // CHECK-NEXT: "test.baz"() ({ + // CHECK-NEXT: transform.test_dummy_payload_op {fail_to_verify} : () -> () + transform.test_produce_invalid_ir %baz : !transform.any_op + transform.print %baz {name = "No verify", assume_verified} : !transform.any_op + + // CHECK-LABEL{LITERAL}: [[[ IR printer: END top-level ]]] + transform.print {name = "END"} + transform.yield + } +} From d83271b093ec206c2f47a9c636a5727cf63cad5e Mon Sep 17 00:00:00 2001 From: Caslyn Tonelli <6718161+Caslyn@users.noreply.github.com> Date: Mon, 1 Apr 2024 09:51:38 -0700 Subject: [PATCH 019/201] [libc] Include algorithm.h to parser.h (#87125) This includes algorithm.h directly to provide the definition for `cpp:max` in parser.h. This will define `max(...)` in the libc namespace for build systems that pull in parser.h explicitly. --- libc/src/stdio/printf_core/CMakeLists.txt | 1 + libc/src/stdio/printf_core/parser.h | 1 + utils/bazel/llvm-project-overlay/libc/BUILD.bazel | 1 + 3 files changed, 3 insertions(+) diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt index 02819ea25ea055..7db79c54beb0ae 100644 --- a/libc/src/stdio/printf_core/CMakeLists.txt +++ b/libc/src/stdio/printf_core/CMakeLists.txt @@ -42,6 +42,7 @@ add_header_library( libc.src.__support.arg_list libc.src.__support.ctype_utils libc.src.__support.str_to_integer + libc.src.__support.CPP.algorithm libc.src.__support.CPP.bit libc.src.__support.CPP.optional libc.src.__support.CPP.string_view diff --git a/libc/src/stdio/printf_core/parser.h b/libc/src/stdio/printf_core/parser.h index 8e8c77e219fa4d..eda978a83ea8af 100644 --- a/libc/src/stdio/printf_core/parser.h +++ b/libc/src/stdio/printf_core/parser.h @@ -10,6 +10,7 @@ #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_PARSER_H #include "include/llvm-libc-macros/stdfix-macros.h" +#include "src/__support/CPP/algorithm.h" // max #include "src/__support/CPP/optional.h" #include "src/__support/CPP/type_traits.h" #include "src/__support/str_to_integer.h" diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 9169f330baac3c..9dfe4c48184e3e 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -3041,6 +3041,7 @@ libc_support_library( deps = [ ":__support_arg_list", ":__support_common", + ":__support_cpp_algorithm", ":__support_cpp_bit", ":__support_cpp_optional", ":__support_cpp_string_view", From 5bbc640f64efb7d110559dab132c8d2fb7fbbf37 Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Mon, 1 Apr 2024 09:55:24 -0700 Subject: [PATCH 020/201] [nfc] Disable the a cpp compiler-rt test on ppc bigendian systems due to build errors (#87262) `Linux/instrprof-vtable-value-prof.cpp` needs to be built for the test to run. However, cpp compile & link failed with undefined-ABI error [1]. See original failure in https://lab.llvm.org/buildbot/#/builders/18/builds/16429 [1] ``` FAIL: Profile-powerpc64 :: Linux/instrprof-vtable-value-prof.cpp (2406 of 2414) ******************** TEST 'Profile-powerpc64 :: Linux/instrprof-vtable-value-prof.cpp' FAILED ******************** Exit Code: 1 Command Output (stderr): -- RUN: at line 3: /home/buildbots/llvm-external-buildbots/workers/ppc64be-sanitizer/sanitizer-ppc64be/build/build_debug/./bin/clang --driver-mode=g++ -m64 -ldl -fprofile-generate -fuse-ld=lld -O2 -g -fprofile-generate=. -mllvm -enable-vtable-value-profiling /home/buildbots/llvm-external-buildbots/workers/ppc64be-sanitizer/sanitizer-ppc64be/build/llvm-project/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp -o /home/buildbots/llvm-external-buildbots/workers/ppc64be-sanitizer/sanitizer-ppc64be/build/build_debug/runtimes/runtimes-bins/compiler-rt/test/profile/Profile-powerpc64/Linux/Output/instrprof-vtable-value-prof.cpp.tmp-test + /home/buildbots/llvm-external-buildbots/workers/ppc64be-sanitizer/sanitizer-ppc64be/build/build_debug/./bin/clang --driver-mode=g++ -m64 -ldl -fprofile-generate -fuse-ld=lld -O2 -g -fprofile-generate=. -mllvm -enable-vtable-value-profiling /home/buildbots/llvm-external-buildbots/workers/ppc64be-sanitizer/sanitizer-ppc64be/build/llvm-project/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp -o /home/buildbots/llvm-external-buildbots/workers/ppc64be-sanitizer/sanitizer-ppc64be/build/build_debug/runtimes/runtimes-bins/compiler-rt/test/profile/Profile-powerpc64/Linux/Output/instrprof-vtable-value-prof.cpp.tmp-test ld.lld: error: /lib/../lib64/Scrt1.o: ABI version 1 is not supported clang: error: linker command failed with exit code 1 (use -v to see invocation) ``` --- .../test/profile/Linux/instrprof-vtable-value-prof.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp b/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp index 5c8426b40892f6..e51805bdf923cb 100644 --- a/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp +++ b/compiler-rt/test/profile/Linux/instrprof-vtable-value-prof.cpp @@ -1,5 +1,10 @@ // REQUIRES: lld-available +// Building the instrumented binary will fail because lld doesn't support +// big-endian ELF for PPC (aka ABI 1). +// ld.lld: error: /lib/../lib64/Scrt1.o: ABI version 1 is not supported +// UNSUPPORTED: ppc && host-byteorder-big-endian + // RUN: %clangxx_pgogen -fuse-ld=lld -O2 -g -fprofile-generate=. -mllvm -enable-vtable-value-profiling %s -o %t-test // RUN: env LLVM_PROFILE_FILE=%t-test.profraw %t-test From 55b74030a4c75f25be901522fe595d7233fad76d Mon Sep 17 00:00:00 2001 From: Shourya Goel Date: Mon, 1 Apr 2024 22:39:28 +0530 Subject: [PATCH 021/201] [libc][POSIX] implement fseeko, ftello (#86928) Fixes: #85287 --- libc/config/linux/x86_64/entrypoints.txt | 2 ++ libc/src/stdio/CMakeLists.txt | 2 ++ libc/src/stdio/fseeko.h | 20 ++++++++++++++++++ libc/src/stdio/ftello.h | 20 ++++++++++++++++++ libc/src/stdio/generic/CMakeLists.txt | 26 ++++++++++++++++++++++-- libc/src/stdio/generic/fseek.cpp | 1 - libc/src/stdio/generic/fseeko.cpp | 26 ++++++++++++++++++++++++ libc/src/stdio/generic/ftell.cpp | 1 - libc/src/stdio/generic/ftello.cpp | 25 +++++++++++++++++++++++ libc/test/src/stdio/CMakeLists.txt | 2 ++ libc/test/src/stdio/ftell_test.cpp | 9 ++++++++ 11 files changed, 130 insertions(+), 4 deletions(-) create mode 100644 libc/src/stdio/fseeko.h create mode 100644 libc/src/stdio/ftello.h create mode 100644 libc/src/stdio/generic/fseeko.cpp create mode 100644 libc/src/stdio/generic/ftello.cpp diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 5b428e51aee620..a4d0da5e043d4f 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -681,6 +681,8 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.stdio.fread_unlocked libc.src.stdio.fseek libc.src.stdio.ftell + libc.src.stdio.fseeko + libc.src.stdio.ftello libc.src.stdio.funlockfile libc.src.stdio.fwrite libc.src.stdio.fwrite_unlocked diff --git a/libc/src/stdio/CMakeLists.txt b/libc/src/stdio/CMakeLists.txt index 11e15c91735188..1056f38fc7513a 100644 --- a/libc/src/stdio/CMakeLists.txt +++ b/libc/src/stdio/CMakeLists.txt @@ -270,6 +270,8 @@ add_stdio_entrypoint_object(ferror) add_stdio_entrypoint_object(ferror_unlocked) add_stdio_entrypoint_object(fseek) add_stdio_entrypoint_object(ftell) +add_stdio_entrypoint_object(fseeko) +add_stdio_entrypoint_object(ftello) add_stdio_entrypoint_object(fflush) add_stdio_entrypoint_object(clearerr) add_stdio_entrypoint_object(clearerr_unlocked) diff --git a/libc/src/stdio/fseeko.h b/libc/src/stdio/fseeko.h new file mode 100644 index 00000000000000..77fb41215c318f --- /dev/null +++ b/libc/src/stdio/fseeko.h @@ -0,0 +1,20 @@ +//===-- Implementation header of fseeko -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_FSEEKO_H +#define LLVM_LIBC_SRC_STDIO_FSEEKO_H + +#include + +namespace LIBC_NAMESPACE { + +int fseeko(::FILE *stream, off_t offset, int whence); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDIO_FSEEKO_H diff --git a/libc/src/stdio/ftello.h b/libc/src/stdio/ftello.h new file mode 100644 index 00000000000000..5ab17f9244a5ad --- /dev/null +++ b/libc/src/stdio/ftello.h @@ -0,0 +1,20 @@ +//===-- Implementation header of ftello -------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_STDIO_FTELLO_H +#define LLVM_LIBC_SRC_STDIO_FTELLO_H + +#include + +namespace LIBC_NAMESPACE { + +off_t ftello(::FILE *f); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_STDIO_FTELLO_H diff --git a/libc/src/stdio/generic/CMakeLists.txt b/libc/src/stdio/generic/CMakeLists.txt index 0aa213caba7b8a..ae255917adfe3a 100644 --- a/libc/src/stdio/generic/CMakeLists.txt +++ b/libc/src/stdio/generic/CMakeLists.txt @@ -103,7 +103,6 @@ add_entrypoint_object( ../fseek.h DEPENDS libc.src.errno.errno - libc.include.stdio libc.src.__support.File.file libc.src.__support.File.platform_file ) @@ -116,7 +115,30 @@ add_entrypoint_object( ../ftell.h DEPENDS libc.src.errno.errno - libc.include.stdio + libc.src.__support.File.file + libc.src.__support.File.platform_file +) + +add_entrypoint_object( + fseeko + SRCS + fseeko.cpp + HDRS + ../fseeko.h + DEPENDS + libc.src.errno.errno + libc.src.__support.File.file + libc.src.__support.File.platform_file +) + +add_entrypoint_object( + ftello + SRCS + ftello.cpp + HDRS + ../ftello.h + DEPENDS + libc.src.errno.errno libc.src.__support.File.file libc.src.__support.File.platform_file ) diff --git a/libc/src/stdio/generic/fseek.cpp b/libc/src/stdio/generic/fseek.cpp index 7666e71e699d56..c5edc8d4198c74 100644 --- a/libc/src/stdio/generic/fseek.cpp +++ b/libc/src/stdio/generic/fseek.cpp @@ -10,7 +10,6 @@ #include "src/__support/File/file.h" #include "src/errno/libc_errno.h" -#include namespace LIBC_NAMESPACE { diff --git a/libc/src/stdio/generic/fseeko.cpp b/libc/src/stdio/generic/fseeko.cpp new file mode 100644 index 00000000000000..215da759937f69 --- /dev/null +++ b/libc/src/stdio/generic/fseeko.cpp @@ -0,0 +1,26 @@ +//===-- Implementation of fseeko ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/fseeko.h" +#include "src/__support/File/file.h" + +#include "src/errno/libc_errno.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(int, fseeko, (::FILE * stream, off_t offset, int whence)) { + auto result = + reinterpret_cast(stream)->seek(offset, whence); + if (!result.has_value()) { + libc_errno = result.error(); + return -1; + } + return 0; +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/stdio/generic/ftell.cpp b/libc/src/stdio/generic/ftell.cpp index 5f7803150534b3..d55bad2828541b 100644 --- a/libc/src/stdio/generic/ftell.cpp +++ b/libc/src/stdio/generic/ftell.cpp @@ -10,7 +10,6 @@ #include "src/__support/File/file.h" #include "src/errno/libc_errno.h" -#include namespace LIBC_NAMESPACE { diff --git a/libc/src/stdio/generic/ftello.cpp b/libc/src/stdio/generic/ftello.cpp new file mode 100644 index 00000000000000..c72e56ea6eb1af --- /dev/null +++ b/libc/src/stdio/generic/ftello.cpp @@ -0,0 +1,25 @@ +//===-- Implementation of ftello ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/stdio/ftello.h" +#include "src/__support/File/file.h" + +#include "src/errno/libc_errno.h" + +namespace LIBC_NAMESPACE { + +LLVM_LIBC_FUNCTION(off_t, ftello, (::FILE * stream)) { + auto result = reinterpret_cast(stream)->tell(); + if (!result.has_value()) { + libc_errno = result.error(); + return -1; + } + return result.value(); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/test/src/stdio/CMakeLists.txt b/libc/test/src/stdio/CMakeLists.txt index 4c38e8aba7d7f2..03c43eaefebe5e 100644 --- a/libc/test/src/stdio/CMakeLists.txt +++ b/libc/test/src/stdio/CMakeLists.txt @@ -442,6 +442,8 @@ add_libc_test( libc.src.stdio.fread libc.src.stdio.fseek libc.src.stdio.ftell + libc.src.stdio.fseeko + libc.src.stdio.ftello libc.src.stdio.fwrite libc.src.stdio.setvbuf ) diff --git a/libc/test/src/stdio/ftell_test.cpp b/libc/test/src/stdio/ftell_test.cpp index 61b626f53cd26e..68a969ed0c30dd 100644 --- a/libc/test/src/stdio/ftell_test.cpp +++ b/libc/test/src/stdio/ftell_test.cpp @@ -10,7 +10,9 @@ #include "src/stdio/fopen.h" #include "src/stdio/fread.h" #include "src/stdio/fseek.h" +#include "src/stdio/fseeko.h" #include "src/stdio/ftell.h" +#include "src/stdio/ftello.h" #include "src/stdio/fwrite.h" #include "src/stdio/setvbuf.h" #include "test/UnitTest/Test.h" @@ -37,6 +39,13 @@ class LlvmLibcFTellTest : public LIBC_NAMESPACE::testing::Test { // still return the correct effective offset. ASSERT_EQ(size_t(LIBC_NAMESPACE::ftell(file)), WRITE_SIZE); + off_t offseto = 42; + ASSERT_EQ(0, LIBC_NAMESPACE::fseeko(file, offseto, SEEK_SET)); + ASSERT_EQ(LIBC_NAMESPACE::ftello(file), offseto); + ASSERT_EQ(0, LIBC_NAMESPACE::fseeko(file, -offseto, SEEK_END)); + ASSERT_EQ(size_t(LIBC_NAMESPACE::ftello(file)), + size_t(WRITE_SIZE - offseto)); + long offset = 5; ASSERT_EQ(0, LIBC_NAMESPACE::fseek(file, offset, SEEK_SET)); ASSERT_EQ(LIBC_NAMESPACE::ftell(file), offset); From 92d0d6f6cb4099e651d066cd88dc0abfa6e612cf Mon Sep 17 00:00:00 2001 From: ChiaHungDuan Date: Mon, 1 Apr 2024 10:09:40 -0700 Subject: [PATCH 022/201] [scudo] Do a M_PURGE call before changing release interval on Android (#87110) --- compiler-rt/lib/scudo/standalone/wrappers_c.inc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c.inc b/compiler-rt/lib/scudo/standalone/wrappers_c.inc index 56d8ef20156e28..21d5b7add51275 100644 --- a/compiler-rt/lib/scudo/standalone/wrappers_c.inc +++ b/compiler-rt/lib/scudo/standalone/wrappers_c.inc @@ -247,6 +247,11 @@ void SCUDO_PREFIX(malloc_postinit)() { INTERFACE WEAK int SCUDO_PREFIX(mallopt)(int param, int value) { if (param == M_DECAY_TIME) { if (SCUDO_ANDROID) { + // Before changing the interval, reset the memory usage status by doing a + // M_PURGE call so that we can minimize the impact of any unreleased pages + // introduced by interval transition. + SCUDO_ALLOCATOR.releaseToOS(scudo::ReleaseToOS::Force); + if (value == 0) { // Will set the release values to their minimum values. value = INT32_MIN; From 9434c083475e42f47383f3067fe2a155db5c6a30 Mon Sep 17 00:00:00 2001 From: Chris B Date: Mon, 1 Apr 2024 12:10:10 -0500 Subject: [PATCH 023/201] [HLSL] Implement array temporary support (#79382) HLSL constant sized array function parameters do not decay to pointers. Instead constant sized array types are preserved as unique types for overload resolution, template instantiation and name mangling. This implements the change by adding a new `ArrayParameterType` which represents a non-decaying `ConstantArrayType`. The new type behaves the same as `ConstantArrayType` except that it does not decay to a pointer. Values of `ConstantArrayType` in HLSL decay during overload resolution via a new `HLSLArrayRValue` cast to `ArrayParameterType`. `ArrayParamterType` values are passed indirectly by-value to functions in IR generation resulting in callee generated memcpy instructions. The behavior of HLSL function calls is documented in the [draft language specification](https://microsoft.github.io/hlsl-specs/specs/hlsl.pdf) under the Expr.Post.Call heading. Additionally the design of this implementation approach is documented in [Clang's documentation](https://clang.llvm.org/docs/HLSL/FunctionCalls.html) Resolves #70123 --- clang/docs/HLSL/FunctionCalls.rst | 31 +++--- clang/include/clang/AST/ASTContext.h | 7 ++ clang/include/clang/AST/OperationKinds.def | 3 + clang/include/clang/AST/RecursiveASTVisitor.h | 11 ++ clang/include/clang/AST/Type.h | 45 +++++++- clang/include/clang/AST/TypeLoc.h | 5 + clang/include/clang/AST/TypeProperties.td | 7 ++ clang/include/clang/Basic/TypeNodes.td | 1 + clang/include/clang/Sema/Overload.h | 3 + .../clang/Serialization/TypeBitCodes.def | 1 + clang/lib/AST/ASTContext.cpp | 62 ++++++++++- clang/lib/AST/ASTImporter.cpp | 9 ++ clang/lib/AST/ASTStructuralEquivalence.cpp | 1 + clang/lib/AST/Expr.cpp | 1 + clang/lib/AST/ExprConstant.cpp | 3 + clang/lib/AST/ItaniumMangle.cpp | 5 + clang/lib/AST/MicrosoftMangle.cpp | 5 + clang/lib/AST/ODRHash.cpp | 4 + clang/lib/AST/Type.cpp | 11 ++ clang/lib/AST/TypePrinter.cpp | 11 ++ clang/lib/CodeGen/CGCall.cpp | 3 +- clang/lib/CodeGen/CGDebugInfo.cpp | 1 + clang/lib/CodeGen/CGExpr.cpp | 1 + clang/lib/CodeGen/CGExprAgg.cpp | 4 + clang/lib/CodeGen/CGExprComplex.cpp | 1 + clang/lib/CodeGen/CGExprConstant.cpp | 1 + clang/lib/CodeGen/CGExprScalar.cpp | 1 + clang/lib/CodeGen/CodeGenFunction.cpp | 2 + clang/lib/CodeGen/CodeGenTypes.cpp | 1 + clang/lib/CodeGen/ItaniumCXXABI.cpp | 4 + clang/lib/Edit/RewriteObjCFoundationAPI.cpp | 1 + clang/lib/Sema/Sema.cpp | 1 + clang/lib/Sema/SemaExpr.cpp | 10 +- clang/lib/Sema/SemaExprCXX.cpp | 8 ++ clang/lib/Sema/SemaInit.cpp | 5 +- clang/lib/Sema/SemaLookup.cpp | 4 + clang/lib/Sema/SemaOverload.cpp | 19 +++- clang/lib/Sema/SemaTemplate.cpp | 5 + clang/lib/Sema/SemaTemplateDeduction.cpp | 3 +- clang/lib/Sema/SemaType.cpp | 3 + clang/lib/Sema/TreeTransform.h | 17 +++ clang/lib/Serialization/ASTReader.cpp | 4 + clang/lib/Serialization/ASTWriter.cpp | 4 + clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp | 3 +- clang/test/CodeGenHLSL/ArrayTemporary.hlsl | 104 ++++++++++++++++++ clang/test/SemaHLSL/ArrayParams.hlsl | 29 +++++ clang/test/SemaHLSL/ArrayTemporary.hlsl | 95 ++++++++++++++++ clang/test/SemaHLSL/ArrayTemporary.ll | 76 +++++++++++++ clang/tools/libclang/CIndex.cpp | 1 + 49 files changed, 606 insertions(+), 31 deletions(-) create mode 100644 clang/test/CodeGenHLSL/ArrayTemporary.hlsl create mode 100644 clang/test/SemaHLSL/ArrayParams.hlsl create mode 100644 clang/test/SemaHLSL/ArrayTemporary.hlsl create mode 100644 clang/test/SemaHLSL/ArrayTemporary.ll diff --git a/clang/docs/HLSL/FunctionCalls.rst b/clang/docs/HLSL/FunctionCalls.rst index 7317de2163f897..6d65fe6e3fb20b 100644 --- a/clang/docs/HLSL/FunctionCalls.rst +++ b/clang/docs/HLSL/FunctionCalls.rst @@ -157,22 +157,23 @@ Clang Implementation of the changes in the prototype implementation are restoring Clang-3.7 code that was previously modified to its original state. -The implementation in clang depends on two new AST nodes and minor extensions to -Clang's existing support for Objective-C write-back arguments. The goal of this -design is to capture the semantic details of HLSL function calls in the AST, and -minimize the amount of magic that needs to occur during IR generation. - -The two new AST nodes are ``HLSLArrayTemporaryExpr`` and ``HLSLOutParamExpr``, -which respectively represent the temporaries used for passing arrays by value -and the temporaries created for function outputs. +The implementation in clang adds a new non-decaying array type, a new AST node +to represent output parameters, and minor extensions to Clang's existing support +for Objective-C write-back arguments. The goal of this design is to capture the +semantic details of HLSL function calls in the AST, and minimize the amount of +magic that needs to occur during IR generation. Array Temporaries ----------------- -The ``HLSLArrayTemporaryExpr`` represents temporary values for input -constant-sized array arguments. This applies for all constant-sized array -arguments regardless of whether or not the parameter is constant-sized or -unsized. +The new ``ArrayParameterType`` is a sub-class of ``ConstantArrayType`` +inheriting all the behaviors and methods of the parent except that it does not +decay to a pointer during overload resolution or template type deduction. + +An argument of ``ConstantArrayType`` can be implicitly converted to an +equivalent non-decayed ``ArrayParameterType`` if the underlying canonical +``ConstantArrayType`` is the same. This occurs during overload resolution +instead of array to pointer decay. .. code-block:: c++ @@ -193,7 +194,7 @@ In the example above, the following AST is generated for the call to CallExpr 'void' |-ImplicitCastExpr 'void (*)(float [4])' | `-DeclRefExpr 'void (float [4])' lvalue Function 'SizedArray' 'void (float [4])' - `-HLSLArrayTemporaryExpr 'float [4]' + `-ImplicitCastExpr 'float [4]' `-DeclRefExpr 'float [4]' lvalue Var 'arr' 'float [4]' In the example above, the following AST is generated for the call to @@ -204,7 +205,7 @@ In the example above, the following AST is generated for the call to CallExpr 'void' |-ImplicitCastExpr 'void (*)(float [])' | `-DeclRefExpr 'void (float [])' lvalue Function 'UnsizedArray' 'void (float [])' - `-HLSLArrayTemporaryExpr 'float [4]' + `-ImplicitCastExpr 'float [4]' `-DeclRefExpr 'float [4]' lvalue Var 'arr' 'float [4]' In both of these cases the argument expression is of known array size so we can @@ -236,7 +237,7 @@ An expected AST should be something like: CallExpr 'void' |-ImplicitCastExpr 'void (*)(float [])' | `-DeclRefExpr 'void (float [])' lvalue Function 'UnsizedArray' 'void (float [])' - `-HLSLArrayTemporaryExpr 'float [4]' + `-ImplicitCastExpr 'float [4]' `-DeclRefExpr 'float [4]' lvalue Var 'arr' 'float [4]' Out Parameter Temporaries diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 002f36ecbbaa3f..08f71051e6cbf3 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -260,6 +260,9 @@ class ASTContext : public RefCountedBase { ASTContext&> SubstTemplateTemplateParmPacks; + mutable llvm::ContextualFoldingSet + ArrayParameterTypes; + /// The set of nested name specifiers. /// /// This set is managed by the NestedNameSpecifier class. @@ -1367,6 +1370,10 @@ class ASTContext : public RefCountedBase { /// type to the decayed type. QualType getDecayedType(QualType Orig, QualType Decayed) const; + /// Return the uniqued reference to a specified array parameter type from the + /// original array type. + QualType getArrayParameterType(QualType Ty) const; + /// Return the uniqued reference to the atomic type for the specified /// type. QualType getAtomicType(QualType T) const; diff --git a/clang/include/clang/AST/OperationKinds.def b/clang/include/clang/AST/OperationKinds.def index ef05072800f11a..8788b8ff0ef0a4 100644 --- a/clang/include/clang/AST/OperationKinds.def +++ b/clang/include/clang/AST/OperationKinds.def @@ -364,6 +364,9 @@ CAST_OPERATION(IntToOCLSampler) // Truncate a vector type by dropping elements from the end (HLSL only). CAST_OPERATION(HLSLVectorTruncation) +// Non-decaying array RValue cast (HLSL only). +CAST_OPERATION(HLSLArrayRValue) + //===- Binary Operations -------------------------------------------------===// // Operators listed in order of precedence. // Note that additions to this should also update the StmtVisitor class, diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 4a1ff222ecadcd..8630317795a9ad 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -993,6 +993,12 @@ DEF_TRAVERSE_TYPE(ConstantArrayType, { TRY_TO(TraverseStmt(const_cast(T->getSizeExpr()))); }) +DEF_TRAVERSE_TYPE(ArrayParameterType, { + TRY_TO(TraverseType(T->getElementType())); + if (T->getSizeExpr()) + TRY_TO(TraverseStmt(const_cast(T->getSizeExpr()))); +}) + DEF_TRAVERSE_TYPE(IncompleteArrayType, { TRY_TO(TraverseType(T->getElementType())); }) @@ -1260,6 +1266,11 @@ DEF_TRAVERSE_TYPELOC(ConstantArrayType, { TRY_TO(TraverseArrayTypeLocHelper(TL)); }) +DEF_TRAVERSE_TYPELOC(ArrayParameterType, { + TRY_TO(TraverseTypeLoc(TL.getElementLoc())); + TRY_TO(TraverseArrayTypeLocHelper(TL)); +}) + DEF_TRAVERSE_TYPELOC(IncompleteArrayType, { TRY_TO(TraverseTypeLoc(TL.getElementLoc())); TRY_TO(TraverseArrayTypeLocHelper(TL)); diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index 5d8dde37e76969..99f45d518c7960 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -2300,6 +2300,7 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase { bool isConstantArrayType() const; bool isIncompleteArrayType() const; bool isVariableArrayType() const; + bool isArrayParameterType() const; bool isDependentSizedArrayType() const; bool isRecordType() const; bool isClassType() const; @@ -3334,14 +3335,15 @@ class ArrayType : public Type, public llvm::FoldingSetNode { return T->getTypeClass() == ConstantArray || T->getTypeClass() == VariableArray || T->getTypeClass() == IncompleteArray || - T->getTypeClass() == DependentSizedArray; + T->getTypeClass() == DependentSizedArray || + T->getTypeClass() == ArrayParameter; } }; /// Represents the canonical version of C arrays with a specified constant size. /// For example, the canonical type for 'int A[4 + 4*100]' is a /// ConstantArrayType where the element type is 'int' and the size is 404. -class ConstantArrayType final : public ArrayType { +class ConstantArrayType : public ArrayType { friend class ASTContext; // ASTContext creates these. struct ExternalSize { @@ -3382,6 +3384,19 @@ class ConstantArrayType final : public ArrayType { const Expr *SzExpr, ArraySizeModifier SzMod, unsigned Qual); +protected: + ConstantArrayType(TypeClass Tc, const ConstantArrayType *ATy, QualType Can) + : ArrayType(Tc, ATy->getElementType(), Can, ATy->getSizeModifier(), + ATy->getIndexTypeQualifiers().getAsOpaqueValue(), nullptr) { + ConstantArrayTypeBits.HasExternalSize = + ATy->ConstantArrayTypeBits.HasExternalSize; + if (!ConstantArrayTypeBits.HasExternalSize) { + ConstantArrayTypeBits.SizeWidth = ATy->ConstantArrayTypeBits.SizeWidth; + Size = ATy->Size; + } else + SizePtr = ATy->SizePtr; + } + public: /// Return the constant array size as an APInt. llvm::APInt getSize() const { @@ -3453,7 +3468,22 @@ class ConstantArrayType final : public ArrayType { ArraySizeModifier SizeMod, unsigned TypeQuals); static bool classof(const Type *T) { - return T->getTypeClass() == ConstantArray; + return T->getTypeClass() == ConstantArray || + T->getTypeClass() == ArrayParameter; + } +}; + +/// Represents a constant array type that does not decay to a pointer when used +/// as a function parameter. +class ArrayParameterType : public ConstantArrayType { + friend class ASTContext; // ASTContext creates these. + + ArrayParameterType(const ConstantArrayType *ATy, QualType CanTy) + : ConstantArrayType(ArrayParameter, ATy, CanTy) {} + +public: + static bool classof(const Type *T) { + return T->getTypeClass() == ArrayParameter; } }; @@ -7185,7 +7215,8 @@ inline bool QualType::isCanonicalAsParam() const { if (T->isVariablyModifiedType() && T->hasSizedVLAType()) return false; - return !isa(T) && !isa(T); + return !isa(T) && + (!isa(T) || isa(T)); } inline bool QualType::isConstQualified() const { @@ -7450,6 +7481,10 @@ inline bool Type::isVariableArrayType() const { return isa(CanonicalType); } +inline bool Type::isArrayParameterType() const { + return isa(CanonicalType); +} + inline bool Type::isDependentSizedArrayType() const { return isa(CanonicalType); } @@ -7813,7 +7848,7 @@ inline bool Type::isTypedefNameType() const { /// Determines whether this type can decay to a pointer type. inline bool Type::canDecayToPointerType() const { - return isFunctionType() || isArrayType(); + return isFunctionType() || (isArrayType() && !isArrayParameterType()); } inline bool Type::hasPointerRepresentation() const { diff --git a/clang/include/clang/AST/TypeLoc.h b/clang/include/clang/AST/TypeLoc.h index b09eb3539a4bad..9f2dff7a782cb3 100644 --- a/clang/include/clang/AST/TypeLoc.h +++ b/clang/include/clang/AST/TypeLoc.h @@ -1611,6 +1611,11 @@ class ConstantArrayTypeLoc : ConstantArrayType> { }; +/// Wrapper for source info for array parameter types. +class ArrayParameterTypeLoc + : public InheritingConcreteTypeLoc< + ConstantArrayTypeLoc, ArrayParameterTypeLoc, ArrayParameterType> {}; + class IncompleteArrayTypeLoc : public InheritingConcreteTypeLoc; } +let Class = ArrayParameterType in { + def : Creator<[{ return ctx.getAdjustedParameterType( + ctx.getConstantArrayType(elementType,sizeValue, + size,sizeModifier, + indexQualifiers.getCVRQualifiers())); }]>; +} + let Class = IncompleteArrayType in { def : Creator<[{ return ctx.getIncompleteArrayType(elementType, sizeModifier, diff --git a/clang/include/clang/Basic/TypeNodes.td b/clang/include/clang/Basic/TypeNodes.td index 3625f063758915..fee49cf4326dfc 100644 --- a/clang/include/clang/Basic/TypeNodes.td +++ b/clang/include/clang/Basic/TypeNodes.td @@ -64,6 +64,7 @@ def ConstantArrayType : TypeNode; def IncompleteArrayType : TypeNode; def VariableArrayType : TypeNode; def DependentSizedArrayType : TypeNode, AlwaysDependent; +def ArrayParameterType : TypeNode; def DependentSizedExtVectorType : TypeNode, AlwaysDependent; def DependentAddressSpaceType : TypeNode, AlwaysDependent; def VectorType : TypeNode; diff --git a/clang/include/clang/Sema/Overload.h b/clang/include/clang/Sema/Overload.h index e4717dd5baf1e8..76311b00d2fc58 100644 --- a/clang/include/clang/Sema/Overload.h +++ b/clang/include/clang/Sema/Overload.h @@ -198,6 +198,9 @@ class Sema; /// HLSL vector truncation. ICK_HLSL_Vector_Truncation, + /// HLSL non-decaying array rvalue cast. + ICK_HLSL_Array_RValue, + /// The number of conversion kinds ICK_Num_Conversion_Kinds, }; diff --git a/clang/include/clang/Serialization/TypeBitCodes.def b/clang/include/clang/Serialization/TypeBitCodes.def index 3c82dfed9497d5..82b053d4caca63 100644 --- a/clang/include/clang/Serialization/TypeBitCodes.def +++ b/clang/include/clang/Serialization/TypeBitCodes.def @@ -66,5 +66,6 @@ TYPE_BIT_CODE(Using, USING, 54) TYPE_BIT_CODE(BTFTagAttributed, BTFTAG_ATTRIBUTED, 55) TYPE_BIT_CODE(PackIndexing, PACK_INDEXING, 56) TYPE_BIT_CODE(CountAttributed, COUNT_ATTRIBUTED, 57) +TYPE_BIT_CODE(ArrayParameter, ARRAY_PARAMETER, 58) #undef TYPE_BIT_CODE diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index c90fafb6f653d0..f7f55dc4e7a9f4 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -879,7 +879,8 @@ ASTContext::ASTContext(LangOptions &LOpts, SourceManager &SM, TemplateSpecializationTypes(this_()), DependentTemplateSpecializationTypes(this_()), AutoTypes(this_()), DependentBitIntTypes(this_()), SubstTemplateTemplateParmPacks(this_()), - CanonTemplateTemplateParms(this_()), SourceMgr(SM), LangOpts(LOpts), + ArrayParameterTypes(this_()), CanonTemplateTemplateParms(this_()), + SourceMgr(SM), LangOpts(LOpts), NoSanitizeL(new NoSanitizeList(LangOpts.NoSanitizeFiles, SM)), XRayFilter(new XRayFunctionFilter(LangOpts.XRayAlwaysInstrumentFiles, LangOpts.XRayNeverInstrumentFiles, @@ -1906,7 +1907,8 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const { case Type::IncompleteArray: case Type::VariableArray: - case Type::ConstantArray: { + case Type::ConstantArray: + case Type::ArrayParameter: { // Model non-constant sized arrays as size zero, but track the alignment. uint64_t Size = 0; if (const auto *CAT = dyn_cast(T)) @@ -3396,6 +3398,37 @@ QualType ASTContext::getDecayedType(QualType T) const { return getDecayedType(T, Decayed); } +QualType ASTContext::getArrayParameterType(QualType Ty) const { + if (Ty->isArrayParameterType()) + return Ty; + assert(Ty->isConstantArrayType() && "Ty must be an array type."); + const auto *ATy = cast(Ty); + llvm::FoldingSetNodeID ID; + ATy->Profile(ID, *this, ATy->getElementType(), ATy->getZExtSize(), + ATy->getSizeExpr(), ATy->getSizeModifier(), + ATy->getIndexTypeQualifiers().getAsOpaqueValue()); + void *InsertPos = nullptr; + ArrayParameterType *AT = + ArrayParameterTypes.FindNodeOrInsertPos(ID, InsertPos); + if (AT) + return QualType(AT, 0); + + QualType Canonical; + if (!Ty.isCanonical()) { + Canonical = getArrayParameterType(getCanonicalType(Ty)); + + // Get the new insert position for the node we care about. + AT = ArrayParameterTypes.FindNodeOrInsertPos(ID, InsertPos); + assert(!AT && "Shouldn't be in the map!"); + } + + AT = new (*this, alignof(ArrayParameterType)) + ArrayParameterType(ATy, Canonical); + Types.push_back(AT); + ArrayParameterTypes.InsertNode(AT, InsertPos); + return QualType(AT, 0); +} + /// getBlockPointerType - Return the uniqued reference to the type for /// a pointer to the specified block. QualType ASTContext::getBlockPointerType(QualType T) const { @@ -3642,6 +3675,7 @@ QualType ASTContext::getVariableArrayDecayedType(QualType type) const { case Type::PackIndexing: case Type::BitInt: case Type::DependentBitInt: + case Type::ArrayParameter: llvm_unreachable("type should never be variably-modified"); // These types can be variably-modified but should never need to @@ -6051,7 +6085,9 @@ CanQualType ASTContext::getCanonicalParamType(QualType T) const { T = getVariableArrayDecayedType(T); const Type *Ty = T.getTypePtr(); QualType Result; - if (isa(Ty)) { + if (getLangOpts().HLSL && isa(Ty)) { + Result = getArrayParameterType(QualType(Ty, 0)); + } else if (isa(Ty)) { Result = getArrayDecayedType(QualType(Ty,0)); } else if (isa(Ty)) { Result = getPointerType(QualType(Ty, 0)); @@ -6973,6 +7009,8 @@ const ArrayType *ASTContext::getAsArrayType(QualType T) const { } QualType ASTContext::getAdjustedParameterType(QualType T) const { + if (getLangOpts().HLSL && T->isConstantArrayType()) + return getArrayParameterType(T); if (T->isArrayType() || T->isFunctionType()) return getDecayedType(T); return T; @@ -8583,6 +8621,7 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string &S, case Type::DeducedTemplateSpecialization: return; + case Type::ArrayParameter: case Type::Pipe: #define ABSTRACT_TYPE(KIND, BASE) #define TYPE(KIND, BASE) @@ -10926,6 +10965,10 @@ QualType ASTContext::mergeTypes(QualType LHS, QualType RHS, bool OfBlockPointer, assert(LHS != RHS && "Equivalent pipe types should have already been handled!"); return {}; + case Type::ArrayParameter: + assert(LHS != RHS && + "Equivalent ArrayParameter types should have already been handled!"); + return {}; case Type::BitInt: { // Merge two bit-precise int types, while trying to preserve typedef info. bool LHSUnsigned = LHS->castAs()->isUnsigned(); @@ -12817,6 +12860,18 @@ static QualType getCommonNonSugarTypeNode(ASTContext &Ctx, const Type *X, getCommonArrayElementType(Ctx, AX, QX, AY, QY), AX->getSize(), SizeExpr, getCommonSizeModifier(AX, AY), getCommonIndexTypeCVRQualifiers(AX, AY)); } + case Type::ArrayParameter: { + const auto *AX = cast(X), + *AY = cast(Y); + assert(AX->getSize() == AY->getSize()); + const Expr *SizeExpr = Ctx.hasSameExpr(AX->getSizeExpr(), AY->getSizeExpr()) + ? AX->getSizeExpr() + : nullptr; + auto ArrayTy = Ctx.getConstantArrayType( + getCommonArrayElementType(Ctx, AX, QX, AY, QY), AX->getSize(), SizeExpr, + getCommonSizeModifier(AX, AY), getCommonIndexTypeCVRQualifiers(AX, AY)); + return Ctx.getArrayParameterType(ArrayTy); + } case Type::Atomic: { const auto *AX = cast(X), *AY = cast(Y); return Ctx.getAtomicType( @@ -13078,6 +13133,7 @@ static QualType getCommonSugarTypeNode(ASTContext &Ctx, const Type *X, CANONICAL_TYPE(Builtin) CANONICAL_TYPE(Complex) CANONICAL_TYPE(ConstantArray) + CANONICAL_TYPE(ArrayParameter) CANONICAL_TYPE(ConstantMatrix) CANONICAL_TYPE(Enum) CANONICAL_TYPE(ExtVector) diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 786695f00fadcc..94a47a8f619018 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -1218,6 +1218,15 @@ ASTNodeImporter::VisitConstantArrayType(const ConstantArrayType *T) { T->getIndexTypeCVRQualifiers()); } +ExpectedType +ASTNodeImporter::VisitArrayParameterType(const ArrayParameterType *T) { + ExpectedType ToArrayTypeOrErr = VisitConstantArrayType(T); + if (!ToArrayTypeOrErr) + return ToArrayTypeOrErr.takeError(); + + return Importer.getToContext().getArrayParameterType(*ToArrayTypeOrErr); +} + ExpectedType ASTNodeImporter::VisitIncompleteArrayType(const IncompleteArrayType *T) { ExpectedType ToElementTypeOrErr = import(T->getElementType()); diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp index 226e0aa38ece70..d56bf21b459e03 100644 --- a/clang/lib/AST/ASTStructuralEquivalence.cpp +++ b/clang/lib/AST/ASTStructuralEquivalence.cpp @@ -840,6 +840,7 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context, case Type::Adjusted: case Type::Decayed: + case Type::ArrayParameter: if (!IsStructurallyEquivalent(Context, cast(T1)->getOriginalType(), cast(T2)->getOriginalType())) diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp index 6221ebd5c9b4e9..26efaa4624056b 100644 --- a/clang/lib/AST/Expr.cpp +++ b/clang/lib/AST/Expr.cpp @@ -1948,6 +1948,7 @@ bool CastExpr::CastConsistency() const { case CK_UserDefinedConversion: // operator bool() case CK_BuiltinFnToFnPtr: case CK_FixedPointToBoolean: + case CK_HLSLArrayRValue: CheckNoBasePath: assert(path_empty() && "Cast kind should not have a base path!"); break; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index dae8f32fc02951..0058e86519985e 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11699,6 +11699,7 @@ GCCTypeClass EvaluateBuiltinClassifyType(QualType T, case Type::IncompleteArray: case Type::FunctionNoProto: case Type::FunctionProto: + case Type::ArrayParameter: return GCCTypeClass::Pointer; case Type::MemberPointer: @@ -14085,6 +14086,7 @@ bool IntExprEvaluator::VisitCastExpr(const CastExpr *E) { case CK_AtomicToNonAtomic: case CK_NoOp: case CK_LValueToRValueBitCast: + case CK_HLSLArrayRValue: return ExprEvaluatorBaseTy::VisitCastExpr(E); case CK_MemberPointerToBoolean: @@ -14913,6 +14915,7 @@ bool ComplexExprEvaluator::VisitCastExpr(const CastExpr *E) { case CK_AtomicToNonAtomic: case CK_NoOp: case CK_LValueToRValueBitCast: + case CK_HLSLArrayRValue: return ExprEvaluatorBaseTy::VisitCastExpr(E); case CK_Dependent: diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 425f84e8af1fe7..d632c697fa20db 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -2398,6 +2398,7 @@ bool CXXNameMangler::mangleUnresolvedTypeOrSimpleId(QualType Ty, case Type::Complex: case Type::Adjusted: case Type::Decayed: + case Type::ArrayParameter: case Type::Pointer: case Type::BlockPointer: case Type::LValueReference: @@ -4446,6 +4447,10 @@ void CXXNameMangler::mangleType(const DependentBitIntType *T) { Out << "_"; } +void CXXNameMangler::mangleType(const ArrayParameterType *T) { + mangleType(cast(T)); +} + void CXXNameMangler::mangleIntegerLiteral(QualType T, const llvm::APSInt &Value) { // ::= L E # integer literal diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index addc3140546a46..a0bb04e69c9be8 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -3079,6 +3079,11 @@ void MicrosoftCXXNameMangler::mangleArrayType(const ArrayType *T) { mangleType(ElementTy, SourceRange(), QMM_Escape); } +void MicrosoftCXXNameMangler::mangleType(const ArrayParameterType *T, + Qualifiers, SourceRange) { + mangleArrayType(cast(T)); +} + // ::= // ::= // diff --git a/clang/lib/AST/ODRHash.cpp b/clang/lib/AST/ODRHash.cpp index 2dbc259138a897..e159a1b00be552 100644 --- a/clang/lib/AST/ODRHash.cpp +++ b/clang/lib/AST/ODRHash.cpp @@ -944,6 +944,10 @@ class ODRTypeVisitor : public TypeVisitor { VisitArrayType(T); } + void VisitArrayParameterType(const ArrayParameterType *T) { + VisitConstantArrayType(T); + } + void VisitDependentSizedArrayType(const DependentSizedArrayType *T) { AddStmt(T->getSizeExpr()); VisitArrayType(T); diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 8f3e26d4601921..779d8a810820d2 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -1197,6 +1197,14 @@ struct SimpleTransformVisitor : public TypeVisitor { return Ctx.getDecayedType(originalType); } + QualType VisitArrayParameterType(const ArrayParameterType *T) { + QualType ArrTy = VisitConstantArrayType(T); + if (ArrTy.isNull()) + return {}; + + return Ctx.getArrayParameterType(ArrTy); + } + SUGARED_TYPE_CLASS(TypeOfExpr) SUGARED_TYPE_CLASS(TypeOf) SUGARED_TYPE_CLASS(Decltype) @@ -4454,6 +4462,7 @@ static CachedProperties computeCachedProperties(const Type *T) { case Type::ConstantArray: case Type::IncompleteArray: case Type::VariableArray: + case Type::ArrayParameter: return Cache::get(cast(T)->getElementType()); case Type::Vector: case Type::ExtVector: @@ -4542,6 +4551,7 @@ LinkageInfo LinkageComputer::computeTypeLinkageInfo(const Type *T) { case Type::ConstantArray: case Type::IncompleteArray: case Type::VariableArray: + case Type::ArrayParameter: return computeTypeLinkageInfo(cast(T)->getElementType()); case Type::Vector: case Type::ExtVector: @@ -4736,6 +4746,7 @@ bool Type::canHaveNullability(bool ResultIfUnknown) const { case Type::Pipe: case Type::BitInt: case Type::DependentBitInt: + case Type::ArrayParameter: return false; } llvm_unreachable("bad type kind!"); diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index 0aa1d9327d7707..9d551ff83151fd 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -268,6 +268,7 @@ bool TypePrinter::canPrefixQualifiers(const Type *T, case Type::Adjusted: case Type::Decayed: + case Type::ArrayParameter: case Type::Pointer: case Type::BlockPointer: case Type::LValueReference: @@ -595,6 +596,16 @@ void TypePrinter::printDecayedBefore(const DecayedType *T, raw_ostream &OS) { printAdjustedBefore(T, OS); } +void TypePrinter::printArrayParameterAfter(const ArrayParameterType *T, + raw_ostream &OS) { + printConstantArrayAfter(T, OS); +} + +void TypePrinter::printArrayParameterBefore(const ArrayParameterType *T, + raw_ostream &OS) { + printConstantArrayBefore(T, OS); +} + void TypePrinter::printDecayedAfter(const DecayedType *T, raw_ostream &OS) { printAdjustedAfter(T, OS); } diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index a5fe39633679b9..9308528ac93823 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -4719,7 +4719,8 @@ void CodeGenFunction::EmitCallArg(CallArgList &args, const Expr *E, } if (HasAggregateEvalKind && isa(E) && - cast(E)->getCastKind() == CK_LValueToRValue) { + cast(E)->getCastKind() == CK_LValueToRValue && + !type->isArrayParameterType()) { LValue L = EmitLValue(cast(E)->getSubExpr()); assert(L.isSimple()); args.addUncopiedAggregate(L, type); diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 691fde8b0d8b82..8c284c332171a1 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -3641,6 +3641,7 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) { case Type::ConstantArray: case Type::VariableArray: case Type::IncompleteArray: + case Type::ArrayParameter: return CreateType(cast(Ty), Unit); case Type::LValueReference: diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 36872c0fedb76e..e0d5575d57d02d 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5190,6 +5190,7 @@ LValue CodeGenFunction::EmitCastLValue(const CastExpr *E) { case CK_IntegralToFixedPoint: case CK_MatrixCast: case CK_HLSLVectorTruncation: + case CK_HLSLArrayRValue: return EmitUnsupportedLValue(E, "unexpected cast lvalue"); case CK_Dependent: diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp index 143855aa84ca3f..1b9287ea239347 100644 --- a/clang/lib/CodeGen/CGExprAgg.cpp +++ b/clang/lib/CodeGen/CGExprAgg.cpp @@ -883,6 +883,9 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) { [[fallthrough]]; + case CK_HLSLArrayRValue: + Visit(E->getSubExpr()); + break; case CK_NoOp: case CK_UserDefinedConversion: @@ -1524,6 +1527,7 @@ static bool castPreservesZero(const CastExpr *CE) { case CK_LValueToRValue: case CK_LValueToRValueBitCast: case CK_UncheckedDerivedToBase: + case CK_HLSLArrayRValue: return false; } llvm_unreachable("Unhandled clang::CastKind enum"); diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index c3774d0cb75edc..a793b214645cb3 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -616,6 +616,7 @@ ComplexPairTy ComplexExprEmitter::EmitCast(CastKind CK, Expr *Op, case CK_IntegralToFixedPoint: case CK_MatrixCast: case CK_HLSLVectorTruncation: + case CK_HLSLArrayRValue: llvm_unreachable("invalid cast kind for complex value"); case CK_FloatingRealToComplex: diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp index 36d7493d9a6baf..9f1b06eebf9ed0 100644 --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -1226,6 +1226,7 @@ class ConstExprEmitter : case CK_ZeroToOCLOpaqueType: case CK_MatrixCast: case CK_HLSLVectorTruncation: + case CK_HLSLArrayRValue: return nullptr; } llvm_unreachable("Invalid CastKind"); diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 83247aa48f8609..397b4977acc3e9 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -2329,6 +2329,7 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { case CK_FloatingComplexToIntegralComplex: case CK_ConstructorConversion: case CK_ToUnion: + case CK_HLSLArrayRValue: llvm_unreachable("scalar cast to non-scalar value"); case CK_LValueToRValue: diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 44103884940fd9..90324de7268ebe 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -276,6 +276,7 @@ TypeEvaluationKind CodeGenFunction::getEvaluationKind(QualType type) { case Type::Record: case Type::ObjCObject: case Type::ObjCInterface: + case Type::ArrayParameter: return TEK_Aggregate; // We operate on atomic values according to their underlying type. @@ -2361,6 +2362,7 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) { type = cast(ty)->getPointeeType(); break; + case Type::ArrayParameter: case Type::ConstantArray: case Type::IncompleteArray: // Losing element qualification here is fine. diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index afadc29ab1b027..1568b6e6275b9d 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -590,6 +590,7 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { ResultType = llvm::ArrayType::get(ResultType, 0); break; } + case Type::ArrayParameter: case Type::ConstantArray: { const ConstantArrayType *A = cast(Ty); llvm::Type *EltTy = ConvertTypeForMem(A->getElementType()); diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index fd71317572f0c9..18acf7784f714b 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -3584,6 +3584,9 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) { case Type::Pipe: llvm_unreachable("Pipe types shouldn't get here"); + case Type::ArrayParameter: + llvm_unreachable("Array Parameter types should not get here."); + case Type::Builtin: case Type::BitInt: // GCC treats vector and complex types as fundamental types. @@ -3868,6 +3871,7 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo( case Type::ConstantArray: case Type::IncompleteArray: case Type::VariableArray: + case Type::ArrayParameter: // Itanium C++ ABI 2.9.5p5: // abi::__array_type_info adds no data members to std::type_info. break; diff --git a/clang/lib/Edit/RewriteObjCFoundationAPI.cpp b/clang/lib/Edit/RewriteObjCFoundationAPI.cpp index 22f2c47e1d6a13..81797c8c4dc75a 100644 --- a/clang/lib/Edit/RewriteObjCFoundationAPI.cpp +++ b/clang/lib/Edit/RewriteObjCFoundationAPI.cpp @@ -1000,6 +1000,7 @@ static bool rewriteToNumericBoxedExpression(const ObjCMessageExpr *Msg, case CK_LValueToRValue: case CK_NoOp: case CK_UserDefinedConversion: + case CK_HLSLArrayRValue: break; case CK_IntegralCast: { diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index 72393bea620526..c9dbac0dfc339d 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -653,6 +653,7 @@ ExprResult Sema::ImpCastExprToType(Expr *E, QualType Ty, case CK_FunctionToPointerDecay: case CK_ToVoid: case CK_NonAtomicToAtomic: + case CK_HLSLArrayRValue: break; } } diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 091fc3e4836b63..80b4257d9d83ed 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -658,8 +658,9 @@ ExprResult Sema::DefaultLvalueConversion(Expr *E) { QualType T = E->getType(); assert(!T.isNull() && "r-value conversion on typeless expression?"); - // lvalue-to-rvalue conversion cannot be applied to function or array types. - if (T->isFunctionType() || T->isArrayType()) + // lvalue-to-rvalue conversion cannot be applied to types that decay to + // pointers (i.e. function or array types). + if (T->canDecayToPointerType()) return E; // We don't want to throw lvalue-to-rvalue casts on top of @@ -4686,6 +4687,9 @@ static void captureVariablyModifiedType(ASTContext &Context, QualType T, case Type::Decayed: T = cast(Ty)->getPointeeType(); break; + case Type::ArrayParameter: + T = cast(Ty)->getElementType(); + break; case Type::Pointer: T = cast(Ty)->getPointeeType(); break; @@ -12908,6 +12912,8 @@ static ImplicitConversionKind castKindToImplicitConversionKind(CastKind CK) { case CK_IntegralComplexToReal: case CK_IntegralRealToComplex: return ICK_Complex_Real; + case CK_HLSLArrayRValue: + return ICK_HLSL_Array_RValue; } } diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index 51c8e04bee8c31..76bb78aa8b5458 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -4416,6 +4416,13 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType, .get(); break; + case ICK_HLSL_Array_RValue: + FromType = Context.getArrayParameterType(FromType); + From = ImpCastExprToType(From, FromType, CK_HLSLArrayRValue, VK_PRValue, + /*BasePath=*/nullptr, CCK) + .get(); + break; + case ICK_Function_To_Pointer: FromType = Context.getPointerType(FromType); From = ImpCastExprToType(From, FromType, CK_FunctionToPointerDecay, @@ -4793,6 +4800,7 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType, case ICK_Num_Conversion_Kinds: case ICK_C_Only_Conversion: case ICK_Incompatible_Pointer_Conversion: + case ICK_HLSL_Array_RValue: llvm_unreachable("Improper second standard conversion"); } diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index dce225a7204da8..777f89c70f87c2 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -6269,7 +6269,10 @@ void InitializationSequence::InitializeFrom(Sema &S, // initializer is a string literal, see 8.5.2. // - Otherwise, if the destination type is an array, the program is // ill-formed. - if (const ArrayType *DestAT = Context.getAsArrayType(DestType)) { + // - Except in HLSL, where non-decaying array parameters behave like + // non-array types for initialization. + if (DestType->isArrayType() && !DestType->isArrayParameterType()) { + const ArrayType *DestAT = Context.getAsArrayType(DestType); if (Initializer && isa(DestAT)) { SetFailed(FK_VariableLengthArrayHasInitializer); return; diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp index d3a9c7abd0e944..38237ee578079d 100644 --- a/clang/lib/Sema/SemaLookup.cpp +++ b/clang/lib/Sema/SemaLookup.cpp @@ -3243,6 +3243,10 @@ addAssociatedClassesAndNamespaces(AssociatedLookup &Result, QualType Ty) { case Type::Pipe: T = cast(T)->getElementType().getTypePtr(); continue; + + // Array parameter types are treated as fundamental types. + case Type::ArrayParameter: + break; } if (Queue.empty()) diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 51450e486eaeb4..16d54c1ffe5fd9 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -160,6 +160,7 @@ ImplicitConversionRank clang::GetConversionRank(ImplicitConversionKind Kind) { ICR_C_Conversion_Extension, ICR_Conversion, ICR_Conversion, + ICR_Conversion, }; static_assert(std::size(Rank) == (int)ICK_Num_Conversion_Kinds); return Rank[(int)Kind]; @@ -201,6 +202,7 @@ static const char *GetImplicitConversionName(ImplicitConversionKind Kind) { "Incompatible pointer conversion", "Fixed point conversion", "HLSL vector truncation", + "Non-decaying array conversion", }; static_assert(std::size(Name) == (int)ICK_Num_Conversion_Kinds); return Name[Kind]; @@ -2131,8 +2133,7 @@ static bool IsStandardConversion(Sema &S, Expr* From, QualType ToType, // A glvalue (3.10) of a non-function, non-array type T can // be converted to a prvalue. bool argIsLValue = From->isGLValue(); - if (argIsLValue && - !FromType->isFunctionType() && !FromType->isArrayType() && + if (argIsLValue && !FromType->canDecayToPointerType() && S.Context.getCanonicalType(FromType) != S.Context.OverloadTy) { SCS.First = ICK_Lvalue_To_Rvalue; @@ -2147,6 +2148,19 @@ static bool IsStandardConversion(Sema &S, Expr* From, QualType ToType, // is T (C++ 4.1p1). C++ can't get here with class types; in C, we // just strip the qualifiers because they don't matter. FromType = FromType.getUnqualifiedType(); + } else if (S.getLangOpts().HLSL && FromType->isConstantArrayType() && + ToType->isArrayParameterType()) { + // HLSL constant array parameters do not decay, so if the argument is a + // constant array and the parameter is an ArrayParameterType we have special + // handling here. + FromType = S.Context.getArrayParameterType(FromType); + if (S.Context.getCanonicalType(FromType) != + S.Context.getCanonicalType(ToType)) + return false; + + SCS.First = ICK_HLSL_Array_RValue; + SCS.setAllToTypes(ToType); + return true; } else if (FromType->isArrayType()) { // Array-to-pointer conversion (C++ 4.2) SCS.First = ICK_Array_To_Pointer; @@ -6100,6 +6114,7 @@ static bool CheckConvertedConstantConversions(Sema &S, case ICK_Lvalue_To_Rvalue: case ICK_Array_To_Pointer: case ICK_Function_To_Pointer: + case ICK_HLSL_Array_RValue: llvm_unreachable("found a first conversion kind in Second"); case ICK_Function_Conversion: diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 9cd19d711af4dc..1a2d5e9310dbe1 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -6898,6 +6898,11 @@ bool UnnamedLocalNoLinkageFinder::VisitBitIntType(const BitIntType *T) { return false; } +bool UnnamedLocalNoLinkageFinder::VisitArrayParameterType( + const ArrayParameterType *T) { + return VisitConstantArrayType(T); +} + bool UnnamedLocalNoLinkageFinder::VisitDependentBitIntType( const DependentBitIntType *T) { return false; diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp index 9a55881f644254..716660244537b8 100644 --- a/clang/lib/Sema/SemaTemplateDeduction.cpp +++ b/clang/lib/Sema/SemaTemplateDeduction.cpp @@ -2277,6 +2277,7 @@ static TemplateDeductionResult DeduceTemplateArgumentsByTypeMatch( case Type::DependentTemplateSpecialization: case Type::PackExpansion: case Type::Pipe: + case Type::ArrayParameter: // No template argument deduction for these types return TemplateDeductionResult::Success; @@ -6355,11 +6356,11 @@ MarkUsedTemplateParameters(ASTContext &Ctx, QualType T, case Type::ConstantArray: case Type::IncompleteArray: + case Type::ArrayParameter: MarkUsedTemplateParameters(Ctx, cast(T)->getElementType(), OnlyDeduced, Depth, Used); break; - case Type::Vector: case Type::ExtVector: MarkUsedTemplateParameters(Ctx, diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index fd94caa4e1d449..d88895d3529458 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -6505,6 +6505,9 @@ namespace { void VisitDecayedTypeLoc(DecayedTypeLoc TL) { llvm_unreachable("decayed type locs not expected here!"); } + void VisitArrayParameterTypeLoc(ArrayParameterTypeLoc TL) { + llvm_unreachable("array parameter type locs not expected here!"); + } void VisitAttributedTypeLoc(AttributedTypeLoc TL) { fillAttributedTypeLoc(TL, State); diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 80a10647ca5d33..eace1bfdff5aa0 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -5243,6 +5243,23 @@ QualType TreeTransform::TransformDecayedType(TypeLocBuilder &TLB, return Result; } +template +QualType +TreeTransform::TransformArrayParameterType(TypeLocBuilder &TLB, + ArrayParameterTypeLoc TL) { + QualType OriginalType = getDerived().TransformType(TLB, TL.getElementLoc()); + if (OriginalType.isNull()) + return QualType(); + + QualType Result = TL.getType(); + if (getDerived().AlwaysRebuild() || + OriginalType != TL.getElementLoc().getType()) + Result = SemaRef.Context.getArrayParameterType(OriginalType); + TLB.push(Result); + // Nothing to set for ArrayParameterTypeLoc. + return Result; +} + template QualType TreeTransform::TransformPointerType(TypeLocBuilder &TLB, PointerTypeLoc TL) { diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 28e8d27fef08c6..004859ed22bf16 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -6810,6 +6810,10 @@ void TypeLocReader::VisitAdjustedTypeLoc(AdjustedTypeLoc TL) { // nothing to do } +void TypeLocReader::VisitArrayParameterTypeLoc(ArrayParameterTypeLoc TL) { + // nothing to do +} + void TypeLocReader::VisitMacroQualifiedTypeLoc(MacroQualifiedTypeLoc TL) { TL.setExpansionLoc(readSourceLocation()); } diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 2438fbc166062f..a2668e61c51d12 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -318,6 +318,10 @@ void TypeLocWriter::VisitAdjustedTypeLoc(AdjustedTypeLoc TL) { // nothing to do } +void TypeLocWriter::VisitArrayParameterTypeLoc(ArrayParameterTypeLoc TL) { + // nothing to do +} + void TypeLocWriter::VisitBlockPointerTypeLoc(BlockPointerTypeLoc TL) { addSourceLocation(TL.getCaretLoc()); } diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp index c3fc56ac30ee9f..7a900780384a91 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngineC.cpp @@ -330,7 +330,8 @@ void ExprEngine::VisitCast(const CastExpr *CastE, const Expr *Ex, case CK_ConstructorConversion: case CK_UserDefinedConversion: case CK_FunctionToPointerDecay: - case CK_BuiltinFnToFnPtr: { + case CK_BuiltinFnToFnPtr: + case CK_HLSLArrayRValue: { // Copy the SVal of Ex to CastE. ProgramStateRef state = Pred->getState(); const LocationContext *LCtx = Pred->getLocationContext(); diff --git a/clang/test/CodeGenHLSL/ArrayTemporary.hlsl b/clang/test/CodeGenHLSL/ArrayTemporary.hlsl new file mode 100644 index 00000000000000..63a30b61440eb5 --- /dev/null +++ b/clang/test/CodeGenHLSL/ArrayTemporary.hlsl @@ -0,0 +1,104 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s + +void fn(float x[2]) { } + +// CHECK-LABEL: define void {{.*}}call{{.*}} +// CHECK: [[Arr:%.*]] = alloca [2 x float] +// CHECK: [[Tmp:%.*]] = alloca [2 x float] +// CHECK: call void @llvm.memset.p0.i32(ptr align 4 [[Arr]], i8 0, i32 8, i1 false) +// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[Arr]], i32 8, i1 false) +// CHECK: call void {{.*}}fn{{.*}}(ptr noundef byval([2 x float]) align 4 [[Tmp]]) +void call() { + float Arr[2] = {0, 0}; + fn(Arr); +} + +struct Obj { + float V; + int X; +}; + +void fn2(Obj O[4]) { } + +// CHECK-LABEL: define void {{.*}}call2{{.*}} +// CHECK: [[Arr:%.*]] = alloca [4 x %struct.Obj] +// CHECK: [[Tmp:%.*]] = alloca [4 x %struct.Obj] +// CHECK: call void @llvm.memset.p0.i32(ptr align 4 [[Arr]], i8 0, i32 32, i1 false) +// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[Arr]], i32 32, i1 false) +// CHECK: call void {{.*}}fn2{{.*}}(ptr noundef byval([4 x %struct.Obj]) align 4 [[Tmp]]) +void call2() { + Obj Arr[4] = {}; + fn2(Arr); +} + + +void fn3(float x[2][2]) { } + +// CHECK-LABEL: define void {{.*}}call3{{.*}} +// CHECK: [[Arr:%.*]] = alloca [2 x [2 x float]] +// CHECK: [[Tmp:%.*]] = alloca [2 x [2 x float]] +// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Arr]], ptr align 4 {{.*}}, i32 16, i1 false) +// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[Arr]], i32 16, i1 false) +// CHECK: call void {{.*}}fn3{{.*}}(ptr noundef byval([2 x [2 x float]]) align 4 [[Tmp]]) +void call3() { + float Arr[2][2] = {{0, 0}, {1,1}}; + fn3(Arr); +} + +// CHECK-LABEL: define void {{.*}}call4{{.*}}(ptr +// CHECK-SAME: noundef byval([2 x [2 x float]]) align 4 [[Arr:%.*]]) +// CHECK: [[Tmp:%.*]] = alloca [2 x [2 x float]] +// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp]], ptr align 4 [[Arr]], i32 16, i1 false) +// CHECK: call void {{.*}}fn3{{.*}}(ptr noundef byval([2 x [2 x float]]) align 4 [[Tmp]]) + +void call4(float Arr[2][2]) { + fn3(Arr); +} + +// Verify that each template instantiation codegens to a unique and correctly +// mangled function name. + +// CHECK-LABEL: define void {{.*}}template_call{{.*}}(ptr + +// CHECK-SAME: noundef byval([2 x float]) align 4 [[FA2:%[0-9A-Z]+]], +// CHECK-SAME: ptr noundef byval([4 x float]) align 4 [[FA4:%[0-9A-Z]+]], +// CHECK-SAME: ptr noundef byval([3 x i32]) align 4 [[IA3:%[0-9A-Z]+]] + +// CHECK: [[Tmp1:%.*]] = alloca [2 x float] +// CHECK: [[Tmp2:%.*]] = alloca [4 x float] +// CHECK: [[Tmp3:%.*]] = alloca [3 x i32] +// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp1]], ptr align 4 [[FA2]], i32 8, i1 false) +// CHECK: call void @"??$template_fn@$$BY01M@@YAXY01M@Z"(ptr noundef byval([2 x float]) align 4 [[Tmp1]]) +// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp2]], ptr align 4 [[FA4]], i32 16, i1 false) +// CHECK: call void @"??$template_fn@$$BY03M@@YAXY03M@Z"(ptr noundef byval([4 x float]) align 4 [[Tmp2]]) +// CHECK: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[Tmp3]], ptr align 4 [[IA3]], i32 12, i1 false) +// CHECK: call void @"??$template_fn@$$BY02H@@YAXY02H@Z"(ptr noundef byval([3 x i32]) align 4 [[Tmp3]]) + +template +void template_fn(T Val) {} + +void template_call(float FA2[2], float FA4[4], int IA3[3]) { + template_fn(FA2); + template_fn(FA4); + template_fn(IA3); +} + + +// Verify that Array parameter element access correctly codegens. +// CHECK-LABEL: define void {{.*}}element_access{{.*}}(ptr +// CHECK-SAME: noundef byval([2 x float]) align 4 [[FA2:%[0-9A-Z]+]] + +// CHECK: [[Addr:%.*]] = getelementptr inbounds [2 x float], ptr [[FA2]], i32 0, i32 0 +// CHECK: [[Tmp:%.*]] = load float, ptr [[Addr]] +// CHECK: call void @"??$template_fn@M@@YAXM@Z"(float noundef [[Tmp]]) + +// CHECK: [[Idx0:%.*]] = getelementptr inbounds [2 x float], ptr [[FA2]], i32 0, i32 0 +// CHECK: [[Val0:%.*]] = load float, ptr [[Idx0]] +// CHECK: [[Sum:%.*]] = fadd float [[Val0]], 5.000000e+00 +// CHECK: [[Idx1:%.*]] = getelementptr inbounds [2 x float], ptr [[FA2]], i32 0, i32 1 +// CHECK: store float [[Sum]], ptr [[Idx1]] + +void element_access(float FA2[2]) { + template_fn(FA2[0]); + FA2[1] = FA2[0] + 5; +} diff --git a/clang/test/SemaHLSL/ArrayParams.hlsl b/clang/test/SemaHLSL/ArrayParams.hlsl new file mode 100644 index 00000000000000..75af9d775206e6 --- /dev/null +++ b/clang/test/SemaHLSL/ArrayParams.hlsl @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -verify + +void fn(int I[5]); // #fn +void fn2(int I[3][3]); // #fn2 + +void call() { + float F[5]; + double D[4]; + int Long[9]; + int Short[4]; + int Same[5]; + + fn(F); // expected-error{{no matching function for call to 'fn'}} + // expected-note@#fn{{candidate function not viable: no known conversion from 'float[5]' to 'int[5]' for 1st argument}} + + fn(D); // expected-error{{no matching function for call to 'fn'}} + // expected-note@#fn{{candidate function not viable: no known conversion from 'double[4]' to 'int[5]' for 1st argument}} + + fn(Long); // expected-error{{no matching function for call to 'fn'}} + // expected-note@#fn{{candidate function not viable: no known conversion from 'int[9]' to 'int[5]' for 1st argument}} + + fn(Short); // expected-error{{no matching function for call to 'fn'}} + // expected-note@#fn{{candidate function not viable: no known conversion from 'int[4]' to 'int[5]' for 1st argument}} + + fn(Same); // totally fine, nothing to see here. + + fn2(Long); // expected-error{{no matching function for call to 'fn2'}} + // expected-note@#fn2{{candidate function not viable: no known conversion from 'int[9]' to 'int[3][3]' for 1st argument}} +} diff --git a/clang/test/SemaHLSL/ArrayTemporary.hlsl b/clang/test/SemaHLSL/ArrayTemporary.hlsl new file mode 100644 index 00000000000000..dff9aff7d9b299 --- /dev/null +++ b/clang/test/SemaHLSL/ArrayTemporary.hlsl @@ -0,0 +1,95 @@ +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -ast-dump %s | FileCheck %s + +void fn(float x[2]) { } + +// CHECK: CallExpr {{.*}} 'void' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(float[2])' +// CHECK-NEXT: DeclRefExpr {{.*}} 'void (float[2])' lvalue Function {{.*}} 'fn' 'void (float[2])' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float[2]' + +void call() { + float Arr[2] = {0, 0}; + fn(Arr); +} + +struct Obj { + float V; + int X; +}; + +void fn2(Obj O[4]) { } + +// CHECK: CallExpr {{.*}} 'void' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(Obj[4])' +// CHECK-NEXT: DeclRefExpr {{.*}} 'void (Obj[4])' lvalue Function {{.*}} 'fn2' 'void (Obj[4])' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'Obj[4]' + +void call2() { + Obj Arr[4] = {}; + fn2(Arr); +} + + +void fn3(float x[2][2]) { } + +// CHECK: CallExpr {{.*}} 'void' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(float[2][2])' +// CHECK-NEXT: DeclRefExpr {{.*}} 'void (float[2][2])' lvalue Function {{.*}} 'fn3' 'void (float[2][2])' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float[2][2]' + +void call3() { + float Arr[2][2] = {{0, 0}, {1,1}}; + fn3(Arr); +} + +// This template function should be instantiated 3 times for the different array +// types and lengths. + +// CHECK: FunctionTemplateDecl {{.*}} template_fn +// CHECK-NEXT: TemplateTypeParmDecl {{.*}} referenced typename depth 0 index 0 T +// CHECK-NEXT: FunctionDecl {{.*}} template_fn 'void (T)' +// CHECK-NEXT: ParmVarDecl {{.*}} Val 'T' + +// CHECK: FunctionDecl {{.*}} used template_fn 'void (float[2])' implicit_instantiation +// CHECK-NEXT: TemplateArgument type 'float[2]' +// CHECK-NEXT: ArrayParameterType {{.*}} 'float[2]' 2 +// CHECK-NEXT: BuiltinType {{.*}} 'float' +// CHECK-NEXT: ParmVarDecl {{.*}} Val 'float[2]' + +// CHECK: FunctionDecl {{.*}} used template_fn 'void (float[4])' implicit_instantiation +// CHECK-NEXT: TemplateArgument type 'float[4]' +// CHECK-NEXT: ArrayParameterType {{.*}} 'float[4]' 4 +// CHECK-NEXT: BuiltinType {{.*}} 'float' +// CHECK-NEXT: ParmVarDecl {{.*}} Val 'float[4]' + +// CHECK: FunctionDecl {{.*}} used template_fn 'void (int[3])' implicit_instantiation +// CHECK-NEXT: TemplateArgument type 'int[3]' +// CHECK-NEXT: ArrayParameterType {{.*}} 'int[3]' 3 +// CHECK-NEXT: BuiltinType {{.*}} 'int' +// CHECK-NEXT: ParmVarDecl {{.*}} Val 'int[3]' + +template +void template_fn(T Val) {} + +// CHECK: FunctionDecl {{.*}} call 'void (float[2], float[4], int[3])' +// CHECK: CallExpr {{.*}} 'void' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(float[2])' +// CHECK-NEXT: DeclRefExpr {{.*}} 'void (float[2])' lvalue Function {{.*}} 'template_fn' 'void (float[2])' (FunctionTemplate {{.*}} 'template_fn') +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float[2]' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float[2]' lvalue ParmVar {{.*}} 'FA2' 'float[2]' +// CHECK-NEXT: CallExpr {{.*}} 'void' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(float[4])' +// CHECK-NEXT: DeclRefExpr {{.*}} 'void (float[4])' lvalue Function {{.*}} 'template_fn' 'void (float[4])' (FunctionTemplate {{.*}} 'template_fn') +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'float[4]' +// CHECK-NEXT: DeclRefExpr {{.*}} 'float[4]' lvalue ParmVar {{.*}} 'FA4' 'float[4]' +// CHECK-NEXT: CallExpr {{.*}} 'void' +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'void (*)(int[3])' +// CHECK-NEXT: DeclRefExpr {{.*}} 'void (int[3])' lvalue Function {{.*}} 'template_fn' 'void (int[3])' (FunctionTemplate {{.*}} 'template_fn') +// CHECK-NEXT: ImplicitCastExpr {{.*}} 'int[3]' +// CHECK-NEXT: DeclRefExpr {{.*}} 'int[3]' lvalue ParmVar {{.*}} 'IA3' 'int[3]' + +void call(float FA2[2], float FA4[4], int IA3[3]) { + template_fn(FA2); + template_fn(FA4); + template_fn(IA3); +} diff --git a/clang/test/SemaHLSL/ArrayTemporary.ll b/clang/test/SemaHLSL/ArrayTemporary.ll new file mode 100644 index 00000000000000..5eec0094865bae --- /dev/null +++ b/clang/test/SemaHLSL/ArrayTemporary.ll @@ -0,0 +1,76 @@ +; ModuleID = '/Users/cbieneman/dev/llvm-project/clang/test/SemaHLSL/ArrayTemporary.hlsl' +source_filename = "/Users/cbieneman/dev/llvm-project/clang/test/SemaHLSL/ArrayTemporary.hlsl" +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-pc-shadermodel6.3-library" + +%struct.Obj = type { float, i32 } + +@"__const.?call3@@YAXXZ.Arr" = private unnamed_addr constant [2 x [2 x float]] [[2 x float] zeroinitializer, [2 x float] [float 1.000000e+00, float 1.000000e+00]], align 4 + +; Function Attrs: noinline nounwind optnone +define void @"?fn@@YAXY01M@Z"(ptr noundef byval([2 x float]) align 4 %x) #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone +define void @"?call@@YAXXZ"() #0 { +entry: + %Arr = alloca [2 x float], align 4 + %agg.tmp = alloca [2 x float], align 4 + call void @llvm.memset.p0.i32(ptr align 4 %Arr, i8 0, i32 8, i1 false) + call void @llvm.memcpy.p0.p0.i32(ptr align 4 %agg.tmp, ptr align 4 %Arr, i32 8, i1 false) + call void @"?fn@@YAXY01M@Z"(ptr noundef byval([2 x float]) align 4 %agg.tmp) + ret void +} + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) +declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1 immarg) #1 + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) +declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg) #2 + +; Function Attrs: noinline nounwind optnone +define void @"?fn2@@YAXY03UObj@@@Z"(ptr noundef byval([4 x %struct.Obj]) align 4 %O) #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone +define void @"?call2@@YAXXZ"() #0 { +entry: + %Arr = alloca [4 x %struct.Obj], align 4 + %agg.tmp = alloca [4 x %struct.Obj], align 4 + call void @llvm.memset.p0.i32(ptr align 4 %Arr, i8 0, i32 32, i1 false) + call void @llvm.memcpy.p0.p0.i32(ptr align 4 %agg.tmp, ptr align 4 %Arr, i32 32, i1 false) + call void @"?fn2@@YAXY03UObj@@@Z"(ptr noundef byval([4 x %struct.Obj]) align 4 %agg.tmp) + ret void +} + +; Function Attrs: noinline nounwind optnone +define void @"?fn3@@YAXY111M@Z"(ptr noundef byval([2 x [2 x float]]) align 4 %x) #0 { +entry: + ret void +} + +; Function Attrs: noinline nounwind optnone +define void @"?call3@@YAXXZ"() #0 { +entry: + %Arr = alloca [2 x [2 x float]], align 4 + %agg.tmp = alloca [2 x [2 x float]], align 4 + call void @llvm.memcpy.p0.p0.i32(ptr align 4 %Arr, ptr align 4 @"__const.?call3@@YAXXZ.Arr", i32 16, i1 false) + call void @llvm.memcpy.p0.p0.i32(ptr align 4 %agg.tmp, ptr align 4 %Arr, i32 16, i1 false) + call void @"?fn3@@YAXY111M@Z"(ptr noundef byval([2 x [2 x float]]) align 4 %agg.tmp) + ret void +} + +attributes #0 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } +attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: write) } +attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } + +!llvm.module.flags = !{!0, !1} +!llvm.ident = !{!2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 4, !"dx.disable_optimizations", i32 1} +!2 = !{!"clang version 19.0.0git (git@github.com:llvm/llvm-project.git 64e1c15c520cf11114ef2ddd887e76560903db2b)"} diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp index 454ee1e42aed1d..00ddc4f24500fd 100644 --- a/clang/tools/libclang/CIndex.cpp +++ b/clang/tools/libclang/CIndex.cpp @@ -1923,6 +1923,7 @@ DEFAULT_TYPELOC_IMPL(ConstantArray, ArrayType) DEFAULT_TYPELOC_IMPL(IncompleteArray, ArrayType) DEFAULT_TYPELOC_IMPL(VariableArray, ArrayType) DEFAULT_TYPELOC_IMPL(DependentSizedArray, ArrayType) +DEFAULT_TYPELOC_IMPL(ArrayParameter, ConstantArrayType) DEFAULT_TYPELOC_IMPL(DependentAddressSpace, Type) DEFAULT_TYPELOC_IMPL(DependentVector, Type) DEFAULT_TYPELOC_IMPL(DependentSizedExtVector, Type) From 53d256bb2790a0e5300f275345c864930b7e6f82 Mon Sep 17 00:00:00 2001 From: Eric Date: Mon, 1 Apr 2024 13:25:26 -0400 Subject: [PATCH 024/201] Update the "Current Status" section of the website to be current. (#84507) The section discusses the reasons for the libraries inception more than a decade ago. Now it discusses the progess libc++ has made, and the many impressive acomplishments our contributors have brought it. The initial section remains below. --------- Co-authored-by: Louis Dionne --- libcxx/docs/index.rst | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/libcxx/docs/index.rst b/libcxx/docs/index.rst index 2a7e47dfe6d88b..db55c6f02a3dc3 100644 --- a/libcxx/docs/index.rst +++ b/libcxx/docs/index.rst @@ -66,6 +66,25 @@ Getting Started with libc++ Current Status ============== +libc++ has become the default C++ Standard Library implementation for many major platforms, including Apple's macOS, +iOS, watchOS, and tvOS, Google Search, the Android operating system, and FreeBSD. As a result, libc++ has an estimated +user base of over 1 billion daily active users. + +Since its inception, libc++ has focused on delivering high performance, standards-conformance, and portability. It has +been extensively tested and optimized, making it robust and production ready. libc++ fully implements C++11 and C++14, +with C++17, C++20, C++23, and C++26 features being actively developed and making steady progress. + +libc++ is continuously integrated and tested on a wide range of platforms and configurations, ensuring its reliability +and compatibility across various systems. The library's extensive test suite and rigorous quality assurance process have +made it a top choice for platform providers looking to offer their users a robust and efficient C++ Standard Library. + +As an open-source project, libc++ benefits from a vibrant community of contributors who work together to improve the +library and add new features. This ongoing development and support ensure that libc++ remains at the forefront of +C++ standardization efforts and continues to meet the evolving needs of C++ developers worldwide. + + +History +------- After its initial introduction, many people have asked "why start a new library instead of contributing to an existing library?" (like Apache's libstdcxx, GNU's libstdc++, STLport, etc). There are many contributing From 2cfd7d433be0831c6e2a248a4b828f7aedcaeaa0 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 1 Apr 2024 10:26:33 -0700 Subject: [PATCH 025/201] [libc] fixup missing include for fullbuild (#87266) Fixes #86928 --- libc/src/stdio/fseeko.h | 1 + libc/src/stdio/ftello.h | 1 + 2 files changed, 2 insertions(+) diff --git a/libc/src/stdio/fseeko.h b/libc/src/stdio/fseeko.h index 77fb41215c318f..3202ed2f97d0ef 100644 --- a/libc/src/stdio/fseeko.h +++ b/libc/src/stdio/fseeko.h @@ -10,6 +10,7 @@ #define LLVM_LIBC_SRC_STDIO_FSEEKO_H #include +#include namespace LIBC_NAMESPACE { diff --git a/libc/src/stdio/ftello.h b/libc/src/stdio/ftello.h index 5ab17f9244a5ad..0fdf13ab6bdbcd 100644 --- a/libc/src/stdio/ftello.h +++ b/libc/src/stdio/ftello.h @@ -10,6 +10,7 @@ #define LLVM_LIBC_SRC_STDIO_FTELLO_H #include +#include namespace LIBC_NAMESPACE { From a54930e696a275ac3947484f44d770cd587ce147 Mon Sep 17 00:00:00 2001 From: Peiming Liu Date: Mon, 1 Apr 2024 10:30:36 -0700 Subject: [PATCH 026/201] [mlir][sparse] allow YieldOp to yield multiple values. (#87261) --- .../SparseTensor/IR/SparseTensorOps.td | 25 +++++++++++++++---- .../SparseTensor/IR/SparseTensorDialect.cpp | 18 +++---------- .../Transforms/SparseReinterpretMap.cpp | 5 ++-- .../lib/Dialect/SparseTensor/Utils/Merger.cpp | 4 +-- 4 files changed, 29 insertions(+), 23 deletions(-) diff --git a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td index 29cf8c32447ecf..5df8a176459b7c 100644 --- a/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td +++ b/mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensorOps.td @@ -1278,8 +1278,10 @@ def SparseTensor_SelectOp : SparseTensor_Op<"select", [Pure, SameOperandsAndResu let hasVerifier = 1; } -def SparseTensor_YieldOp : SparseTensor_Op<"yield", [Pure, Terminator]>, - Arguments<(ins Optional:$result)> { +def SparseTensor_YieldOp : SparseTensor_Op<"yield", [Pure, Terminator, + ParentOneOf<["BinaryOp", "UnaryOp", "ReduceOp", "SelectOp", + "ForeachOp"]>]>, + Arguments<(ins Variadic:$results)> { let summary = "Yield from sparse_tensor set-like operations"; let description = [{ Yields a value from within a `binary`, `unary`, `reduce`, @@ -1302,14 +1304,27 @@ def SparseTensor_YieldOp : SparseTensor_Op<"yield", [Pure, Terminator]>, let builders = [ OpBuilder<(ins), [{ - build($_builder, $_state, Value()); + build($_builder, $_state, ValueRange()); + }]>, + OpBuilder<(ins "Value":$yieldVal), + [{ + build($_builder, $_state, ValueRange(yieldVal)); }]> ]; + let extraClassDeclaration = [{ + Value getSingleResult() { + assert(hasSingleResult()); + return getResults().front(); + } + bool hasSingleResult() { + return getResults().size() == 1; + } + }]; + let assemblyFormat = [{ - $result attr-dict `:` type($result) + $results attr-dict `:` type($results) }]; - let hasVerifier = 1; } def SparseTensor_ForeachOp : SparseTensor_Op<"foreach", diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp index 6da51bb6b9cacf..e4d93c5623b9c4 100644 --- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp @@ -1591,7 +1591,8 @@ static LogicalResult verifyNumBlockArgs(T *op, Region ®ion, if (!yield) return op->emitError() << regionName << " region must end with sparse_tensor.yield"; - if (!yield.getResult() || yield.getResult().getType() != outputType) + if (!yield.hasSingleResult() || + yield.getSingleResult().getType() != outputType) return op->emitError() << regionName << " region yield type mismatch"; return success(); @@ -1654,7 +1655,8 @@ LogicalResult UnaryOp::verify() { // Absent branch can only yield invariant values. Block *absentBlock = &absent.front(); Block *parent = getOperation()->getBlock(); - Value absentVal = cast(absentBlock->getTerminator()).getResult(); + Value absentVal = + cast(absentBlock->getTerminator()).getSingleResult(); if (auto arg = dyn_cast(absentVal)) { if (arg.getOwner() == parent) return emitError("absent region cannot yield linalg argument"); @@ -1907,18 +1909,6 @@ LogicalResult SortOp::verify() { return success(); } -LogicalResult YieldOp::verify() { - // Check for compatible parent. - auto *parentOp = (*this)->getParentOp(); - if (isa(parentOp) || isa(parentOp) || - isa(parentOp) || isa(parentOp) || - isa(parentOp)) - return success(); - - return emitOpError("expected parent op to be sparse_tensor unary, binary, " - "reduce, select or foreach"); -} - /// Materialize a single constant operation from a given attribute value with /// the desired resultant type. Operation *SparseTensorDialect::materializeConstant(OpBuilder &builder, diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseReinterpretMap.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseReinterpretMap.cpp index 14ea07f0b54b82..9c0fc60877d8a3 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseReinterpretMap.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseReinterpretMap.cpp @@ -764,9 +764,10 @@ struct ForeachOpDemapper if (numInitArgs != 0) { rewriter.setInsertionPointToEnd(body); auto yield = llvm::cast(body->getTerminator()); - if (auto stt = tryGetSparseTensorType(yield.getResult()); + if (auto stt = tryGetSparseTensorType(yield.getSingleResult()); stt && !stt->isIdentity()) { - Value y = genDemap(rewriter, stt->getEncoding(), yield.getResult()); + Value y = + genDemap(rewriter, stt->getEncoding(), yield.getSingleResult()); rewriter.create(loc, y); rewriter.eraseOp(yield); } diff --git a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp index 72b722c69ae34b..9c0aed3c18eff2 100644 --- a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp +++ b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp @@ -1031,7 +1031,7 @@ LatSetId Merger::buildLattices(ExprId e, LoopId i) { // invariant on the right. Block &absentBlock = absentRegion.front(); YieldOp absentYield = cast(absentBlock.getTerminator()); - const Value absentVal = absentYield.getResult(); + const Value absentVal = absentYield.getSingleResult(); const ExprId rhs = addInvariantExp(absentVal); return disjSet(e, child0, buildLattices(rhs, i), unop); } @@ -1500,7 +1500,7 @@ static Value insertYieldOp(RewriterBase &rewriter, Location loc, Region ®ion, // Merge cloned block and return yield value. Operation *placeholder = rewriter.create(loc, 0); rewriter.inlineBlockBefore(&tmpRegion.front(), placeholder, vals); - Value val = clonedYield.getResult(); + Value val = clonedYield.getSingleResult(); rewriter.eraseOp(clonedYield); rewriter.eraseOp(placeholder); return val; From 2be722587f5987891ed8b2904a03f983e987f226 Mon Sep 17 00:00:00 2001 From: lntue <35648136+lntue@users.noreply.github.com> Date: Mon, 1 Apr 2024 13:31:07 -0400 Subject: [PATCH 027/201] [libc][math] Implement atan2f correctly rounded to all rounding modes. (#86716) We compute atan2f(y, x) in 2 stages: - Fast step: perform computations in double precision , with relative errors < 2^-50 - Accurate step: if the result from the Fast step fails Ziv's rounding test, then we perform computations in double-double precision, with relative errors < 2^-100. On Ryzen 5900X, worst-case latency is ~ 200 clocks, compared to average latency ~ 60 clocks, and average reciprocal throughput ~ 20 clocks. --- libc/config/baremetal/arm/entrypoints.txt | 1 + libc/config/baremetal/riscv/entrypoints.txt | 1 + libc/config/darwin/arm/entrypoints.txt | 1 + libc/config/linux/aarch64/entrypoints.txt | 1 + libc/config/linux/arm/entrypoints.txt | 1 + libc/config/linux/riscv/entrypoints.txt | 1 + libc/config/linux/x86_64/entrypoints.txt | 1 + libc/config/windows/entrypoints.txt | 1 + libc/docs/math/index.rst | 3 +- libc/spec/stdc.td | 4 + libc/src/math/generic/CMakeLists.txt | 18 ++ libc/src/math/generic/atan2f.cpp | 306 ++++++++++++++++++++ libc/src/math/generic/atanf.cpp | 22 +- libc/src/math/generic/inv_trigf_utils.cpp | 117 ++++---- libc/src/math/generic/inv_trigf_utils.h | 7 +- libc/test/src/math/CMakeLists.txt | 13 + libc/test/src/math/atan2f_test.cpp | 133 +++++++++ libc/test/src/math/atanf_test.cpp | 5 +- libc/test/src/math/smoke/CMakeLists.txt | 12 + libc/test/src/math/smoke/atan2f_test.cpp | 50 ++++ libc/utils/MPFRWrapper/MPFRUtils.cpp | 8 + libc/utils/MPFRWrapper/MPFRUtils.h | 1 + 22 files changed, 634 insertions(+), 73 deletions(-) create mode 100644 libc/src/math/generic/atan2f.cpp create mode 100644 libc/test/src/math/atan2f_test.cpp create mode 100644 libc/test/src/math/smoke/atan2f_test.cpp diff --git a/libc/config/baremetal/arm/entrypoints.txt b/libc/config/baremetal/arm/entrypoints.txt index 17ce56e228a6ac..9e21f5c20d9207 100644 --- a/libc/config/baremetal/arm/entrypoints.txt +++ b/libc/config/baremetal/arm/entrypoints.txt @@ -207,6 +207,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.acoshf libc.src.math.asinf libc.src.math.asinhf + libc.src.math.atan2f libc.src.math.atanf libc.src.math.atanhf libc.src.math.ceil diff --git a/libc/config/baremetal/riscv/entrypoints.txt b/libc/config/baremetal/riscv/entrypoints.txt index 39756e1ee29f54..7664937da0f6e0 100644 --- a/libc/config/baremetal/riscv/entrypoints.txt +++ b/libc/config/baremetal/riscv/entrypoints.txt @@ -207,6 +207,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.acoshf libc.src.math.asinf libc.src.math.asinhf + libc.src.math.atan2f libc.src.math.atanf libc.src.math.atanhf libc.src.math.ceil diff --git a/libc/config/darwin/arm/entrypoints.txt b/libc/config/darwin/arm/entrypoints.txt index 02a09256606956..6b89ce55d72b65 100644 --- a/libc/config/darwin/arm/entrypoints.txt +++ b/libc/config/darwin/arm/entrypoints.txt @@ -118,6 +118,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.acoshf libc.src.math.asinf libc.src.math.asinhf + libc.src.math.atan2f libc.src.math.atanf libc.src.math.atanhf libc.src.math.copysign diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 78da7f0b334b1f..4ba2d8387b07eb 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -330,6 +330,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.acoshf libc.src.math.asinf libc.src.math.asinhf + libc.src.math.atan2f libc.src.math.atanf libc.src.math.atanhf libc.src.math.copysign diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt index 6e63e270280e7a..04baa4c1cf93ab 100644 --- a/libc/config/linux/arm/entrypoints.txt +++ b/libc/config/linux/arm/entrypoints.txt @@ -198,6 +198,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.acoshf libc.src.math.asinf libc.src.math.asinhf + libc.src.math.atan2f libc.src.math.atanf libc.src.math.atanhf libc.src.math.ceil diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt index 5aae4e246cfb3c..25745513b920ba 100644 --- a/libc/config/linux/riscv/entrypoints.txt +++ b/libc/config/linux/riscv/entrypoints.txt @@ -338,6 +338,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.acoshf libc.src.math.asinf libc.src.math.asinhf + libc.src.math.atan2f libc.src.math.atanf libc.src.math.atanhf libc.src.math.copysign diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index a4d0da5e043d4f..c1d2bfa848df5a 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -348,6 +348,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.acoshf libc.src.math.asinf libc.src.math.asinhf + libc.src.math.atan2f libc.src.math.atanf libc.src.math.atanhf libc.src.math.canonicalize diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt index f4456f561ec017..c38125a6462272 100644 --- a/libc/config/windows/entrypoints.txt +++ b/libc/config/windows/entrypoints.txt @@ -116,6 +116,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.acoshf libc.src.math.asinf libc.src.math.asinhf + libc.src.math.atan2f libc.src.math.atanf libc.src.math.atanhf libc.src.math.copysign diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index 080b6a4427f511..b7f1b8739648ca 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -422,7 +422,7 @@ Higher Math Functions +------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ | atan2 | | | | | | | | | | | | | +------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| atan2f | | | | | | | | | | | | | +| atan2f | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | +------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ | atan2l | | | | | | | | | | | | | +------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ @@ -591,6 +591,7 @@ acosh |check| asin |check| asinh |check| atan |check| +atan2 |check| atanh |check| cos |check| large cosh |check| diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index ac6e1d1801ba55..719bb9aa18cb0a 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -620,10 +620,14 @@ def StdC : StandardSpec<"stdc"> { FunctionSpec<"tanhf", RetValSpec, [ArgSpec]>, FunctionSpec<"acosf", RetValSpec, [ArgSpec]>, + FunctionSpec<"asinf", RetValSpec, [ArgSpec]>, FunctionSpec<"asin", RetValSpec, [ArgSpec]>, + FunctionSpec<"atanf", RetValSpec, [ArgSpec]>, + FunctionSpec<"atan2f", RetValSpec, [ArgSpec, ArgSpec]>, + FunctionSpec<"acoshf", RetValSpec, [ArgSpec]>, FunctionSpec<"asinhf", RetValSpec, [ArgSpec]>, FunctionSpec<"atanhf", RetValSpec, [ArgSpec]>, diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 4d9b91150d0200..b164d33e204b1a 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -2854,6 +2854,24 @@ add_entrypoint_object( -O3 ) +add_entrypoint_object( + atan2f + SRCS + atan2f.cpp + HDRS + ../atan2f.h + COMPILE_OPTIONS + -O3 + DEPENDS + .inv_trigf_utils + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.nearest_integer + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.rounding_mode + libc.src.__support.macros.optimization +) + add_entrypoint_object( scalbn SRCS diff --git a/libc/src/math/generic/atan2f.cpp b/libc/src/math/generic/atan2f.cpp new file mode 100644 index 00000000000000..b79410dbf66ee6 --- /dev/null +++ b/libc/src/math/generic/atan2f.cpp @@ -0,0 +1,306 @@ +//===-- Single-precision atan2f function ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/atan2f.h" +#include "inv_trigf_utils.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/double_double.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/nearest_integer.h" +#include "src/__support/FPUtil/rounding_mode.h" +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY + +namespace LIBC_NAMESPACE { + +namespace { + +// Look up tables for accurate pass: + +// atan(i/16) with i = 0..16, generated by Sollya with: +// > for i from 0 to 16 do { +// a = round(atan(i/16), D, RN); +// b = round(atan(i/16) - a, D, RN); +// print("{", b, ",", a, "},"); +// }; +constexpr fputil::DoubleDouble ATAN_I[17] = { + {0.0, 0.0}, + {-0x1.c934d86d23f1dp-60, 0x1.ff55bb72cfdeap-5}, + {-0x1.cd37686760c17p-59, 0x1.fd5ba9aac2f6ep-4}, + {0x1.347b0b4f881cap-58, 0x1.7b97b4bce5b02p-3}, + {0x1.8ab6e3cf7afbdp-57, 0x1.f5b75f92c80ddp-3}, + {-0x1.963a544b672d8p-57, 0x1.362773707ebccp-2}, + {-0x1.c63aae6f6e918p-56, 0x1.6f61941e4def1p-2}, + {-0x1.24dec1b50b7ffp-56, 0x1.a64eec3cc23fdp-2}, + {0x1.a2b7f222f65e2p-56, 0x1.dac670561bb4fp-2}, + {-0x1.d5b495f6349e6p-56, 0x1.0657e94db30dp-1}, + {-0x1.928df287a668fp-58, 0x1.1e00babdefeb4p-1}, + {0x1.1021137c71102p-55, 0x1.345f01cce37bbp-1}, + {0x1.2419a87f2a458p-56, 0x1.4978fa3269ee1p-1}, + {0x1.0028e4bc5e7cap-57, 0x1.5d58987169b18p-1}, + {-0x1.8c34d25aadef6p-56, 0x1.700a7c5784634p-1}, + {-0x1.bf76229d3b917p-56, 0x1.819d0b7158a4dp-1}, + {0x1.1a62633145c07p-55, 0x1.921fb54442d18p-1}, +}; + +// Taylor polynomial, generated by Sollya with: +// > for i from 0 to 8 do { +// j = (-1)^(i + 1)/(2*i + 1); +// a = round(j, D, RN); +// b = round(j - a, D, RN); +// print("{", b, ",", a, "},"); +// }; +constexpr fputil::DoubleDouble COEFFS[9] = { + {0.0, 1.0}, // 1 + {-0x1.5555555555555p-56, -0x1.5555555555555p-2}, // -1/3 + {-0x1.999999999999ap-57, 0x1.999999999999ap-3}, // 1/5 + {-0x1.2492492492492p-57, -0x1.2492492492492p-3}, // -1/7 + {0x1.c71c71c71c71cp-58, 0x1.c71c71c71c71cp-4}, // 1/9 + {0x1.745d1745d1746p-59, -0x1.745d1745d1746p-4}, // -1/11 + {-0x1.3b13b13b13b14p-58, 0x1.3b13b13b13b14p-4}, // 1/13 + {-0x1.1111111111111p-60, -0x1.1111111111111p-4}, // -1/15 + {0x1.e1e1e1e1e1e1ep-61, 0x1.e1e1e1e1e1e1ep-5}, // 1/17 +}; + +// Veltkamp's splitting of a double precision into hi + lo, where the hi part is +// slightly smaller than an even split, so that the product of +// hi * (s1 * k + s2) is exact, +// where: +// s1, s2 are single precsion, +// 1/16 <= s1/s2 <= 1 +// 1/16 <= k <= 1 is an integer. +// So the maximal precision of (s1 * k + s2) is: +// prec(s1 * k + s2) = 2 + log2(msb(s2)) - log2(lsb(k_d * s1)) +// = 2 + log2(msb(s1)) + 4 - log2(lsb(k_d)) - log2(lsb(s1)) +// = 2 + log2(lsb(s1)) + 23 + 4 - (-4) - log2(lsb(s1)) +// = 33. +// Thus, the Veltkamp splitting constant is C = 2^33 + 1. +// This is used when FMA instruction is not available. +[[maybe_unused]] constexpr fputil::DoubleDouble split_d(double a) { + fputil::DoubleDouble r{0.0, 0.0}; + constexpr double C = 0x1.0p33 + 1.0; + double t1 = C * a; + double t2 = a - t1; + r.hi = t1 + t2; + r.lo = a - r.hi; + return r; +} + +// Compute atan( num_d / den_d ) in double-double precision. +// num_d = min(|x|, |y|) +// den_d = max(|x|, |y|) +// q_d = num_d / den_d +// idx, k_d = round( 2^4 * num_d / den_d ) +// final_sign = sign of the final result +// const_term = the constant term in the final expression. +float atan2f_double_double(double num_d, double den_d, double q_d, int idx, + double k_d, double final_sign, + const fputil::DoubleDouble &const_term) { + fputil::DoubleDouble q; + double num_r, den_r; + + if (idx != 0) { + // The following range reduction is accurate even without fma for + // 1/16 <= n/d <= 1. + // atan(n/d) - atan(idx/16) = atan((n/d - idx/16) / (1 + (n/d) * (idx/16))) + // = atan((n - d*(idx/16)) / (d + n*idx/16)) + k_d *= 0x1.0p-4; + num_r = fputil::multiply_add(k_d, -den_d, num_d); // Exact + den_r = fputil::multiply_add(k_d, num_d, den_d); // Exact + q.hi = num_r / den_r; + } else { + // For 0 < n/d < 1/16, we just need to calculate the lower part of their + // quotient. + q.hi = q_d; + num_r = num_d; + den_r = den_d; + } +#ifdef LIBC_TARGET_CPU_HAS_FMA + q.lo = fputil::multiply_add(q.hi, -den_r, num_r) / den_r; +#else + // Compute `(num_r - q.hi * den_r) / den_r` accurately without FMA + // instructions. + fputil::DoubleDouble q_hi_dd = split_d(q.hi); + double t1 = fputil::multiply_add(q_hi_dd.hi, -den_r, num_r); // Exact + double t2 = fputil::multiply_add(q_hi_dd.lo, -den_r, t1); + q.lo = t2 / den_r; +#endif // LIBC_TARGET_CPU_HAS_FMA + + // Taylor polynomial, evaluating using Horner's scheme: + // P = x - x^3/3 + x^5/5 -x^7/7 + x^9/9 - x^11/11 + x^13/13 - x^15/15 + // + x^17/17 + // = x*(1 + x^2*(-1/3 + x^2*(1/5 + x^2*(-1/7 + x^2*(1/9 + x^2* + // *(-1/11 + x^2*(1/13 + x^2*(-1/15 + x^2 * 1/17)))))))) + fputil::DoubleDouble q2 = fputil::quick_mult(q, q); + fputil::DoubleDouble p_dd = + fputil::polyeval(q2, COEFFS[0], COEFFS[1], COEFFS[2], COEFFS[3], + COEFFS[4], COEFFS[5], COEFFS[6], COEFFS[7], COEFFS[8]); + fputil::DoubleDouble r_dd = + fputil::add(const_term, fputil::multiply_add(q, p_dd, ATAN_I[idx])); + r_dd.hi *= final_sign; + r_dd.lo *= final_sign; + + // Make sure the sum is normalized: + fputil::DoubleDouble rr = fputil::exact_add(r_dd.hi, r_dd.lo); + // Round to odd. + uint64_t rr_bits = cpp::bit_cast(rr.hi); + if (LIBC_UNLIKELY(((rr_bits & 0xfff'ffff) == 0) && (rr.lo != 0.0))) { + Sign hi_sign = fputil::FPBits(rr.hi).sign(); + Sign lo_sign = fputil::FPBits(rr.lo).sign(); + if (hi_sign == lo_sign) { + ++rr_bits; + } else if ((rr_bits & fputil::FPBits::FRACTION_MASK) > 0) { + --rr_bits; + } + } + + return static_cast(cpp::bit_cast(rr_bits)); +} + +} // anonymous namespace + +// There are several range reduction steps we can take for atan2(y, x) as +// follow: + +// * Range reduction 1: signness +// atan2(y, x) will return a number between -PI and PI representing the angle +// forming by the 0x axis and the vector (x, y) on the 0xy-plane. +// In particular, we have that: +// atan2(y, x) = atan( y/x ) if x >= 0 and y >= 0 (I-quadrant) +// = pi + atan( y/x ) if x < 0 and y >= 0 (II-quadrant) +// = -pi + atan( y/x ) if x < 0 and y < 0 (III-quadrant) +// = atan( y/x ) if x >= 0 and y < 0 (IV-quadrant) +// Since atan function is odd, we can use the formula: +// atan(-u) = -atan(u) +// to adjust the above conditions a bit further: +// atan2(y, x) = atan( |y|/|x| ) if x >= 0 and y >= 0 (I-quadrant) +// = pi - atan( |y|/|x| ) if x < 0 and y >= 0 (II-quadrant) +// = -pi + atan( |y|/|x| ) if x < 0 and y < 0 (III-quadrant) +// = -atan( |y|/|x| ) if x >= 0 and y < 0 (IV-quadrant) +// Which can be simplified to: +// atan2(y, x) = sign(y) * atan( |y|/|x| ) if x >= 0 +// = sign(y) * (pi - atan( |y|/|x| )) if x < 0 + +// * Range reduction 2: reciprocal +// Now that the argument inside atan is positive, we can use the formula: +// atan(1/x) = pi/2 - atan(x) +// to make the argument inside atan <= 1 as follow: +// atan2(y, x) = sign(y) * atan( |y|/|x|) if 0 <= |y| <= x +// = sign(y) * (pi/2 - atan( |x|/|y| ) if 0 <= x < |y| +// = sign(y) * (pi - atan( |y|/|x| )) if 0 <= |y| <= -x +// = sign(y) * (pi/2 + atan( |x|/|y| )) if 0 <= -x < |y| + +// * Range reduction 3: look up table. +// After the previous two range reduction steps, we reduce the problem to +// compute atan(u) with 0 <= u <= 1, or to be precise: +// atan( n / d ) where n = min(|x|, |y|) and d = max(|x|, |y|). +// An accurate polynomial approximation for the whole [0, 1] input range will +// require a very large degree. To make it more efficient, we reduce the input +// range further by finding an integer idx such that: +// | n/d - idx/16 | <= 1/32. +// In particular, +// idx := 2^-4 * round(2^4 * n/d) +// Then for the fast pass, we find a polynomial approximation for: +// atan( n/d ) ~ atan( idx/16 ) + (n/d - idx/16) * Q(n/d - idx/16) +// For the accurate pass, we use the addition formula: +// atan( n/d ) - atan( idx/16 ) = atan( (n/d - idx/16)/(1 + (n*idx)/(16*d)) ) +// = atan( (n - d * idx/16)/(d + n * idx/16) ) +// And finally we use Taylor polynomial to compute the RHS in the accurate pass: +// atan(u) ~ P(u) = u - u^3/3 + u^5/5 - u^7/7 + u^9/9 - u^11/11 + u^13/13 - +// - u^15/15 + u^17/17 +// It's error in double-double precision is estimated in Sollya to be: +// > P = x - x^3/3 + x^5/5 -x^7/7 + x^9/9 - x^11/11 + x^13/13 - x^15/15 +// + x^17/17; +// > dirtyinfnorm(atan(x) - P, [-2^-5, 2^-5]); +// 0x1.aec6f...p-100 +// which is about rounding errors of double-double (2^-104). + +LLVM_LIBC_FUNCTION(float, atan2f, (float y, float x)) { + using FPBits = typename fputil::FPBits; + constexpr double IS_NEG[2] = {1.0, -1.0}; + constexpr double PI = 0x1.921fb54442d18p1; + constexpr double PI_LO = 0x1.1a62633145c07p-53; + constexpr double PI_OVER_4 = 0x1.921fb54442d18p-1; + constexpr double PI_OVER_2 = 0x1.921fb54442d18p0; + constexpr double THREE_PI_OVER_4 = 0x1.2d97c7f3321d2p+1; + // Adjustment for constant term: + // CONST_ADJ[x_sign][y_sign][recip] + constexpr fputil::DoubleDouble CONST_ADJ[2][2][2] = { + {{{0.0, 0.0}, {-PI_LO / 2, -PI_OVER_2}}, + {{-0.0, -0.0}, {-PI_LO / 2, -PI_OVER_2}}}, + {{{-PI_LO, -PI}, {PI_LO / 2, PI_OVER_2}}, + {{-PI_LO, -PI}, {PI_LO / 2, PI_OVER_2}}}}; + + FPBits x_bits(x), y_bits(y); + bool x_sign = x_bits.sign().is_neg(); + bool y_sign = y_bits.sign().is_neg(); + x_bits.set_sign(Sign::POS); + y_bits.set_sign(Sign::POS); + uint32_t x_abs = x_bits.uintval(); + uint32_t y_abs = y_bits.uintval(); + uint32_t max_abs = x_abs > y_abs ? x_abs : y_abs; + uint32_t min_abs = x_abs <= y_abs ? x_abs : y_abs; + + if (LIBC_UNLIKELY(max_abs >= 0x7f80'0000U || min_abs == 0U)) { + if (x_bits.is_nan() || y_bits.is_nan()) + return FPBits::quiet_nan().get_val(); + size_t x_except = x_abs == 0 ? 0 : (x_abs == 0x7f80'0000 ? 2 : 1); + size_t y_except = y_abs == 0 ? 0 : (y_abs == 0x7f80'0000 ? 2 : 1); + + // Exceptional cases: + // EXCEPT[y_except][x_except][x_is_neg] + // with x_except & y_except: + // 0: zero + // 1: finite, non-zero + // 2: infinity + constexpr double EXCEPTS[3][3][2] = { + {{0.0, PI}, {0.0, PI}, {0.0, PI}}, + {{PI_OVER_2, PI_OVER_2}, {0.0, 0.0}, {0.0, PI}}, + {{PI_OVER_2, PI_OVER_2}, + {PI_OVER_2, PI_OVER_2}, + {PI_OVER_4, THREE_PI_OVER_4}}, + }; + + double r = IS_NEG[y_sign] * EXCEPTS[y_except][x_except][x_sign]; + + return static_cast(r); + } + + bool recip = x_abs < y_abs; + double final_sign = IS_NEG[(x_sign != y_sign) != recip]; + fputil::DoubleDouble const_term = CONST_ADJ[x_sign][y_sign][recip]; + double num_d = static_cast(FPBits(min_abs).get_val()); + double den_d = static_cast(FPBits(max_abs).get_val()); + double q_d = num_d / den_d; + + double k_d = fputil::nearest_integer(q_d * 0x1.0p4f); + int idx = static_cast(k_d); + q_d = fputil::multiply_add(k_d, -0x1.0p-4, q_d); + + double p = atan_eval(q_d, idx); + double r = final_sign * + fputil::multiply_add(q_d, p, const_term.hi + ATAN_COEFFS[idx][0]); + + constexpr uint32_t LOWER_ERR = 4; + // Mask sticky bits in double precision before rounding to single precision. + constexpr uint32_t MASK = + mask_trailing_ones::SIG_LEN - + FPBits::SIG_LEN - 1>(); + constexpr uint32_t UPPER_ERR = MASK - LOWER_ERR; + + uint32_t r_bits = static_cast(cpp::bit_cast(r)) & MASK; + + // Ziv's rounding test. + if (LIBC_LIKELY(r_bits > LOWER_ERR && r_bits < UPPER_ERR)) + return static_cast(r); + + return atan2f_double_double(num_d, den_d, q_d, idx, k_d, final_sign, + const_term); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/math/generic/atanf.cpp b/libc/src/math/generic/atanf.cpp index 4adda429cc041c..9fa1a331c9c038 100644 --- a/libc/src/math/generic/atanf.cpp +++ b/libc/src/math/generic/atanf.cpp @@ -60,11 +60,15 @@ LLVM_LIBC_FUNCTION(float, atanf, (float x)) { } // Use Taylor polynomial: // atan(x) ~ x * (1 - x^2 / 3 + x^4 / 5 - x^6 / 7 + x^8 / 9 - x^10 / 11). + constexpr double ATAN_TAYLOR[6] = { + 0x1.0000000000000p+0, -0x1.5555555555555p-2, 0x1.999999999999ap-3, + -0x1.2492492492492p-3, 0x1.c71c71c71c71cp-4, -0x1.745d1745d1746p-4, + }; double x2 = x_d * x_d; double x4 = x2 * x2; - double c0 = fputil::multiply_add(x2, ATAN_COEFFS[0][1], ATAN_COEFFS[0][0]); - double c1 = fputil::multiply_add(x2, ATAN_COEFFS[0][3], ATAN_COEFFS[0][2]); - double c2 = fputil::multiply_add(x2, ATAN_COEFFS[0][5], ATAN_COEFFS[0][4]); + double c0 = fputil::multiply_add(x2, ATAN_TAYLOR[1], ATAN_TAYLOR[0]); + double c1 = fputil::multiply_add(x2, ATAN_TAYLOR[3], ATAN_TAYLOR[2]); + double c2 = fputil::multiply_add(x2, ATAN_TAYLOR[5], ATAN_TAYLOR[4]); double p = fputil::polyeval(x4, c0, c1, c2); double r = fputil::multiply_add(x_d, p, const_term); return static_cast(r); @@ -81,11 +85,6 @@ LLVM_LIBC_FUNCTION(float, atanf, (float x)) { int idx; if (x_abs > 0x3f80'0000U) { - // Exceptional value: - if (LIBC_UNLIKELY(x_abs == 0x3ffe'2ec1U)) { // |x| = 0x1.fc5d82p+0 - return sign.is_pos() ? fputil::round_result_slightly_up(0x1.1ab2fp0f) - : fputil::round_result_slightly_down(-0x1.1ab2fp0f); - } // |x| > 1, we need to invert x, so we will perform range reduction in // double precision. x_d = 1.0 / static_cast(x_bits.get_val()); @@ -98,10 +97,9 @@ LLVM_LIBC_FUNCTION(float, atanf, (float x)) { SIGNED_PI_OVER_2[sign.is_neg()]); } else { // Exceptional value: - if (LIBC_UNLIKELY(x_abs == 0x3dbb'6ac7U)) { // |x| = 0x1.76d58ep-4 - return sign.is_pos() - ? fputil::round_result_slightly_up(0x1.75cb06p-4f) - : fputil::round_result_slightly_down(-0x1.75cb06p-4f); + if (LIBC_UNLIKELY(x_abs == 0x3d8d'6b23U)) { // |x| = 0x1.1ad646p-4 + return sign.is_pos() ? fputil::round_result_slightly_down(0x1.1a6386p-4f) + : fputil::round_result_slightly_up(-0x1.1a6386p-4f); } // Perform range reduction in single precision. float x_f = x_bits.get_val(); diff --git a/libc/src/math/generic/inv_trigf_utils.cpp b/libc/src/math/generic/inv_trigf_utils.cpp index 93d5bcbf7b567d..19c8b997dc4ed5 100644 --- a/libc/src/math/generic/inv_trigf_utils.cpp +++ b/libc/src/math/generic/inv_trigf_utils.cpp @@ -16,65 +16,70 @@ namespace LIBC_NAMESPACE { // Generated by Sollya with: // > for i from 1 to 16 do { // mid_point = i/16; -// P = fpminimax(atan(mid_point + x), 7, [|D...|], [-1/32, 1/32]); +// P = fpminimax(atan(mid_point + x), 8, [|D...|], [-1/32, 1/32]); // print("{", coeff(P, 0), ",", coeff(P, 1), ",", coeff(P, 2), ",", // coeff(P, 3), ",", coeff(P, 4), ",", coeff(P, 5), ",", coeff(P, 6), -// ",", coeff(P, 7), "},"); +// ",", coeff(P, 7), ",", coeff(P, 8), "},"); // }; -// For i = 0, ATAN_COEFFS[0][j] = (-1)^j * (1/(2*j + 1)) is the odd coefficients -// of the Taylor polynomial of atan(x). -double ATAN_COEFFS[17][8] = { - {0x1.0000000000000p+0, -0x1.5555555555555p-2, 0x1.999999999999ap-3, - -0x1.2492492492492p-3, 0x1.c71c71c71c71cp-4, -0x1.745d1745d1746p-4, - 0x1.3b13b13b13b14p-4, -0x1.1111111111111p-4}, - {0x1.ff55bb72cfdb1p-5, 0x1.fe01fe01fe1bp-1, -0x1.fc05f80821d1ap-5, - -0x1.4d6930419fc5fp-2, 0x1.f61b9f6d69313p-5, 0x1.8208a32f4346cp-3, - -0x1.ecb8fc53d04efp-5, -0x1.060710cb59cbcp-3}, - {0x1.fd5ba9aac2f3cp-4, 0x1.f81f81f81f96ap-1, -0x1.f05e09cf4c1b2p-4, - -0x1.368c3aac7543ep-2, 0x1.d9b14bddfac55p-4, 0x1.4048e55ec725ep-3, - -0x1.b98ca3c1594b5p-4, -0x1.664eabaabbc16p-4}, - {0x1.7b97b4bce5ae7p-3, 0x1.ee9c7f8458f06p-1, -0x1.665c226c8dc69p-3, - -0x1.1344bb77961b7p-2, 0x1.42ac97745d3ccp-3, 0x1.c32e142047ec1p-4, - -0x1.137ae41ab96cbp-3, -0x1.1a6ae8c09a4b6p-5}, - {0x1.f5b75f92c80c6p-3, 0x1.e1e1e1e1e1ed4p-1, -0x1.c5894d101ad4p-3, - -0x1.ce6de02b38c38p-3, 0x1.78a3920c336b9p-3, 0x1.dd5ff94a9d499p-5, - -0x1.1ac2d3f9d072ep-3, 0x1.0af9735dff373p-6}, - {0x1.362773707ebc5p-2, 0x1.d272ca3fc5b8bp-1, -0x1.0997e8ae90cb6p-2, - -0x1.6cf6667146798p-3, 0x1.8dd1dff17f3d3p-3, 0x1.24860eced656fp-7, - -0x1.f4220e8f18ed5p-4, 0x1.b700aed7cdc34p-5}, - {0x1.6f61941e4deeep-2, 0x1.c0e070381c115p-1, -0x1.2726dd1347c7ep-2, - -0x1.09f37b3ad010dp-3, 0x1.85eaca5196f5cp-3, -0x1.04d640117852ap-5, - -0x1.802c2956871c7p-4, 0x1.2992b45df0ee7p-4}, - {0x1.a64eec3cc23fep-2, 0x1.adbe87f94906bp-1, -0x1.3b9d8eab5eae5p-2, - -0x1.57c09646faabbp-4, 0x1.6795330e73aep-3, -0x1.f2d89a702a652p-5, - -0x1.f3afd90a9d4d7p-5, 0x1.3261723d3f153p-4}, - {0x1.dac670561bb53p-2, 0x1.999999999998fp-1, -0x1.47ae147afd8cap-2, - -0x1.5d867c3dfd72ap-5, 0x1.3a92a76cba833p-3, -0x1.3ec460286928ap-4, - -0x1.ed02ff86892acp-6, 0x1.0a674c8f05727p-4}, - {0x1.0657e94db30d2p-1, 0x1.84f00c27805ffp-1, -0x1.4c62cb564f677p-2, - -0x1.e6495b262dfe7p-8, 0x1.063c34eca262bp-3, -0x1.58b78dc79b5aep-4, - -0x1.4623815233be1p-8, 0x1.93afe94328089p-5}, - {0x1.1e00babdefeb6p-1, 0x1.702e05c0b8159p-1, -0x1.4af2b78236bd6p-2, - 0x1.5d0b7ea46ed08p-6, 0x1.a124870236935p-4, -0x1.519e1ec133a88p-4, - 0x1.a54632a3f48c7p-7, 0x1.099ca0945096dp-5}, - {0x1.345f01cce37bdp-1, 0x1.5babcc647fa7ep-1, -0x1.449db09443a67p-2, - 0x1.655caac78a0fcp-5, 0x1.3bbbdb0d09efap-4, -0x1.34a306c27e021p-4, - 0x1.83fe749c7966p-6, 0x1.2057cc96d9edcp-6}, - {0x1.4978fa3269ee2p-1, 0x1.47ae147ae146bp-1, -0x1.3a92a305652e1p-2, - 0x1.ec21b5172657fp-5, 0x1.c2f8c45d2f4eep-5, -0x1.0ba99c4aeb8acp-4, - 0x1.d716a4af4d1d6p-6, 0x1.97fba0a9696dep-8}, - {0x1.5d58987169b19p-1, 0x1.34679ace0133cp-1, -0x1.2ddfb03920e2fp-2, - 0x1.2491307c0fa0bp-4, 0x1.29c7eca0136fp-5, -0x1.bca792caa6f1cp-5, - 0x1.e5d92545576bcp-6, -0x1.8ca76fcf5ccd2p-10}, - {0x1.700a7c5784634p-1, 0x1.21fb78121fb71p-1, -0x1.1f6a8499ea541p-2, - 0x1.41b15e5e77bcfp-4, 0x1.59bc9bf54fb02p-6, -0x1.63b54ff058e0fp-5, - 0x1.c8da01221306fp-6, -0x1.906b2c274c39cp-8}, - {0x1.819d0b7158a4dp-1, 0x1.107fbbe01107cp-1, -0x1.0feeb40897d4ep-2, - 0x1.50e5afb95f5d6p-4, 0x1.2a7c2f0c7495dp-7, -0x1.12bd2bb5062cdp-5, - 0x1.93e8ceb89afebp-6, -0x1.10da9b8c6b731p-7}, - {0x1.921fb54442d18p-1, 0x1.fffffffffffebp-2, -0x1.fffffffffcbbcp-3, - 0x1.555555564e2fep-4, -0x1.20b17d5dd89dcp-30, -0x1.9999c5ad71711p-6, - 0x1.5558b76e7aaf9p-6, -0x1.236e803c6c1f6p-7}, +// For i = 0, the polynomial is generated by: +// > P = fpminimax(atan(x)/x, 7, [|1, D...|], [0, 1/32]); +// > dirtyinfnorm((atan(x) - x*P)/x, [0, 1/32]); +// 0x1.feb2fcdba66447ccbe28a1a0f935b51678a718fb1p-59 +// Notice that degree-7 is good enough for atanf, but degree-8 helps reduce the +// error bounds for atan2f's fast pass 16 times, and it does not affect the +// performance of atanf much. +double ATAN_COEFFS[17][9] = { + {0.0, 1.0, 0x1.3f8d76d26d61bp-47, -0x1.5555555574cd8p-2, + 0x1.0dde5d06878eap-29, 0x1.99997738acc77p-3, 0x1.2c43eac9797cap-16, + -0x1.25fb020007dbdp-3, 0x1.c1b6c31d7b0aep-7}, + {0x1.ff55bb72cfde9p-5, 0x1.fe01fe01fe007p-1, -0x1.fc05f809ed8dap-5, + -0x1.4d69303afe04ep-2, 0x1.f61bc3e8349cp-5, 0x1.820839278756bp-3, + -0x1.eda4de1c6bf3fp-5, -0x1.0514d42d64a63p-3, 0x1.db3746a442dcbp-5}, + {0x1.fd5ba9aac2f6ep-4, 0x1.f81f81f81f813p-1, -0x1.f05e09d0dc378p-4, + -0x1.368c3aa719215p-2, 0x1.d9b16b33ff9c9p-4, 0x1.40488f9c6262ap-3, + -0x1.ba55933e62ea5p-4, -0x1.64c6a15cd9116p-4, 0x1.9273d5939a75ap-4}, + {0x1.7b97b4bce5b02p-3, 0x1.ee9c7f8458e05p-1, -0x1.665c226d6961p-3, + -0x1.1344bb7391703p-2, 0x1.42aca8b0081b9p-3, 0x1.c32d9381d7c03p-4, + -0x1.13e970672e246p-3, -0x1.181ed934dd733p-5, 0x1.bad81ea190c08p-4}, + {0x1.f5b75f92c80ddp-3, 0x1.e1e1e1e1e1e2cp-1, -0x1.c5894d10d363dp-3, + -0x1.ce6de025f9f5ep-3, 0x1.78a3a07c8dd7fp-3, 0x1.dd5f5180f386ep-5, + -0x1.1b1f513c4536bp-3, 0x1.0df852e58c43cp-6, 0x1.722e7a7e42505p-4}, + {0x1.362773707ebccp-2, 0x1.d272ca3fc5b2ep-1, -0x1.0997e8aeca8fbp-2, + -0x1.6cf6666e5e693p-3, 0x1.8dd1e907e88adp-3, 0x1.24849ac0caa5dp-7, + -0x1.f496be486229dp-4, 0x1.b7d54b8e759ecp-5, 0x1.d39c0d39c3922p-5}, + {0x1.6f61941e4def1p-2, 0x1.c0e070381c0f2p-1, -0x1.2726dd135d9eep-2, + -0x1.09f37b39b70e4p-3, 0x1.85eacdaadd712p-3, -0x1.04d66340d5b9p-5, + -0x1.8056b15a22b98p-4, 0x1.29baf494ad3ddp-4, 0x1.52d5881322a7ap-6}, + {0x1.a64eec3cc23fdp-2, 0x1.adbe87f94906ap-1, -0x1.3b9d8eab55addp-2, + -0x1.57c09646eb7p-4, 0x1.6795319e3b8dfp-3, -0x1.f2d89b5ef31bep-5, + -0x1.f38aac26203cap-5, 0x1.3262802235e3fp-4, -0x1.2afd6b9a57d66p-7}, + {0x1.dac670561bb4fp-2, 0x1.99999999999ap-1, -0x1.47ae147adff11p-2, + -0x1.5d867c40188b7p-5, 0x1.3a92a2df85e7ap-3, -0x1.3ec457c46e851p-4, + -0x1.ec1b9777e2e5bp-6, 0x1.0a542992a821ep-4, -0x1.ccffbe2f0d945p-6}, + {0x1.0657e94db30dp-1, 0x1.84f00c2780615p-1, -0x1.4c62cb562defap-2, + -0x1.e6495b3c14e03p-8, 0x1.063c2fa617bfcp-3, -0x1.58b782d9907aap-4, + -0x1.41e6ff524b7fp-8, 0x1.937dfff3205a7p-5, -0x1.0fb1fd1c729dp-5}, + {0x1.1e00babdefeb4p-1, 0x1.702e05c0b816ep-1, -0x1.4af2b78215fbep-2, + 0x1.5d0b7e9f36997p-6, 0x1.a1247cb978debp-4, -0x1.519e1457734cap-4, + 0x1.a755cf86b5bfbp-7, 0x1.096d174284564p-5, -0x1.081adf539ad58p-5}, + {0x1.345f01cce37bbp-1, 0x1.5babcc647fa8ep-1, -0x1.449db09426a6dp-2, + 0x1.655caac5896dap-5, 0x1.3bbbd22d05a61p-4, -0x1.34a2febee042fp-4, + 0x1.84df9c8269e34p-6, 0x1.200e8176c899ap-6, -0x1.c00b23c3ce222p-6}, + {0x1.4978fa3269ee1p-1, 0x1.47ae147ae1477p-1, -0x1.3a92a3055231ap-2, + 0x1.ec21b515a4a2p-5, 0x1.c2f8b81f9a0d2p-5, -0x1.0ba9964125453p-4, + 0x1.d7b5614777a05p-6, 0x1.971e91ed73595p-8, -0x1.3fc375a78dc74p-6}, + {0x1.5d58987169b18p-1, 0x1.34679ace01343p-1, -0x1.2ddfb039136e5p-2, + 0x1.2491307b9fb73p-4, 0x1.29c7e4886dc22p-5, -0x1.bca78bcca83ap-5, + 0x1.e63efd7cbe1ddp-6, -0x1.8ea6c4f03b42dp-10, -0x1.9385b5c3a6997p-7}, + {0x1.700a7c5784634p-1, 0x1.21fb78121fb76p-1, -0x1.1f6a8499e5d1ap-2, + 0x1.41b15e5e29423p-4, 0x1.59bc953163345p-6, -0x1.63b54b13184ddp-5, + 0x1.c9086666d213p-6, -0x1.90c3b4ad8d4bcp-8, -0x1.80f08ed9f6f57p-8}, + {0x1.819d0b7158a4dp-1, 0x1.107fbbe01107ep-1, -0x1.0feeb4089670ep-2, + 0x1.50e5afb93f5cbp-4, 0x1.2a7c2adffeffbp-7, -0x1.12bd29b4f1b43p-5, + 0x1.93f71f0eb00eap-6, -0x1.10ece5ad30e28p-7, -0x1.db1a76bcd2b9cp-10}, + {0x1.921fb54442d18p-1, 0x1.ffffffffffffep-2, -0x1.fffffffffc51cp-3, + 0x1.555555557002ep-4, -0x1.a88260c338e75p-30, -0x1.99999f9a7614fp-6, + 0x1.555e31a1e15e9p-6, -0x1.245240d65e629p-7, -0x1.fa9ba66478903p-11}, }; } // namespace LIBC_NAMESPACE diff --git a/libc/src/math/generic/inv_trigf_utils.h b/libc/src/math/generic/inv_trigf_utils.h index c621f27e101460..e60c367d7b46b8 100644 --- a/libc/src/math/generic/inv_trigf_utils.h +++ b/libc/src/math/generic/inv_trigf_utils.h @@ -19,9 +19,9 @@ namespace LIBC_NAMESPACE { constexpr double M_MATH_PI = 0x1.921fb54442d18p+1; constexpr double M_MATH_PI_2 = 0x1.921fb54442d18p+0; -extern double ATAN_COEFFS[17][8]; +extern double ATAN_COEFFS[17][9]; -// For |x| <= 1/32 and 1 <= i <= 16, return Q(x) such that: +// For |x| <= 1/32 and 0 <= i <= 16, return Q(x) such that: // Q(x) ~ (atan(x + i/16) - atan(i/16)) / x. LIBC_INLINE double atan_eval(double x, int i) { double x2 = x * x; @@ -29,10 +29,11 @@ LIBC_INLINE double atan_eval(double x, int i) { double c0 = fputil::multiply_add(x, ATAN_COEFFS[i][2], ATAN_COEFFS[i][1]); double c1 = fputil::multiply_add(x, ATAN_COEFFS[i][4], ATAN_COEFFS[i][3]); double c2 = fputil::multiply_add(x, ATAN_COEFFS[i][6], ATAN_COEFFS[i][5]); + double c3 = fputil::multiply_add(x, ATAN_COEFFS[i][8], ATAN_COEFFS[i][7]); double x4 = x2 * x2; double d1 = fputil::multiply_add(x2, c1, c0); - double d2 = fputil::multiply_add(x2, ATAN_COEFFS[i][7], c2); + double d2 = fputil::multiply_add(x2, c3, c2); double p = fputil::multiply_add(x4, d2, d1); return p; } diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt index 026bcd12928bdf..f8f0f8ba7b6f63 100644 --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -1719,6 +1719,19 @@ add_fp_unittest( libc.src.__support.FPUtil.fp_bits ) +add_fp_unittest( + atan2f_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + atan2f_test.cpp + DEPENDS + libc.include.math + libc.src.math.atan2f + libc.src.__support.FPUtil.fp_bits +) + add_subdirectory(generic) add_subdirectory(smoke) diff --git a/libc/test/src/math/atan2f_test.cpp b/libc/test/src/math/atan2f_test.cpp new file mode 100644 index 00000000000000..343e7601b0392b --- /dev/null +++ b/libc/test/src/math/atan2f_test.cpp @@ -0,0 +1,133 @@ +//===-- Unittests for atan2f ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "include/llvm-libc-macros/math-macros.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/math/atan2f.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" +#include "utils/MPFRWrapper/MPFRUtils.h" + +using LlvmLibcAtan2fTest = LIBC_NAMESPACE::testing::FPTest; +using LIBC_NAMESPACE::testing::tlog; + +namespace mpfr = LIBC_NAMESPACE::testing::mpfr; + +TEST_F(LlvmLibcAtan2fTest, TrickyInputs) { + constexpr int N = 17; + mpfr::BinaryInput INPUTS[N] = { + {0x1.0cb3a4p+20f, 0x1.4ebacp+22f}, {0x1.12215p+1f, 0x1.4fabfcp+22f}, + {-0x1.13baaep+41f, 0x1.5bd22ep+23f}, {0x1.1ff7dcp+41f, 0x1.aec0a6p+23f}, + {0x1.2bc794p+23f, 0x1.0bc0c6p+23f}, {0x1.2fba3ap+42f, 0x1.f99456p+23f}, + {0x1.5ea1f8p+27f, 0x1.f2a1aep+23f}, {0x1.7a931p+44f, 0x1.352ac4p+22f}, + {0x1.8802bcp+21f, 0x1.8f130ap+23f}, {0x1.658ef8p+17f, 0x1.3c00f4p+22f}, + {0x1.69fb0cp+21f, 0x1.39e4c4p+23f}, {0x1.8eb24cp+11f, 0x1.36518p+23f}, + {0x1.9e7ebp+30f, 0x1.d80522p+23f}, {0x1.b4bdeep+19f, 0x1.c19b4p+23f}, + {0x1.bc201p+43f, 0x1.617346p+23f}, {0x1.c96c3cp+20f, 0x1.c01d1ep+23f}, + {0x1.781fcp+28f, 0x1.dcb3cap+23f}, + }; + + for (int i = 0; i < N; ++i) { + float x = INPUTS[i].x; + float y = INPUTS[i].y; + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Atan2, INPUTS[i], + LIBC_NAMESPACE::atan2f(x, y), 0.5); + INPUTS[i].x = -INPUTS[i].x; + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Atan2, INPUTS[i], + LIBC_NAMESPACE::atan2f(-x, y), 0.5); + INPUTS[i].y = -INPUTS[i].y; + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Atan2, INPUTS[i], + LIBC_NAMESPACE::atan2f(-x, -y), 0.5); + INPUTS[i].x = -INPUTS[i].x; + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Atan2, INPUTS[i], + LIBC_NAMESPACE::atan2f(x, -y), 0.5); + } +} + +TEST_F(LlvmLibcAtan2fTest, InFloatRange) { + constexpr uint32_t X_COUNT = 1'23; + constexpr uint32_t X_START = FPBits(0.25f).uintval(); + constexpr uint32_t X_STOP = FPBits(4.0f).uintval(); + constexpr uint32_t X_STEP = (X_STOP - X_START) / X_COUNT; + + constexpr uint32_t Y_COUNT = 1'37; + constexpr uint32_t Y_START = FPBits(0.25f).uintval(); + constexpr uint32_t Y_STOP = FPBits(4.0f).uintval(); + constexpr uint32_t Y_STEP = (Y_STOP - Y_START) / Y_COUNT; + + auto test = [&](mpfr::RoundingMode rounding_mode) { + mpfr::ForceRoundingMode __r(rounding_mode); + if (!__r.success) + return; + + uint64_t fails = 0; + uint64_t finite_count = 0; + uint64_t total_count = 0; + float failed_x, failed_y, failed_r = 0.0; + double tol = 0.5; + + for (uint32_t i = 0, v = X_START; i <= X_COUNT; ++i, v += X_STEP) { + float x = FPBits(v).get_val(); + if (isnan(x) || isinf(x) || x < 0.0) + continue; + + for (uint32_t j = 0, w = Y_START; j <= Y_COUNT; ++j, w += Y_STEP) { + float y = FPBits(w).get_val(); + if (isnan(y) || isinf(y)) + continue; + + LIBC_NAMESPACE::libc_errno = 0; + float result = LIBC_NAMESPACE::atan2f(x, y); + ++total_count; + if (isnan(result) || isinf(result)) + continue; + + ++finite_count; + mpfr::BinaryInput inputs{x, y}; + + if (!TEST_MPFR_MATCH_ROUNDING_SILENTLY(mpfr::Operation::Atan2, inputs, + result, 0.5, rounding_mode)) { + ++fails; + while (!TEST_MPFR_MATCH_ROUNDING_SILENTLY( + mpfr::Operation::Atan2, inputs, result, tol, rounding_mode)) { + failed_x = x; + failed_y = y; + failed_r = result; + + if (tol > 1000.0) + break; + + tol *= 2.0; + } + } + } + } + if (fails || (finite_count < total_count)) { + tlog << " Atan2f failed: " << fails << "/" << finite_count << "/" + << total_count << " tests.\n" + << " Max ULPs is at most: " << static_cast(tol) << ".\n"; + } + if (fails) { + mpfr::BinaryInput inputs{failed_x, failed_y}; + EXPECT_MPFR_MATCH(mpfr::Operation::Atan2, inputs, failed_r, 0.5, + rounding_mode); + } + }; + + tlog << " Test Rounding To Nearest...\n"; + test(mpfr::RoundingMode::Nearest); + + tlog << " Test Rounding Downward...\n"; + test(mpfr::RoundingMode::Downward); + + tlog << " Test Rounding Upward...\n"; + test(mpfr::RoundingMode::Upward); + + tlog << " Test Rounding Toward Zero...\n"; + test(mpfr::RoundingMode::TowardZero); +} diff --git a/libc/test/src/math/atanf_test.cpp b/libc/test/src/math/atanf_test.cpp index e51932fc495525..58b0eadd63f8d6 100644 --- a/libc/test/src/math/atanf_test.cpp +++ b/libc/test/src/math/atanf_test.cpp @@ -55,12 +55,15 @@ TEST_F(LlvmLibcAtanfTest, InFloatRange) { TEST_F(LlvmLibcAtanfTest, SpecialValues) { uint32_t val_arr[] = { 0x3d8d6b23U, // x = 0x1.1ad646p-4f + 0x3dbb6ac7U, // x = 0x1.76d58ep-4f 0x3feefcfbU, // x = 0x1.ddf9f6p+0f + 0x3ffe2ec1U, // x = 0x1.fc5d82p+0f 0xbd8d6b23U, // x = -0x1.1ad646p-4f + 0xbdbb6ac7U, // x = -0x1.76d58ep-4f 0xbfeefcfbU, // x = -0x1.ddf9f6p+0f + 0xbffe2ec1U, // x = -0x1.fc5d82p+0 0x7F800000U, // x = +Inf 0xFF800000U, // x = -Inf - 0xbffe2ec1U, // x = -0x1.fc5d82p+0f }; for (uint32_t v : val_arr) { float x = FPBits(v).get_val(); diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index ae2cbad7d5a7d9..5d269ddb229cfc 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -2755,6 +2755,18 @@ add_fp_unittest( libc.src.__support.FPUtil.fp_bits ) +add_fp_unittest( + atan2f_test + SUITE + libc-math-smoke-tests + SRCS + atan2f_test.cpp + DEPENDS + libc.src.errno.errno + libc.src.math.atan2f + libc.src.__support.FPUtil.fp_bits +) + add_fp_unittest( scalbn_test SUITE diff --git a/libc/test/src/math/smoke/atan2f_test.cpp b/libc/test/src/math/smoke/atan2f_test.cpp new file mode 100644 index 00000000000000..ecac36b3a8c01f --- /dev/null +++ b/libc/test/src/math/smoke/atan2f_test.cpp @@ -0,0 +1,50 @@ +//===-- Unittests for atan2f ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "include/llvm-libc-macros/math-macros.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/errno/libc_errno.h" +#include "src/math/atan2f.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcAtan2fTest = LIBC_NAMESPACE::testing::FPTest; + +TEST_F(LlvmLibcAtan2fTest, SpecialNumbers) { + LIBC_NAMESPACE::libc_errno = 0; + + LIBC_NAMESPACE::fputil::clear_except(FE_ALL_EXCEPT); + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::atan2f(aNaN, zero)); + EXPECT_FP_EXCEPTION(0); + EXPECT_MATH_ERRNO(0); + + LIBC_NAMESPACE::fputil::clear_except(FE_ALL_EXCEPT); + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::atan2f(1.0f, aNaN)); + EXPECT_FP_EXCEPTION(0); + EXPECT_MATH_ERRNO(0); + + LIBC_NAMESPACE::fputil::clear_except(FE_ALL_EXCEPT); + EXPECT_FP_EQ_ALL_ROUNDING(0.0f, LIBC_NAMESPACE::atan2f(zero, zero)); + EXPECT_FP_EXCEPTION(0); + EXPECT_MATH_ERRNO(0); + + LIBC_NAMESPACE::fputil::clear_except(FE_ALL_EXCEPT); + EXPECT_FP_EQ_ALL_ROUNDING(-0.0f, LIBC_NAMESPACE::atan2f(-0.0f, zero)); + EXPECT_FP_EXCEPTION(0); + EXPECT_MATH_ERRNO(0); + + LIBC_NAMESPACE::fputil::clear_except(FE_ALL_EXCEPT); + EXPECT_FP_EQ_ALL_ROUNDING(0.0f, LIBC_NAMESPACE::atan2f(1.0f, inf)); + EXPECT_FP_EXCEPTION(0); + EXPECT_MATH_ERRNO(0); + + LIBC_NAMESPACE::fputil::clear_except(FE_ALL_EXCEPT); + EXPECT_FP_EQ_ALL_ROUNDING(-0.0f, LIBC_NAMESPACE::atan2f(-1.0f, inf)); + EXPECT_FP_EXCEPTION(0); + EXPECT_MATH_ERRNO(0); +} diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp index 2e1c44e6fd5da9..a83f7a7ceb922c 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.cpp +++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp @@ -181,6 +181,12 @@ class MPFRNumber { return result; } + MPFRNumber atan2(const MPFRNumber &b) { + MPFRNumber result(*this); + mpfr_atan2(result.value, value, b.value, mpfr_rounding); + return result; + } + MPFRNumber atanh() const { MPFRNumber result(*this); mpfr_atanh(result.value, value, mpfr_rounding); @@ -623,6 +629,8 @@ binary_operation_one_output(Operation op, InputType x, InputType y, MPFRNumber inputX(x, precision, rounding); MPFRNumber inputY(y, precision, rounding); switch (op) { + case Operation::Atan2: + return inputX.atan2(inputY); case Operation::Fmod: return inputX.fmod(inputY); case Operation::Hypot: diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h index 6164d78fa5adc4..d5ff590cd7bb69 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.h +++ b/libc/utils/MPFRWrapper/MPFRUtils.h @@ -68,6 +68,7 @@ enum class Operation : int { // input and produce a single floating point number of the same type as // output. BeginBinaryOperationsSingleOutput, + Atan2, Fmod, Hypot, Pow, From 4746877c2716224dc87c69750bdd0df95b6d5b16 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Mon, 1 Apr 2024 13:51:47 -0400 Subject: [PATCH 028/201] [C99] Claim conformance to WG14 N570 --- clang/test/C/C99/n570.c | 31 +++++++++++++++++++++++++++++++ clang/www/c_status.html | 2 +- 2 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 clang/test/C/C99/n570.c diff --git a/clang/test/C/C99/n570.c b/clang/test/C/C99/n570.c new file mode 100644 index 00000000000000..31c09224e618b5 --- /dev/null +++ b/clang/test/C/C99/n570.c @@ -0,0 +1,31 @@ +// RUN: %clang_cc1 -verify -std=c99 %s +// RUN: %clang_cc1 -E -std=c99 %s | FileCheck %s +// expected-no-diagnostics + +/* WG14 N570: Yes + * Empty macro arguments + * + * NB: the original paper is not available online anywhere, so the test + * coverage is coming from what could be gleaned from the C99 rationale + * document. In C89, it was UB to pass no arguments to a function-like macro, + * and that's now supported in C99. + */ + +#define TEN 10 +#define U u +#define I // expands into no preprocessing tokens +#define L L +#define glue(a, b) a ## b +#define xglue(a, b) glue(a, b) + +const unsigned u = xglue(TEN, U); +const int i = xglue(TEN, I); +const long l = xglue(TEN, L); + +// CHECK: const unsigned u = 10u; +// CHECK-NEXT: const int i = 10; +// CHECK-NEXT: const long l = 10L; + +_Static_assert(u == 10U, ""); +_Static_assert(i == 10, ""); +_Static_assert(l == 10L, ""); diff --git a/clang/www/c_status.html b/clang/www/c_status.html index 028234a8961db2..123897593e5d84 100644 --- a/clang/www/c_status.html +++ b/clang/www/c_status.html @@ -300,7 +300,7 @@

C99 implementation status

empty macro arguments N570 - Unknown + Yes new structure type compatibility (tag compatibility) From 5ff2773d4e606ac57750f1fc2aa4dc49b8dbede1 Mon Sep 17 00:00:00 2001 From: Russell Greene Date: Mon, 1 Apr 2024 12:15:24 -0600 Subject: [PATCH 029/201] [clang-cl] Allow a colon after /Fo option (#87209) Modeled after https://github.com/llvm/llvm-project/commit/8513a681f7d8d1188706762e712168aebc3119dd# According to https://learn.microsoft.com/en-us/cpp/build/reference/fo-object-file-name?view=msvc-170, `/Fo` accepts a trailing-colon variant. This is also tested in practice. This allows clang-cl to parse this. I just copied one of the existing tests, let me know if this is not the best way to do this. I tested that the test does not pass beofre the Options.td change, and that it does after. See also #46065 --- clang/include/clang/Driver/Options.td | 1 + clang/test/Driver/cl-outputs.c | 3 +++ 2 files changed, 4 insertions(+) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 04eb87f0d5d1aa..f5289fb00c895e 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -8313,6 +8313,7 @@ def _SLASH_Fi : CLCompileJoined<"Fi">, def _SLASH_Fo : CLCompileJoined<"Fo">, HelpText<"Set output object file (with /c)">, MetaVarName<"">; +def _SLASH_Fo_COLON : CLCompileJoined<"Fo:">, Alias<_SLASH_Fo>; def _SLASH_guard : CLJoined<"guard:">, HelpText<"Enable Control Flow Guard with /guard:cf, or only the table with /guard:cf,nochecks. " "Enable EH Continuation Guard with /guard:ehcont">; diff --git a/clang/test/Driver/cl-outputs.c b/clang/test/Driver/cl-outputs.c index 07ff43642a62be..4d58f0fb548b57 100644 --- a/clang/test/Driver/cl-outputs.c +++ b/clang/test/Driver/cl-outputs.c @@ -301,5 +301,8 @@ // RUN: %clang_cl -fdebug-compilation-dir=. /Z7 /Foa.obj -### -- %s 2>&1 | FileCheck -check-prefix=RELATIVE_OBJPATH1 %s // RELATIVE_OBJPATH1: "-object-file-name=a.obj" +// RUN: %clang_cl -fdebug-compilation-dir=. /Z7 /Fo:a.obj -### -- %s 2>&1 | FileCheck -check-prefix=RELATIVE_OBJPATH1_COLON %s +// RELATIVE_OBJPATH1_COLON: "-object-file-name=a.obj" + // RUN: %clang_cl -fdebug-compilation-dir=. /Z7 /Fofoo/a.obj -### -- %s 2>&1 | FileCheck -check-prefix=RELATIVE_OBJPATH2 %s // RELATIVE_OBJPATH2: "-object-file-name=foo\\a.obj" From 6634c3e9377abf88c08bb065fb55aa15cda4c248 Mon Sep 17 00:00:00 2001 From: Kai Nacke Date: Mon, 1 Apr 2024 14:20:41 -0400 Subject: [PATCH 030/201] [GOFF] Wrap debug output with LLVM_DEBUG (#87252) The content of a GOFF record is always dumped if NDEBUG is not defined, which produces rather confusing output. This changes wrap the dumping code in LLVM_DEBUG, so the dump is only done when debug output of this module is requested. --- llvm/lib/Object/GOFFObjectFile.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Object/GOFFObjectFile.cpp b/llvm/lib/Object/GOFFObjectFile.cpp index 76a13559ebfe35..d3dfd5d1540cfe 100644 --- a/llvm/lib/Object/GOFFObjectFile.cpp +++ b/llvm/lib/Object/GOFFObjectFile.cpp @@ -104,16 +104,13 @@ GOFFObjectFile::GOFFObjectFile(MemoryBufferRef Object, Error &Err) PrevContinuationBits = I[1] & 0x03; continue; } - -#ifndef NDEBUG - for (size_t J = 0; J < GOFF::RecordLength; ++J) { + LLVM_DEBUG(for (size_t J = 0; J < GOFF::RecordLength; ++J) { const uint8_t *P = I + J; if (J % 8 == 0) dbgs() << " "; - dbgs() << format("%02hhX", *P); - } -#endif + }); + switch (RecordType) { case GOFF::RT_ESD: { // Save ESD record. From b8ead2198f27924f91b90b6c104c1234ccc8972e Mon Sep 17 00:00:00 2001 From: Gulfem Savrun Yeniceri Date: Mon, 1 Apr 2024 18:25:02 +0000 Subject: [PATCH 031/201] Revert "[CodeGen] Fix register pressure computation in MachinePipeliner (#87030)" This reverts commit a4dec9d6bc67c4d8fbd4a4f54ffaa0399def9627 because the test failed in the following builder: https://luci-milo.appspot.com/ui/p/fuchsia/builders/prod/clang-linux-x64/b8751864477467126481/overview --- llvm/lib/CodeGen/MachinePipeliner.cpp | 2 +- llvm/test/CodeGen/AArch64/sms-regpress.mir | 158 ----------------- llvm/test/CodeGen/PowerPC/sms-regpress.mir | 186 ++++++++++++++++++--- 3 files changed, 166 insertions(+), 180 deletions(-) delete mode 100644 llvm/test/CodeGen/AArch64/sms-regpress.mir diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index b9c6765be445a0..eb42a78603d407 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -1268,7 +1268,7 @@ class HighRegisterPressureDetector { // Calculate the upper limit of each pressure set void computePressureSetLimit(const RegisterClassInfo &RCI) { for (unsigned PSet = 0; PSet < PSetNum; PSet++) - PressureSetLimit[PSet] = TRI->getRegPressureSetLimit(MF, PSet); + PressureSetLimit[PSet] = RCI.getRegPressureSetLimit(PSet); // We assume fixed registers, such as stack pointer, are already in use. // Therefore subtracting the weight of the fixed registers from the limit of diff --git a/llvm/test/CodeGen/AArch64/sms-regpress.mir b/llvm/test/CodeGen/AArch64/sms-regpress.mir deleted file mode 100644 index ad98d5c6124fcf..00000000000000 --- a/llvm/test/CodeGen/AArch64/sms-regpress.mir +++ /dev/null @@ -1,158 +0,0 @@ -# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-max-mii=40 -pipeliner-register-pressure -pipeliner-ii-search-range=30 -debug-only=pipeliner 2>&1 | FileCheck %s - -# Check that if the register pressure is too high, the schedule is rejected, II is incremented, and scheduling continues. -# The specific value of II is not important. - -# CHECK: {{^ *}}Try to schedule with {{[0-9]+$}} -# CHECK: {{^ *}}Rejected the schedule because of too high register pressure{{$}} -# CHECK: {{^ *}}Try to schedule with {{[0-9]+$}} -# CHECK: {{^ *}}Schedule Found? 1 (II={{[0-9]+}}){{$}} - ---- | - target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" - - define dso_local double @kernel(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, i32 noundef %n) local_unnamed_addr { - entry: - %0 = load double, ptr %a, align 8 - %arrayidx1 = getelementptr inbounds i8, ptr %a, i64 8 - %1 = load double, ptr %arrayidx1, align 8 - %cmp133 = icmp sgt i32 %n, 0 - br i1 %cmp133, label %for.body.preheader, label %for.cond.cleanup - - for.body.preheader: ; preds = %entry - %wide.trip.count = zext nneg i32 %n to i64 - br label %for.body - - for.cond.cleanup: ; preds = %for.body, %entry - %res.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add54, %for.body ] - ret double %res.0.lcssa - - for.body: ; preds = %for.body.preheader, %for.body - %lsr.iv137 = phi i64 [ %wide.trip.count, %for.body.preheader ], [ %lsr.iv.next, %for.body ] - %lsr.iv = phi ptr [ %b, %for.body.preheader ], [ %scevgep, %for.body ] - %res.0135 = phi double [ 0.000000e+00, %for.body.preheader ], [ %add54, %for.body ] - %2 = load double, ptr %lsr.iv, align 8 - %3 = tail call double @llvm.fmuladd.f64(double %0, double %2, double %0) - %4 = tail call double @llvm.fmuladd.f64(double %3, double %2, double %3) - %5 = tail call double @llvm.fmuladd.f64(double %4, double %2, double %4) - %6 = tail call double @llvm.fmuladd.f64(double %5, double %2, double %5) - %7 = tail call double @llvm.fmuladd.f64(double %6, double %2, double %6) - %8 = tail call double @llvm.fmuladd.f64(double %7, double %2, double %7) - %9 = tail call double @llvm.fmuladd.f64(double %8, double %2, double %8) - %10 = tail call double @llvm.fmuladd.f64(double %9, double %2, double %9) - %11 = tail call double @llvm.fmuladd.f64(double %10, double %2, double %10) - %12 = tail call double @llvm.fmuladd.f64(double %11, double %2, double %11) - %13 = tail call double @llvm.fmuladd.f64(double %12, double %2, double %12) - %14 = tail call double @llvm.fmuladd.f64(double %13, double %2, double %13) - %15 = tail call double @llvm.fmuladd.f64(double %14, double %2, double %14) - %16 = tail call double @llvm.fmuladd.f64(double %15, double %2, double %15) - %17 = tail call double @llvm.fmuladd.f64(double %16, double %2, double %16) - %18 = tail call double @llvm.fmuladd.f64(double %17, double %2, double %17) - %add = fadd double %17, %18 - %19 = tail call double @llvm.fmuladd.f64(double %18, double %2, double %add) - %add35 = fadd double %10, %19 - %20 = tail call double @llvm.fmuladd.f64(double %3, double %2, double %add35) - %add38 = fadd double %11, %20 - %21 = tail call double @llvm.fmuladd.f64(double %4, double %2, double %add38) - %add41 = fadd double %12, %21 - %22 = tail call double @llvm.fmuladd.f64(double %5, double %2, double %add41) - %add44 = fadd double %14, %15 - %add45 = fadd double %13, %add44 - %add46 = fadd double %add45, %22 - %23 = tail call double @llvm.fmuladd.f64(double %6, double %2, double %add46) - %mul = fmul double %2, %7 - %mul51 = fmul double %1, %mul - %24 = tail call double @llvm.fmuladd.f64(double %mul51, double %9, double %23) - %25 = tail call double @llvm.fmuladd.f64(double %8, double %1, double %24) - %add54 = fadd double %res.0135, %25 - %scevgep = getelementptr i8, ptr %lsr.iv, i64 8 - %lsr.iv.next = add nsw i64 %lsr.iv137, -1 - %exitcond.not = icmp eq i64 %lsr.iv.next, 0 - br i1 %exitcond.not, label %for.cond.cleanup, label %for.body - } - - declare double @llvm.fmuladd.f64(double, double, double) - -... ---- -name: kernel -tracksRegLiveness: true -liveins: - - { reg: '$x0', virtual-reg: '%10' } - - { reg: '$x1', virtual-reg: '%11' } - - { reg: '$w2', virtual-reg: '%12' } -body: | - bb.0.entry: - successors: %bb.1, %bb.4 - liveins: $x0, $x1, $w2 - - %12:gpr32common = COPY $w2 - %11:gpr64 = COPY $x1 - %10:gpr64common = COPY $x0 - dead $wzr = SUBSWri %12, 1, 0, implicit-def $nzcv - Bcc 10, %bb.1, implicit $nzcv - - bb.4: - %13:fpr64 = FMOVD0 - B %bb.2 - - bb.1.for.body.preheader: - %0:fpr64 = LDRDui %10, 0 :: (load (s64) from %ir.a) - %1:fpr64 = LDRDui %10, 1 :: (load (s64) from %ir.arrayidx1) - %16:gpr32 = ORRWrs $wzr, %12, 0 - %2:gpr64all = SUBREG_TO_REG 0, killed %16, %subreg.sub_32 - %15:fpr64 = FMOVD0 - B %bb.3 - - bb.2.for.cond.cleanup: - %3:fpr64 = PHI %13, %bb.4, %7, %bb.3 - $d0 = COPY %3 - RET_ReallyLR implicit $d0 - - bb.3.for.body: - successors: %bb.2, %bb.3 - - %4:gpr64sp = PHI %2, %bb.1, %9, %bb.3 - %5:gpr64sp = PHI %11, %bb.1, %8, %bb.3 - %6:fpr64 = PHI %15, %bb.1, %7, %bb.3 - early-clobber %17:gpr64sp, %18:fpr64 = LDRDpost %5, 8 :: (load (s64) from %ir.lsr.iv) - %19:fpr64 = nofpexcept FMADDDrrr %0, %18, %0, implicit $fpcr - %20:fpr64 = nofpexcept FMADDDrrr %19, %18, %19, implicit $fpcr - %21:fpr64 = nofpexcept FMADDDrrr %20, %18, %20, implicit $fpcr - %22:fpr64 = nofpexcept FMADDDrrr %21, %18, %21, implicit $fpcr - %23:fpr64 = nofpexcept FMADDDrrr %22, %18, %22, implicit $fpcr - %24:fpr64 = nofpexcept FMADDDrrr %23, %18, %23, implicit $fpcr - %25:fpr64 = nofpexcept FMADDDrrr %24, %18, %24, implicit $fpcr - %26:fpr64 = nofpexcept FMADDDrrr %25, %18, %25, implicit $fpcr - %27:fpr64 = nofpexcept FMADDDrrr %26, %18, %26, implicit $fpcr - %28:fpr64 = nofpexcept FMADDDrrr %27, %18, %27, implicit $fpcr - %29:fpr64 = nofpexcept FMADDDrrr %28, %18, %28, implicit $fpcr - %30:fpr64 = nofpexcept FMADDDrrr %29, %18, %29, implicit $fpcr - %31:fpr64 = nofpexcept FMADDDrrr %30, %18, %30, implicit $fpcr - %32:fpr64 = nofpexcept FMADDDrrr %31, %18, %31, implicit $fpcr - %33:fpr64 = nofpexcept FMADDDrrr %32, %18, %32, implicit $fpcr - %34:fpr64 = nofpexcept FMADDDrrr %33, %18, %33, implicit $fpcr - %35:fpr64 = nofpexcept FADDDrr %33, %34, implicit $fpcr - %36:fpr64 = nofpexcept FMADDDrrr %34, %18, killed %35, implicit $fpcr - %37:fpr64 = nofpexcept FADDDrr %26, killed %36, implicit $fpcr - %38:fpr64 = nofpexcept FMADDDrrr %19, %18, killed %37, implicit $fpcr - %39:fpr64 = nofpexcept FADDDrr %27, killed %38, implicit $fpcr - %40:fpr64 = nofpexcept FMADDDrrr %20, %18, killed %39, implicit $fpcr - %41:fpr64 = nofpexcept FADDDrr %28, killed %40, implicit $fpcr - %42:fpr64 = nofpexcept FMADDDrrr %21, %18, killed %41, implicit $fpcr - %43:fpr64 = nofpexcept FADDDrr %30, %31, implicit $fpcr - %44:fpr64 = nofpexcept FADDDrr %29, killed %43, implicit $fpcr - %45:fpr64 = nofpexcept FADDDrr killed %44, killed %42, implicit $fpcr - %46:fpr64 = nofpexcept FMADDDrrr %22, %18, killed %45, implicit $fpcr - %47:fpr64 = nofpexcept FMULDrr %18, %23, implicit $fpcr - %48:fpr64 = nofpexcept FMULDrr %1, killed %47, implicit $fpcr - %49:fpr64 = nofpexcept FMADDDrrr killed %48, %25, killed %46, implicit $fpcr - %50:fpr64 = nofpexcept FMADDDrrr %24, %1, killed %49, implicit $fpcr - %7:fpr64 = nofpexcept FADDDrr %6, killed %50, implicit $fpcr - %8:gpr64all = COPY %17 - %51:gpr64 = nsw SUBSXri %4, 1, 0, implicit-def $nzcv - %9:gpr64all = COPY %51 - Bcc 0, %bb.2, implicit $nzcv - B %bb.3 - -... diff --git a/llvm/test/CodeGen/PowerPC/sms-regpress.mir b/llvm/test/CodeGen/PowerPC/sms-regpress.mir index b01115c49fd8d5..cebd78af882dfd 100644 --- a/llvm/test/CodeGen/PowerPC/sms-regpress.mir +++ b/llvm/test/CodeGen/PowerPC/sms-regpress.mir @@ -1,30 +1,41 @@ -# RUN: llc --verify-machineinstrs -mcpu=pwr9 -o - %s -run-pass=pipeliner -ppc-enable-pipeliner -pipeliner-register-pressure -pipeliner-max-mii=50 -pipeliner-ii-search-range=30 -pipeliner-max-stages=10 -debug-only=pipeliner 2>&1 | FileCheck %s +# RUN: llc --verify-machineinstrs -mcpu=pwr9 -o - %s -run-pass=pipeliner -ppc-enable-pipeliner -pipeliner-register-pressure -pipeliner-max-mii=50 -pipeliner-ii-search-range=30 -pipeliner-max-stages=10 -debug-only=pipeliner 2>&1 | FileCheck %s # REQUIRES: asserts # Check that if the register pressure is too high, the schedule is rejected, II is incremented, and scheduling continues. # The specific value of II is not important. -# CHECK: {{^ *}}Try to schedule with {{[0-9]+$}} -# CHECK: {{^ *}}Rejected the schedule because of too high register pressure{{$}} -# CHECK: {{^ *}}Try to schedule with {{[0-9]+$}} -# CHECK: {{^ *}}Schedule Found? 1 (II={{[0-9]+}}){{$}} +# CHECK: Try to schedule with 21 +# CHECK: Can't schedule +# CHECK: Try to schedule with 22 +# CHECK: Can't schedule +# CHECK: Try to schedule with 23 +# CHECK: Rejected the schedule because of too high register pressure +# CHECK: Try to schedule with 24 +# CHECK: Rejected the schedule because of too high register pressure +# CHECK: Try to schedule with 25 +# CHECK: Rejected the schedule because of too high register pressure +# CHECK: Try to schedule with 26 +# CHECK: Schedule Found? 1 (II=26) --- | + ; ModuleID = 'a.ll' + source_filename = "a.c" target datalayout = "e-m:e-Fn32-i64:64-n32:64" target triple = "ppc64le" - define dso_local double @kernel(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, i32 noundef signext %n) local_unnamed_addr { + ; Function Attrs: nofree nosync nounwind memory(argmem: read) uwtable + define dso_local double @kernel(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, i32 noundef signext %n) local_unnamed_addr #0 { entry: - %0 = load double, ptr %a, align 8 - %arrayidx1 = getelementptr inbounds i8, ptr %a, i64 8 - %1 = load double, ptr %arrayidx1, align 8 + %0 = load double, ptr %a, align 8, !tbaa !3 + %arrayidx1 = getelementptr inbounds double, ptr %a, i64 1 + %1 = load double, ptr %arrayidx1, align 8, !tbaa !3 %cmp163 = icmp sgt i32 %n, 0 br i1 %cmp163, label %for.body.preheader, label %for.cond.cleanup for.body.preheader: ; preds = %entry - %wide.trip.count = zext nneg i32 %n to i64 - %scevgep167 = getelementptr i8, ptr %b, i64 -8 + %wide.trip.count = zext i32 %n to i64 + %scevgep1 = getelementptr i8, ptr %b, i64 -8 call void @llvm.set.loop.iterations.i64(i64 %wide.trip.count) br label %for.body @@ -32,11 +43,11 @@ %res.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %30, %for.body ] ret double %res.0.lcssa - for.body: ; preds = %for.body.preheader, %for.body + for.body: ; preds = %for.body, %for.body.preheader %res.0165 = phi double [ 0.000000e+00, %for.body.preheader ], [ %30, %for.body ] - %2 = phi ptr [ %scevgep167, %for.body.preheader ], [ %3, %for.body ] + %2 = phi ptr [ %scevgep1, %for.body.preheader ], [ %3, %for.body ] %3 = getelementptr i8, ptr %2, i64 8 - %4 = load double, ptr %3, align 8 + %4 = load double, ptr %3, align 8, !tbaa !3 %5 = tail call double @llvm.fmuladd.f64(double %0, double %4, double %0) %6 = tail call double @llvm.fmuladd.f64(double %5, double %4, double %5) %7 = tail call double @llvm.fmuladd.f64(double %6, double %4, double %6) @@ -81,23 +92,152 @@ %mul66 = fmul double %12, %mul65 %30 = tail call double @llvm.fmuladd.f64(double %mul66, double %10, double %res.0165) %31 = call i1 @llvm.loop.decrement.i64(i64 1) - br i1 %31, label %for.body, label %for.cond.cleanup + br i1 %31, label %for.body, label %for.cond.cleanup, !llvm.loop !7 } - declare double @llvm.fmuladd.f64(double, double, double) + ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) + declare double @llvm.fmuladd.f64(double, double, double) #1 - declare void @llvm.set.loop.iterations.i64(i64) + ; Function Attrs: nocallback noduplicate nofree nosync nounwind willreturn + declare void @llvm.set.loop.iterations.i64(i64) #2 - declare i1 @llvm.loop.decrement.i64(i64) + ; Function Attrs: nocallback noduplicate nofree nosync nounwind willreturn + declare i1 @llvm.loop.decrement.i64(i64) #2 + attributes #0 = { nofree nosync nounwind memory(argmem: read) uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crbits,+crypto,+direct-move,+extdiv,+htm,+isa-v206-instructions,+isa-v207-instructions,+isa-v30-instructions,+power8-vector,+power9-vector,+quadword-atomics,+vsx,-aix-small-local-exec-tls,-privileged,-rop-protect,-spe" } + attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } + attributes #2 = { nocallback noduplicate nofree nosync nounwind willreturn } + + !llvm.module.flags = !{!0, !1} + !llvm.ident = !{!2} + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{i32 7, !"uwtable", i32 2} + !2 = !{!"clang version 18.0.0 (https://miratech-soft@dev.azure.com/miratech-soft/llvm/_git/llvm c8d01fb665fc5d9378100a6d92ebcd3be49be655)"} + !3 = !{!4, !4, i64 0} + !4 = !{!"double", !5, i64 0} + !5 = !{!"omnipotent char", !6, i64 0} + !6 = !{!"Simple C/C++ TBAA"} + !7 = distinct !{!7, !8, !9} + !8 = !{!"llvm.loop.mustprogress"} + !9 = !{!"llvm.loop.unroll.disable"} + ... --- name: kernel +alignment: 16 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: false +registers: + - { id: 0, class: vsfrc, preferred-register: '' } + - { id: 1, class: vsfrc, preferred-register: '' } + - { id: 2, class: g8rc, preferred-register: '' } + - { id: 3, class: vsfrc, preferred-register: '' } + - { id: 4, class: vsfrc, preferred-register: '' } + - { id: 5, class: g8rc_and_g8rc_nox0, preferred-register: '' } + - { id: 6, class: g8rc, preferred-register: '' } + - { id: 7, class: vsfrc, preferred-register: '' } + - { id: 8, class: g8rc_and_g8rc_nox0, preferred-register: '' } + - { id: 9, class: g8rc_and_g8rc_nox0, preferred-register: '' } + - { id: 10, class: g8rc, preferred-register: '' } + - { id: 11, class: gprc, preferred-register: '' } + - { id: 12, class: vsfrc, preferred-register: '' } + - { id: 13, class: crrc, preferred-register: '' } + - { id: 14, class: vsfrc, preferred-register: '' } + - { id: 15, class: g8rc, preferred-register: '' } + - { id: 16, class: g8rc, preferred-register: '' } + - { id: 17, class: g8rc, preferred-register: '' } + - { id: 18, class: f8rc, preferred-register: '' } + - { id: 19, class: g8rc_and_g8rc_nox0, preferred-register: '' } + - { id: 20, class: vsfrc, preferred-register: '' } + - { id: 21, class: vsfrc, preferred-register: '' } + - { id: 22, class: vsfrc, preferred-register: '' } + - { id: 23, class: vsfrc, preferred-register: '' } + - { id: 24, class: vsfrc, preferred-register: '' } + - { id: 25, class: vsfrc, preferred-register: '' } + - { id: 26, class: vsfrc, preferred-register: '' } + - { id: 27, class: vsfrc, preferred-register: '' } + - { id: 28, class: vsfrc, preferred-register: '' } + - { id: 29, class: vsfrc, preferred-register: '' } + - { id: 30, class: vsfrc, preferred-register: '' } + - { id: 31, class: vsfrc, preferred-register: '' } + - { id: 32, class: vsfrc, preferred-register: '' } + - { id: 33, class: vsfrc, preferred-register: '' } + - { id: 34, class: vsfrc, preferred-register: '' } + - { id: 35, class: vsfrc, preferred-register: '' } + - { id: 36, class: vsfrc, preferred-register: '' } + - { id: 37, class: vsfrc, preferred-register: '' } + - { id: 38, class: vsfrc, preferred-register: '' } + - { id: 39, class: vsfrc, preferred-register: '' } + - { id: 40, class: vsfrc, preferred-register: '' } + - { id: 41, class: vsfrc, preferred-register: '' } + - { id: 42, class: vsfrc, preferred-register: '' } + - { id: 43, class: vsfrc, preferred-register: '' } + - { id: 44, class: vsfrc, preferred-register: '' } + - { id: 45, class: vsfrc, preferred-register: '' } + - { id: 46, class: vsfrc, preferred-register: '' } + - { id: 47, class: vsfrc, preferred-register: '' } + - { id: 48, class: vsfrc, preferred-register: '' } + - { id: 49, class: vsfrc, preferred-register: '' } + - { id: 50, class: vsfrc, preferred-register: '' } + - { id: 51, class: vsfrc, preferred-register: '' } + - { id: 52, class: vsfrc, preferred-register: '' } + - { id: 53, class: vsfrc, preferred-register: '' } + - { id: 54, class: vsfrc, preferred-register: '' } + - { id: 55, class: vsfrc, preferred-register: '' } + - { id: 56, class: vsfrc, preferred-register: '' } + - { id: 57, class: vsfrc, preferred-register: '' } + - { id: 58, class: vsfrc, preferred-register: '' } + - { id: 59, class: vsfrc, preferred-register: '' } + - { id: 60, class: vsfrc, preferred-register: '' } + - { id: 61, class: vsfrc, preferred-register: '' } + - { id: 62, class: crbitrc, preferred-register: '' } liveins: - { reg: '$x3', virtual-reg: '%8' } - { reg: '$x4', virtual-reg: '%9' } - { reg: '$x5', virtual-reg: '%10' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' + maxCallFrameSize: 4294967295 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} body: | bb.0.entry: successors: %bb.2(0x50000000), %bb.1(0x30000000) @@ -111,12 +251,16 @@ body: | BCC 44, killed %13, %bb.2 bb.1: + successors: %bb.3(0x80000000) + %12:vsfrc = XXLXORdpz B %bb.3 bb.2.for.body.preheader: - %0:vsfrc = DFLOADf64 0, %8 :: (load (s64) from %ir.a) - %1:vsfrc = DFLOADf64 8, killed %8 :: (load (s64) from %ir.arrayidx1) + successors: %bb.4(0x80000000) + + %0:vsfrc = DFLOADf64 0, %8 :: (load (s64) from %ir.a, !tbaa !3) + %1:vsfrc = DFLOADf64 8, killed %8 :: (load (s64) from %ir.arrayidx1, !tbaa !3) %16:g8rc = IMPLICIT_DEF %15:g8rc = INSERT_SUBREG killed %16, killed %11, %subreg.sub_32 %17:g8rc = RLDICL killed %15, 0, 32 @@ -135,7 +279,7 @@ body: | %4:vsfrc = PHI %14, %bb.2, %7, %bb.4 %5:g8rc_and_g8rc_nox0 = PHI %2, %bb.2, %6, %bb.4 - %18:f8rc, %19:g8rc_and_g8rc_nox0 = LFDU 8, killed %5 :: (load (s64) from %ir.3) + %18:f8rc, %19:g8rc_and_g8rc_nox0 = LFDU 8, killed %5 :: (load (s64) from %ir.3, !tbaa !3) %6:g8rc = COPY killed %19 %20:vsfrc = nofpexcept XSMADDADP %0, %0, %18, implicit $rm %21:vsfrc = nofpexcept XSMADDADP %20, %20, %18, implicit $rm From e45f6e569dafd4033f86d276065d77799b5f6226 Mon Sep 17 00:00:00 2001 From: Vlad Serebrennikov Date: Mon, 1 Apr 2024 22:37:37 +0400 Subject: [PATCH 032/201] [clang] Factor out OpenACC part of `Sema` (#84184) This patch moves OpenACC parts of `Sema` into a separate class `SemaOpenACC` that is placed in a separate header `Sema/SemaOpenACC.h`. This patch is intended to be a model of factoring things out of `Sema`, so I picked a small OpenACC part. Goals are the following: 1) Split `Sema` into manageable parts. 2) Make dependencies between parts visible. 3) Improve Clang development cycle by avoiding recompiling unrelated parts of the compiler. 4) Avoid compile-time regressions. 5) Avoid notational regressions in the code that uses Sema. --- clang/include/clang/Sema/Sema.h | 65 ++++------------------ clang/include/clang/Sema/SemaOpenACC.h | 74 ++++++++++++++++++++++++++ clang/lib/Parse/ParseOpenACC.cpp | 21 ++++---- clang/lib/Sema/JumpDiagnostics.cpp | 1 + clang/lib/Sema/Sema.cpp | 3 +- clang/lib/Sema/SemaOpenACC.cpp | 33 +++++++----- clang/lib/Sema/TreeTransform.h | 11 ++-- 7 files changed, 125 insertions(+), 83 deletions(-) create mode 100644 clang/include/clang/Sema/SemaOpenACC.h diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 3a1abd4c7892b8..a02b684f2c77e2 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -33,7 +33,6 @@ #include "clang/AST/NSAPI.h" #include "clang/AST/PrettyPrinter.h" #include "clang/AST/StmtCXX.h" -#include "clang/AST/StmtOpenACC.h" #include "clang/AST/StmtOpenMP.h" #include "clang/AST/TypeLoc.h" #include "clang/AST/TypeOrdering.h" @@ -42,7 +41,6 @@ #include "clang/Basic/DarwinSDKInfo.h" #include "clang/Basic/ExpressionTraits.h" #include "clang/Basic/Module.h" -#include "clang/Basic/OpenACCKinds.h" #include "clang/Basic/OpenCLOptions.h" #include "clang/Basic/OpenMPKinds.h" #include "clang/Basic/PragmaKinds.h" @@ -183,6 +181,7 @@ class Preprocessor; class PseudoDestructorTypeStorage; class PseudoObjectExpr; class QualType; +class SemaOpenACC; class StandardConversionSequence; class Stmt; class StringLiteral; @@ -466,9 +465,8 @@ class Sema final { // 37. Name Lookup for RISC-V Vector Intrinsic (SemaRISCVVectorLookup.cpp) // 38. CUDA (SemaCUDA.cpp) // 39. HLSL Constructs (SemaHLSL.cpp) - // 40. OpenACC Constructs (SemaOpenACC.cpp) - // 41. OpenMP Directives and Clauses (SemaOpenMP.cpp) - // 42. SYCL Constructs (SemaSYCL.cpp) + // 40. OpenMP Directives and Clauses (SemaOpenMP.cpp) + // 41. SYCL Constructs (SemaSYCL.cpp) /// \name Semantic Analysis /// Implementations are in Sema.cpp @@ -1162,6 +1160,11 @@ class Sema final { /// CurContext - This is the current declaration context of parsing. DeclContext *CurContext; + SemaOpenACC &OpenACC() { + assert(OpenACCPtr); + return *OpenACCPtr; + } + protected: friend class Parser; friend class InitializationSequence; @@ -1192,6 +1195,8 @@ class Sema final { mutable IdentifierInfo *Ident_super; + std::unique_ptr OpenACCPtr; + ///@} // @@ -13351,56 +13356,6 @@ class Sema final { // // - /// \name OpenACC Constructs - /// Implementations are in SemaOpenACC.cpp - ///@{ - -public: - /// Called after parsing an OpenACC Clause so that it can be checked. - bool ActOnOpenACCClause(OpenACCClauseKind ClauseKind, - SourceLocation StartLoc); - - /// Called after the construct has been parsed, but clauses haven't been - /// parsed. This allows us to diagnose not-implemented, as well as set up any - /// state required for parsing the clauses. - void ActOnOpenACCConstruct(OpenACCDirectiveKind K, SourceLocation StartLoc); - - /// Called after the directive, including its clauses, have been parsed and - /// parsing has consumed the 'annot_pragma_openacc_end' token. This DOES - /// happen before any associated declarations or statements have been parsed. - /// This function is only called when we are parsing a 'statement' context. - bool ActOnStartOpenACCStmtDirective(OpenACCDirectiveKind K, - SourceLocation StartLoc); - - /// Called after the directive, including its clauses, have been parsed and - /// parsing has consumed the 'annot_pragma_openacc_end' token. This DOES - /// happen before any associated declarations or statements have been parsed. - /// This function is only called when we are parsing a 'Decl' context. - bool ActOnStartOpenACCDeclDirective(OpenACCDirectiveKind K, - SourceLocation StartLoc); - /// Called when we encounter an associated statement for our construct, this - /// should check legality of the statement as it appertains to this Construct. - StmtResult ActOnOpenACCAssociatedStmt(OpenACCDirectiveKind K, - StmtResult AssocStmt); - - /// Called after the directive has been completely parsed, including the - /// declaration group or associated statement. - StmtResult ActOnEndOpenACCStmtDirective(OpenACCDirectiveKind K, - SourceLocation StartLoc, - SourceLocation EndLoc, - StmtResult AssocStmt); - /// Called after the directive has been completely parsed, including the - /// declaration group or associated statement. - DeclGroupRef ActOnEndOpenACCDeclDirective(); - - ///@} - - // - // - // ------------------------------------------------------------------------- - // - // - /// \name OpenMP Directives and Clauses /// Implementations are in SemaOpenMP.cpp ///@{ diff --git a/clang/include/clang/Sema/SemaOpenACC.h b/clang/include/clang/Sema/SemaOpenACC.h new file mode 100644 index 00000000000000..7f50d7889ad79b --- /dev/null +++ b/clang/include/clang/Sema/SemaOpenACC.h @@ -0,0 +1,74 @@ +//===----- SemaOpenACC.h - Semantic Analysis for OpenACC constructs -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file declares semantic analysis for OpenACC constructs and +/// clauses. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_SEMA_SEMAOPENACC_H +#define LLVM_CLANG_SEMA_SEMAOPENACC_H + +#include "clang/AST/DeclGroup.h" +#include "clang/Basic/OpenACCKinds.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Sema/Ownership.h" + +namespace clang { + +class ASTContext; +class DiagnosticEngine; +class LangOptions; +class Sema; + +class SemaOpenACC { +public: + SemaOpenACC(Sema &S); + + ASTContext &getASTContext() const; + DiagnosticsEngine &getDiagnostics() const; + const LangOptions &getLangOpts() const; + + Sema &SemaRef; + + /// Called after parsing an OpenACC Clause so that it can be checked. + bool ActOnClause(OpenACCClauseKind ClauseKind, SourceLocation StartLoc); + + /// Called after the construct has been parsed, but clauses haven't been + /// parsed. This allows us to diagnose not-implemented, as well as set up any + /// state required for parsing the clauses. + void ActOnConstruct(OpenACCDirectiveKind K, SourceLocation StartLoc); + + /// Called after the directive, including its clauses, have been parsed and + /// parsing has consumed the 'annot_pragma_openacc_end' token. This DOES + /// happen before any associated declarations or statements have been parsed. + /// This function is only called when we are parsing a 'statement' context. + bool ActOnStartStmtDirective(OpenACCDirectiveKind K, SourceLocation StartLoc); + + /// Called after the directive, including its clauses, have been parsed and + /// parsing has consumed the 'annot_pragma_openacc_end' token. This DOES + /// happen before any associated declarations or statements have been parsed. + /// This function is only called when we are parsing a 'Decl' context. + bool ActOnStartDeclDirective(OpenACCDirectiveKind K, SourceLocation StartLoc); + /// Called when we encounter an associated statement for our construct, this + /// should check legality of the statement as it appertains to this Construct. + StmtResult ActOnAssociatedStmt(OpenACCDirectiveKind K, StmtResult AssocStmt); + + /// Called after the directive has been completely parsed, including the + /// declaration group or associated statement. + StmtResult ActOnEndStmtDirective(OpenACCDirectiveKind K, + SourceLocation StartLoc, + SourceLocation EndLoc, StmtResult AssocStmt); + /// Called after the directive has been completely parsed, including the + /// declaration group or associated statement. + DeclGroupRef ActOnEndDeclDirective(); +}; + +} // namespace clang + +#endif // LLVM_CLANG_SEMA_SEMAOPENACC_H diff --git a/clang/lib/Parse/ParseOpenACC.cpp b/clang/lib/Parse/ParseOpenACC.cpp index 50e3c39f60919b..07dd2ba0106a4e 100644 --- a/clang/lib/Parse/ParseOpenACC.cpp +++ b/clang/lib/Parse/ParseOpenACC.cpp @@ -14,6 +14,7 @@ #include "clang/Parse/ParseDiagnostic.h" #include "clang/Parse/Parser.h" #include "clang/Parse/RAIIObjectsForParser.h" +#include "clang/Sema/SemaOpenACC.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" @@ -777,7 +778,7 @@ bool Parser::ParseOpenACCClause(OpenACCDirectiveKind DirKind) { SourceLocation ClauseLoc = ConsumeToken(); bool Result = ParseOpenACCClauseParams(DirKind, Kind); - getActions().ActOnOpenACCClause(Kind, ClauseLoc); + getActions().OpenACC().ActOnClause(Kind, ClauseLoc); return Result; } @@ -1151,7 +1152,7 @@ Parser::OpenACCDirectiveParseInfo Parser::ParseOpenACCDirective() { SourceLocation StartLoc = getCurToken().getLocation(); OpenACCDirectiveKind DirKind = ParseOpenACCDirectiveKind(*this); - getActions().ActOnOpenACCConstruct(DirKind, StartLoc); + getActions().OpenACC().ActOnConstruct(DirKind, StartLoc); // Once we've parsed the construct/directive name, some have additional // specifiers that need to be taken care of. Atomic has an 'atomic-clause' @@ -1223,12 +1224,12 @@ Parser::DeclGroupPtrTy Parser::ParseOpenACCDirectiveDecl() { OpenACCDirectiveParseInfo DirInfo = ParseOpenACCDirective(); - if (getActions().ActOnStartOpenACCDeclDirective(DirInfo.DirKind, - DirInfo.StartLoc)) + if (getActions().OpenACC().ActOnStartDeclDirective(DirInfo.DirKind, + DirInfo.StartLoc)) return nullptr; // TODO OpenACC: Do whatever decl parsing is required here. - return DeclGroupPtrTy::make(getActions().ActOnEndOpenACCDeclDirective()); + return DeclGroupPtrTy::make(getActions().OpenACC().ActOnEndDeclDirective()); } // Parse OpenACC Directive on a Statement. @@ -1239,8 +1240,8 @@ StmtResult Parser::ParseOpenACCDirectiveStmt() { ConsumeAnnotationToken(); OpenACCDirectiveParseInfo DirInfo = ParseOpenACCDirective(); - if (getActions().ActOnStartOpenACCStmtDirective(DirInfo.DirKind, - DirInfo.StartLoc)) + if (getActions().OpenACC().ActOnStartStmtDirective(DirInfo.DirKind, + DirInfo.StartLoc)) return StmtError(); StmtResult AssocStmt; @@ -1249,10 +1250,10 @@ StmtResult Parser::ParseOpenACCDirectiveStmt() { ParsingOpenACCDirectiveRAII DirScope(*this, /*Value=*/false); ParseScope ACCScope(this, getOpenACCScopeFlags(DirInfo.DirKind)); - AssocStmt = getActions().ActOnOpenACCAssociatedStmt(DirInfo.DirKind, - ParseStatement()); + AssocStmt = getActions().OpenACC().ActOnAssociatedStmt(DirInfo.DirKind, + ParseStatement()); } - return getActions().ActOnEndOpenACCStmtDirective( + return getActions().OpenACC().ActOnEndStmtDirective( DirInfo.DirKind, DirInfo.StartLoc, DirInfo.EndLoc, AssocStmt); } diff --git a/clang/lib/Sema/JumpDiagnostics.cpp b/clang/lib/Sema/JumpDiagnostics.cpp index 6722878883be8e..ce6211c23218bb 100644 --- a/clang/lib/Sema/JumpDiagnostics.cpp +++ b/clang/lib/Sema/JumpDiagnostics.cpp @@ -16,6 +16,7 @@ #include "clang/AST/ExprCXX.h" #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtObjC.h" +#include "clang/AST/StmtOpenACC.h" #include "clang/AST/StmtOpenMP.h" #include "clang/Basic/SourceLocation.h" #include "clang/Sema/SemaInternal.h" diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index c9dbac0dfc339d..b7e4fc0ac9b5b2 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -43,6 +43,7 @@ #include "clang/Sema/ScopeInfo.h" #include "clang/Sema/SemaConsumer.h" #include "clang/Sema/SemaInternal.h" +#include "clang/Sema/SemaOpenACC.h" #include "clang/Sema/TemplateDeduction.h" #include "clang/Sema/TemplateInstCallback.h" #include "clang/Sema/TypoCorrection.h" @@ -196,7 +197,7 @@ Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer, ThreadSafetyDeclCache(nullptr), LateTemplateParser(nullptr), LateTemplateParserCleanup(nullptr), OpaqueParser(nullptr), CurContext(nullptr), ExternalSource(nullptr), CurScope(nullptr), - Ident_super(nullptr), + Ident_super(nullptr), OpenACCPtr(std::make_unique(*this)), MSPointerToMemberRepresentationMethod( LangOpts.getMSPointerToMemberRepresentationMethod()), MSStructPragmaOn(false), VtorDispStack(LangOpts.getVtorDispMode()), diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp index d3a602d1c382fa..2ac994cac71e19 100644 --- a/clang/lib/Sema/SemaOpenACC.cpp +++ b/clang/lib/Sema/SemaOpenACC.cpp @@ -11,14 +11,15 @@ /// //===----------------------------------------------------------------------===// +#include "clang/AST/StmtOpenACC.h" +#include "clang/Sema/SemaOpenACC.h" #include "clang/Basic/DiagnosticSema.h" -#include "clang/Basic/OpenACCKinds.h" #include "clang/Sema/Sema.h" using namespace clang; namespace { -bool diagnoseConstructAppertainment(Sema &S, OpenACCDirectiveKind K, +bool diagnoseConstructAppertainment(SemaOpenACC &S, OpenACCDirectiveKind K, SourceLocation StartLoc, bool IsStmt) { switch (K) { default: @@ -30,14 +31,21 @@ bool diagnoseConstructAppertainment(Sema &S, OpenACCDirectiveKind K, case OpenACCDirectiveKind::Serial: case OpenACCDirectiveKind::Kernels: if (!IsStmt) - return S.Diag(StartLoc, diag::err_acc_construct_appertainment) << K; + return S.SemaRef.Diag(StartLoc, diag::err_acc_construct_appertainment) + << K; break; } return false; } } // namespace -bool Sema::ActOnOpenACCClause(OpenACCClauseKind ClauseKind, +SemaOpenACC::SemaOpenACC(Sema &S) : SemaRef(S) {} + +ASTContext &SemaOpenACC::getASTContext() const { return SemaRef.Context; } +DiagnosticsEngine &SemaOpenACC::getDiagnostics() const { return SemaRef.Diags; } +const LangOptions &SemaOpenACC::getLangOpts() const { return SemaRef.LangOpts; } + +bool SemaOpenACC::ActOnClause(OpenACCClauseKind ClauseKind, SourceLocation StartLoc) { if (ClauseKind == OpenACCClauseKind::Invalid) return false; @@ -45,9 +53,10 @@ bool Sema::ActOnOpenACCClause(OpenACCClauseKind ClauseKind, // whatever it can do. This function will eventually need to start returning // some sort of Clause AST type, but for now just return true/false based on // success. - return Diag(StartLoc, diag::warn_acc_clause_unimplemented) << ClauseKind; + return SemaRef.Diag(StartLoc, diag::warn_acc_clause_unimplemented) + << ClauseKind; } -void Sema::ActOnOpenACCConstruct(OpenACCDirectiveKind K, +void SemaOpenACC::ActOnConstruct(OpenACCDirectiveKind K, SourceLocation StartLoc) { switch (K) { case OpenACCDirectiveKind::Invalid: @@ -63,17 +72,17 @@ void Sema::ActOnOpenACCConstruct(OpenACCDirectiveKind K, // here as these constructs do not take any arguments. break; default: - Diag(StartLoc, diag::warn_acc_construct_unimplemented) << K; + SemaRef.Diag(StartLoc, diag::warn_acc_construct_unimplemented) << K; break; } } -bool Sema::ActOnStartOpenACCStmtDirective(OpenACCDirectiveKind K, +bool SemaOpenACC::ActOnStartStmtDirective(OpenACCDirectiveKind K, SourceLocation StartLoc) { return diagnoseConstructAppertainment(*this, K, StartLoc, /*IsStmt=*/true); } -StmtResult Sema::ActOnEndOpenACCStmtDirective(OpenACCDirectiveKind K, +StmtResult SemaOpenACC::ActOnEndStmtDirective(OpenACCDirectiveKind K, SourceLocation StartLoc, SourceLocation EndLoc, StmtResult AssocStmt) { @@ -92,7 +101,7 @@ StmtResult Sema::ActOnEndOpenACCStmtDirective(OpenACCDirectiveKind K, llvm_unreachable("Unhandled case in directive handling?"); } -StmtResult Sema::ActOnOpenACCAssociatedStmt(OpenACCDirectiveKind K, +StmtResult SemaOpenACC::ActOnAssociatedStmt(OpenACCDirectiveKind K, StmtResult AssocStmt) { switch (K) { default: @@ -114,9 +123,9 @@ StmtResult Sema::ActOnOpenACCAssociatedStmt(OpenACCDirectiveKind K, llvm_unreachable("Invalid associated statement application"); } -bool Sema::ActOnStartOpenACCDeclDirective(OpenACCDirectiveKind K, +bool SemaOpenACC::ActOnStartDeclDirective(OpenACCDirectiveKind K, SourceLocation StartLoc) { return diagnoseConstructAppertainment(*this, K, StartLoc, /*IsStmt=*/false); } -DeclGroupRef Sema::ActOnEndOpenACCDeclDirective() { return DeclGroupRef{}; } +DeclGroupRef SemaOpenACC::ActOnEndDeclDirective() { return DeclGroupRef{}; } diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index eace1bfdff5aa0..a2568ad0f82cc2 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -39,6 +39,7 @@ #include "clang/Sema/ScopeInfo.h" #include "clang/Sema/SemaDiagnostic.h" #include "clang/Sema/SemaInternal.h" +#include "clang/Sema/SemaOpenACC.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/Support/ErrorHandling.h" #include @@ -4000,16 +4001,16 @@ class TreeTransform { SourceLocation BeginLoc, SourceLocation EndLoc, StmtResult StrBlock) { - getSema().ActOnOpenACCConstruct(K, BeginLoc); + getSema().OpenACC().ActOnConstruct(K, BeginLoc); // TODO OpenACC: Include clauses. - if (getSema().ActOnStartOpenACCStmtDirective(K, BeginLoc)) + if (getSema().OpenACC().ActOnStartStmtDirective(K, BeginLoc)) return StmtError(); - StrBlock = getSema().ActOnOpenACCAssociatedStmt(K, StrBlock); + StrBlock = getSema().OpenACC().ActOnAssociatedStmt(K, StrBlock); - return getSema().ActOnEndOpenACCStmtDirective(K, BeginLoc, EndLoc, - StrBlock); + return getSema().OpenACC().ActOnEndStmtDirective(K, BeginLoc, EndLoc, + StrBlock); } private: From f3ec73fca492124b15c3eb9a3ae12b7d86470d27 Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Mon, 1 Apr 2024 11:47:11 -0700 Subject: [PATCH 033/201] [NFC]Precommit test for vtable import (#79363) A precommit test case to show function summary and global values when a function has instructions annotated with vtable profiles and indirect call profiles. - This is a precommit test for https://github.com/llvm/llvm-project/pull/79381 --- .../thinlto-func-summary-vtableref-pgo.ll | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll diff --git a/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll b/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll new file mode 100644 index 00000000000000..78b175caca85f0 --- /dev/null +++ b/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll @@ -0,0 +1,61 @@ +; RUN: opt -module-summary %s -o %t.o + +; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s + +; RUN: llvm-dis -o - %t.o | FileCheck %s --check-prefix=DIS + + +; CHECK: +; CHECK-NEXT: +; The `VALUE_GUID` below represents the "_ZN4Base4funcEv" referenced by the +; indirect call instruction. +; CHECK-NEXT: +; has the format [valueid, flags, instcount, funcflags, +; numrefs, rorefcnt, worefcnt, +; n x (valueid, hotness+tailcall)] +; CHECK-NEXT: +; CHECK-NEXT: + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function has one BB and an entry count of 150, so the BB is hot according to +; ProfileSummary and reflected so in the bitcode (see llvm-dis output). +define i32 @_Z4testP4Base(ptr %0) !prof !15 { + %2 = load ptr, ptr %0, !prof !16 + %3 = load ptr, ptr %2 + %4 = tail call i32 %3(ptr %0), !prof !17 + ret i32 %4 +} + +!llvm.module.flags = !{!1} + + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 200} +!6 = !{!"MaxInternalCount", i64 200} +!7 = !{!"MaxFunctionCount", i64 200} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 990000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} + +!15 = !{!"function_entry_count", i32 150} +; 1960855528937986108 is the MD5 hash of _ZTV4Base +!16 = !{!"VP", i32 2, i64 1600, i64 1960855528937986108, i64 1600} +; 5459407273543877811 is the MD5 hash of _ZN4Base4funcEv +!17 = !{!"VP", i32 0, i64 1600, i64 5459407273543877811, i64 1600} + +; ModuleSummaryIndex stores map in std::map; so +; global value summares are printed out in the order that gv's guid increases. +; DIS: ^0 = module: (path: "{{.*}}", hash: (0, 0, 0, 0, 0)) +; DIS: ^1 = gv: (guid: 5459407273543877811) +; DIS: ^2 = gv: (name: "_Z4testP4Base", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 4, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), calls: ((callee: ^1, hotness: hot))))) ; guid = 15857150948103218965 +; DIS: ^3 = blockcount: 0 From cbd48b184eca1ca73e6f20575501d94ad30fbd58 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Mon, 1 Apr 2024 14:49:56 -0400 Subject: [PATCH 034/201] [C99] Claim conformance to "conversion of array to pointer not limited to lvalues" We don't have a document number for this, but the change was called out explicitly in the editor's comments in the C99 foreword. --- clang/test/C/C99/array-lvalue.c | 38 +++++++++++++++++++++++++++++++++ clang/www/c_status.html | 2 +- 2 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 clang/test/C/C99/array-lvalue.c diff --git a/clang/test/C/C99/array-lvalue.c b/clang/test/C/C99/array-lvalue.c new file mode 100644 index 00000000000000..4e963b4f74fc32 --- /dev/null +++ b/clang/test/C/C99/array-lvalue.c @@ -0,0 +1,38 @@ +/* RUN: %clang_cc1 -verify -pedantic -std=c99 %s + RUN: %clang_cc1 -verify=c89 -pedantic -std=c89 %s + expected-no-diagnostics + */ + +/* WG14 ???: Yes + * Conversion of array to pointer not limited to lvalues + * + * NB: the original paper number is unknown, this was gleaned from the editor's report + * in the C99 foreword. The C99 rationale document did not shed much light on + * the situation either, mostly talking about user confusion between lvalue and + * modifiable lvalue. However, the crux of the change was C89 changing: + * + * C89 3.2.2.1: Except when it is the operand of ..., an lvalue that has type + * 'array of type' is converted to an expression that has type 'pointer to + * type' that points to the initial element of the array object and is not an + * lvalue. + * + * C99 6.3.2.1p3: Except when it is the operand of ..., an expression that has + * type 'array of type' is converted to an expression with type 'pointer to + * type' that points to the initial element of the array object and is not an + * lvalue. + */ + +struct S { + char arr[100]; +}; + +struct S f(void); + +void func(void) { + char c; + /* The return from f() is an rvalue, so this code is not valid in C89, but is + * valid in C99. + */ + c = f().arr[10]; /* c89-warning {{ISO C90 does not allow subscripting non-lvalue array}} */ +} + diff --git a/clang/www/c_status.html b/clang/www/c_status.html index 123897593e5d84..803dce8e29fc04 100644 --- a/clang/www/c_status.html +++ b/clang/www/c_status.html @@ -360,7 +360,7 @@

C99 implementation status

conversion of array to pointer not limited to lvalues Unknown - Unknown + Yes relaxed constraints on aggregate and union initialization From 4cd7bb07c7540bf83a7a60a67aa282e99461ca2f Mon Sep 17 00:00:00 2001 From: Kirill Podoprigora Date: Mon, 1 Apr 2024 22:07:10 +0300 Subject: [PATCH 035/201] [mlir] Remove ``dataclasses`` package from mlir ``requirements.txt`` (#87223) The ``dataclasses`` package makes sense for Python 3.6, becauses ``dataclasses`` is only included in the standard library with 3.7 version. Now, 3.6 has reached EOL, so all current supported versions of Python (3.8, 3.9, 3.10, 3.11, 3.12) have this feature in their standard libraries. Therefore there's no need to install the ``dataclasses`` package now. --- mlir/python/requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mlir/python/requirements.txt b/mlir/python/requirements.txt index a596f8747ebe76..acd6dbb25edaf5 100644 --- a/mlir/python/requirements.txt +++ b/mlir/python/requirements.txt @@ -1,4 +1,3 @@ numpy>=1.19.5, <=1.26 pybind11>=2.9.0, <=2.10.3 -PyYAML>=5.3.1, <=6.0.1 -dataclasses>=0.6, <=0.8 \ No newline at end of file +PyYAML>=5.3.1, <=6.0.1 \ No newline at end of file From ee99475068523de185dce0a449b65e684a1e6b73 Mon Sep 17 00:00:00 2001 From: Nathan Sidwell Date: Mon, 1 Apr 2024 15:41:38 -0400 Subject: [PATCH 036/201] [clang] Fix bitfield access unit for vbase corner case (#87238) This fixes #87227, a vbase can be placed below nvsize when empty members and/or bases are in play. We must account for that. --- clang/lib/CodeGen/CGRecordLayoutBuilder.cpp | 57 +++++++--- .../test/CodeGenCXX/bitfield-access-tail.cpp | 104 ++++++++++++------ 2 files changed, 113 insertions(+), 48 deletions(-) diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp index e32023aeac1e6f..634a55fec5182e 100644 --- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp +++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp @@ -185,9 +185,10 @@ struct CGRecordLowering { /// Lowers an ASTRecordLayout to a llvm type. void lower(bool NonVirtualBaseType); void lowerUnion(bool isNoUniqueAddress); - void accumulateFields(); + void accumulateFields(bool isNonVirtualBaseType); RecordDecl::field_iterator - accumulateBitFields(RecordDecl::field_iterator Field, + accumulateBitFields(bool isNonVirtualBaseType, + RecordDecl::field_iterator Field, RecordDecl::field_iterator FieldEnd); void computeVolatileBitfields(); void accumulateBases(); @@ -195,8 +196,10 @@ struct CGRecordLowering { void accumulateVBases(); /// Recursively searches all of the bases to find out if a vbase is /// not the primary vbase of some base class. - bool hasOwnStorage(const CXXRecordDecl *Decl, const CXXRecordDecl *Query); + bool hasOwnStorage(const CXXRecordDecl *Decl, + const CXXRecordDecl *Query) const; void calculateZeroInit(); + CharUnits calculateTailClippingOffset(bool isNonVirtualBaseType) const; /// Lowers bitfield storage types to I8 arrays for bitfields with tail /// padding that is or can potentially be used. void clipTailPadding(); @@ -287,7 +290,7 @@ void CGRecordLowering::lower(bool NVBaseType) { computeVolatileBitfields(); return; } - accumulateFields(); + accumulateFields(NVBaseType); // RD implies C++. if (RD) { accumulateVPtrs(); @@ -378,12 +381,12 @@ void CGRecordLowering::lowerUnion(bool isNoUniqueAddress) { Packed = true; } -void CGRecordLowering::accumulateFields() { +void CGRecordLowering::accumulateFields(bool isNonVirtualBaseType) { for (RecordDecl::field_iterator Field = D->field_begin(), FieldEnd = D->field_end(); Field != FieldEnd;) { if (Field->isBitField()) { - Field = accumulateBitFields(Field, FieldEnd); + Field = accumulateBitFields(isNonVirtualBaseType, Field, FieldEnd); assert((Field == FieldEnd || !Field->isBitField()) && "Failed to accumulate all the bitfields"); } else if (Field->isZeroSize(Context)) { @@ -404,9 +407,12 @@ void CGRecordLowering::accumulateFields() { } // Create members for bitfields. Field is a bitfield, and FieldEnd is the end -// iterator of the record. Return the first non-bitfield encountered. +// iterator of the record. Return the first non-bitfield encountered. We need +// to know whether this is the base or complete layout, as virtual bases could +// affect the upper bound of bitfield access unit allocation. RecordDecl::field_iterator -CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field, +CGRecordLowering::accumulateBitFields(bool isNonVirtualBaseType, + RecordDecl::field_iterator Field, RecordDecl::field_iterator FieldEnd) { if (isDiscreteBitFieldABI()) { // Run stores the first element of the current run of bitfields. FieldEnd is @@ -505,6 +511,10 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field, bitsToCharUnits(Context.getTargetInfo().getRegisterWidth()); unsigned CharBits = Context.getCharWidth(); + // Limit of useable tail padding at end of the record. Computed lazily and + // cached here. + CharUnits ScissorOffset = CharUnits::Zero(); + // Data about the start of the span we're accumulating to create an access // unit from. Begin is the first bitfield of the span. If Begin is FieldEnd, // we've not got a current span. The span starts at the BeginOffset character @@ -630,10 +640,14 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field, LimitOffset = bitsToCharUnits(getFieldBitOffset(*Probe)); goto FoundLimit; } - // We reached the end of the fields. We can't necessarily use tail - // padding in C++ structs, so the NonVirtual size is what we must - // use there. - LimitOffset = RD ? Layout.getNonVirtualSize() : Layout.getDataSize(); + // We reached the end of the fields, determine the bounds of useable + // tail padding. As this can be complex for C++, we cache the result. + if (ScissorOffset.isZero()) { + ScissorOffset = calculateTailClippingOffset(isNonVirtualBaseType); + assert(!ScissorOffset.isZero() && "Tail clipping at zero"); + } + + LimitOffset = ScissorOffset; FoundLimit:; CharUnits TypeSize = getSize(Type); @@ -838,13 +852,17 @@ void CGRecordLowering::accumulateVPtrs() { llvm::PointerType::getUnqual(Types.getLLVMContext()))); } -void CGRecordLowering::accumulateVBases() { +CharUnits +CGRecordLowering::calculateTailClippingOffset(bool isNonVirtualBaseType) const { + if (!RD) + return Layout.getDataSize(); + CharUnits ScissorOffset = Layout.getNonVirtualSize(); // In the itanium ABI, it's possible to place a vbase at a dsize that is // smaller than the nvsize. Here we check to see if such a base is placed // before the nvsize and set the scissor offset to that, instead of the // nvsize. - if (isOverlappingVBaseABI()) + if (!isNonVirtualBaseType && isOverlappingVBaseABI()) for (const auto &Base : RD->vbases()) { const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); if (BaseDecl->isEmpty()) @@ -856,8 +874,13 @@ void CGRecordLowering::accumulateVBases() { ScissorOffset = std::min(ScissorOffset, Layout.getVBaseClassOffset(BaseDecl)); } - Members.push_back(MemberInfo(ScissorOffset, MemberInfo::Scissor, nullptr, - RD)); + + return ScissorOffset; +} + +void CGRecordLowering::accumulateVBases() { + Members.push_back(MemberInfo(calculateTailClippingOffset(false), + MemberInfo::Scissor, nullptr, RD)); for (const auto &Base : RD->vbases()) { const CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); if (BaseDecl->isEmpty()) @@ -882,7 +905,7 @@ void CGRecordLowering::accumulateVBases() { } bool CGRecordLowering::hasOwnStorage(const CXXRecordDecl *Decl, - const CXXRecordDecl *Query) { + const CXXRecordDecl *Query) const { const ASTRecordLayout &DeclLayout = Context.getASTRecordLayout(Decl); if (DeclLayout.isPrimaryBaseVirtual() && DeclLayout.getPrimaryBase() == Query) return false; diff --git a/clang/test/CodeGenCXX/bitfield-access-tail.cpp b/clang/test/CodeGenCXX/bitfield-access-tail.cpp index 68716fdf3b1daa..1539e17cad4369 100644 --- a/clang/test/CodeGenCXX/bitfield-access-tail.cpp +++ b/clang/test/CodeGenCXX/bitfield-access-tail.cpp @@ -2,45 +2,45 @@ // Configs that have cheap unaligned access // Little Endian -// RUN: %clang_cc1 -triple=aarch64-apple-darwin %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s -// RUN: %clang_cc1 -triple=aarch64-linux-gnu %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s +// RUN: %clang_cc1 -triple=aarch64-apple-darwin %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT64 %s +// RUN: %clang_cc1 -triple=aarch64-linux-gnu %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT64 %s // RUN: %clang_cc1 -triple=arm-apple-darwin %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT-DWN32 %s -// RUN: %clang_cc1 -triple=arm-none-eabi %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s -// RUN: %clang_cc1 -triple=i686-linux-gnu %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s -// RUN: %clang_cc1 -triple=loongarch64-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s -// RUN: %clang_cc1 -triple=powerpcle-linux-gnu %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s -// RUN: %clang_cc1 -triple=ve-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s -// RUN: %clang_cc1 -triple=wasm32 %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s -// RUN: %clang_cc1 -triple=wasm64 %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s -// RUN: %clang_cc1 -triple=x86_64-linux-gnu %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s +// RUN: %clang_cc1 -triple=arm-none-eabi %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT32 %s +// RUN: %clang_cc1 -triple=i686-linux-gnu %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT32 %s +// RUN: %clang_cc1 -triple=loongarch64-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT64 %s +// RUN: %clang_cc1 -triple=powerpcle-linux-gnu %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT32 %s +// RUN: %clang_cc1 -triple=ve-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT64 %s +// RUN: %clang_cc1 -triple=wasm32 %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT32 %s +// RUN: %clang_cc1 -triple=wasm64 %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT64 %s +// RUN: %clang_cc1 -triple=x86_64-linux-gnu %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT64 %s // Big Endian -// RUN: %clang_cc1 -triple=powerpc-linux-gnu %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s -// RUN: %clang_cc1 -triple=powerpc64-linux-gnu %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s -// RUN: %clang_cc1 -triple=systemz %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s +// RUN: %clang_cc1 -triple=powerpc-linux-gnu %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT32 %s +// RUN: %clang_cc1 -triple=powerpc64-linux-gnu %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT64 %s +// RUN: %clang_cc1 -triple=systemz %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT64 %s // Configs that have expensive unaligned access // Little Endian -// RUN: %clang_cc1 -triple=amdgcn-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s -// RUN: %clang_cc1 -triple=arc-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s -// RUN: %clang_cc1 -triple=bpf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s -// RUN: %clang_cc1 -triple=csky %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s -// RUN: %clang_cc1 -triple=hexagon-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s -// RUN: %clang_cc1 -triple=le64-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s -// RUN: %clang_cc1 -triple=loongarch32-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s -// RUN: %clang_cc1 -triple=nvptx-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s -// RUN: %clang_cc1 -triple=riscv32 %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s -// RUN: %clang_cc1 -triple=riscv64 %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s -// RUN: %clang_cc1 -triple=spir-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s -// RUN: %clang_cc1 -triple=xcore-none-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s +// RUN: %clang_cc1 -triple=amdgcn-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT64 %s +// RUN: %clang_cc1 -triple=arc-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT32 %s +// RUN: %clang_cc1 -triple=bpf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT64 %s +// RUN: %clang_cc1 -triple=csky %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT32 %s +// RUN: %clang_cc1 -triple=hexagon-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT32 %s +// RUN: %clang_cc1 -triple=le64-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT64 %s +// RUN: %clang_cc1 -triple=loongarch32-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT32 %s +// RUN: %clang_cc1 -triple=nvptx-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT32 %s +// RUN: %clang_cc1 -triple=riscv32 %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT32 %s +// RUN: %clang_cc1 -triple=riscv64 %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT64 %s +// RUN: %clang_cc1 -triple=spir-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT32 %s +// RUN: %clang_cc1 -triple=xcore-none-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT32 %s // Big endian -// RUN: %clang_cc1 -triple=lanai-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s -// RUN: %clang_cc1 -triple=m68k-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s -// RUN: %clang_cc1 -triple=mips-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s -// RUN: %clang_cc1 -triple=mips64-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s -// RUN: %clang_cc1 -triple=sparc-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s -// RUN: %clang_cc1 -triple=tce-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT %s +// RUN: %clang_cc1 -triple=lanai-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT32 %s +// RUN: %clang_cc1 -triple=m68k-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT32 %s +// RUN: %clang_cc1 -triple=mips-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT32 %s +// RUN: %clang_cc1 -triple=mips64-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT64 %s +// RUN: %clang_cc1 -triple=sparc-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT32 %s +// RUN: %clang_cc1 -triple=tce-elf %s -emit-llvm -o /dev/null -fdump-record-layouts-simple | FileCheck --check-prefixes CHECK,LAYOUT,LAYOUT32 %s // Can use tail padding struct Pod { @@ -113,3 +113,45 @@ struct __attribute__((packed)) PNonPod { // LAYOUT-DWN32-NEXT: + +struct __attribute__((aligned(4))) Empty {} empty; + +struct Char { char a; } cbase; +struct D : virtual Char { + [[no_unique_address]] Empty e0; + [[no_unique_address]] Empty e1; + unsigned a : 24; // keep as 24bits +} d; +// CHECK-LABEL: LLVMType:%struct.D = +// LAYOUT64-SAME: type <{ ptr, [3 x i8], %struct.Char, [4 x i8] }> +// LAYOUT32-SAME: type { ptr, [3 x i8], %struct.Char } +// LAYOUT-DWN32-SAME: type { ptr, [3 x i8], %struct.Char } +// CHECK-NEXT: NonVirtualBaseLLVMType: +// LAYOUT64-SAME: %struct.D.base = type <{ ptr, i32 }> +// LAYOUT32-SAME: %struct.D = type { ptr, [3 x i8], %struct.Char } +// LAYOUT-DWN32-SAME: %struct.D = type { ptr, [3 x i8], %struct.Char } +// CHECK: BitFields:[ +// LAYOUT-NEXT: + +struct Int { int a; } ibase; +struct E : virtual Int { + [[no_unique_address]] Empty e0; + [[no_unique_address]] Empty e1; + unsigned a : 24; // expand to 32 +} e; +// CHECK-LABEL: LLVMType:%struct.E = +// LAYOUT64-SAME: type <{ ptr, i32, %struct.Int }> +// LAYOUT32-SAME: type { ptr, i32, %struct.Int } +// LAYOUT-DWN32-SAME: type { ptr, i32, %struct.Int } +// CHECK-NEXT: NonVirtualBaseLLVMType:%struct.E.base = +// LAYOUT64-SAME: type <{ ptr, i32 }> +// LAYOUT32-SAME: type { ptr, i32 } +// LAYOUT-DWN32-SAME: type { ptr, i32 } +// CHECK: BitFields:[ +// LAYOUT-NEXT: From ed6edf262d9061ce3c024754c4981299b5184ee2 Mon Sep 17 00:00:00 2001 From: Christopher Ferris Date: Mon, 1 Apr 2024 13:35:29 -0700 Subject: [PATCH 037/201] [scudo] Change isPowerOfTwo macro to return false for zero. (#87120) Clean-up all of the calls and remove the redundant == 0 checks. There is only one small visible change. For non-Android, the memalign function will now fail if alignment is zero. Before this would have passed. --- compiler-rt/lib/scudo/standalone/common.h | 6 +++++- compiler-rt/lib/scudo/standalone/stack_depot.h | 4 ++-- compiler-rt/lib/scudo/standalone/wrappers_c_checks.h | 6 ++---- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/compiler-rt/lib/scudo/standalone/common.h b/compiler-rt/lib/scudo/standalone/common.h index ae45683f1ee381..151fbd317e74d3 100644 --- a/compiler-rt/lib/scudo/standalone/common.h +++ b/compiler-rt/lib/scudo/standalone/common.h @@ -28,7 +28,11 @@ template inline Dest bit_cast(const Source &S) { return D; } -inline constexpr bool isPowerOfTwo(uptr X) { return (X & (X - 1)) == 0; } +inline constexpr bool isPowerOfTwo(uptr X) { + if (X == 0) + return false; + return (X & (X - 1)) == 0; +} inline constexpr uptr roundUp(uptr X, uptr Boundary) { DCHECK(isPowerOfTwo(Boundary)); diff --git a/compiler-rt/lib/scudo/standalone/stack_depot.h b/compiler-rt/lib/scudo/standalone/stack_depot.h index 98cd9707a64613..0176c40aa899df 100644 --- a/compiler-rt/lib/scudo/standalone/stack_depot.h +++ b/compiler-rt/lib/scudo/standalone/stack_depot.h @@ -103,7 +103,7 @@ class alignas(atomic_u64) StackDepot { // Ensure that RingSize, RingMask and TabMask are set up in a way that // all accesses are within range of BufSize. bool isValid(uptr BufSize) const { - if (RingSize == 0 || !isPowerOfTwo(RingSize)) + if (!isPowerOfTwo(RingSize)) return false; uptr RingBytes = sizeof(atomic_u64) * RingSize; if (RingMask + 1 != RingSize) @@ -112,7 +112,7 @@ class alignas(atomic_u64) StackDepot { if (TabMask == 0) return false; uptr TabSize = TabMask + 1; - if (TabSize == 0 || !isPowerOfTwo(TabSize)) + if (!isPowerOfTwo(TabSize)) return false; uptr TabBytes = sizeof(atomic_u32) * TabSize; diff --git a/compiler-rt/lib/scudo/standalone/wrappers_c_checks.h b/compiler-rt/lib/scudo/standalone/wrappers_c_checks.h index 9cd48e82792e0d..d0288699cf1b1f 100644 --- a/compiler-rt/lib/scudo/standalone/wrappers_c_checks.h +++ b/compiler-rt/lib/scudo/standalone/wrappers_c_checks.h @@ -31,15 +31,13 @@ inline void *setErrnoOnNull(void *Ptr) { // Checks aligned_alloc() parameters, verifies that the alignment is a power of // two and that the size is a multiple of alignment. inline bool checkAlignedAllocAlignmentAndSize(uptr Alignment, uptr Size) { - return Alignment == 0 || !isPowerOfTwo(Alignment) || - !isAligned(Size, Alignment); + return !isPowerOfTwo(Alignment) || !isAligned(Size, Alignment); } // Checks posix_memalign() parameters, verifies that alignment is a power of two // and a multiple of sizeof(void *). inline bool checkPosixMemalignAlignment(uptr Alignment) { - return Alignment == 0 || !isPowerOfTwo(Alignment) || - !isAligned(Alignment, sizeof(void *)); + return !isPowerOfTwo(Alignment) || !isAligned(Alignment, sizeof(void *)); } // Returns true if calloc(Size, N) overflows on Size*N calculation. Use a From e93b5f5a4776ffea12d03652559dfdf8d421184c Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Mon, 1 Apr 2024 13:05:34 -0700 Subject: [PATCH 038/201] [ubsan][NFC] Remove recently added `cl::init(false)` Extracted from #84858 --- clang/lib/CodeGen/BackendUtil.cpp | 7 +++---- clang/lib/CodeGen/CGExpr.cpp | 3 +-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 82b30b8d815629..1220c575d1df9f 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -101,20 +101,19 @@ namespace llvm { extern cl::opt PrintPipelinePasses; cl::opt ClRemoveTraps("clang-remove-traps", cl::Optional, - cl::desc("Insert remove-traps pass."), - cl::init(false)); + cl::desc("Insert remove-traps pass.")); // Experiment to move sanitizers earlier. static cl::opt ClSanitizeOnOptimizerEarlyEP( "sanitizer-early-opt-ep", cl::Optional, - cl::desc("Insert sanitizers on OptimizerEarlyEP."), cl::init(false)); + cl::desc("Insert sanitizers on OptimizerEarlyEP.")); extern cl::opt ProfileCorrelate; // Re-link builtin bitcodes after optimization cl::opt ClRelinkBuiltinBitcodePostop( "relink-builtin-bitcode-postop", cl::Optional, - cl::desc("Re-link builtin bitcodes after optimization."), cl::init(false)); + cl::desc("Re-link builtin bitcodes after optimization.")); } // namespace llvm namespace { diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index e0d5575d57d02d..54432353e7420d 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -56,8 +56,7 @@ using namespace CodeGen; // Experiment to make sanitizers easier to debug static llvm::cl::opt ClSanitizeDebugDeoptimization( "ubsan-unique-traps", llvm::cl::Optional, - llvm::cl::desc("Deoptimize traps for UBSAN so there is 1 trap per check"), - llvm::cl::init(false)); + llvm::cl::desc("Deoptimize traps for UBSAN so there is 1 trap per check")); //===--------------------------------------------------------------------===// // Miscellaneous Helper Methods From b8cc3ba409dc850776f37e27613bf74f5a80d66a Mon Sep 17 00:00:00 2001 From: Lei Wang Date: Mon, 1 Apr 2024 13:54:54 -0700 Subject: [PATCH 039/201] [PseudoProbe] Extend to skip instrumenting probe into the dests of invoke (#79919) As before we only skip instrumenting probe of `unwind`(`KnownColdBlock`) block, this PR extends to skip the both EH flow from `invoke`, i.e. also skip the `normal` dest. For more contexts: when doing call-to-invoke conversion, the block is split by the `invoke` and two extra blocks(`normal` and `unwind`) are added. With this PR, the instrumentation is the same as the one before the call-to-invoke conversion. One significant benefit is this can help mitigate the "unstable IR" issue(https://discourse.llvm.org/t/ipo-for-linkonce-odr-functions/69404), the two versions now are on the same probe instrumentation, expected to be the same checksum. To achieve the same checksum, some tweaks is needed: - Now it also skips incrementing the probe ID for the skipped probe. - The checksum is also computed based on the CFG that skips the EH edges. We observed this fixes ~5% mismatched samples. --- llvm/include/llvm/Analysis/EHUtils.h | 1 - .../llvm/Transforms/IPO/SampleProfileProbe.h | 13 +- .../lib/Transforms/IPO/SampleProfileProbe.cpp | 121 ++++++++++++-- .../ThinLTO/X86/pseudo-probe-desc-import.ll | 4 +- .../SampleProfile/pseudo-probe-eh.ll | 2 +- .../SampleProfile/pseudo-probe-invoke.ll | 155 ++++++++++++++++++ 6 files changed, 276 insertions(+), 20 deletions(-) create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-invoke.ll diff --git a/llvm/include/llvm/Analysis/EHUtils.h b/llvm/include/llvm/Analysis/EHUtils.h index f2ff6cbd2e9036..3ad0878bd64f88 100644 --- a/llvm/include/llvm/Analysis/EHUtils.h +++ b/llvm/include/llvm/Analysis/EHUtils.h @@ -79,7 +79,6 @@ static void computeEHOnlyBlocks(FunctionT &F, DenseSet &EHBlocks) { } } - EHBlocks.clear(); for (auto Entry : Statuses) { if (Entry.second == EH) EHBlocks.insert(Entry.first); diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h index 0f2729a9462de2..03aa93ce6bd387 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h +++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h @@ -81,8 +81,17 @@ class SampleProfileProber { uint64_t getFunctionHash() const { return FunctionHash; } uint32_t getBlockId(const BasicBlock *BB) const; uint32_t getCallsiteId(const Instruction *Call) const; - void computeCFGHash(); - void computeProbeIdForBlocks(); + void findUnreachableBlocks(DenseSet &BlocksToIgnore); + void findInvokeNormalDests(DenseSet &InvokeNormalDests); + void computeBlocksToIgnore(DenseSet &BlocksToIgnore, + DenseSet &BlocksAndCallsToIgnore); + void computeProbeIdForCallsites( + const DenseSet &BlocksAndCallsToIgnore); + const Instruction * + getOriginalTerminator(const BasicBlock *Head, + const DenseSet &BlocksToIgnore); + void computeCFGHash(const DenseSet &BlocksToIgnore); + void computeProbeIdForBlocks(const DenseSet &BlocksToIgnore); void computeProbeIdForCallsites(); Function *F; diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp index 090e5560483edb..4d0fa24bd57ca4 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp @@ -173,21 +173,114 @@ SampleProfileProber::SampleProfileProber(Function &Func, BlockProbeIds.clear(); CallProbeIds.clear(); LastProbeId = (uint32_t)PseudoProbeReservedId::Last; - computeProbeIdForBlocks(); - computeProbeIdForCallsites(); - computeCFGHash(); + + DenseSet BlocksToIgnore; + DenseSet BlocksAndCallsToIgnore; + computeBlocksToIgnore(BlocksToIgnore, BlocksAndCallsToIgnore); + + computeProbeIdForBlocks(BlocksToIgnore); + computeProbeIdForCallsites(BlocksAndCallsToIgnore); + computeCFGHash(BlocksToIgnore); +} + +// Two purposes to compute the blocks to ignore: +// 1. Reduce the IR size. +// 2. Make the instrumentation(checksum) stable. e.g. the frondend may +// generate unstable IR while optimizing nounwind attribute, some versions are +// optimized with the call-to-invoke conversion, while other versions do not. +// This discrepancy in probe ID could cause profile mismatching issues. +// Note that those ignored blocks are either cold blocks or new split blocks +// whose original blocks are instrumented, so it shouldn't degrade the profile +// quality. +void SampleProfileProber::computeBlocksToIgnore( + DenseSet &BlocksToIgnore, + DenseSet &BlocksAndCallsToIgnore) { + // Ignore the cold EH and unreachable blocks and calls. + computeEHOnlyBlocks(*F, BlocksAndCallsToIgnore); + findUnreachableBlocks(BlocksAndCallsToIgnore); + + BlocksToIgnore.insert(BlocksAndCallsToIgnore.begin(), + BlocksAndCallsToIgnore.end()); + + // Handle the call-to-invoke conversion case: make sure that the probe id and + // callsite id are consistent before and after the block split. For block + // probe, we only keep the head block probe id and ignore the block ids of the + // normal dests. For callsite probe, it's different to block probe, there is + // no additional callsite in the normal dests, so we don't ignore the + // callsites. + findInvokeNormalDests(BlocksToIgnore); +} + +// Unreachable blocks and calls are always cold, ignore them. +void SampleProfileProber::findUnreachableBlocks( + DenseSet &BlocksToIgnore) { + for (auto &BB : *F) { + if (&BB != &F->getEntryBlock() && pred_size(&BB) == 0) + BlocksToIgnore.insert(&BB); + } +} + +// In call-to-invoke conversion, basic block can be split into multiple blocks, +// only instrument probe in the head block, ignore the normal dests. +void SampleProfileProber::findInvokeNormalDests( + DenseSet &InvokeNormalDests) { + for (auto &BB : *F) { + auto *TI = BB.getTerminator(); + if (auto *II = dyn_cast(TI)) { + auto *ND = II->getNormalDest(); + InvokeNormalDests.insert(ND); + + // The normal dest and the try/catch block are connected by an + // unconditional branch. + while (pred_size(ND) == 1) { + auto *Pred = *pred_begin(ND); + if (succ_size(Pred) == 1) { + InvokeNormalDests.insert(Pred); + ND = Pred; + } else + break; + } + } + } +} + +// The call-to-invoke conversion splits the original block into a list of block, +// we need to compute the hash using the original block's successors to keep the +// CFG Hash consistent. For a given head block, we keep searching the +// succesor(normal dest or unconditional branch dest) to find the tail block, +// the tail block's successors are the original block's successors. +const Instruction *SampleProfileProber::getOriginalTerminator( + const BasicBlock *Head, const DenseSet &BlocksToIgnore) { + auto *TI = Head->getTerminator(); + if (auto *II = dyn_cast(TI)) { + return getOriginalTerminator(II->getNormalDest(), BlocksToIgnore); + } else if (succ_size(Head) == 1 && + BlocksToIgnore.contains(*succ_begin(Head))) { + // Go to the unconditional branch dest. + return getOriginalTerminator(*succ_begin(Head), BlocksToIgnore); + } + return TI; } // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index // value of each BB in the CFG. The higher 32 bits record the number of edges // preceded by the number of indirect calls. // This is derived from FuncPGOInstrumentation::computeCFGHash(). -void SampleProfileProber::computeCFGHash() { +void SampleProfileProber::computeCFGHash( + const DenseSet &BlocksToIgnore) { std::vector Indexes; JamCRC JC; for (auto &BB : *F) { - for (BasicBlock *Succ : successors(&BB)) { + if (BlocksToIgnore.contains(&BB)) + continue; + + auto *TI = getOriginalTerminator(&BB, BlocksToIgnore); + for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { + auto *Succ = TI->getSuccessor(I); auto Index = getBlockId(Succ); + // Ingore ignored-block(zero ID) to avoid unstable checksum. + if (Index == 0) + continue; for (int J = 0; J < 4; J++) Indexes.push_back((uint8_t)(Index >> (J * 8))); } @@ -207,23 +300,23 @@ void SampleProfileProber::computeCFGHash() { << ", Hash = " << FunctionHash << "\n"); } -void SampleProfileProber::computeProbeIdForBlocks() { - DenseSet KnownColdBlocks; - computeEHOnlyBlocks(*F, KnownColdBlocks); - // Insert pseudo probe to non-cold blocks only. This will reduce IR size as - // well as the binary size while retaining the profile quality. +void SampleProfileProber::computeProbeIdForBlocks( + const DenseSet &BlocksToIgnore) { for (auto &BB : *F) { - ++LastProbeId; - if (!KnownColdBlocks.contains(&BB)) - BlockProbeIds[&BB] = LastProbeId; + if (BlocksToIgnore.contains(&BB)) + continue; + BlockProbeIds[&BB] = ++LastProbeId; } } -void SampleProfileProber::computeProbeIdForCallsites() { +void SampleProfileProber::computeProbeIdForCallsites( + const DenseSet &BlocksAndCallsToIgnore) { LLVMContext &Ctx = F->getContext(); Module *M = F->getParent(); for (auto &BB : *F) { + if (BlocksAndCallsToIgnore.contains(&BB)) + continue; for (auto &I : BB) { if (!isa(I)) continue; diff --git a/llvm/test/ThinLTO/X86/pseudo-probe-desc-import.ll b/llvm/test/ThinLTO/X86/pseudo-probe-desc-import.ll index 21dd8c0fe92414..f915aaccc06e17 100644 --- a/llvm/test/ThinLTO/X86/pseudo-probe-desc-import.ll +++ b/llvm/test/ThinLTO/X86/pseudo-probe-desc-import.ll @@ -12,8 +12,8 @@ ; RUN: llvm-lto -thinlto-action=import %t3.bc -thinlto-index=%t3.index.bc -o /dev/null 2>&1 | FileCheck %s --check-prefix=WARN -; CHECK-NOT: {i64 6699318081062747564, i64 4294967295, !"foo" -; CHECK: !{i64 -2624081020897602054, i64 281479271677951, !"main" +; CHECK-NOT: {i64 6699318081062747564, i64 [[#]], !"foo" +; CHECK: !{i64 -2624081020897602054, i64 [[#]], !"main" ; WARN: warning: Pseudo-probe ignored: source module '{{.*}}' is compiled with -fpseudo-probe-for-profiling while destination module '{{.*}}' is not diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-eh.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-eh.ll index 697ef44fb7ed71..9954914bca4380 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-eh.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-eh.ll @@ -18,7 +18,7 @@ entry: to label %ret unwind label %lpad ret: -; CHECK: call void @llvm.pseudoprobe +; CHECK-NOT: call void @llvm.pseudoprobe ret void lpad: ; preds = %entry diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-invoke.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-invoke.ll new file mode 100644 index 00000000000000..822ab403dee297 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-invoke.ll @@ -0,0 +1,155 @@ +; REQUIRES: x86_64-linux +; RUN: opt < %s -passes=pseudo-probe -S -o - | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +$__clang_call_terminate = comdat any + +@x = dso_local global i32 0, align 4, !dbg !0 + +; Function Attrs: mustprogress noinline nounwind uwtable +define dso_local void @_Z3barv() #0 personality ptr @__gxx_personality_v0 !dbg !14 { +entry: +; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 1 + %0 = load volatile i32, ptr @x, align 4, !dbg !17, !tbaa !19 + %tobool = icmp ne i32 %0, 0, !dbg !17 + br i1 %tobool, label %if.then, label %if.else, !dbg !23 + +if.then: ; preds = %entry +; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 2 + invoke void @_Z3foov() + to label %invoke.cont unwind label %terminate.lpad, !dbg !24 + +invoke.cont: ; preds = %if.then +; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844, + invoke void @_Z3bazv() + to label %invoke.cont1 unwind label %terminate.lpad, !dbg !26 + +invoke.cont1: ; preds = %invoke.cont +; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844, + br label %if.end, !dbg !27 + +if.else: ; preds = %entry +; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 3 + invoke void @_Z3foov() + to label %invoke.cont2 unwind label %terminate.lpad, !dbg !28 + +invoke.cont2: ; preds = %if.else +; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844, + br label %if.end + +if.end: ; preds = %invoke.cont2, %invoke.cont1 +; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 4 + invoke void @_Z3foov() + to label %invoke.cont3 unwind label %terminate.lpad, !dbg !29 + +invoke.cont3: ; preds = %if.end +; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844, + %1 = load volatile i32, ptr @x, align 4, !dbg !30, !tbaa !19 + %tobool4 = icmp ne i32 %1, 0, !dbg !30 + br i1 %tobool4, label %if.then5, label %if.end6, !dbg !32 + +if.then5: ; preds = %invoke.cont3 +; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 5 + %2 = load volatile i32, ptr @x, align 4, !dbg !33, !tbaa !19 + %inc = add nsw i32 %2, 1, !dbg !33 + store volatile i32 %inc, ptr @x, align 4, !dbg !33, !tbaa !19 + br label %if.end6, !dbg !35 + +if.end6: ; preds = %if.then5, %invoke.cont3 +; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 6 + ret void, !dbg !36 + +terminate.lpad: ; preds = %if.end, %if.else, %invoke.cont, %if.then +; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844, + %3 = landingpad { ptr, i32 } + catch ptr null, !dbg !24 + %4 = extractvalue { ptr, i32 } %3, 0, !dbg !24 + call void @__clang_call_terminate(ptr %4) #3, !dbg !24 + unreachable, !dbg !24 +} + +; Function Attrs: mustprogress noinline nounwind uwtable +define dso_local void @_Z3foov() #0 !dbg !37 { +entry: + ret void, !dbg !38 +} + +declare i32 @__gxx_personality_v0(...) + +; Function Attrs: noinline noreturn nounwind uwtable +define linkonce_odr hidden void @__clang_call_terminate(ptr noundef %0) #1 comdat { + %2 = call ptr @__cxa_begin_catch(ptr %0) #4 + call void @_ZSt9terminatev() #3 + unreachable +} + +declare ptr @__cxa_begin_catch(ptr) + +declare void @_ZSt9terminatev() + +; Function Attrs: mustprogress noinline nounwind uwtable +define dso_local void @_Z3bazv() #0 !dbg !39 { +entry: + ret void, !dbg !40 +} + +; CHECK: ![[#]] = !{i64 -3270123626113159616, i64 4294967295, !"_Z3bazv"} + +attributes #0 = { mustprogress noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { noinline noreturn nounwind uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #2 = { mustprogress noinline norecurse nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #3 = { noreturn nounwind } +attributes #4 = { nounwind } + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!7, !8, !9, !10, !11, !12} +!llvm.ident = !{!13} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "test.cpp", directory: "/home", checksumkind: CSK_MD5, checksum: "a4c7b0392f3fd9c8ebb85065159dbb02") +!4 = !{!0} +!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6) +!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!7 = !{i32 7, !"Dwarf Version", i32 5} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{i32 1, !"wchar_size", i32 4} +!10 = !{i32 8, !"PIC Level", i32 2} +!11 = !{i32 7, !"PIE Level", i32 2} +!12 = !{i32 7, !"uwtable", i32 2} +!13 = !{!"clang version 19.0.0"} +!14 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !3, file: !3, line: 4, type: !15, scopeLine: 4, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!15 = !DISubroutineType(types: !16) +!16 = !{null} +!17 = !DILocation(line: 5, column: 6, scope: !18) +!18 = distinct !DILexicalBlock(scope: !14, file: !3, line: 5, column: 6) +!19 = !{!20, !20, i64 0} +!20 = !{!"int", !21, i64 0} +!21 = !{!"omnipotent char", !22, i64 0} +!22 = !{!"Simple C++ TBAA"} +!23 = !DILocation(line: 5, column: 6, scope: !14) +!24 = !DILocation(line: 6, column: 5, scope: !25) +!25 = distinct !DILexicalBlock(scope: !18, file: !3, line: 5, column: 9) +!26 = !DILocation(line: 7, column: 5, scope: !25) +!27 = !DILocation(line: 8, column: 3, scope: !25) +!28 = !DILocation(line: 9, column: 5, scope: !18) +!29 = !DILocation(line: 11, column: 3, scope: !14) +!30 = !DILocation(line: 12, column: 6, scope: !31) +!31 = distinct !DILexicalBlock(scope: !14, file: !3, line: 12, column: 6) +!32 = !DILocation(line: 12, column: 6, scope: !14) +!33 = !DILocation(line: 13, column: 5, scope: !34) +!34 = distinct !DILexicalBlock(scope: !31, file: !3, line: 12, column: 9) +!35 = !DILocation(line: 14, column: 5, scope: !34) +!36 = !DILocation(line: 17, column: 1, scope: !14) +!37 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !3, file: !3, line: 19, type: !15, scopeLine: 19, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!38 = !DILocation(line: 19, column: 13, scope: !37) +!39 = distinct !DISubprogram(name: "baz", linkageName: "_Z3bazv", scope: !3, file: !3, line: 18, type: !15, scopeLine: 18, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!40 = !DILocation(line: 18, column: 13, scope: !39) +!41 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 22, type: !42, scopeLine: 22, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2) +!42 = !DISubroutineType(types: !43) +!43 = !{!6} +!44 = !DILocation(line: 23, column: 3, scope: !41) +!45 = !DILocation(line: 24, column: 1, scope: !41) From f2f01f6b03aa81d5bdbf841a88f8853620c6902b Mon Sep 17 00:00:00 2001 From: Jeff Niu Date: Mon, 1 Apr 2024 13:59:53 -0700 Subject: [PATCH 040/201] [llvm][Support] Use `thread_local` caching for llvm::get_threadid() query on Apple systems (#87219) I was profiling our compiler and noticed that `llvm::get_threadid` was at the top of the hotlist, taking up a surprising 5% (7 seconds) in the profile trace. It seems that computing this on MacOS systems is non-trivial, so cache the result in a thread_local. Co-authored-by: Mehdi Amini --- llvm/lib/Support/Unix/Threading.inc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Support/Unix/Threading.inc b/llvm/lib/Support/Unix/Threading.inc index 55e7dcfa4678cf..839c00c5ebbf96 100644 --- a/llvm/lib/Support/Unix/Threading.inc +++ b/llvm/lib/Support/Unix/Threading.inc @@ -115,8 +115,11 @@ uint64_t llvm::get_threadid() { // Calling "mach_thread_self()" bumps the reference count on the thread // port, so we need to deallocate it. mach_task_self() doesn't bump the ref // count. - thread_port_t Self = mach_thread_self(); - mach_port_deallocate(mach_task_self(), Self); + static thread_local thread_port_t Self = [] { + thread_port_t InitSelf = mach_thread_self(); + mach_port_deallocate(mach_task_self(), Self); + return InitSelf; + }(); return Self; #elif defined(__FreeBSD__) return uint64_t(pthread_getthreadid_np()); From a6caceed8d27d4ebd44c517c3114a36a64ebddfe Mon Sep 17 00:00:00 2001 From: Jordan Rupprecht Date: Mon, 1 Apr 2024 16:02:12 -0500 Subject: [PATCH 041/201] [lldb] Don't crash when attempting to parse breakpoint id `N.` as `N.*` (#87263) We check if the next character after `N.` is `*` before we check its length. Using `split` on the string is cleaner and less error prone than using indices with `find` and `substr`. Note: this does not make `N.` mean anything, it just prevents assertion failures. `N.` is treated the same as an unrecognized breakpoint name: ``` (lldb) breakpoint enable 1 1 breakpoints enabled. (lldb) breakpoint enable 1.* 1 breakpoints enabled. (lldb) breakpoint enable 1. 0 breakpoints enabled. (lldb) breakpoint enable xyz 0 breakpoints enabled. ``` Found via LLDB fuzzers. --- lldb/source/Breakpoint/BreakpointIDList.cpp | 48 +++++++++---------- .../TestBreakpointLocations.py | 6 +++ 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/lldb/source/Breakpoint/BreakpointIDList.cpp b/lldb/source/Breakpoint/BreakpointIDList.cpp index 851d074e753588..97af1d40eb7a58 100644 --- a/lldb/source/Breakpoint/BreakpointIDList.cpp +++ b/lldb/source/Breakpoint/BreakpointIDList.cpp @@ -16,6 +16,7 @@ #include "lldb/Utility/StreamString.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" using namespace lldb; using namespace lldb_private; @@ -111,32 +112,27 @@ llvm::Error BreakpointIDList::FindAndReplaceIDRanges( } else { // See if user has specified id.* llvm::StringRef tmp_str = old_args[i].ref(); - size_t pos = tmp_str.find('.'); - if (pos != llvm::StringRef::npos) { - llvm::StringRef bp_id_str = tmp_str.substr(0, pos); - if (BreakpointID::IsValidIDExpression(bp_id_str) && - tmp_str[pos + 1] == '*' && tmp_str.size() == (pos + 2)) { - - BreakpointSP breakpoint_sp; - auto bp_id = BreakpointID::ParseCanonicalReference(bp_id_str); - if (bp_id) - breakpoint_sp = target->GetBreakpointByID(bp_id->GetBreakpointID()); - if (!breakpoint_sp) { - new_args.Clear(); - return llvm::createStringError( - llvm::inconvertibleErrorCode(), - "'%d' is not a valid breakpoint ID.\n", - bp_id->GetBreakpointID()); - } - const size_t num_locations = breakpoint_sp->GetNumLocations(); - for (size_t j = 0; j < num_locations; ++j) { - BreakpointLocation *bp_loc = - breakpoint_sp->GetLocationAtIndex(j).get(); - StreamString canonical_id_str; - BreakpointID::GetCanonicalReference( - &canonical_id_str, bp_id->GetBreakpointID(), bp_loc->GetID()); - new_args.AppendArgument(canonical_id_str.GetString()); - } + auto [prefix, suffix] = tmp_str.split('.'); + if (suffix == "*" && BreakpointID::IsValidIDExpression(prefix)) { + + BreakpointSP breakpoint_sp; + auto bp_id = BreakpointID::ParseCanonicalReference(prefix); + if (bp_id) + breakpoint_sp = target->GetBreakpointByID(bp_id->GetBreakpointID()); + if (!breakpoint_sp) { + new_args.Clear(); + return llvm::createStringError(llvm::inconvertibleErrorCode(), + "'%d' is not a valid breakpoint ID.\n", + bp_id->GetBreakpointID()); + } + const size_t num_locations = breakpoint_sp->GetNumLocations(); + for (size_t j = 0; j < num_locations; ++j) { + BreakpointLocation *bp_loc = + breakpoint_sp->GetLocationAtIndex(j).get(); + StreamString canonical_id_str; + BreakpointID::GetCanonicalReference( + &canonical_id_str, bp_id->GetBreakpointID(), bp_loc->GetID()); + new_args.AppendArgument(canonical_id_str.GetString()); } } } diff --git a/lldb/test/API/functionalities/breakpoint/breakpoint_locations/TestBreakpointLocations.py b/lldb/test/API/functionalities/breakpoint/breakpoint_locations/TestBreakpointLocations.py index 8930bea619bb6e..d87e6275f7b51e 100644 --- a/lldb/test/API/functionalities/breakpoint/breakpoint_locations/TestBreakpointLocations.py +++ b/lldb/test/API/functionalities/breakpoint/breakpoint_locations/TestBreakpointLocations.py @@ -293,6 +293,12 @@ def breakpoint_locations_test(self): startstr="3 breakpoints enabled.", ) + # The 'breakpoint enable 1.' command should not crash. + self.expect( + "breakpoint enable 1.", + startstr="0 breakpoints enabled.", + ) + # The 'breakpoint disable 1.1' command should disable 1 location. self.expect( "breakpoint disable 1.1", From 03577ced1f55bf96224513f2414bf025d6877fac Mon Sep 17 00:00:00 2001 From: Maksim Panchenko Date: Mon, 1 Apr 2024 14:11:02 -0700 Subject: [PATCH 042/201] [BOLT][NFC] Fix typo --- bolt/include/bolt/Core/BinaryFunction.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index 5089f849128010..bc047fefa3151c 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -1168,7 +1168,7 @@ class BinaryFunction { /// Pass an offset of the entry point in the input binary and a corresponding /// global symbol to the callback function. /// - /// Return true of all callbacks returned true, false otherwise. + /// Return true if all callbacks returned true, false otherwise. bool forEachEntryPoint(EntryPointCallbackTy Callback) const; /// Return MC symbol associated with the end of the function. From 70e189fbc96909d3841dd2bca4a2909345cd826f Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 1 Apr 2024 14:13:56 -0700 Subject: [PATCH 043/201] [libc] fixup ftello test (#87282) Use a seek offset that fits within the file size. This was missed in presubmit because the FILE based stdio tests aren't run in overlay mode; fullbuild is not tested in presubmit. WRITE_SIZE == 11, so using a value of 42 for offseto would cause the expression `WRITE_SIZE - offseto` to evaluate to -31 as an unsigned 64b integer (18446744073709551585ULL). Fixes #86928 --- libc/test/src/stdio/ftell_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/test/src/stdio/ftell_test.cpp b/libc/test/src/stdio/ftell_test.cpp index 68a969ed0c30dd..62745e2194be6d 100644 --- a/libc/test/src/stdio/ftell_test.cpp +++ b/libc/test/src/stdio/ftell_test.cpp @@ -39,7 +39,7 @@ class LlvmLibcFTellTest : public LIBC_NAMESPACE::testing::Test { // still return the correct effective offset. ASSERT_EQ(size_t(LIBC_NAMESPACE::ftell(file)), WRITE_SIZE); - off_t offseto = 42; + off_t offseto = 5; ASSERT_EQ(0, LIBC_NAMESPACE::fseeko(file, offseto, SEEK_SET)); ASSERT_EQ(LIBC_NAMESPACE::ftello(file), offseto); ASSERT_EQ(0, LIBC_NAMESPACE::fseeko(file, -offseto, SEEK_END)); From 6b136ce738d1acc96d926d7999419867dea16961 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 1 Apr 2024 14:35:39 -0700 Subject: [PATCH 044/201] [workflows] issue-write: Exit early if there are no comments (#87114) This will eliminate some unnecessary REST API calls. --- .github/workflows/issue-write.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/issue-write.yml b/.github/workflows/issue-write.yml index 02a5f7c213e898..f5b84fec17a792 100644 --- a/.github/workflows/issue-write.yml +++ b/.github/workflows/issue-write.yml @@ -31,7 +31,7 @@ jobs: script: | var fs = require('fs'); const comments = JSON.parse(fs.readFileSync('./comments')); - if (!comments) { + if (!comments || comments.length == 0) { return; } From 0478adc97e1a4018d866520cb149b6e6c2a9101a Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 1 Apr 2024 14:58:28 -0700 Subject: [PATCH 045/201] [Object,ELFTypes] Remove TargetEndianness Finish the rename by #86604 --- llvm/include/llvm/Object/ELFTypes.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h index 4617b70a2f120a..4ab23e4ea81b1a 100644 --- a/llvm/include/llvm/Object/ELFTypes.h +++ b/llvm/include/llvm/Object/ELFTypes.h @@ -51,7 +51,6 @@ template struct ELFType { using packed = support::detail::packed_endian_specific_integral; public: - static const endianness TargetEndianness = E; static const endianness Endianness = E; static const bool Is64Bits = Is64; From 1d5e5f4d3c68e63ced47ee9b17d62fb995aa1e62 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Mon, 1 Apr 2024 15:06:10 -0700 Subject: [PATCH 046/201] [GISEL][NFC] Fix comment for widenScalarToNextPow2 The docstring for this function incorrectly specified when a widening is not performed. This patch adds the additional specification for what happens when the type size is a power of two but it is less than MinSize. --- llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h index 6afaea3f3fc5c6..82e713f30ea31c 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h @@ -879,7 +879,8 @@ class LegalizeRuleSet { } /// Widen the scalar to the next power of two that is at least MinSize. - /// No effect if the type is not a scalar or is a power of two. + /// No effect if the type is a power of two, except if the type is smaller + /// than MinSize, or if the type is a vector type. LegalizeRuleSet &widenScalarToNextPow2(unsigned TypeIdx, unsigned MinSize = 0) { using namespace LegalityPredicates; From 1e15371dd8843dfc52b9435afaa133997c1773d8 Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Mon, 1 Apr 2024 15:14:49 -0700 Subject: [PATCH 047/201] [ThinLTO][TypeProf] Implement vtable def import (#79381) Add annotated vtable GUID as referenced variables in per function summary, and update bitcode writer to create value-ids for these referenced vtables. - This is the part3 of type profiling work, and described in the "Virtual Table Definition Import" [1] section of the RFC. [1] https://github.com/llvm/llvm-project/pull/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 --- llvm/include/llvm/ProfileData/InstrProf.h | 12 +++- .../IndirectCallPromotionAnalysis.cpp | 4 ++ llvm/lib/Analysis/ModuleSummaryAnalysis.cpp | 20 ++++++ llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 13 +++- llvm/lib/ProfileData/InstrProf.cpp | 70 +++++++++++++------ .../thinlto-func-summary-vtableref-pgo.ll | 37 ++++++---- 6 files changed, 120 insertions(+), 36 deletions(-) diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h index fd66c4ed948f36..eb3c10bcba1ca7 100644 --- a/llvm/include/llvm/ProfileData/InstrProf.h +++ b/llvm/include/llvm/ProfileData/InstrProf.h @@ -283,7 +283,7 @@ void annotateValueSite(Module &M, Instruction &Inst, /// Extract the value profile data from \p Inst which is annotated with /// value profile meta data. Return false if there is no value data annotated, -/// otherwise return true. +/// otherwise return true. bool getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, uint32_t MaxNumValueData, @@ -291,6 +291,16 @@ bool getValueProfDataFromInst(const Instruction &Inst, uint32_t &ActualNumValueData, uint64_t &TotalC, bool GetNoICPValue = false); +/// Extract the value profile data from \p Inst and returns them if \p Inst is +/// annotated with value profile data. Returns nullptr otherwise. It's similar +/// to `getValueProfDataFromInst` above except that an array is allocated only +/// after a preliminary checking that the value profiles of kind `ValueKind` +/// exist. +std::unique_ptr +getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, + uint32_t MaxNumValueData, uint32_t &ActualNumValueData, + uint64_t &TotalC, bool GetNoICPValue = false); + inline StringRef getPGOFuncNameMetadataName() { return "PGOFuncName"; } /// Return the PGOFuncName meta data associated with a function. diff --git a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp index ebfa1c8fc08e1c..ab53717eb889a0 100644 --- a/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp +++ b/llvm/lib/Analysis/IndirectCallPromotionAnalysis.cpp @@ -45,6 +45,10 @@ static cl::opt cl::desc("Max number of promotions for a single indirect " "call callsite")); +cl::opt MaxNumVTableAnnotations( + "icp-max-num-vtables", cl::init(6), cl::Hidden, + cl::desc("Max number of vtables annotated for a vtable load instruction.")); + ICallPromotionAnalysis::ICallPromotionAnalysis() { ValueDataArray = std::make_unique(MaxNumPromotions); } diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp index 1f15e94783240a..3ad0bab827a512 100644 --- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp +++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp @@ -82,6 +82,8 @@ static cl::opt ModuleSummaryDotFile( extern cl::opt ScalePartialSampleProfileWorkingSetSize; +extern cl::opt MaxNumVTableAnnotations; + // Walk through the operands of a given User via worklist iteration and populate // the set of GlobalValue references encountered. Invoked either on an // Instruction or a GlobalVariable (which walks its initializer). @@ -124,6 +126,24 @@ static bool findRefEdges(ModuleSummaryIndex &Index, const User *CurUser, Worklist.push_back(Operand); } } + + const Instruction *I = dyn_cast(CurUser); + if (I) { + uint32_t ActualNumValueData = 0; + uint64_t TotalCount = 0; + // MaxNumVTableAnnotations is the maximum number of vtables annotated on + // the instruction. + auto ValueDataArray = + getValueProfDataFromInst(*I, IPVK_VTableTarget, MaxNumVTableAnnotations, + ActualNumValueData, TotalCount); + + if (ValueDataArray.get()) { + for (uint32_t j = 0; j < ActualNumValueData; j++) { + RefEdges.insert(Index.getOrInsertValueInfo(/* VTableGUID = */ + ValueDataArray[j].Value)); + } + } + } return HasBlockAddress; } diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 221eeaae6e2b83..dd554e422516f6 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -203,7 +203,7 @@ class ModuleBitcodeWriterBase : public BitcodeWriterBase { for (const auto &GUIDSummaryLists : *Index) // Examine all summaries for this GUID. for (auto &Summary : GUIDSummaryLists.second.SummaryList) - if (auto FS = dyn_cast(Summary.get())) + if (auto FS = dyn_cast(Summary.get())) { // For each call in the function summary, see if the call // is to a GUID (which means it is for an indirect call, // otherwise we would have a Value for it). If so, synthesize @@ -211,6 +211,15 @@ class ModuleBitcodeWriterBase : public BitcodeWriterBase { for (auto &CallEdge : FS->calls()) if (!CallEdge.first.haveGVs() || !CallEdge.first.getValue()) assignValueId(CallEdge.first.getGUID()); + + // For each referenced variables in the function summary, see if the + // variable is represented by a GUID (as opposed to a symbol to + // declarations or definitions in the module). If so, synthesize a + // value id. + for (auto &RefEdge : FS->refs()) + if (!RefEdge.haveGVs() || !RefEdge.getValue()) + assignValueId(RefEdge.getGUID()); + } } protected: @@ -4188,7 +4197,7 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord( NameVals.push_back(SpecialRefCnts.second); // worefcnt for (auto &RI : FS->refs()) - NameVals.push_back(VE.getValueID(RI.getValue())); + NameVals.push_back(getValueId(RI)); const bool UseRelBFRecord = WriteRelBFToSummary && !F.hasProfileData() && diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp index 90c3cfc45b98ae..95f900d0fff1ca 100644 --- a/llvm/lib/ProfileData/InstrProf.cpp +++ b/llvm/lib/ProfileData/InstrProf.cpp @@ -1271,46 +1271,44 @@ void annotateValueSite(Module &M, Instruction &Inst, Inst.setMetadata(LLVMContext::MD_prof, MDNode::get(Ctx, Vals)); } -bool getValueProfDataFromInst(const Instruction &Inst, - InstrProfValueKind ValueKind, - uint32_t MaxNumValueData, - InstrProfValueData ValueData[], - uint32_t &ActualNumValueData, uint64_t &TotalC, - bool GetNoICPValue) { +MDNode *mayHaveValueProfileOfKind(const Instruction &Inst, + InstrProfValueKind ValueKind) { MDNode *MD = Inst.getMetadata(LLVMContext::MD_prof); if (!MD) - return false; + return nullptr; - unsigned NOps = MD->getNumOperands(); + if (MD->getNumOperands() < 5) + return nullptr; - if (NOps < 5) - return false; - - // Operand 0 is a string tag "VP": MDString *Tag = cast(MD->getOperand(0)); - if (!Tag) - return false; - - if (!Tag->getString().equals("VP")) - return false; + if (!Tag || !Tag->getString().equals("VP")) + return nullptr; // Now check kind: ConstantInt *KindInt = mdconst::dyn_extract(MD->getOperand(1)); if (!KindInt) - return false; + return nullptr; if (KindInt->getZExtValue() != ValueKind) - return false; + return nullptr; + + return MD; +} +static bool getValueProfDataFromInstImpl(const MDNode *const MD, + const uint32_t MaxNumDataWant, + InstrProfValueData ValueData[], + uint32_t &ActualNumValueData, + uint64_t &TotalC, bool GetNoICPValue) { + const unsigned NOps = MD->getNumOperands(); // Get total count ConstantInt *TotalCInt = mdconst::dyn_extract(MD->getOperand(2)); if (!TotalCInt) return false; TotalC = TotalCInt->getZExtValue(); - ActualNumValueData = 0; for (unsigned I = 3; I < NOps; I += 2) { - if (ActualNumValueData >= MaxNumValueData) + if (ActualNumValueData >= MaxNumDataWant) break; ConstantInt *Value = mdconst::dyn_extract(MD->getOperand(I)); ConstantInt *Count = @@ -1327,6 +1325,36 @@ bool getValueProfDataFromInst(const Instruction &Inst, return true; } +std::unique_ptr +getValueProfDataFromInst(const Instruction &Inst, InstrProfValueKind ValueKind, + uint32_t MaxNumValueData, uint32_t &ActualNumValueData, + uint64_t &TotalC, bool GetNoICPValue) { + MDNode *MD = mayHaveValueProfileOfKind(Inst, ValueKind); + if (!MD) + return nullptr; + auto ValueDataArray = std::make_unique(MaxNumValueData); + if (!getValueProfDataFromInstImpl(MD, MaxNumValueData, ValueDataArray.get(), + ActualNumValueData, TotalC, GetNoICPValue)) + return nullptr; + return ValueDataArray; +} + +// FIXME: Migrate existing callers to the function above that returns an +// array. +bool getValueProfDataFromInst(const Instruction &Inst, + InstrProfValueKind ValueKind, + uint32_t MaxNumValueData, + InstrProfValueData ValueData[], + uint32_t &ActualNumValueData, uint64_t &TotalC, + bool GetNoICPValue) { + MDNode *MD = mayHaveValueProfileOfKind(Inst, ValueKind); + if (!MD) + return false; + return getValueProfDataFromInstImpl(MD, MaxNumValueData, ValueData, + ActualNumValueData, TotalC, + GetNoICPValue); +} + MDNode *getPGOFuncNameMetadata(const Function &F) { return F.getMetadata(getPGOFuncNameMetadataName()); } diff --git a/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll b/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll index 78b175caca85f0..ba3ce9a75ee832 100644 --- a/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll +++ b/llvm/test/Bitcode/thinlto-func-summary-vtableref-pgo.ll @@ -1,20 +1,31 @@ -; RUN: opt -module-summary %s -o %t.o +; Promote at most one function and annotate at most one vtable. +; As a result, only one value (of each relevant kind) shows up in the function +; summary. + +; RUN: opt -module-summary -icp-max-num-vtables=1 -icp-max-prom=1 %s -o %t.o ; RUN: llvm-bcanalyzer -dump %t.o | FileCheck %s ; RUN: llvm-dis -o - %t.o | FileCheck %s --check-prefix=DIS - +; Round trip it through llvm-as +; RUN: llvm-dis -o - %t.o | llvm-as -o - | llvm-dis -o - | FileCheck %s --check-prefix=DIS ; CHECK: ; CHECK-NEXT: +; The `VALUE_GUID` below represents the "_ZTV4Base" referenced by the instruction +; that loads vtable pointers. +; CHECK-NEXT: ; The `VALUE_GUID` below represents the "_ZN4Base4funcEv" referenced by the ; indirect call instruction. -; CHECK-NEXT: +; CHECK-NEXT: +; NOTE vtables and functions from Derived class is dropped because +; `-icp-max-num-vtables` and `-icp-max-prom` are both set to one. ; has the format [valueid, flags, instcount, funcflags, ; numrefs, rorefcnt, worefcnt, +; m x valueid, ; n x (valueid, hotness+tailcall)] -; CHECK-NEXT: +; CHECK-NEXT: ; CHECK-NEXT: target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -31,7 +42,6 @@ define i32 @_Z4testP4Base(ptr %0) !prof !15 { !llvm.module.flags = !{!1} - !1 = !{i32 1, !"ProfileSummary", !2} !2 = !{!3, !4, !5, !6, !7, !8, !9, !10} !3 = !{!"ProfileFormat", !"InstrProf"} @@ -48,14 +58,17 @@ define i32 @_Z4testP4Base(ptr %0) !prof !15 { !14 = !{i32 999999, i64 1, i32 2} !15 = !{!"function_entry_count", i32 150} -; 1960855528937986108 is the MD5 hash of _ZTV4Base -!16 = !{!"VP", i32 2, i64 1600, i64 1960855528937986108, i64 1600} -; 5459407273543877811 is the MD5 hash of _ZN4Base4funcEv -!17 = !{!"VP", i32 0, i64 1600, i64 5459407273543877811, i64 1600} +; 1960855528937986108 is the MD5 hash of _ZTV4Base, and +; 13870436605473471591 is the MD5 hash of _ZTV7Derived +!16 = !{!"VP", i32 2, i64 150, i64 1960855528937986108, i64 100, i64 13870436605473471591, i64 50} +; 5459407273543877811 is the MD5 hash of _ZN4Base4funcEv, and +; 6174874150489409711 is the MD5 hash of _ZN7Derived4funcEv +!17 = !{!"VP", i32 0, i64 150, i64 5459407273543877811, i64 100, i64 6174874150489409711, i64 50} ; ModuleSummaryIndex stores map in std::map; so ; global value summares are printed out in the order that gv's guid increases. ; DIS: ^0 = module: (path: "{{.*}}", hash: (0, 0, 0, 0, 0)) -; DIS: ^1 = gv: (guid: 5459407273543877811) -; DIS: ^2 = gv: (name: "_Z4testP4Base", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 4, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), calls: ((callee: ^1, hotness: hot))))) ; guid = 15857150948103218965 -; DIS: ^3 = blockcount: 0 +; DIS: ^1 = gv: (guid: 1960855528937986108) +; DIS: ^2 = gv: (guid: 5459407273543877811) +; DIS: ^3 = gv: (name: "_Z4testP4Base", summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 0, canAutoHide: 0), insts: 4, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 0, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 1, mustBeUnreachable: 0), calls: ((callee: ^2, hotness: hot)), refs: (readonly ^1)))) ; guid = 15857150948103218965 +; DIS: ^4 = blockcount: 0 From 649f9603a2da82a32830ce1dc7ce5825d3766a1d Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 1 Apr 2024 15:17:24 -0700 Subject: [PATCH 048/201] [workflows] issue-write: Avoid race condition when PR branch is deleted (#87118) Fixes #87102 . --- .github/workflows/issue-write.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/issue-write.yml b/.github/workflows/issue-write.yml index f5b84fec17a792..4a564a5076bac9 100644 --- a/.github/workflows/issue-write.yml +++ b/.github/workflows/issue-write.yml @@ -77,6 +77,15 @@ jobs: } const gql_result = await github.graphql(gql_query, gql_variables); console.log(gql_result); + // If the branch for the PR was deleted before this job has a chance + // to run, then the ref will be null. This can happen if someone: + // 1. Rebase the PR, which triggers some workflow. + // 2. Immediately merges the PR and deletes the branch. + // 3. The workflow finishes and triggers this job. + if (!gql_result.repository.ref) { + console.log("Ref has been deleted"); + return; + } console.log(gql_result.repository.ref.associatedPullRequests.nodes); var pr_number = 0; From f2a87b07e7fe1892a11ee9424d22dbaec5de5b5b Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 1 Apr 2024 17:26:20 -0500 Subject: [PATCH 049/201] [OpenMP] Use loaded offloading toolchains to add libraries (#87108) Summary: We want to pass these GPU libraries by default if a certain offloading toolchain is loaded for OpenMP. Previously I parsed this from the arguments because it's only available in the compilation. This doesn't really work for `native` and it's extra effort, so this patch just passes in the `Compilation` as an extr argument and uses that. Tests should be unaffected. --- clang/lib/Driver/ToolChains/CommonArgs.cpp | 58 ++++++++-------------- clang/lib/Driver/ToolChains/CommonArgs.h | 4 +- clang/lib/Driver/ToolChains/Darwin.cpp | 2 +- clang/lib/Driver/ToolChains/DragonFly.cpp | 2 +- clang/lib/Driver/ToolChains/FreeBSD.cpp | 2 +- clang/lib/Driver/ToolChains/Gnu.cpp | 2 +- clang/lib/Driver/ToolChains/Haiku.cpp | 2 +- clang/lib/Driver/ToolChains/NetBSD.cpp | 2 +- clang/lib/Driver/ToolChains/OpenBSD.cpp | 2 +- clang/lib/Driver/ToolChains/Solaris.cpp | 2 +- 10 files changed, 32 insertions(+), 46 deletions(-) diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index ace4fb99581e38..62a53b85ce098b 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -1075,14 +1075,14 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args, /// Adds the '-lcgpu' and '-lmgpu' libraries to the compilation to include the /// LLVM C library for GPUs. -static void addOpenMPDeviceLibC(const ToolChain &TC, const ArgList &Args, +static void addOpenMPDeviceLibC(const Compilation &C, const ArgList &Args, ArgStringList &CmdArgs) { if (Args.hasArg(options::OPT_nogpulib) || Args.hasArg(options::OPT_nolibc)) return; // Check the resource directory for the LLVM libc GPU declarations. If it's // found we can assume that LLVM was built with support for the GPU libc. - SmallString<256> LibCDecls(TC.getDriver().ResourceDir); + SmallString<256> LibCDecls(C.getDriver().ResourceDir); llvm::sys::path::append(LibCDecls, "include", "llvm_libc_wrappers", "llvm-libc-decls"); bool HasLibC = llvm::sys::fs::exists(LibCDecls) && @@ -1090,38 +1090,23 @@ static void addOpenMPDeviceLibC(const ToolChain &TC, const ArgList &Args, if (!Args.hasFlag(options::OPT_gpulibc, options::OPT_nogpulibc, HasLibC)) return; - // We don't have access to the offloading toolchains here, so determine from - // the arguments if we have any active NVPTX or AMDGPU toolchains. - llvm::DenseSet Libraries; - if (const Arg *Targets = Args.getLastArg(options::OPT_fopenmp_targets_EQ)) { - if (llvm::any_of(Targets->getValues(), - [](auto S) { return llvm::Triple(S).isAMDGPU(); })) { - Libraries.insert("-lcgpu-amdgpu"); - Libraries.insert("-lmgpu-amdgpu"); - } - if (llvm::any_of(Targets->getValues(), - [](auto S) { return llvm::Triple(S).isNVPTX(); })) { - Libraries.insert("-lcgpu-nvptx"); - Libraries.insert("-lmgpu-nvptx"); - } - } + SmallVector ToolChains; + auto TCRange = C.getOffloadToolChains(Action::OFK_OpenMP); + for (auto TI = TCRange.first, TE = TCRange.second; TI != TE; ++TI) + ToolChains.push_back(TI->second); - for (StringRef Arch : Args.getAllArgValues(options::OPT_offload_arch_EQ)) { - if (llvm::any_of(llvm::split(Arch, ","), [](StringRef Str) { - return IsAMDGpuArch(StringToCudaArch(Str)); - })) { - Libraries.insert("-lcgpu-amdgpu"); - Libraries.insert("-lmgpu-amdgpu"); - } - if (llvm::any_of(llvm::split(Arch, ","), [](StringRef Str) { - return IsNVIDIAGpuArch(StringToCudaArch(Str)); - })) { - Libraries.insert("-lcgpu-nvptx"); - Libraries.insert("-lmgpu-nvptx"); - } + if (llvm::any_of(ToolChains, [](const ToolChain *TC) { + return TC->getTriple().isAMDGPU(); + })) { + CmdArgs.push_back("-lcgpu-amdgpu"); + CmdArgs.push_back("-lmgpu-amdgpu"); + } + if (llvm::any_of(ToolChains, [](const ToolChain *TC) { + return TC->getTriple().isNVPTX(); + })) { + CmdArgs.push_back("-lcgpu-nvptx"); + CmdArgs.push_back("-lmgpu-nvptx"); } - - llvm::append_range(CmdArgs, Libraries); } void tools::addOpenMPRuntimeLibraryPath(const ToolChain &TC, @@ -1153,9 +1138,10 @@ void tools::addArchSpecificRPath(const ToolChain &TC, const ArgList &Args, } } -bool tools::addOpenMPRuntime(ArgStringList &CmdArgs, const ToolChain &TC, - const ArgList &Args, bool ForceStaticHostRuntime, - bool IsOffloadingHost, bool GompNeedsRT) { +bool tools::addOpenMPRuntime(const Compilation &C, ArgStringList &CmdArgs, + const ToolChain &TC, const ArgList &Args, + bool ForceStaticHostRuntime, bool IsOffloadingHost, + bool GompNeedsRT) { if (!Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, options::OPT_fno_openmp, false)) return false; @@ -1196,7 +1182,7 @@ bool tools::addOpenMPRuntime(ArgStringList &CmdArgs, const ToolChain &TC, CmdArgs.push_back("-lomptarget.devicertl"); if (IsOffloadingHost) - addOpenMPDeviceLibC(TC, Args, CmdArgs); + addOpenMPDeviceLibC(C, Args, CmdArgs); addArchSpecificRPath(TC, Args, CmdArgs); addOpenMPRuntimeLibraryPath(TC, Args, CmdArgs); diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h index bb37be4bd6ea05..5581905db31144 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.h +++ b/clang/lib/Driver/ToolChains/CommonArgs.h @@ -111,8 +111,8 @@ void addOpenMPRuntimeLibraryPath(const ToolChain &TC, const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs); /// Returns true, if an OpenMP runtime has been added. -bool addOpenMPRuntime(llvm::opt::ArgStringList &CmdArgs, const ToolChain &TC, - const llvm::opt::ArgList &Args, +bool addOpenMPRuntime(const Compilation &C, llvm::opt::ArgStringList &CmdArgs, + const ToolChain &TC, const llvm::opt::ArgList &Args, bool ForceStaticHostRuntime = false, bool IsOffloadingHost = false, bool GompNeedsRT = false); diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index c7682c7f1d3379..caf6c4a444fdce 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -686,7 +686,7 @@ void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA, } if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) - addOpenMPRuntime(CmdArgs, getToolChain(), Args); + addOpenMPRuntime(C, CmdArgs, getToolChain(), Args); if (isObjCRuntimeLinked(Args) && !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { diff --git a/clang/lib/Driver/ToolChains/DragonFly.cpp b/clang/lib/Driver/ToolChains/DragonFly.cpp index b59a172bd6ae86..1dbc46763c1156 100644 --- a/clang/lib/Driver/ToolChains/DragonFly.cpp +++ b/clang/lib/Driver/ToolChains/DragonFly.cpp @@ -136,7 +136,7 @@ void dragonfly::Linker::ConstructJob(Compilation &C, const JobAction &JA, // Use the static OpenMP runtime with -static-openmp bool StaticOpenMP = Args.hasArg(options::OPT_static_openmp) && !Static; - addOpenMPRuntime(CmdArgs, ToolChain, Args, StaticOpenMP); + addOpenMPRuntime(C, CmdArgs, ToolChain, Args, StaticOpenMP); if (D.CCCIsCXX()) { if (ToolChain.ShouldLinkCXXStdlib(Args)) diff --git a/clang/lib/Driver/ToolChains/FreeBSD.cpp b/clang/lib/Driver/ToolChains/FreeBSD.cpp index c5757ddebb0f3e..a8ee6540001ee4 100644 --- a/clang/lib/Driver/ToolChains/FreeBSD.cpp +++ b/clang/lib/Driver/ToolChains/FreeBSD.cpp @@ -295,7 +295,7 @@ void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, // Use the static OpenMP runtime with -static-openmp bool StaticOpenMP = Args.hasArg(options::OPT_static_openmp) && !Args.hasArg(options::OPT_static); - addOpenMPRuntime(CmdArgs, ToolChain, Args, StaticOpenMP); + addOpenMPRuntime(C, CmdArgs, ToolChain, Args, StaticOpenMP); if (D.CCCIsCXX()) { if (ToolChain.ShouldLinkCXXStdlib(Args)) diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index a9c9d2475809d7..dedbfac6cb25d2 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -598,7 +598,7 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, // FIXME: Only pass GompNeedsRT = true for platforms with libgomp that // require librt. Most modern Linux platforms do, but some may not. - if (addOpenMPRuntime(CmdArgs, ToolChain, Args, StaticOpenMP, + if (addOpenMPRuntime(C, CmdArgs, ToolChain, Args, StaticOpenMP, JA.isHostOffloading(Action::OFK_OpenMP), /* GompNeedsRT= */ true)) // OpenMP runtimes implies pthreads when using the GNU toolchain. diff --git a/clang/lib/Driver/ToolChains/Haiku.cpp b/clang/lib/Driver/ToolChains/Haiku.cpp index 30464e2229e65b..346652a7e4bd8e 100644 --- a/clang/lib/Driver/ToolChains/Haiku.cpp +++ b/clang/lib/Driver/ToolChains/Haiku.cpp @@ -107,7 +107,7 @@ void haiku::Linker::ConstructJob(Compilation &C, const JobAction &JA, options::OPT_r)) { // Use the static OpenMP runtime with -static-openmp bool StaticOpenMP = Args.hasArg(options::OPT_static_openmp) && !Static; - addOpenMPRuntime(CmdArgs, ToolChain, Args, StaticOpenMP); + addOpenMPRuntime(C, CmdArgs, ToolChain, Args, StaticOpenMP); if (D.CCCIsCXX() && ToolChain.ShouldLinkCXXStdlib(Args)) ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); diff --git a/clang/lib/Driver/ToolChains/NetBSD.cpp b/clang/lib/Driver/ToolChains/NetBSD.cpp index 0eec8fddabd5db..d54f2288294949 100644 --- a/clang/lib/Driver/ToolChains/NetBSD.cpp +++ b/clang/lib/Driver/ToolChains/NetBSD.cpp @@ -311,7 +311,7 @@ void netbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, options::OPT_r)) { // Use the static OpenMP runtime with -static-openmp bool StaticOpenMP = Args.hasArg(options::OPT_static_openmp) && !Static; - addOpenMPRuntime(CmdArgs, ToolChain, Args, StaticOpenMP); + addOpenMPRuntime(C, CmdArgs, ToolChain, Args, StaticOpenMP); if (D.CCCIsCXX()) { if (ToolChain.ShouldLinkCXXStdlib(Args)) diff --git a/clang/lib/Driver/ToolChains/OpenBSD.cpp b/clang/lib/Driver/ToolChains/OpenBSD.cpp index 6da6728585df93..e20d9fb1cfc417 100644 --- a/clang/lib/Driver/ToolChains/OpenBSD.cpp +++ b/clang/lib/Driver/ToolChains/OpenBSD.cpp @@ -221,7 +221,7 @@ void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, options::OPT_r)) { // Use the static OpenMP runtime with -static-openmp bool StaticOpenMP = Args.hasArg(options::OPT_static_openmp) && !Static; - addOpenMPRuntime(CmdArgs, ToolChain, Args, StaticOpenMP); + addOpenMPRuntime(C, CmdArgs, ToolChain, Args, StaticOpenMP); if (D.CCCIsCXX()) { if (ToolChain.ShouldLinkCXXStdlib(Args)) diff --git a/clang/lib/Driver/ToolChains/Solaris.cpp b/clang/lib/Driver/ToolChains/Solaris.cpp index 5d7f0ae2a392a6..7126e018ca5b6f 100644 --- a/clang/lib/Driver/ToolChains/Solaris.cpp +++ b/clang/lib/Driver/ToolChains/Solaris.cpp @@ -211,7 +211,7 @@ void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA, // Use the static OpenMP runtime with -static-openmp bool StaticOpenMP = Args.hasArg(options::OPT_static_openmp) && !Args.hasArg(options::OPT_static); - addOpenMPRuntime(CmdArgs, ToolChain, Args, StaticOpenMP); + addOpenMPRuntime(C, CmdArgs, ToolChain, Args, StaticOpenMP); if (D.CCCIsCXX()) { if (ToolChain.ShouldLinkCXXStdlib(Args)) From 9df19ce40281551bd348b262a131085cf98dadf5 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Mon, 1 Apr 2024 23:07:01 +0000 Subject: [PATCH 050/201] Add uncovered enums in switches caused by 9434c083475e42f47383f3067fe2a155db5c6a30 These are probably actually unreachable - perhaps an lldb developer would be interested in rephrasing this change to move the new cases into some unreachable/unsupported bucket, rather than my half-hearted guess at what the desired behavior would be (completely untested, because they're probably untestable/unreachable - maybe debugging from modules?) --- lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp index ebcc3bc99a801f..4a1c8d57655215 100644 --- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp +++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp @@ -4097,6 +4097,8 @@ TypeSystemClang::GetTypeClass(lldb::opaque_compiler_type_t type) { return lldb::eTypeClassArray; case clang::Type::DependentSizedArray: return lldb::eTypeClassArray; + case clang::Type::ArrayParameter: + return lldb::eTypeClassArray; case clang::Type::DependentSizedExtVector: return lldb::eTypeClassVector; case clang::Type::DependentVector: @@ -4776,6 +4778,7 @@ lldb::Encoding TypeSystemClang::GetEncoding(lldb::opaque_compiler_type_t type, case clang::Type::IncompleteArray: case clang::Type::VariableArray: + case clang::Type::ArrayParameter: break; case clang::Type::ConstantArray: @@ -5109,6 +5112,7 @@ lldb::Format TypeSystemClang::GetFormat(lldb::opaque_compiler_type_t type) { case clang::Type::IncompleteArray: case clang::Type::VariableArray: + case clang::Type::ArrayParameter: break; case clang::Type::ConstantArray: From 1079fc4f543c42bb09a33d2d79d90edd9c0bac91 Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Tue, 2 Apr 2024 02:43:04 +0300 Subject: [PATCH 051/201] [mlir][pass] Add `errorHandler` param to `Pass::initializeOptions` (#87289) There is no good way to report detailed errors from inside `Pass::initializeOptions` function as context may not be available at this point and writing directly to `llvm::errs()` is not composable. See https://github.com/llvm/llvm-project/pull/87166#discussion_r1546426763 * Add error handler callback to `Pass::initializeOptions` * Update `PassOptions::parseFromString` to support custom error stream instead of using `llvm::errs()` directly. * Update default `Pass::initializeOptions` implementation to propagate error string from `parseFromString` to new error handler. * Update `MapMemRefStorageClassPass` to report error details using new API. --- mlir/include/mlir/Pass/Pass.h | 4 +++- mlir/include/mlir/Pass/PassOptions.h | 3 ++- .../MemRefToSPIRV/MapMemRefStorageClassPass.cpp | 8 +++++--- mlir/lib/Pass/Pass.cpp | 12 ++++++++++-- mlir/lib/Pass/PassRegistry.cpp | 7 ++++--- mlir/lib/Transforms/InlinerPass.cpp | 10 +++++++--- .../Dialect/Transform/test-pass-application.mlir | 1 + 7 files changed, 32 insertions(+), 13 deletions(-) diff --git a/mlir/include/mlir/Pass/Pass.h b/mlir/include/mlir/Pass/Pass.h index 070e0cad38787c..0f50f3064f1780 100644 --- a/mlir/include/mlir/Pass/Pass.h +++ b/mlir/include/mlir/Pass/Pass.h @@ -114,7 +114,9 @@ class Pass { /// Derived classes may override this method to hook into the point at which /// options are initialized, but should generally always invoke this base /// class variant. - virtual LogicalResult initializeOptions(StringRef options); + virtual LogicalResult + initializeOptions(StringRef options, + function_ref errorHandler); /// Prints out the pass in the textual representation of pipelines. If this is /// an adaptor pass, print its pass managers. diff --git a/mlir/include/mlir/Pass/PassOptions.h b/mlir/include/mlir/Pass/PassOptions.h index 6717a3585d12a5..3a5e3224133e6f 100644 --- a/mlir/include/mlir/Pass/PassOptions.h +++ b/mlir/include/mlir/Pass/PassOptions.h @@ -293,7 +293,8 @@ class PassOptions : protected llvm::cl::SubCommand { /// Parse options out as key=value pairs that can then be handed off to the /// `llvm::cl` command line passing infrastructure. Everything is space /// separated. - LogicalResult parseFromString(StringRef options); + LogicalResult parseFromString(StringRef options, + raw_ostream &errorStream = llvm::errs()); /// Print the options held by this struct in a form that can be parsed via /// 'parseFromString'. diff --git a/mlir/lib/Conversion/MemRefToSPIRV/MapMemRefStorageClassPass.cpp b/mlir/lib/Conversion/MemRefToSPIRV/MapMemRefStorageClassPass.cpp index 76dab8ee4ac336..4cbc3dfdae223c 100644 --- a/mlir/lib/Conversion/MemRefToSPIRV/MapMemRefStorageClassPass.cpp +++ b/mlir/lib/Conversion/MemRefToSPIRV/MapMemRefStorageClassPass.cpp @@ -272,14 +272,16 @@ class MapMemRefStorageClassPass final const spirv::MemorySpaceToStorageClassMap &memorySpaceMap) : memorySpaceMap(memorySpaceMap) {} - LogicalResult initializeOptions(StringRef options) override { - if (failed(Pass::initializeOptions(options))) + LogicalResult initializeOptions( + StringRef options, + function_ref errorHandler) override { + if (failed(Pass::initializeOptions(options, errorHandler))) return failure(); if (clientAPI == "opencl") memorySpaceMap = spirv::mapMemorySpaceToOpenCLStorageClass; else if (clientAPI != "vulkan") - return failure(); + return errorHandler(llvm::Twine("Invalid clienAPI: ") + clientAPI); return success(); } diff --git a/mlir/lib/Pass/Pass.cpp b/mlir/lib/Pass/Pass.cpp index 3fb05e53866607..57a6c20141d2c1 100644 --- a/mlir/lib/Pass/Pass.cpp +++ b/mlir/lib/Pass/Pass.cpp @@ -60,8 +60,16 @@ Operation *PassExecutionAction::getOp() const { void Pass::anchor() {} /// Attempt to initialize the options of this pass from the given string. -LogicalResult Pass::initializeOptions(StringRef options) { - return passOptions.parseFromString(options); +LogicalResult Pass::initializeOptions( + StringRef options, + function_ref errorHandler) { + std::string errStr; + llvm::raw_string_ostream os(errStr); + if (failed(passOptions.parseFromString(options, os))) { + os.flush(); + return errorHandler(errStr); + } + return success(); } /// Copy the option values from 'other', which is another instance of this diff --git a/mlir/lib/Pass/PassRegistry.cpp b/mlir/lib/Pass/PassRegistry.cpp index b0c314369190a4..f8149673a40939 100644 --- a/mlir/lib/Pass/PassRegistry.cpp +++ b/mlir/lib/Pass/PassRegistry.cpp @@ -40,7 +40,7 @@ buildDefaultRegistryFn(const PassAllocatorFunction &allocator) { return [=](OpPassManager &pm, StringRef options, function_ref errorHandler) { std::unique_ptr pass = allocator(); - LogicalResult result = pass->initializeOptions(options); + LogicalResult result = pass->initializeOptions(options, errorHandler); std::optional pmOpName = pm.getOpName(); std::optional passOpName = pass->getOpName(); @@ -280,7 +280,8 @@ parseNextArg(StringRef options) { llvm_unreachable("unexpected control flow in pass option parsing"); } -LogicalResult detail::PassOptions::parseFromString(StringRef options) { +LogicalResult detail::PassOptions::parseFromString(StringRef options, + raw_ostream &errorStream) { // NOTE: `options` is modified in place to always refer to the unprocessed // part of the string. while (!options.empty()) { @@ -291,7 +292,7 @@ LogicalResult detail::PassOptions::parseFromString(StringRef options) { auto it = OptionsMap.find(key); if (it == OptionsMap.end()) { - llvm::errs() << ": no such option " << key << "\n"; + errorStream << ": no such option " << key << "\n"; return failure(); } if (llvm::cl::ProvidePositionalOption(it->second, value, 0)) diff --git a/mlir/lib/Transforms/InlinerPass.cpp b/mlir/lib/Transforms/InlinerPass.cpp index 9a7d5403a95dc5..43ca5cac8b76f3 100644 --- a/mlir/lib/Transforms/InlinerPass.cpp +++ b/mlir/lib/Transforms/InlinerPass.cpp @@ -64,7 +64,9 @@ class InlinerPass : public impl::InlinerBase { /// Derived classes may override this method to hook into the point at which /// options are initialized, but should generally always invoke this base /// class variant. - LogicalResult initializeOptions(StringRef options) override; + LogicalResult initializeOptions( + StringRef options, + function_ref errorHandler) override; /// Inliner configuration parameters created from the pass options. InlinerConfig config; @@ -153,8 +155,10 @@ void InlinerPass::runOnOperation() { return; } -LogicalResult InlinerPass::initializeOptions(StringRef options) { - if (failed(Pass::initializeOptions(options))) +LogicalResult InlinerPass::initializeOptions( + StringRef options, + function_ref errorHandler) { + if (failed(Pass::initializeOptions(options, errorHandler))) return failure(); // Initialize the pipeline builder for operations without the dedicated diff --git a/mlir/test/Dialect/Transform/test-pass-application.mlir b/mlir/test/Dialect/Transform/test-pass-application.mlir index 7cb5387b937d45..460ac3947f5c82 100644 --- a/mlir/test/Dialect/Transform/test-pass-application.mlir +++ b/mlir/test/Dialect/Transform/test-pass-application.mlir @@ -78,6 +78,7 @@ module attributes {transform.with_named_sequence} { transform.named_sequence @__transform_main(%arg1: !transform.any_op) { %1 = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.any_op // expected-error @below {{failed to add pass or pass pipeline to pipeline: canonicalize}} + // expected-error @below {{: no such option invalid-option}} transform.apply_registered_pass "canonicalize" to %1 {options = "invalid-option=1"} : (!transform.any_op) -> !transform.any_op transform.yield } From 6d0174e70641b1ea172ffed07c43604ef15e28ae Mon Sep 17 00:00:00 2001 From: Stephen Neuendorffer Date: Mon, 1 Apr 2024 17:04:29 -0700 Subject: [PATCH 052/201] [libc] allow libc-hdrgen to work on windows files (#87292) The code does some (overly simple?) checks on file syntax. These checks assume unix line endings and fail on windows. This commit updates the code to strip extra whitespace, making the checks more robust, particularly in the presence of windows line endings. Fixes #86023 --- libc/utils/HdrGen/Generator.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/libc/utils/HdrGen/Generator.cpp b/libc/utils/HdrGen/Generator.cpp index 3bcf005adda74f..d926d5d9ac3c8d 100644 --- a/libc/utils/HdrGen/Generator.cpp +++ b/libc/utils/HdrGen/Generator.cpp @@ -84,11 +84,19 @@ void Generator::generate(llvm::raw_ostream &OS, llvm::RecordKeeper &Records) { Line = Line.drop_front(CommandPrefixSize); P = Line.split("("); + // It's possible that we have windows line endings, so strip off the extra + // CR. + P.second = P.second.trim(); if (P.second.empty() || P.second[P.second.size() - 1] != ')') { SrcMgr.PrintMessage(llvm::SMLoc::getFromPointer(P.second.data()), llvm::SourceMgr::DK_Error, "Command argument list should begin with '(' " "and end with ')'."); + SrcMgr.PrintMessage(llvm::SMLoc::getFromPointer(P.second.data()), + llvm::SourceMgr::DK_Error, P.second.data()); + SrcMgr.PrintMessage(llvm::SMLoc::getFromPointer(P.second.data()), + llvm::SourceMgr::DK_Error, + std::to_string(P.second.size())); std::exit(1); } llvm::StringRef CommandName = P.first; From dd5797505ebc2dbfdd58927c4f0a11a1256696eb Mon Sep 17 00:00:00 2001 From: Abhinav Gunjal Date: Mon, 1 Apr 2024 17:36:09 -0700 Subject: [PATCH 053/201] lit_test : check if there is already a deps key in kwargs (#87290) This change checks if there is already a `deps` key in `kwargs` and concatenate it to avoid multiple values for `deps` key argument. background: https://github.com/llvm/llvm-project/pull/87022 recently added explicit `deps` to the lit_test. This is causing StableHLO bazel build failures at https://github.com/openxla/stablehlo/actions/runs/8511888283/job/23312383380?pr=2147 Tested: local build run is successful --- utils/bazel/llvm-project-overlay/llvm/lit_test.bzl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/utils/bazel/llvm-project-overlay/llvm/lit_test.bzl b/utils/bazel/llvm-project-overlay/llvm/lit_test.bzl index f754a9fc7d5e48..af7ae560768d6f 100644 --- a/utils/bazel/llvm-project-overlay/llvm/lit_test.bzl +++ b/utils/bazel/llvm-project-overlay/llvm/lit_test.bzl @@ -10,6 +10,7 @@ def lit_test( srcs, args = None, data = None, + deps = None, **kwargs): """Runs a single test file with LLVM's lit tool. @@ -27,6 +28,7 @@ def lit_test( args = args or [] data = data or [] + deps = deps or [] native.py_test( name = name, @@ -35,7 +37,7 @@ def lit_test( args = args + ["-v"] + ["$(execpath %s)" % src for src in srcs], data = data + srcs, legacy_create_init = False, - deps = [Label("//llvm:lit")], + deps = deps + [Label("//llvm:lit")], **kwargs ) From 9dbd364589883ae3343a291077804c564d4b3de5 Mon Sep 17 00:00:00 2001 From: Ben Shi <2283975856@qq.com> Date: Tue, 2 Apr 2024 08:38:02 +0800 Subject: [PATCH 054/201] [AVR][NFC] Improve format of target description files (#87212) --- llvm/lib/Target/AVR/AVRInstrInfo.td | 320 +++++++--------------------- 1 file changed, 75 insertions(+), 245 deletions(-) diff --git a/llvm/lib/Target/AVR/AVRInstrInfo.td b/llvm/lib/Target/AVR/AVRInstrInfo.td index fe0d3b6c8189b7..38ebfab64c618d 100644 --- a/llvm/lib/Target/AVR/AVRInstrInfo.td +++ b/llvm/lib/Target/AVR/AVRInstrInfo.td @@ -343,13 +343,9 @@ def AVR_COND_PL : PatLeaf<(i8 7)>; // Pessimistically assume ADJCALLSTACKDOWN / ADJCALLSTACKUP will become // sub / add which can clobber SREG. let Defs = [SP, SREG], Uses = [SP] in { - def ADJCALLSTACKDOWN : Pseudo<(outs), - (ins i16imm - : $amt, i16imm - : $amt2), - "#ADJCALLSTACKDOWN", [(AVRcallseq_start timm - : $amt, timm - : $amt2)]>; + def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i16imm:$amt, i16imm:$amt2), + "#ADJCALLSTACKDOWN", + [(AVRcallseq_start timm:$amt, timm:$amt2)]>; // R31R30 is used to update SP. It is normally free because it is a // call-clobbered register but it is necessary to set it as a def as the @@ -357,13 +353,8 @@ let Defs = [SP, SREG], Uses = [SP] in { // seems). hasSideEffects needs to be set to true so this instruction isn't // considered dead. let Defs = [R31R30], hasSideEffects = 1 in def ADJCALLSTACKUP - : Pseudo<(outs), - (ins i16imm - : $amt1, i16imm - : $amt2), - "#ADJCALLSTACKUP", [(AVRcallseq_end timm - : $amt1, timm - : $amt2)]>; + : Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2), + "#ADJCALLSTACKUP", [(AVRcallseq_end timm:$amt1, timm:$amt2)]>; } //===----------------------------------------------------------------------===// @@ -372,19 +363,9 @@ let Defs = [SP, SREG], Uses = [SP] in { let isCommutable = 1, Constraints = "$src = $rd", Defs = [SREG] in { // ADD Rd, Rr // Adds two 8-bit registers. - def ADDRdRr - : FRdRr<0b0000, 0b11, - (outs GPR8 - : $rd), - (ins GPR8 - : $src, GPR8 - : $rr), - "add\t$rd, $rr", - [(set i8 - : $rd, (add i8 - : $src, i8 - : $rr)), - (implicit SREG)]>; + def ADDRdRr : FRdRr<0b0000, 0b11, (outs GPR8:$rd),(ins GPR8:$src, GPR8:$rr), + "add\t$rd, $rr", + [(set i8:$rd, (add i8:$src, i8:$rr)), (implicit SREG)]>; // ADDW Rd+1:Rd, Rr+1:Rr // Pseudo instruction to add four 8-bit registers as two 16-bit values. @@ -392,34 +373,17 @@ let isCommutable = 1, Constraints = "$src = $rd", Defs = [SREG] in { // Expands to: // add Rd, Rr // adc Rd+1, Rr+1 - def ADDWRdRr - : Pseudo<(outs DREGS - : $rd), - (ins DREGS - : $src, DREGS - : $rr), - "addw\t$rd, $rr", - [(set i16 - : $rd, (add i16 - : $src, i16 - : $rr)), - (implicit SREG)]>; + def ADDWRdRr : Pseudo<(outs DREGS:$rd), (ins DREGS:$src, DREGS:$rr), + "addw\t$rd, $rr", + [(set i16:$rd, (add i16:$src, i16:$rr)), + (implicit SREG)]>; // ADC Rd, Rr // Adds two 8-bit registers with carry. - let Uses = [SREG] in def ADCRdRr - : FRdRr<0b0001, 0b11, - (outs GPR8 - : $rd), - (ins GPR8 - : $src, GPR8 - : $rr), - "adc\t$rd, $rr", - [(set i8 - : $rd, (adde i8 - : $src, i8 - : $rr)), - (implicit SREG)]>; + let Uses = [SREG] in + def ADCRdRr : FRdRr<0b0001, 0b11, (outs GPR8:$rd), (ins GPR8:$src, GPR8:$rr), + "adc\t$rd, $rr", + [(set i8:$rd, (adde i8:$src, i8:$rr)), (implicit SREG)]>; // ADCW Rd+1:Rd, Rr+1:Rr // Pseudo instruction to add four 8-bit registers as two 16-bit values with @@ -428,56 +392,30 @@ let isCommutable = 1, Constraints = "$src = $rd", Defs = [SREG] in { // Expands to: // adc Rd, Rr // adc Rd+1, Rr+1 - let Uses = [SREG] in def ADCWRdRr : Pseudo<(outs DREGS - : $rd), - (ins DREGS - : $src, DREGS - : $rr), - "adcw\t$rd, $rr", [ - (set i16 - : $rd, (adde i16 - : $src, i16 - : $rr)), - (implicit SREG) - ]>; + let Uses = [SREG] in + def ADCWRdRr : Pseudo<(outs DREGS:$rd), (ins DREGS:$src, DREGS:$rr), + "adcw\t$rd, $rr", + [(set i16:$rd, (adde i16:$src, i16:$rr)), + (implicit SREG)]>; // AIDW Rd, k // Adds an immediate 6-bit value K to Rd, placing the result in Rd. - def ADIWRdK - : FWRdK<0b0, - (outs IWREGS - : $rd), - (ins IWREGS - : $src, imm_arith6 - : $k), - "adiw\t$rd, $k", - [(set i16 - : $rd, (add i16 - : $src, uimm6 - : $k)), - (implicit SREG)]>, - Requires<[HasADDSUBIW]>; + def ADIWRdK : FWRdK<0b0, (outs IWREGS:$rd), (ins IWREGS :$src, imm_arith6:$k), + "adiw\t$rd, $k", + [(set i16:$rd, (add i16:$src, uimm6:$k)), + (implicit SREG)]>, + Requires<[HasADDSUBIW]>; } //===----------------------------------------------------------------------===// // Subtraction //===----------------------------------------------------------------------===// -let Constraints = "$src = $rd", Defs = [SREG] in { +let Constraints = "$rs = $rd", Defs = [SREG] in { // SUB Rd, Rr // Subtracts the 8-bit value of Rr from Rd and places the value in Rd. - def SUBRdRr - : FRdRr<0b0001, 0b10, - (outs GPR8 - : $rd), - (ins GPR8 - : $src, GPR8 - : $rr), - "sub\t$rd, $rr", - [(set i8 - : $rd, (sub i8 - : $src, i8 - : $rr)), - (implicit SREG)]>; + def SUBRdRr : FRdRr<0b0001, 0b10, (outs GPR8:$rd), (ins GPR8:$rs, GPR8:$rr), + "sub\t$rd, $rr", + [(set i8:$rd, (sub i8:$rs, i8:$rr)), (implicit SREG)]>; // SUBW Rd+1:Rd, Rr+1:Rr // Subtracts two 16-bit values and places the result into Rd. @@ -485,129 +423,58 @@ let Constraints = "$src = $rd", Defs = [SREG] in { // Expands to: // sub Rd, Rr // sbc Rd+1, Rr+1 - def SUBWRdRr - : Pseudo<(outs DREGS - : $rd), - (ins DREGS - : $src, DREGS - : $rr), - "subw\t$rd, $rr", - [(set i16 - : $rd, (sub i16 - : $src, i16 - : $rr)), - (implicit SREG)]>; + def SUBWRdRr : Pseudo<(outs DREGS:$rd), (ins DREGS:$rs, DREGS:$rr), + "subw\t$rd, $rr", + [(set i16:$rd, (sub i16:$rs, i16:$rr)), + (implicit SREG)]>; - def SUBIRdK - : FRdK<0b0101, - (outs LD8 - : $rd), - (ins LD8 - : $src, imm_ldi8 - : $k), - "subi\t$rd, $k", - [(set i8 - : $rd, (sub i8 - : $src, imm - : $k)), - (implicit SREG)]>; + def SUBIRdK : FRdK<0b0101, (outs LD8:$rd), (ins LD8:$rs, imm_ldi8:$k), + "subi\t$rd, $k", + [(set i8:$rd, (sub i8:$rs, imm:$k)), (implicit SREG)]>; // SUBIW Rd+1:Rd, K+1:K // // Expands to: // subi Rd, K // sbci Rd+1, K+1 - def SUBIWRdK - : Pseudo<(outs DLDREGS - : $rd), - (ins DLDREGS - : $src, i16imm - : $rr), - "subiw\t$rd, $rr", - [(set i16 - : $rd, (sub i16 - : $src, imm - : $rr)), - (implicit SREG)]>; + def SUBIWRdK : Pseudo<(outs DLDREGS:$rd), (ins DLDREGS:$rs, i16imm:$rr), + "subiw\t$rd, $rr", + [(set i16:$rd, (sub i16:$rs, imm:$rr)), + (implicit SREG)]>; - def SBIWRdK - : FWRdK<0b1, - (outs IWREGS - : $rd), - (ins IWREGS - : $src, imm_arith6 - : $k), - "sbiw\t$rd, $k", - [(set i16 - : $rd, (sub i16 - : $src, uimm6 - : $k)), - (implicit SREG)]>, - Requires<[HasADDSUBIW]>; + def SBIWRdK : FWRdK<0b1, (outs IWREGS:$rd), (ins IWREGS:$rs, imm_arith6:$k), + "sbiw\t$rd, $k", + [(set i16:$rd, (sub i16:$rs, uimm6:$k)), + (implicit SREG)]>, + Requires<[HasADDSUBIW]>; // Subtract with carry operations which must read the carry flag in SREG. let Uses = [SREG] in { - def SBCRdRr - : FRdRr<0b0000, 0b10, - (outs GPR8 - : $rd), - (ins GPR8 - : $src, GPR8 - : $rr), - "sbc\t$rd, $rr", - [(set i8 - : $rd, (sube i8 - : $src, i8 - : $rr)), - (implicit SREG)]>; + def SBCRdRr : FRdRr<0b0000, 0b10, (outs GPR8:$rd), (ins GPR8:$rs, GPR8:$rr), + "sbc\t$rd, $rr", + [(set i8:$rd, (sube i8:$rs, i8:$rr)), (implicit SREG)]>; // SBCW Rd+1:Rd, Rr+1:Rr // // Expands to: // sbc Rd, Rr // sbc Rd+1, Rr+1 - def SBCWRdRr : Pseudo<(outs DREGS - : $rd), - (ins DREGS - : $src, DREGS - : $rr), - "sbcw\t$rd, $rr", [ - (set i16 - : $rd, (sube i16 - : $src, i16 - : $rr)), - (implicit SREG) - ]>; + def SBCWRdRr : Pseudo<(outs DREGS:$rd), (ins DREGS:$rs, DREGS:$rr), + "sbcw\t$rd, $rr", + [(set i16:$rd, (sube i16:$rs, i16:$rr)), + (implicit SREG)]>; - def SBCIRdK - : FRdK<0b0100, - (outs LD8 - : $rd), - (ins LD8 - : $src, imm_ldi8 - : $k), - "sbci\t$rd, $k", - [(set i8 - : $rd, (sube i8 - : $src, imm - : $k)), - (implicit SREG)]>; + def SBCIRdK : FRdK<0b0100, (outs LD8:$rd), (ins LD8:$rs, imm_ldi8:$k), + "sbci\t$rd, $k", + [(set i8:$rd, (sube i8:$rs, imm:$k)), (implicit SREG)]>; // SBCIW Rd+1:Rd, K+1:K // sbci Rd, K // sbci Rd+1, K+1 - def SBCIWRdK : Pseudo<(outs DLDREGS - : $rd), - (ins DLDREGS - : $src, i16imm - : $rr), - "sbciw\t$rd, $rr", [ - (set i16 - : $rd, (sube i16 - : $src, imm - : $rr)), - (implicit SREG) - ]>; + def SBCIWRdK : Pseudo<(outs DLDREGS:$rd), (ins DLDREGS:$rs, i16imm:$rr), + "sbciw\t$rd, $rr", + [(set i16:$rd, (sube i16:$rs, imm:$rr)), + (implicit SREG)]>; } } @@ -615,27 +482,13 @@ let Constraints = "$src = $rd", Defs = [SREG] in { // Increment and Decrement //===----------------------------------------------------------------------===// let Constraints = "$src = $rd", Defs = [SREG] in { - def INCRd - : FRd<0b1001, 0b0100011, - (outs GPR8 - : $rd), - (ins GPR8 - : $src), - "inc\t$rd", [(set i8 - : $rd, (add i8 - : $src, 1)), - (implicit SREG)]>; + def INCRd : FRd<0b1001, 0b0100011, (outs GPR8:$rd), (ins GPR8:$src), + "inc\t$rd", + [(set i8:$rd, (add i8:$src, 1)), (implicit SREG)]>; - def DECRd - : FRd<0b1001, 0b0101010, - (outs GPR8 - : $rd), - (ins GPR8 - : $src), - "dec\t$rd", [(set i8 - : $rd, (add i8 - : $src, -1)), - (implicit SREG)]>; + def DECRd : FRd<0b1001, 0b0101010, (outs GPR8:$rd), (ins GPR8:$src), + "dec\t$rd", + [(set i8:$rd, (add i8:$src, -1)), (implicit SREG)]>; } //===----------------------------------------------------------------------===// @@ -646,58 +499,35 @@ let isCommutable = 1, Defs = [R1, R0, SREG] in { // MUL Rd, Rr // Multiplies Rd by Rr and places the result into R1:R0. let usesCustomInserter = 1 in { - def MULRdRr : FRdRr<0b1001, 0b11, (outs), - (ins GPR8 - : $rd, GPR8 - : $rr), - "mul\t$rd, $rr", - [/*(set R1, R0, (smullohi i8:$rd, i8:$rr))*/]>, + def MULRdRr : FRdRr<0b1001, 0b11, (outs), (ins GPR8:$rd, GPR8:$rr), + "mul\t$rd, $rr", []>, Requires<[SupportsMultiplication]>; - def MULSRdRr : FMUL2RdRr<0, (outs), - (ins LD8 - : $rd, LD8 - : $rr), + def MULSRdRr : FMUL2RdRr<0, (outs), (ins LD8:$rd, LD8:$rr), "muls\t$rd, $rr", []>, Requires<[SupportsMultiplication]>; } - def MULSURdRr : FMUL2RdRr<1, (outs), - (ins LD8lo - : $rd, LD8lo - : $rr), + def MULSURdRr : FMUL2RdRr<1, (outs), (ins LD8lo:$rd, LD8lo:$rr), "mulsu\t$rd, $rr", []>, Requires<[SupportsMultiplication]>; - def FMUL : FFMULRdRr<0b01, (outs), - (ins LD8lo - : $rd, LD8lo - : $rr), + def FMUL : FFMULRdRr<0b01, (outs), (ins LD8lo:$rd, LD8lo:$rr), "fmul\t$rd, $rr", []>, Requires<[SupportsMultiplication]>; - def FMULS : FFMULRdRr<0b10, (outs), - (ins LD8lo - : $rd, LD8lo - : $rr), + def FMULS : FFMULRdRr<0b10, (outs), (ins LD8lo:$rd, LD8lo:$rr), "fmuls\t$rd, $rr", []>, Requires<[SupportsMultiplication]>; - def FMULSU : FFMULRdRr<0b11, (outs), - (ins LD8lo - : $rd, LD8lo - : $rr), + def FMULSU : FFMULRdRr<0b11, (outs), (ins LD8lo:$rd, LD8lo:$rr), "fmulsu\t$rd, $rr", []>, Requires<[SupportsMultiplication]>; } let Defs = - [R15, R14, R13, R12, R11, R10, R9, R8, R7, R6, R5, R4, R3, R2, R1, - R0] in def DESK : FDES<(outs), - (ins i8imm - : $k), - "des\t$k", []>, - Requires<[HasDES]>; + [R15, R14, R13, R12, R11, R10, R9, R8, R7, R6, R5, R4, R3, R2, R1, R0] in +def DESK : FDES<(outs), (ins i8imm:$k), "des\t$k", []>, Requires<[HasDES]>; //===----------------------------------------------------------------------===// // Logic From 372c275800140f35a697f12a2e83d94d5603eaf5 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Mon, 1 Apr 2024 17:28:44 -0700 Subject: [PATCH 055/201] [dfsan][test] Disable the test with internal_symbolizer After #87191 we had to add 8b135a7d1f59a5a7adccb162abf92d751209afe7, which makes symbolizer to calls a global constructor with `realloc`. --- compiler-rt/test/dfsan/mmap_at_init.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/compiler-rt/test/dfsan/mmap_at_init.c b/compiler-rt/test/dfsan/mmap_at_init.c index a8d7535df4a6c4..9129dc7d39031c 100644 --- a/compiler-rt/test/dfsan/mmap_at_init.c +++ b/compiler-rt/test/dfsan/mmap_at_init.c @@ -4,6 +4,9 @@ // // Tests that calling mmap() during during dfsan initialization works. +// `internal_symbolizer` can not use `realloc` on memory from the test `calloc`. +// UNSUPPORTED: internal_symbolizer + #include #include #include From f33a6dcf959238e82f6ad45333e3547d8cfcfe38 Mon Sep 17 00:00:00 2001 From: Chen Zheng Date: Tue, 2 Apr 2024 08:40:28 +0800 Subject: [PATCH 056/201] [PPC][NFC] add an option for GatherAllAliasesMaxDepth (#87071) GatherAllAliases is time consuming. Add an debug option on PPC to control the complexity of the function. This is useful when debuging compile time related issues. --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 7436b202fba0d9..43e4a34a9b3483 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -137,6 +137,10 @@ static cl::opt PPCMinimumJumpTableEntries( "ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC")); +static cl::opt PPCGatherAllAliasesMaxDepth( + "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, + cl::desc("max depth when checking alias info in GatherAllAliases()")); + STATISTIC(NumTailCalls, "Number of tail calls"); STATISTIC(NumSiblingCalls, "Number of sibling calls"); STATISTIC(ShufflesHandledWithVPERM, @@ -1512,6 +1516,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, // than the corresponding branch. This information is used in CGP to decide // when to convert selects into branches. PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive(); + + GatherAllAliasesMaxDepth = PPCGatherAllAliasesMaxDepth; } // *********************************** NOTE ************************************ From 84f24c2daffc40fc10b4ea2ae69016ebdabfc0ed Mon Sep 17 00:00:00 2001 From: Shih-Po Hung Date: Tue, 2 Apr 2024 09:26:27 +0800 Subject: [PATCH 057/201] [RISCV][TTI] Scale the cost of intrinsic umin/umax/smin/smax with LMUL (#87245) Use the return type to measure the LMUL size for throughput/latency cost --- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 22 +++- .../Analysis/CostModel/RISCV/int-min-max.ll | 120 +++++++++--------- 2 files changed, 80 insertions(+), 62 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index efcaa65605e034..ed4b0ca8c941e3 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -810,9 +810,27 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, case Intrinsic::smin: case Intrinsic::smax: { auto LT = getTypeLegalizationCost(RetTy); - if ((ST->hasVInstructions() && LT.second.isVector()) || - (LT.second.isScalarInteger() && ST->hasStdExtZbb())) + if (LT.second.isScalarInteger() && ST->hasStdExtZbb()) return LT.first; + + if (ST->hasVInstructions() && LT.second.isVector()) { + unsigned Op; + switch (ICA.getID()) { + case Intrinsic::umin: + Op = RISCV::VMINU_VV; + break; + case Intrinsic::umax: + Op = RISCV::VMAXU_VV; + break; + case Intrinsic::smin: + Op = RISCV::VMIN_VV; + break; + case Intrinsic::smax: + Op = RISCV::VMAX_VV; + break; + } + return LT.first * getRISCVInstructionCost(Op, LT.second, CostKind); + } break; } case Intrinsic::sadd_sat: diff --git a/llvm/test/Analysis/CostModel/RISCV/int-min-max.ll b/llvm/test/Analysis/CostModel/RISCV/int-min-max.ll index ec669c986c1503..79cf1c84ed494e 100644 --- a/llvm/test/Analysis/CostModel/RISCV/int-min-max.ll +++ b/llvm/test/Analysis/CostModel/RISCV/int-min-max.ll @@ -12,36 +12,36 @@ define void @smax() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.smax.nxv2i8( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.smax.nxv4i8( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.smax.nxv8i8( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call @llvm.smax.nxv16i8( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call @llvm.smax.nxv16i8( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call i16 @llvm.smax.i16(i16 undef, i16 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i16> @llvm.smax.v2i16(<2 x i16> undef, <2 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i16> @llvm.smax.v4i16(<4 x i16> undef, <4 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i16> @llvm.smax.v8i16(<8 x i16> undef, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <16 x i16> @llvm.smax.v16i16(<16 x i16> undef, <16 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call @llvm.smax.nxv1i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call @llvm.smax.nxv2i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call @llvm.smax.nxv4i16( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = call @llvm.smax.nxv8i16( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call @llvm.smax.nxv16i16( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = call @llvm.smax.nxv8i16( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = call @llvm.smax.nxv16i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %21 = call i32 @llvm.smax.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <2 x i32> @llvm.smax.v2i32(<2 x i32> undef, <2 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> undef, <4 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = call <8 x i32> @llvm.smax.v8i32(<8 x i32> undef, <8 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %25 = call <16 x i32> @llvm.smax.v16i32(<16 x i32> undef, <16 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call @llvm.smax.nxv1i32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call @llvm.smax.nxv2i32( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = call @llvm.smax.nxv4i32( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call @llvm.smax.nxv8i32( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call @llvm.smax.nxv16i32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call @llvm.smax.nxv4i32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %29 = call @llvm.smax.nxv8i32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %30 = call @llvm.smax.nxv16i32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = call i64 @llvm.smax.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %32 = call <2 x i64> @llvm.smax.v2i64(<2 x i64> undef, <2 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = call <16 x i64> @llvm.smax.v16i64(<16 x i64> undef, <16 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = call <4 x i64> @llvm.smax.v4i64(<4 x i64> undef, <4 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = call <8 x i64> @llvm.smax.v8i64(<8 x i64> undef, <8 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %35 = call <16 x i64> @llvm.smax.v16i64(<16 x i64> undef, <16 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = call @llvm.smax.nxv1i64( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = call @llvm.smax.nxv2i64( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = call @llvm.smax.nxv4i64( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %39 = call @llvm.smax.nxv8i64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %37 = call @llvm.smax.nxv2i64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %38 = call @llvm.smax.nxv4i64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %39 = call @llvm.smax.nxv8i64( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call i8 @llvm.smax.i8(i8 undef, i8 undef) @@ -97,36 +97,36 @@ define void @smin() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.smin.nxv2i8( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.smin.nxv4i8( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.smin.nxv8i8( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call @llvm.smin.nxv16i8( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call @llvm.smin.nxv16i8( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call i16 @llvm.smin.i16(i16 undef, i16 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i16> @llvm.smin.v2i16(<2 x i16> undef, <2 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i16> @llvm.smin.v4i16(<4 x i16> undef, <4 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i16> @llvm.smin.v8i16(<8 x i16> undef, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <16 x i16> @llvm.smin.v16i16(<16 x i16> undef, <16 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call @llvm.smin.nxv1i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call @llvm.smin.nxv2i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call @llvm.smin.nxv4i16( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = call @llvm.smin.nxv8i16( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call @llvm.smin.nxv16i16( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = call @llvm.smin.nxv8i16( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = call @llvm.smin.nxv16i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %21 = call i32 @llvm.smin.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <2 x i32> @llvm.smin.v2i32(<2 x i32> undef, <2 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = call <8 x i32> @llvm.smin.v8i32(<8 x i32> undef, <8 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %25 = call <16 x i32> @llvm.smin.v16i32(<16 x i32> undef, <16 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call @llvm.smin.nxv1i32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call @llvm.smin.nxv2i32( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = call @llvm.smin.nxv4i32( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call @llvm.smin.nxv8i32( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call @llvm.smin.nxv16i32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call @llvm.smin.nxv4i32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %29 = call @llvm.smin.nxv8i32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %30 = call @llvm.smin.nxv16i32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = call i64 @llvm.smin.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %32 = call <2 x i64> @llvm.smin.v2i64(<2 x i64> undef, <2 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = call <16 x i64> @llvm.smin.v16i64(<16 x i64> undef, <16 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = call <4 x i64> @llvm.smin.v4i64(<4 x i64> undef, <4 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = call <8 x i64> @llvm.smin.v8i64(<8 x i64> undef, <8 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %35 = call <16 x i64> @llvm.smin.v16i64(<16 x i64> undef, <16 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = call @llvm.smin.nxv1i64( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = call @llvm.smin.nxv2i64( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = call @llvm.smin.nxv4i64( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %39 = call @llvm.smin.nxv8i64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %37 = call @llvm.smin.nxv2i64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %38 = call @llvm.smin.nxv4i64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %39 = call @llvm.smin.nxv8i64( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call i8 @llvm.smin.i8(i8 undef, i8 undef) @@ -182,36 +182,36 @@ define void @umax() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.umax.nxv2i8( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.umax.nxv4i8( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.umax.nxv8i8( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call @llvm.umax.nxv16i8( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call @llvm.umax.nxv16i8( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call i16 @llvm.umax.i16(i16 undef, i16 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i16> @llvm.umax.v2i16(<2 x i16> undef, <2 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i16> @llvm.umax.v4i16(<4 x i16> undef, <4 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i16> @llvm.umax.v8i16(<8 x i16> undef, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <16 x i16> @llvm.umax.v16i16(<16 x i16> undef, <16 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call @llvm.umax.nxv1i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call @llvm.umax.nxv2i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call @llvm.umax.nxv4i16( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = call @llvm.umax.nxv8i16( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call @llvm.umax.nxv16i16( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = call @llvm.umax.nxv8i16( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = call @llvm.umax.nxv16i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %21 = call i32 @llvm.umax.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <2 x i32> @llvm.umax.v2i32(<2 x i32> undef, <2 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> undef, <4 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = call <8 x i32> @llvm.umax.v8i32(<8 x i32> undef, <8 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %25 = call <16 x i32> @llvm.umax.v16i32(<16 x i32> undef, <16 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call @llvm.umax.nxv1i32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call @llvm.umax.nxv2i32( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = call @llvm.umax.nxv4i32( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call @llvm.umax.nxv8i32( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call @llvm.umax.nxv16i32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call @llvm.umax.nxv4i32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %29 = call @llvm.umax.nxv8i32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %30 = call @llvm.umax.nxv16i32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = call i64 @llvm.umax.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %32 = call <2 x i64> @llvm.umax.v2i64(<2 x i64> undef, <2 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = call <16 x i64> @llvm.umax.v16i64(<16 x i64> undef, <16 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = call <4 x i64> @llvm.umax.v4i64(<4 x i64> undef, <4 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = call <8 x i64> @llvm.umax.v8i64(<8 x i64> undef, <8 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %35 = call <16 x i64> @llvm.umax.v16i64(<16 x i64> undef, <16 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = call @llvm.umax.nxv1i64( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = call @llvm.umax.nxv2i64( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = call @llvm.umax.nxv4i64( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %39 = call @llvm.umax.nxv8i64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %37 = call @llvm.umax.nxv2i64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %38 = call @llvm.umax.nxv4i64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %39 = call @llvm.umax.nxv8i64( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call i8 @llvm.umax.i8(i8 undef, i8 undef) @@ -267,36 +267,36 @@ define void @umin() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.umin.nxv2i8( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.umin.nxv4i8( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.umin.nxv8i8( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call @llvm.umin.nxv16i8( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %10 = call @llvm.umin.nxv16i8( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call i16 @llvm.umin.i16(i16 undef, i16 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i16> @llvm.umin.v2i16(<2 x i16> undef, <2 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i16> @llvm.umin.v4i16(<4 x i16> undef, <4 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i16> @llvm.umin.v8i16(<8 x i16> undef, <8 x i16> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %15 = call <16 x i16> @llvm.umin.v16i16(<16 x i16> undef, <16 x i16> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call @llvm.umin.nxv1i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call @llvm.umin.nxv2i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call @llvm.umin.nxv4i16( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = call @llvm.umin.nxv8i16( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call @llvm.umin.nxv16i16( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %19 = call @llvm.umin.nxv8i16( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %20 = call @llvm.umin.nxv16i16( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %21 = call i32 @llvm.umin.i32(i32 undef, i32 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <2 x i32> @llvm.umin.v2i32(<2 x i32> undef, <2 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> undef, <4 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %24 = call <8 x i32> @llvm.umin.v8i32(<8 x i32> undef, <8 x i32> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %25 = call <16 x i32> @llvm.umin.v16i32(<16 x i32> undef, <16 x i32> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call @llvm.umin.nxv1i32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call @llvm.umin.nxv2i32( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = call @llvm.umin.nxv4i32( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call @llvm.umin.nxv8i32( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call @llvm.umin.nxv16i32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %28 = call @llvm.umin.nxv4i32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %29 = call @llvm.umin.nxv8i32( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %30 = call @llvm.umin.nxv16i32( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = call i64 @llvm.umin.i64(i64 undef, i64 undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %32 = call <2 x i64> @llvm.umin.v2i64(<2 x i64> undef, <2 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = call <16 x i64> @llvm.umin.v16i64(<16 x i64> undef, <16 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %33 = call <4 x i64> @llvm.umin.v4i64(<4 x i64> undef, <4 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %34 = call <8 x i64> @llvm.umin.v8i64(<8 x i64> undef, <8 x i64> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %35 = call <16 x i64> @llvm.umin.v16i64(<16 x i64> undef, <16 x i64> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = call @llvm.umin.nxv1i64( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = call @llvm.umin.nxv2i64( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = call @llvm.umin.nxv4i64( undef, undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %39 = call @llvm.umin.nxv8i64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %37 = call @llvm.umin.nxv2i64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %38 = call @llvm.umin.nxv4i64( undef, undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %39 = call @llvm.umin.nxv8i64( undef, undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; call i8 @llvm.umin.i8(i8 undef, i8 undef) From 30fd099d5062638b5fe6b89135ad6433a888023a Mon Sep 17 00:00:00 2001 From: Cyndy Ishida Date: Mon, 1 Apr 2024 18:30:23 -0700 Subject: [PATCH 058/201] [InstallAPI] Fixup dsym test (#87299) Update the test to run when the compiler is built to support arm64-darwin targets. --- clang/test/InstallAPI/diagnostics-dsym.test | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/clang/test/InstallAPI/diagnostics-dsym.test b/clang/test/InstallAPI/diagnostics-dsym.test index 8a1b394f2f8683..c9cbeffef7bacc 100644 --- a/clang/test/InstallAPI/diagnostics-dsym.test +++ b/clang/test/InstallAPI/diagnostics-dsym.test @@ -1,23 +1,24 @@ -; REQUIRES: 86_64-darwin +; REQUIRES: system-darwin +; REQUIRES: target-aarch64 ; RUN: rm -rf %t ; RUN: split-file %s %t // Build a simple dylib with debug info. -; RUN: %clang --target=x86_64-apple-macos10.15 -g -dynamiclib %t/foo.c \ +; RUN: %clang --target=arm64-apple-macos11 -g -dynamiclib %t/foo.c \ ; RUN: -current_version 1 -compatibility_version 1 -L%t/usr/lib \ ; RUN: -save-temps \ ; RUN: -o %t/foo.dylib -install_name %t/foo.dylib ; RUN: dsymutil %t/foo.dylib -o %t/foo.dSYM -; RUN: not clang-installapi -x c++ --target=x86_64-apple-macos10.15 \ +; RUN: not clang-installapi -x c++ --target=arm64-apple-macos11 \ ; RUN: -install_name %t/foo.dylib \ ; RUN: -current_version 1 -compatibility_version 1 \ ; RUN: -o %t/output.tbd \ ; RUN: --verify-against=%t/foo.dylib --dsym=%t/foo.dSYM \ ; RUN: --verify-mode=Pedantic 2>&1 | FileCheck %s -; CHECK: violations found for x86_64 +; CHECK: violations found for arm64 ; CHECK: foo.c:5:0: error: no declaration found for exported symbol 'bar' in dynamic library ; CHECK: foo.c:1:0: error: no declaration found for exported symbol 'foo' in dynamic library @@ -31,9 +32,9 @@ char bar = 'a'; ;--- usr/lib/libSystem.tbd --- !tapi-tbd tbd-version: 4 -targets: [ x86_64-macos ] +targets: [ arm64-macos ] install-name: '/usr/lib/libSystem.B.dylib' exports: - - targets: [ x86_64-macos ] + - targets: [ arm64-macos ] symbols: [ dyld_stub_binder ] ... From d7a43a00fe80007de5d7614576b180d3d21d541b Mon Sep 17 00:00:00 2001 From: Shih-Po Hung Date: Tue, 2 Apr 2024 09:30:51 +0800 Subject: [PATCH 059/201] [RISCV][TTI] Scale the cost of trunc/fptrunc/fpext with LMUL (#87101) Use the destination data type to measure the LMUL size for latency/throughput cost --- .../Target/RISCV/RISCVTargetTransformInfo.cpp | 25 +- llvm/test/Analysis/CostModel/RISCV/cast.ll | 454 +++++++++--------- .../CostModel/RISCV/reduce-scalable-fp.ll | 12 +- .../CostModel/RISCV/rvv-insertelement.ll | 84 ++-- .../CostModel/RISCV/shuffle-broadcast.ll | 2 +- 5 files changed, 298 insertions(+), 279 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index ed4b0ca8c941e3..38304ff90252f0 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -927,6 +927,7 @@ InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, if (!IsTypeLegal) return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); + std::pair SrcLT = getTypeLegalizationCost(Src); std::pair DstLT = getTypeLegalizationCost(Dst); int ISD = TLI->InstructionOpcodeToISD(Opcode); @@ -961,13 +962,31 @@ InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, // Instead we use the following instructions to truncate to mask vector: // vand.vi v8, v8, 1 // vmsne.vi v0, v8, 0 - return 2; + return getRISCVInstructionCost({RISCV::VAND_VI, RISCV::VMSNE_VI}, + SrcLT.second, CostKind); } [[fallthrough]]; case ISD::FP_EXTEND: - case ISD::FP_ROUND: + case ISD::FP_ROUND: { // Counts of narrow/widen instructions. - return std::abs(PowDiff); + unsigned SrcEltSize = Src->getScalarSizeInBits(); + unsigned DstEltSize = Dst->getScalarSizeInBits(); + + unsigned Op = (ISD == ISD::TRUNCATE) ? RISCV::VNSRL_WI + : (ISD == ISD::FP_EXTEND) ? RISCV::VFWCVT_F_F_V + : RISCV::VFNCVT_F_F_W; + InstructionCost Cost = 0; + for (; SrcEltSize != DstEltSize;) { + MVT ElementMVT = (ISD == ISD::TRUNCATE) + ? MVT::getIntegerVT(DstEltSize) + : MVT::getFloatingPointVT(DstEltSize); + MVT DstMVT = DstLT.second.changeVectorElementType(ElementMVT); + DstEltSize = + (DstEltSize > SrcEltSize) ? DstEltSize >> 1 : DstEltSize << 1; + Cost += getRISCVInstructionCost(Op, DstMVT, CostKind); + } + return Cost; + } case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: diff --git a/llvm/test/Analysis/CostModel/RISCV/cast.ll b/llvm/test/Analysis/CostModel/RISCV/cast.ll index 14da9a3f79d771..6ddd57a24c51f5 100644 --- a/llvm/test/Analysis/CostModel/RISCV/cast.ll +++ b/llvm/test/Analysis/CostModel/RISCV/cast.ll @@ -1035,17 +1035,17 @@ define void @trunc() { ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4i1 = trunc <4 x i8> undef to <4 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_v4i1 = trunc <4 x i16> undef to <4 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_v4i1 = trunc <4 x i32> undef to <4 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_v4i1 = trunc <4 x i64> undef to <4 x i1> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i64_v4i1 = trunc <4 x i64> undef to <4 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8i8 = trunc <8 x i16> undef to <8 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_v8i8 = trunc <8 x i32> undef to <8 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_v8i8 = trunc <8 x i64> undef to <8 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64_v8i8 = trunc <8 x i64> undef to <8 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8i16 = trunc <8 x i32> undef to <8 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8i16 = trunc <8 x i64> undef to <8 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_v8i32 = trunc <8 x i64> undef to <8 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_v8i16 = trunc <8 x i64> undef to <8 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8i32 = trunc <8 x i64> undef to <8 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8i1 = trunc <8 x i8> undef to <8 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16_v8i1 = trunc <8 x i16> undef to <8 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_v8i1 = trunc <8 x i32> undef to <8 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8i1 = trunc <8 x i64> undef to <8 x i1> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32_v8i1 = trunc <8 x i32> undef to <8 x i1> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i64_v8i1 = trunc <8 x i64> undef to <8 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16i8 = trunc <2 x i16> undef to <2 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_v16i8 = trunc <2 x i32> undef to <2 x i8> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i64_v16i8 = trunc <2 x i64> undef to <2 x i8> @@ -1057,44 +1057,44 @@ define void @trunc() { ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_v16i1 = trunc <2 x i32> undef to <2 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i64_v16i1 = trunc <2 x i64> undef to <2 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32i8 = trunc <16 x i16> undef to <16 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i32_v32i8 = trunc <16 x i32> undef to <16 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i64_v32i8 = trunc <16 x i64> undef to <16 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i32_v32i16 = trunc <16 x i32> undef to <16 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i64_v32i16 = trunc <16 x i64> undef to <16 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i64_v32i32 = trunc <16 x i64> undef to <16 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i32_v32i8 = trunc <16 x i32> undef to <16 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32i64_v32i8 = trunc <16 x i64> undef to <16 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i32_v32i16 = trunc <16 x i32> undef to <16 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32i64_v32i16 = trunc <16 x i64> undef to <16 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i64_v32i32 = trunc <16 x i64> undef to <16 x i32> ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_v32i1 = trunc <16 x i8> undef to <16 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_v32i1 = trunc <16 x i16> undef to <16 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i32_v32i1 = trunc <16 x i32> undef to <16 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i64_v32i1 = trunc <16 x i64> undef to <16 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i16_v64i8 = trunc <64 x i16> undef to <64 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v64i32_v64i8 = trunc <64 x i32> undef to <64 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v64i64_v64i8 = trunc <64 x i64> undef to <64 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i32_v64i16 = trunc <64 x i32> undef to <64 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i64_v64i16 = trunc <64 x i64> undef to <64 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64i64_v64i32 = trunc <64 x i64> undef to <64 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_v64i1 = trunc <64 x i8> undef to <64 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i16_v64i1 = trunc <64 x i16> undef to <64 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v64i32_v64i1 = trunc <64 x i32> undef to <64 x i1> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i16_v32i1 = trunc <16 x i16> undef to <16 x i1> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i32_v32i1 = trunc <16 x i32> undef to <16 x i1> +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i64_v32i1 = trunc <16 x i64> undef to <16 x i1> +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i16_v64i8 = trunc <64 x i16> undef to <64 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v64i32_v64i8 = trunc <64 x i32> undef to <64 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v64i64_v64i8 = trunc <64 x i64> undef to <64 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i32_v64i16 = trunc <64 x i32> undef to <64 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v64i64_v64i16 = trunc <64 x i64> undef to <64 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v64i64_v64i32 = trunc <64 x i64> undef to <64 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v64i8_v64i1 = trunc <64 x i8> undef to <64 x i1> +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i16_v64i1 = trunc <64 x i16> undef to <64 x i1> +; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v64i32_v64i1 = trunc <64 x i32> undef to <64 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i64_v64i1 = trunc <64 x i64> undef to <64 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128i16_v128i8 = trunc <128 x i16> undef to <128 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v128i32_v128i8 = trunc <128 x i32> undef to <128 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v128i64_v128i8 = trunc <128 x i64> undef to <128 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128i32_v128i16 = trunc <128 x i32> undef to <128 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128i64_v128i16 = trunc <128 x i64> undef to <128 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128i64_v128i32 = trunc <128 x i64> undef to <128 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i8_v128i1 = trunc <128 x i8> undef to <128 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v128i16_v128i1 = trunc <128 x i16> undef to <128 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v128i32_v128i1 = trunc <128 x i32> undef to <128 x i1> +; RV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v128i16_v128i8 = trunc <128 x i16> undef to <128 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v128i32_v128i8 = trunc <128 x i32> undef to <128 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %v128i64_v128i8 = trunc <128 x i64> undef to <128 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v128i32_v128i16 = trunc <128 x i32> undef to <128 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v128i64_v128i16 = trunc <128 x i64> undef to <128 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v128i64_v128i32 = trunc <128 x i64> undef to <128 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v128i8_v128i1 = trunc <128 x i8> undef to <128 x i1> +; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v128i16_v128i1 = trunc <128 x i16> undef to <128 x i1> +; RV32-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %v128i32_v128i1 = trunc <128 x i32> undef to <128 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128i64_v128i1 = trunc <128 x i64> undef to <128 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v256i16_v256i8 = trunc <256 x i16> undef to <256 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v256i32_v256i8 = trunc <256 x i32> undef to <256 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %v256i64_v256i8 = trunc <256 x i64> undef to <256 x i8> -; RV32-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v256i32_v256i16 = trunc <256 x i32> undef to <256 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v256i64_v256i16 = trunc <256 x i64> undef to <256 x i16> -; RV32-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v256i64_v256i32 = trunc <256 x i64> undef to <256 x i32> -; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v256i8_v256i1 = trunc <256 x i8> undef to <256 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v256i16_v256i1 = trunc <256 x i16> undef to <256 x i1> -; RV32-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v256i32_v256i1 = trunc <256 x i32> undef to <256 x i1> +; RV32-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v256i16_v256i8 = trunc <256 x i16> undef to <256 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v256i32_v256i8 = trunc <256 x i32> undef to <256 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %v256i64_v256i8 = trunc <256 x i64> undef to <256 x i8> +; RV32-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v256i32_v256i16 = trunc <256 x i32> undef to <256 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %v256i64_v256i16 = trunc <256 x i64> undef to <256 x i16> +; RV32-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v256i64_v256i32 = trunc <256 x i64> undef to <256 x i32> +; RV32-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v256i8_v256i1 = trunc <256 x i8> undef to <256 x i1> +; RV32-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v256i16_v256i1 = trunc <256 x i16> undef to <256 x i1> +; RV32-NEXT: Cost Model: Found an estimated cost of 134 for instruction: %v256i32_v256i1 = trunc <256 x i32> undef to <256 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v256i64_v256i1 = trunc <256 x i64> undef to <256 x i1> ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1i8 = trunc undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1i32_nxv1i8 = trunc undef to @@ -1115,56 +1115,56 @@ define void @trunc() { ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2i1 = trunc undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i16_nxv2i1 = trunc undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i32_nxv2i1 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64_nxv2i1 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv2i64_nxv2i1 = trunc undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4i8 = trunc undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32_nxv4i8 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i64_nxv4i8 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i64_nxv4i8 = trunc undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4i16 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4i16 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i64_nxv4i32 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i64_nxv4i16 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4i32 = trunc undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4i1 = trunc undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i16_nxv4i1 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32_nxv4i1 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4i1 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i32_nxv4i1 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv4i64_nxv4i1 = trunc undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8i8 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i32_nxv8i8 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i64_nxv8i8 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8i16 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i64_nxv8i16 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i64_nxv8i32 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i32_nxv8i8 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv8i64_nxv8i8 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i32_nxv8i16 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv8i64_nxv8i16 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i64_nxv8i32 = trunc undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8i1 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i16_nxv8i1 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i32_nxv8i1 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i64_nxv8i1 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16i8 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i32_nxv16i8 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16i64_nxv16i8 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32_nxv16i16 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_nxv16i16 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i64_nxv16i32 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8_nxv16i1 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i16_nxv16i1 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i32_nxv16i1 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_nxv16i1 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i16_nxv32i8 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32i32_nxv32i8 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv32i64_nxv32i8 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i32_nxv32i16 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i64_nxv32i16 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32i64_nxv32i32 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i8_nxv32i1 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i16_nxv32i1 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32i32_nxv32i1 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i64_nxv32i1 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64i16_nxv64i8 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv64i32_nxv64i8 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i16_nxv8i1 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i32_nxv8i1 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv8i64_nxv8i1 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i16_nxv16i8 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv16i32_nxv16i8 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv16i64_nxv16i8 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i32_nxv16i16 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv16i64_nxv16i16 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv16i64_nxv16i32 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i8_nxv16i1 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i16_nxv16i1 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv16i32_nxv16i1 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv16i64_nxv16i1 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i16_nxv32i8 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv32i32_nxv32i8 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %nxv32i64_nxv32i8 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv32i32_nxv32i16 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %nxv32i64_nxv32i16 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %nxv32i64_nxv32i32 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv32i8_nxv32i1 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv32i16_nxv32i1 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv32i32_nxv32i1 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %nxv32i64_nxv32i1 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv64i16_nxv64i8 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %nxv64i32_nxv64i8 = trunc undef to ; RV32-NEXT: Cost Model: Invalid cost for instruction: %nxv64i64_nxv64i8 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64i32_nxv64i16 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %nxv64i64_nxv64i16 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv64i64_nxv64i32 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64i8_nxv64i1 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv64i16_nxv64i1 = trunc undef to -; RV32-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv64i32_nxv64i1 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %nxv64i32_nxv64i16 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %nxv64i64_nxv64i16 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 37 for instruction: %nxv64i64_nxv64i32 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv64i8_nxv64i1 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv64i16_nxv64i1 = trunc undef to +; RV32-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %nxv64i32_nxv64i1 = trunc undef to ; RV32-NEXT: Cost Model: Invalid cost for instruction: %nxv64i64_nxv64i1 = trunc undef to ; RV32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; @@ -1188,17 +1188,17 @@ define void @trunc() { ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_v4i1 = trunc <4 x i8> undef to <4 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i16_v4i1 = trunc <4 x i16> undef to <4 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i32_v4i1 = trunc <4 x i32> undef to <4 x i1> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i64_v4i1 = trunc <4 x i64> undef to <4 x i1> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4i64_v4i1 = trunc <4 x i64> undef to <4 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i16_v8i8 = trunc <8 x i16> undef to <8 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_v8i8 = trunc <8 x i32> undef to <8 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_v8i8 = trunc <8 x i64> undef to <8 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i64_v8i8 = trunc <8 x i64> undef to <8 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i32_v8i16 = trunc <8 x i32> undef to <8 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8i16 = trunc <8 x i64> undef to <8 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i64_v8i32 = trunc <8 x i64> undef to <8 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i64_v8i16 = trunc <8 x i64> undef to <8 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8i32 = trunc <8 x i64> undef to <8 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_v8i1 = trunc <8 x i8> undef to <8 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i16_v8i1 = trunc <8 x i16> undef to <8 x i1> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i32_v8i1 = trunc <8 x i32> undef to <8 x i1> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i64_v8i1 = trunc <8 x i64> undef to <8 x i1> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8i32_v8i1 = trunc <8 x i32> undef to <8 x i1> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8i64_v8i1 = trunc <8 x i64> undef to <8 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16i16_v16i8 = trunc <2 x i16> undef to <2 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_v16i8 = trunc <2 x i32> undef to <2 x i8> ; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16i64_v16i8 = trunc <2 x i64> undef to <2 x i8> @@ -1210,43 +1210,43 @@ define void @trunc() { ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i32_v16i1 = trunc <2 x i32> undef to <2 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16i64_v16i1 = trunc <2 x i64> undef to <2 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i16_v32i8 = trunc <16 x i16> undef to <16 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i32_v32i8 = trunc <16 x i32> undef to <16 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i64_v32i8 = trunc <16 x i64> undef to <16 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i32_v32i16 = trunc <16 x i32> undef to <16 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i64_v32i16 = trunc <16 x i64> undef to <16 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32i64_v32i32 = trunc <16 x i64> undef to <16 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32i32_v32i8 = trunc <16 x i32> undef to <16 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32i64_v32i8 = trunc <16 x i64> undef to <16 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i32_v32i16 = trunc <16 x i32> undef to <16 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v32i64_v32i16 = trunc <16 x i64> undef to <16 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i64_v32i32 = trunc <16 x i64> undef to <16 x i32> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i8_v32i1 = trunc <16 x i8> undef to <16 x i1> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i16_v32i1 = trunc <16 x i16> undef to <16 x i1> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i32_v32i1 = trunc <16 x i32> undef to <16 x i1> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v32i64_v32i1 = trunc <16 x i64> undef to <16 x i1> -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v64i16_v64i8 = trunc <64 x i16> undef to <64 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v64i32_v64i8 = trunc <64 x i32> undef to <64 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v64i64_v64i8 = trunc <64 x i64> undef to <64 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64i32_v64i16 = trunc <64 x i32> undef to <64 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64i64_v64i16 = trunc <64 x i64> undef to <64 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64i64_v64i32 = trunc <64 x i64> undef to <64 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i8_v64i1 = trunc <64 x i8> undef to <64 x i1> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v64i16_v64i1 = trunc <64 x i16> undef to <64 x i1> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32i16_v32i1 = trunc <16 x i16> undef to <16 x i1> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i32_v32i1 = trunc <16 x i32> undef to <16 x i1> +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v32i64_v32i1 = trunc <16 x i64> undef to <16 x i1> +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v64i16_v64i8 = trunc <64 x i16> undef to <64 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v64i32_v64i8 = trunc <64 x i32> undef to <64 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v64i64_v64i8 = trunc <64 x i64> undef to <64 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64i32_v64i16 = trunc <64 x i32> undef to <64 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v64i64_v64i16 = trunc <64 x i64> undef to <64 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v64i64_v64i32 = trunc <64 x i64> undef to <64 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v64i8_v64i1 = trunc <64 x i8> undef to <64 x i1> +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v64i16_v64i1 = trunc <64 x i16> undef to <64 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v64i32_v64i1 = trunc <64 x i32> undef to <64 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v64i64_v64i1 = trunc <64 x i64> undef to <64 x i1> -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v128i16_v128i8 = trunc <128 x i16> undef to <128 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v128i32_v128i8 = trunc <128 x i32> undef to <128 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %v128i64_v128i8 = trunc <128 x i64> undef to <128 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128i32_v128i16 = trunc <128 x i32> undef to <128 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128i64_v128i16 = trunc <128 x i64> undef to <128 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128i64_v128i32 = trunc <128 x i64> undef to <128 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v128i8_v128i1 = trunc <128 x i8> undef to <128 x i1> -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v128i16_v128i1 = trunc <128 x i16> undef to <128 x i1> +; RV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v128i16_v128i8 = trunc <128 x i16> undef to <128 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v128i32_v128i8 = trunc <128 x i32> undef to <128 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %v128i64_v128i8 = trunc <128 x i64> undef to <128 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v128i32_v128i16 = trunc <128 x i32> undef to <128 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v128i64_v128i16 = trunc <128 x i64> undef to <128 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v128i64_v128i32 = trunc <128 x i64> undef to <128 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v128i8_v128i1 = trunc <128 x i8> undef to <128 x i1> +; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %v128i16_v128i1 = trunc <128 x i16> undef to <128 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v128i32_v128i1 = trunc <128 x i32> undef to <128 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v128i64_v128i1 = trunc <128 x i64> undef to <128 x i1> -; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v256i16_v256i8 = trunc <256 x i16> undef to <256 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v256i32_v256i8 = trunc <256 x i32> undef to <256 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %v256i64_v256i8 = trunc <256 x i64> undef to <256 x i8> -; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v256i32_v256i16 = trunc <256 x i32> undef to <256 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v256i64_v256i16 = trunc <256 x i64> undef to <256 x i16> -; RV64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v256i64_v256i32 = trunc <256 x i64> undef to <256 x i32> -; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v256i8_v256i1 = trunc <256 x i8> undef to <256 x i1> -; RV64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v256i16_v256i1 = trunc <256 x i16> undef to <256 x i1> +; RV64-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v256i16_v256i8 = trunc <256 x i16> undef to <256 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v256i32_v256i8 = trunc <256 x i32> undef to <256 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %v256i64_v256i8 = trunc <256 x i64> undef to <256 x i8> +; RV64-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v256i32_v256i16 = trunc <256 x i32> undef to <256 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %v256i64_v256i16 = trunc <256 x i64> undef to <256 x i16> +; RV64-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v256i64_v256i32 = trunc <256 x i64> undef to <256 x i32> +; RV64-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v256i8_v256i1 = trunc <256 x i8> undef to <256 x i1> +; RV64-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %v256i16_v256i1 = trunc <256 x i16> undef to <256 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v256i32_v256i1 = trunc <256 x i32> undef to <256 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v256i64_v256i1 = trunc <256 x i64> undef to <256 x i1> ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1i16_nxv1i8 = trunc undef to @@ -1268,57 +1268,57 @@ define void @trunc() { ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i8_nxv2i1 = trunc undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i16_nxv2i1 = trunc undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i32_nxv2i1 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2i64_nxv2i1 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv2i64_nxv2i1 = trunc undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i16_nxv4i8 = trunc undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32_nxv4i8 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i64_nxv4i8 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i64_nxv4i8 = trunc undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i32_nxv4i16 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4i16 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4i64_nxv4i32 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4i64_nxv4i16 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4i32 = trunc undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i8_nxv4i1 = trunc undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i16_nxv4i1 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i32_nxv4i1 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4i64_nxv4i1 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4i32_nxv4i1 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv4i64_nxv4i1 = trunc undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i16_nxv8i8 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i32_nxv8i8 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i64_nxv8i8 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i32_nxv8i16 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i64_nxv8i16 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8i64_nxv8i32 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv8i32_nxv8i8 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv8i64_nxv8i8 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i32_nxv8i16 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv8i64_nxv8i16 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i64_nxv8i32 = trunc undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i8_nxv8i1 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i16_nxv8i1 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i32_nxv8i1 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8i64_nxv8i1 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i16_nxv16i8 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i32_nxv16i8 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16i64_nxv16i8 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16i32_nxv16i16 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_nxv16i16 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16i64_nxv16i32 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i8_nxv16i1 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i16_nxv16i1 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i32_nxv16i1 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16i64_nxv16i1 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv32i16_nxv32i8 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32i32_nxv32i8 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv32i64_nxv32i8 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32i32_nxv32i16 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i64_nxv32i16 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32i64_nxv32i32 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i8_nxv32i1 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv32i16_nxv32i1 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv32i32_nxv32i1 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i64_nxv32i1 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv64i16_nxv64i8 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv64i32_nxv64i8 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %nxv64i64_nxv64i8 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64i32_nxv64i16 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64i64_nxv64i16 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv64i64_nxv64i32 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv64i8_nxv64i1 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv64i16_nxv64i1 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv64i32_nxv64i1 = trunc undef to -; RV64-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %nxv64i64_nxv64i1 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8i16_nxv8i1 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8i32_nxv8i1 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv8i64_nxv8i1 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv16i16_nxv16i8 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv16i32_nxv16i8 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %nxv16i64_nxv16i8 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i32_nxv16i16 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv16i64_nxv16i16 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv16i64_nxv16i32 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16i8_nxv16i1 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i16_nxv16i1 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv16i32_nxv16i1 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv16i64_nxv16i1 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv32i16_nxv32i8 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv32i32_nxv32i8 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %nxv32i64_nxv32i8 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv32i32_nxv32i16 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %nxv32i64_nxv32i16 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %nxv32i64_nxv32i32 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv32i8_nxv32i1 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv32i16_nxv32i1 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv32i32_nxv32i1 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %nxv32i64_nxv32i1 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv64i16_nxv64i8 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %nxv64i32_nxv64i8 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %nxv64i64_nxv64i8 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %nxv64i32_nxv64i16 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %nxv64i64_nxv64i16 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %nxv64i64_nxv64i32 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %nxv64i8_nxv64i1 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %nxv64i16_nxv64i1 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %nxv64i32_nxv64i1 = trunc undef to +; RV64-NEXT: Cost Model: Found an estimated cost of 135 for instruction: %nxv64i64_nxv64i1 = trunc undef to ; RV64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %v2i16_v2i8 = trunc <2 x i16> undef to <2 x i8> @@ -1495,44 +1495,44 @@ define void @fpext() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16_v2f64 = fpext <2 x half> undef to <2 x double> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_v2f64 = fpext <2 x float> undef to <2 x double> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16_v4f32 = fpext <4 x half> undef to <4 x float> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16_v4f64 = fpext <4 x half> undef to <4 x double> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_v4f64 = fpext <4 x float> undef to <4 x double> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16_v8f32 = fpext <8 x half> undef to <8 x float> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16_v8f64 = fpext <8 x half> undef to <8 x double> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_v8f64 = fpext <8 x float> undef to <8 x double> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f16_v16f32 = fpext <16 x half> undef to <16 x float> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16_v16f64 = fpext <16 x half> undef to <16 x double> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_v16f64 = fpext <16 x float> undef to <16 x double> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f16_v32f32 = fpext <32 x half> undef to <32 x float> -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32f16_v32f64 = fpext <32 x half> undef to <32 x double> -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f32_v32f64 = fpext <32 x float> undef to <32 x double> -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f16_v64f32 = fpext <64 x half> undef to <64 x float> -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64f16_v64f64 = fpext <64 x half> undef to <64 x double> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64f32_v64f64 = fpext <64 x float> undef to <64 x double> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128f16_v128f32 = fpext <128 x half> undef to <128 x float> -; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128f16_v128f64 = fpext <128 x half> undef to <128 x double> -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128f32_v128f64 = fpext <128 x float> undef to <128 x double> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16_v4f64 = fpext <4 x half> undef to <4 x double> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32_v4f64 = fpext <4 x float> undef to <4 x double> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f16_v8f32 = fpext <8 x half> undef to <8 x float> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f16_v8f64 = fpext <8 x half> undef to <8 x double> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32_v8f64 = fpext <8 x float> undef to <8 x double> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16f16_v16f32 = fpext <16 x half> undef to <16 x float> +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16f16_v16f64 = fpext <16 x half> undef to <16 x double> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f32_v16f64 = fpext <16 x float> undef to <16 x double> +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32f16_v32f32 = fpext <32 x half> undef to <32 x float> +; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %v32f16_v32f64 = fpext <32 x half> undef to <32 x double> +; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32f32_v32f64 = fpext <32 x float> undef to <32 x double> +; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v64f16_v64f32 = fpext <64 x half> undef to <64 x float> +; CHECK-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %v64f16_v64f64 = fpext <64 x half> undef to <64 x double> +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v64f32_v64f64 = fpext <64 x float> undef to <64 x double> +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v128f16_v128f32 = fpext <128 x half> undef to <128 x float> +; CHECK-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %v128f16_v128f64 = fpext <128 x half> undef to <128 x double> +; CHECK-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v128f32_v128f64 = fpext <128 x float> undef to <128 x double> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f16_nxv1f32 = fpext undef to ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1f16_nxv1f64 = fpext undef to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1f64 = fpext undef to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f16_nxv2f32 = fpext undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f16_nxv2f64 = fpext undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f32_nxv2f64 = fpext undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f16_nxv4f32 = fpext undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f16_nxv4f64 = fpext undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32_nxv4f64 = fpext undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f16_nxv8f32 = fpext undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f16_nxv8f64 = fpext undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f32_nxv8f64 = fpext undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f16_nxv16f32 = fpext undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16f16_nxv16f64 = fpext undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16f32_nxv16f64 = fpext undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f16_nxv32f32 = fpext undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32f16_nxv32f64 = fpext undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32f32_nxv32f64 = fpext undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64f16_nxv64f32 = fpext undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64f16_nxv64f64 = fpext undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv64f32_nxv64f64 = fpext undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv2f16_nxv2f64 = fpext undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f32_nxv2f64 = fpext undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f16_nxv4f32 = fpext undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv4f16_nxv4f64 = fpext undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv4f32_nxv4f64 = fpext undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8f16_nxv8f32 = fpext undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv8f16_nxv8f64 = fpext undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv8f32_nxv8f64 = fpext undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16f16_nxv16f32 = fpext undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %nxv16f16_nxv16f64 = fpext undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv16f32_nxv16f64 = fpext undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv32f16_nxv32f32 = fpext undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %nxv32f16_nxv32f64 = fpext undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %nxv32f32_nxv32f64 = fpext undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %nxv64f16_nxv64f32 = fpext undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %nxv64f16_nxv64f64 = fpext undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %nxv64f32_nxv64f64 = fpext undef to ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %v2f16_v2f32 = fpext <2 x half> undef to <2 x float> @@ -1603,20 +1603,20 @@ define void @fptrunc() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f64_v4f16 = fptrunc <4 x double> undef to <4 x half> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_v4f32 = fptrunc <4 x double> undef to <4 x float> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_v8f16 = fptrunc <8 x float> undef to <8 x half> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f64_v8f16 = fptrunc <8 x double> undef to <8 x half> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_v8f32 = fptrunc <8 x double> undef to <8 x float> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_v16f16 = fptrunc <16 x float> undef to <16 x half> -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f64_v16f16 = fptrunc <16 x double> undef to <16 x half> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f64_v16f32 = fptrunc <16 x double> undef to <16 x float> -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v32f32_v32f16 = fptrunc <32 x float> undef to <32 x half> -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v32f64_v32f16 = fptrunc <32 x double> undef to <32 x half> -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v32f64_v32f32 = fptrunc <32 x double> undef to <32 x float> -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v64f32_v64f16 = fptrunc <64 x float> undef to <64 x half> -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v64f64_v64f16 = fptrunc <64 x double> undef to <64 x half> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v64f64_v64f32 = fptrunc <64 x double> undef to <64 x float> -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v128f32_v128f16 = fptrunc <128 x float> undef to <128 x half> -; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v128f64_v128f16 = fptrunc <128 x double> undef to <128 x half> -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v128f64_v128f32 = fptrunc <128 x double> undef to <128 x float> +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_v8f16 = fptrunc <8 x double> undef to <8 x half> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f64_v8f32 = fptrunc <8 x double> undef to <8 x float> +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_v16f16 = fptrunc <16 x float> undef to <16 x half> +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f64_v16f16 = fptrunc <16 x double> undef to <16 x half> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16f64_v16f32 = fptrunc <16 x double> undef to <16 x float> +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v32f32_v32f16 = fptrunc <32 x float> undef to <32 x half> +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v32f64_v32f16 = fptrunc <32 x double> undef to <32 x half> +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v32f64_v32f32 = fptrunc <32 x double> undef to <32 x float> +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v64f32_v64f16 = fptrunc <64 x float> undef to <64 x half> +; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v64f64_v64f16 = fptrunc <64 x double> undef to <64 x half> +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v64f64_v64f32 = fptrunc <64 x double> undef to <64 x float> +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v128f32_v128f16 = fptrunc <128 x float> undef to <128 x half> +; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %v128f64_v128f16 = fptrunc <128 x double> undef to <128 x half> +; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v128f64_v128f32 = fptrunc <128 x double> undef to <128 x float> ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f32_nxv1f16 = fptrunc undef to ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv1f64_nxv1f16 = fptrunc undef to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv1f64_nxv1f32 = fptrunc undef to @@ -1624,20 +1624,20 @@ define void @fptrunc() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv2f64_nxv1f16 = fptrunc undef to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv2f64_nxv1f32 = fptrunc undef to ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f32_nxv4f16 = fptrunc undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f64_nxv4f16 = fptrunc undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv4f64_nxv4f32 = fptrunc undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f32_nxv8f16 = fptrunc undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f64_nxv8f16 = fptrunc undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv8f64_nxv8f32 = fptrunc undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %nxv16f32_nxv16f16 = fptrunc undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv16f64_nxv16f16 = fptrunc undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv16f64_nxv16f32 = fptrunc undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv32f32_nxv32f16 = fptrunc undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32f64_nxv32f16 = fptrunc undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv32f64_nxv32f32 = fptrunc undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv64f32_nxv64f16 = fptrunc undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %nxv64f64_nxv64f16 = fptrunc undef to -; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv64f64_nxv64f32 = fptrunc undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %nxv4f64_nxv4f16 = fptrunc undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv4f64_nxv4f32 = fptrunc undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %nxv8f32_nxv8f16 = fptrunc undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv8f64_nxv8f16 = fptrunc undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv8f64_nxv8f32 = fptrunc undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %nxv16f32_nxv16f16 = fptrunc undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv16f64_nxv16f16 = fptrunc undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv16f64_nxv16f32 = fptrunc undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv32f32_nxv32f16 = fptrunc undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %nxv32f64_nxv32f16 = fptrunc undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %nxv32f64_nxv32f32 = fptrunc undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %nxv64f32_nxv64f16 = fptrunc undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %nxv64f64_nxv64f16 = fptrunc undef to +; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %nxv64f64_nxv64f32 = fptrunc undef to ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %v2f32_v2f16 = fptrunc <2 x float> undef to <2 x half> diff --git a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll index e42dc889f1ba02..a9a5f4d2de5419 100644 --- a/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll +++ b/llvm/test/Analysis/CostModel/RISCV/reduce-scalable-fp.ll @@ -238,7 +238,7 @@ define float @vreduce_ord_fadd_nxv4f32( %v, float %s) { define float @vreduce_fwadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_fwadd_nxv4f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = fpext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc float @llvm.vector.reduce.fadd.nxv4f32(float %s, %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; @@ -254,7 +254,7 @@ define float @vreduce_fwadd_nxv4f32( %v, float %s) { define float @vreduce_ord_fwadd_nxv4f32( %v, float %s) { ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv4f32' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = fpext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red = call float @llvm.vector.reduce.fadd.nxv4f32(float %s, %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret float %red ; @@ -358,7 +358,7 @@ define double @vreduce_ord_fadd_nxv2f64( %v, double %s) { define double @vreduce_fwadd_nxv2f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_fwadd_nxv2f64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = fpext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv2f64(double %s, %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; @@ -374,7 +374,7 @@ define double @vreduce_fwadd_nxv2f64( %v, double %s) { define double @vreduce_ord_fwadd_nxv2f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv2f64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %e = fpext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv2f64(double %s, %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; @@ -418,7 +418,7 @@ define double @vreduce_ord_fadd_nxv4f64( %v, double %s) { define double @vreduce_fwadd_nxv4f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_fwadd_nxv4f64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e = fpext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call reassoc double @llvm.vector.reduce.fadd.nxv4f64(double %s, %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; @@ -434,7 +434,7 @@ define double @vreduce_fwadd_nxv4f64( %v, double %s) { define double @vreduce_ord_fwadd_nxv4f64( %v, double %s) { ; CHECK-LABEL: 'vreduce_ord_fwadd_nxv4f64' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %e = fpext %v to +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %e = fpext %v to ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %red = call double @llvm.vector.reduce.fadd.nxv4f64(double %s, %e) ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret double %red ; diff --git a/llvm/test/Analysis/CostModel/RISCV/rvv-insertelement.ll b/llvm/test/Analysis/CostModel/RISCV/rvv-insertelement.ll index 6e1ae0216f7655..8b68480788f79e 100644 --- a/llvm/test/Analysis/CostModel/RISCV/rvv-insertelement.ll +++ b/llvm/test/Analysis/CostModel/RISCV/rvv-insertelement.ll @@ -12,12 +12,12 @@ define void @insertelement_int(i32 %x) { ; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0 ; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0 ; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 -; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 +; RV32V-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 ; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv2i1_0 = insertelement undef, i1 undef, i32 0 ; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv4i1_0 = insertelement undef, i1 undef, i32 0 ; RV32V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv8i1_0 = insertelement undef, i1 undef, i32 0 -; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_0 = insertelement undef, i1 undef, i32 0 -; RV32V-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i1_0 = insertelement undef, i1 undef, i32 0 +; RV32V-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv16i1_0 = insertelement undef, i1 undef, i32 0 +; RV32V-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv32i1_0 = insertelement undef, i1 undef, i32 0 ; RV32V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0 ; RV32V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0 ; RV32V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0 @@ -66,12 +66,12 @@ define void @insertelement_int(i32 %x) { ; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i1_1 = insertelement <4 x i1> undef, i1 undef, i32 1 ; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i1_1 = insertelement <8 x i1> undef, i1 undef, i32 1 ; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i1_1 = insertelement <16 x i1> undef, i1 undef, i32 1 -; RV32V-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1 +; RV32V-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1 ; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv2i1_1 = insertelement undef, i1 undef, i32 1 ; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv4i1_1 = insertelement undef, i1 undef, i32 1 ; RV32V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv8i1_1 = insertelement undef, i1 undef, i32 1 -; RV32V-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_1 = insertelement undef, i1 undef, i32 1 -; RV32V-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv32i1_1 = insertelement undef, i1 undef, i32 1 +; RV32V-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv16i1_1 = insertelement undef, i1 undef, i32 1 +; RV32V-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %nxv32i1_1 = insertelement undef, i1 undef, i32 1 ; RV32V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 undef, i32 1 ; RV32V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_1 = insertelement <4 x i8> undef, i8 undef, i32 1 ; RV32V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_1 = insertelement <8 x i8> undef, i8 undef, i32 1 @@ -120,12 +120,12 @@ define void @insertelement_int(i32 %x) { ; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i1_x = insertelement <4 x i1> undef, i1 undef, i32 %x ; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i1_x = insertelement <8 x i1> undef, i1 undef, i32 %x ; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v16i1_x = insertelement <16 x i1> undef, i1 undef, i32 %x -; RV32V-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x +; RV32V-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x ; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv2i1_x = insertelement undef, i1 undef, i32 %x ; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv4i1_x = insertelement undef, i1 undef, i32 %x ; RV32V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv8i1_x = insertelement undef, i1 undef, i32 %x -; RV32V-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv16i1_x = insertelement undef, i1 undef, i32 %x -; RV32V-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv32i1_x = insertelement undef, i1 undef, i32 %x +; RV32V-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv16i1_x = insertelement undef, i1 undef, i32 %x +; RV32V-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %nxv32i1_x = insertelement undef, i1 undef, i32 %x ; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_x = insertelement <2 x i8> undef, i8 undef, i32 %x ; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_x = insertelement <4 x i8> undef, i8 undef, i32 %x ; RV32V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_x = insertelement <8 x i8> undef, i8 undef, i32 %x @@ -177,12 +177,12 @@ define void @insertelement_int(i32 %x) { ; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0 ; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0 ; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 -; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 +; RV64V-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 ; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv2i1_0 = insertelement undef, i1 undef, i32 0 ; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv4i1_0 = insertelement undef, i1 undef, i32 0 ; RV64V-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv8i1_0 = insertelement undef, i1 undef, i32 0 -; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_0 = insertelement undef, i1 undef, i32 0 -; RV64V-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i1_0 = insertelement undef, i1 undef, i32 0 +; RV64V-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv16i1_0 = insertelement undef, i1 undef, i32 0 +; RV64V-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv32i1_0 = insertelement undef, i1 undef, i32 0 ; RV64V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0 ; RV64V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0 ; RV64V-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0 @@ -231,12 +231,12 @@ define void @insertelement_int(i32 %x) { ; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i1_1 = insertelement <4 x i1> undef, i1 undef, i32 1 ; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i1_1 = insertelement <8 x i1> undef, i1 undef, i32 1 ; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16i1_1 = insertelement <16 x i1> undef, i1 undef, i32 1 -; RV64V-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1 +; RV64V-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1 ; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv2i1_1 = insertelement undef, i1 undef, i32 1 ; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv4i1_1 = insertelement undef, i1 undef, i32 1 ; RV64V-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv8i1_1 = insertelement undef, i1 undef, i32 1 -; RV64V-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_1 = insertelement undef, i1 undef, i32 1 -; RV64V-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv32i1_1 = insertelement undef, i1 undef, i32 1 +; RV64V-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv16i1_1 = insertelement undef, i1 undef, i32 1 +; RV64V-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %nxv32i1_1 = insertelement undef, i1 undef, i32 1 ; RV64V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 undef, i32 1 ; RV64V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_1 = insertelement <4 x i8> undef, i8 undef, i32 1 ; RV64V-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_1 = insertelement <8 x i8> undef, i8 undef, i32 1 @@ -285,12 +285,12 @@ define void @insertelement_int(i32 %x) { ; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i1_x = insertelement <4 x i1> undef, i1 undef, i32 %x ; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i1_x = insertelement <8 x i1> undef, i1 undef, i32 %x ; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v16i1_x = insertelement <16 x i1> undef, i1 undef, i32 %x -; RV64V-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x +; RV64V-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x ; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv2i1_x = insertelement undef, i1 undef, i32 %x ; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv4i1_x = insertelement undef, i1 undef, i32 %x ; RV64V-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv8i1_x = insertelement undef, i1 undef, i32 %x -; RV64V-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv16i1_x = insertelement undef, i1 undef, i32 %x -; RV64V-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv32i1_x = insertelement undef, i1 undef, i32 %x +; RV64V-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv16i1_x = insertelement undef, i1 undef, i32 %x +; RV64V-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %nxv32i1_x = insertelement undef, i1 undef, i32 %x ; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_x = insertelement <2 x i8> undef, i8 undef, i32 %x ; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_x = insertelement <4 x i8> undef, i8 undef, i32 %x ; RV64V-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_x = insertelement <8 x i8> undef, i8 undef, i32 %x @@ -341,13 +341,13 @@ define void @insertelement_int(i32 %x) { ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0 -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv2i1_0 = insertelement undef, i1 undef, i32 0 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv4i1_0 = insertelement undef, i1 undef, i32 0 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv8i1_0 = insertelement undef, i1 undef, i32 0 -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_0 = insertelement undef, i1 undef, i32 0 -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i1_0 = insertelement undef, i1 undef, i32 0 +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv16i1_0 = insertelement undef, i1 undef, i32 0 +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv32i1_0 = insertelement undef, i1 undef, i32 0 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0 @@ -395,13 +395,13 @@ define void @insertelement_int(i32 %x) { ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i1_1 = insertelement <4 x i1> undef, i1 undef, i32 1 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i1_1 = insertelement <8 x i1> undef, i1 undef, i32 1 -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i1_1 = insertelement <16 x i1> undef, i1 undef, i32 1 -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1 +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v16i1_1 = insertelement <16 x i1> undef, i1 undef, i32 1 +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv2i1_1 = insertelement undef, i1 undef, i32 1 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv4i1_1 = insertelement undef, i1 undef, i32 1 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv8i1_1 = insertelement undef, i1 undef, i32 1 -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_1 = insertelement undef, i1 undef, i32 1 -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv32i1_1 = insertelement undef, i1 undef, i32 1 +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv16i1_1 = insertelement undef, i1 undef, i32 1 +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %nxv32i1_1 = insertelement undef, i1 undef, i32 1 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 undef, i32 1 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_1 = insertelement <4 x i8> undef, i8 undef, i32 1 ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_1 = insertelement <8 x i8> undef, i8 undef, i32 1 @@ -449,13 +449,13 @@ define void @insertelement_int(i32 %x) { ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v2i1_x = insertelement <2 x i1> undef, i1 undef, i32 %x ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i1_x = insertelement <4 x i1> undef, i1 undef, i32 %x ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i1_x = insertelement <8 x i1> undef, i1 undef, i32 %x -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i1_x = insertelement <16 x i1> undef, i1 undef, i32 %x -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16i1_x = insertelement <16 x i1> undef, i1 undef, i32 %x +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv2i1_x = insertelement undef, i1 undef, i32 %x ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv4i1_x = insertelement undef, i1 undef, i32 %x ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv8i1_x = insertelement undef, i1 undef, i32 %x -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv16i1_x = insertelement undef, i1 undef, i32 %x -; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv32i1_x = insertelement undef, i1 undef, i32 %x +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv16i1_x = insertelement undef, i1 undef, i32 %x +; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %nxv32i1_x = insertelement undef, i1 undef, i32 %x ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_x = insertelement <2 x i8> undef, i8 undef, i32 %x ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_x = insertelement <4 x i8> undef, i8 undef, i32 %x ; RV32ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_x = insertelement <8 x i8> undef, i8 undef, i32 %x @@ -506,13 +506,13 @@ define void @insertelement_int(i32 %x) { ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2i1_0 = insertelement <2 x i1> undef, i1 undef, i32 0 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4i1_0 = insertelement <4 x i1> undef, i1 undef, i32 0 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8i1_0 = insertelement <8 x i1> undef, i1 undef, i32 0 -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i1_0 = insertelement <16 x i1> undef, i1 undef, i32 0 +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v32i1_0 = insertelement <32 x i1> undef, i1 undef, i32 0 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv2i1_0 = insertelement undef, i1 undef, i32 0 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv4i1_0 = insertelement undef, i1 undef, i32 0 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %nxv8i1_0 = insertelement undef, i1 undef, i32 0 -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv16i1_0 = insertelement undef, i1 undef, i32 0 -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv32i1_0 = insertelement undef, i1 undef, i32 0 +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv16i1_0 = insertelement undef, i1 undef, i32 0 +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %nxv32i1_0 = insertelement undef, i1 undef, i32 0 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2i8_0 = insertelement <2 x i8> undef, i8 undef, i32 0 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4i8_0 = insertelement <4 x i8> undef, i8 undef, i32 0 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8i8_0 = insertelement <8 x i8> undef, i8 undef, i32 0 @@ -560,13 +560,13 @@ define void @insertelement_int(i32 %x) { ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2i1_1 = insertelement <2 x i1> undef, i1 undef, i32 1 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4i1_1 = insertelement <4 x i1> undef, i1 undef, i32 1 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8i1_1 = insertelement <8 x i1> undef, i1 undef, i32 1 -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16i1_1 = insertelement <16 x i1> undef, i1 undef, i32 1 -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1 +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v16i1_1 = insertelement <16 x i1> undef, i1 undef, i32 1 +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v32i1_1 = insertelement <32 x i1> undef, i1 undef, i32 1 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv2i1_1 = insertelement undef, i1 undef, i32 1 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv4i1_1 = insertelement undef, i1 undef, i32 1 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %nxv8i1_1 = insertelement undef, i1 undef, i32 1 -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %nxv16i1_1 = insertelement undef, i1 undef, i32 1 -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %nxv32i1_1 = insertelement undef, i1 undef, i32 1 +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %nxv16i1_1 = insertelement undef, i1 undef, i32 1 +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %nxv32i1_1 = insertelement undef, i1 undef, i32 1 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2i8_1 = insertelement <2 x i8> undef, i8 undef, i32 1 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4i8_1 = insertelement <4 x i8> undef, i8 undef, i32 1 ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8i8_1 = insertelement <8 x i8> undef, i8 undef, i32 1 @@ -614,13 +614,13 @@ define void @insertelement_int(i32 %x) { ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v2i1_x = insertelement <2 x i1> undef, i1 undef, i32 %x ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4i1_x = insertelement <4 x i1> undef, i1 undef, i32 %x ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8i1_x = insertelement <8 x i1> undef, i1 undef, i32 %x -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16i1_x = insertelement <16 x i1> undef, i1 undef, i32 %x -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v16i1_x = insertelement <16 x i1> undef, i1 undef, i32 %x +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v32i1_x = insertelement <32 x i1> undef, i1 undef, i32 %x ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv2i1_x = insertelement undef, i1 undef, i32 %x ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv4i1_x = insertelement undef, i1 undef, i32 %x ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %nxv8i1_x = insertelement undef, i1 undef, i32 %x -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %nxv16i1_x = insertelement undef, i1 undef, i32 %x -; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %nxv32i1_x = insertelement undef, i1 undef, i32 %x +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %nxv16i1_x = insertelement undef, i1 undef, i32 %x +; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %nxv32i1_x = insertelement undef, i1 undef, i32 %x ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2i8_x = insertelement <2 x i8> undef, i8 undef, i32 %x ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4i8_x = insertelement <4 x i8> undef, i8 undef, i32 %x ; RV64ZVE64X-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8i8_x = insertelement <8 x i8> undef, i8 undef, i32 %x diff --git a/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll b/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll index b763198e98bacd..79ba1562d0f884 100644 --- a/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll +++ b/llvm/test/Analysis/CostModel/RISCV/shuffle-broadcast.ll @@ -197,7 +197,7 @@ define void @broadcast_fixed() #0{ ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %41 = shufflevector <32 x i1> undef, <32 x i1> undef, <32 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %42 = shufflevector <64 x i1> undef, <64 x i1> undef, <64 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %43 = shufflevector <128 x i1> undef, <128 x i1> undef, <128 x i32> zeroinitializer -; CHECK-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %ins1 = insertelement <128 x i1> poison, i1 poison, i32 0 +; CHECK-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %ins1 = insertelement <128 x i1> poison, i1 poison, i32 0 ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %44 = shufflevector <128 x i1> %ins1, <128 x i1> poison, <128 x i32> zeroinitializer ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %ins2 = insertelement <2 x i8> poison, i8 3, i32 0 ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %45 = shufflevector <2 x i8> %ins2, <2 x i8> undef, <2 x i32> zeroinitializer From 38113a083283d2f30a677befaa5fb86dce731c8b Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Tue, 2 Apr 2024 10:53:57 +0900 Subject: [PATCH 060/201] [mlir][IR] Trigger `notifyOperationReplaced` on `replaceAllOpUsesWith` (#84721) Before this change: `notifyOperationReplaced` was triggered when calling `RewriteBase::replaceOp`. After this change: `notifyOperationReplaced` is triggered when `RewriterBase::replaceAllOpUsesWith` or `RewriterBase::replaceOp` is called. Until now, every `notifyOperationReplaced` was always sent together with a `notifyOperationErased`, which made that `notifyOperationErased` callback irrelevant. More importantly, when a user called `RewriterBase::replaceAllOpUsesWith`+`RewriterBase::eraseOp` instead of `RewriterBase::replaceOp`, no `notifyOperationReplaced` callback was sent, even though the two notations are semantically equivalent. As an example, this can be a problem when applying patterns with the transform dialect because the `TrackingListener` will only see the `notifyOperationErased` callback and the payload op is dropped from the mappings. Note: It is still possible to write semantically equivalent code that does not trigger a `notifyOperationReplaced` (e.g., when op results are replaced one-by-one), but this commit already improves the situation a lot. --- mlir/include/mlir/IR/PatternMatch.h | 29 ++++++++++++--------- mlir/lib/IR/PatternMatch.cpp | 24 +++++++++++------ mlir/test/lib/Dialect/Test/TestPatterns.cpp | 5 +++- 3 files changed, 37 insertions(+), 21 deletions(-) diff --git a/mlir/include/mlir/IR/PatternMatch.h b/mlir/include/mlir/IR/PatternMatch.h index 070e6ed702f86a..ac2b0d5a38375a 100644 --- a/mlir/include/mlir/IR/PatternMatch.h +++ b/mlir/include/mlir/IR/PatternMatch.h @@ -409,9 +409,9 @@ class RewriterBase : public OpBuilder { /// Notify the listener that the specified operation was modified in-place. virtual void notifyOperationModified(Operation *op) {} - /// Notify the listener that the specified operation is about to be replaced - /// with another operation. This is called before the uses of the old - /// operation have been changed. + /// Notify the listener that all uses of the specified operation's results + /// are about to be replaced with the results of another operation. This is + /// called before the uses of the old operation have been changed. /// /// By default, this function calls the "operation replaced with values" /// notification. @@ -420,9 +420,10 @@ class RewriterBase : public OpBuilder { notifyOperationReplaced(op, replacement->getResults()); } - /// Notify the listener that the specified operation is about to be replaced - /// with the a range of values, potentially produced by other operations. - /// This is called before the uses of the operation have been changed. + /// Notify the listener that all uses of the specified operation's results + /// are about to be replaced with the a range of values, potentially + /// produced by other operations. This is called before the uses of the + /// operation have been changed. virtual void notifyOperationReplaced(Operation *op, ValueRange replacement) {} @@ -648,12 +649,16 @@ class RewriterBase : public OpBuilder { for (auto it : llvm::zip(from, to)) replaceAllUsesWith(std::get<0>(it), std::get<1>(it)); } - // Note: This function cannot be called `replaceAllUsesWith` because the - // overload resolution, when called with an op that can be implicitly - // converted to a Value, would be ambiguous. - void replaceAllOpUsesWith(Operation *from, ValueRange to) { - replaceAllUsesWith(from->getResults(), to); - } + + /// Find uses of `from` and replace them with `to`. Also notify the listener + /// about every in-place op modification (for every use that was replaced) + /// and that the `from` operation is about to be replaced. + /// + /// Note: This function cannot be called `replaceAllUsesWith` because the + /// overload resolution, when called with an op that can be implicitly + /// converted to a Value, would be ambiguous. + void replaceAllOpUsesWith(Operation *from, ValueRange to); + void replaceAllOpUsesWith(Operation *from, Operation *to); /// Find uses of `from` and replace them with `to` if the `functor` returns /// true. Also notify the listener about every in-place op modification (for diff --git a/mlir/lib/IR/PatternMatch.cpp b/mlir/lib/IR/PatternMatch.cpp index 4079ccc7567256..5944a0ea46a143 100644 --- a/mlir/lib/IR/PatternMatch.cpp +++ b/mlir/lib/IR/PatternMatch.cpp @@ -110,6 +110,22 @@ RewriterBase::~RewriterBase() { // Out of line to provide a vtable anchor for the class. } +void RewriterBase::replaceAllOpUsesWith(Operation *from, ValueRange to) { + // Notify the listener that we're about to replace this op. + if (auto *rewriteListener = dyn_cast_if_present(listener)) + rewriteListener->notifyOperationReplaced(from, to); + + replaceAllUsesWith(from->getResults(), to); +} + +void RewriterBase::replaceAllOpUsesWith(Operation *from, Operation *to) { + // Notify the listener that we're about to replace this op. + if (auto *rewriteListener = dyn_cast_if_present(listener)) + rewriteListener->notifyOperationReplaced(from, to); + + replaceAllUsesWith(from->getResults(), to->getResults()); +} + /// This method replaces the results of the operation with the specified list of /// values. The number of provided values must match the number of results of /// the operation. The replaced op is erased. @@ -117,10 +133,6 @@ void RewriterBase::replaceOp(Operation *op, ValueRange newValues) { assert(op->getNumResults() == newValues.size() && "incorrect # of replacement values"); - // Notify the listener that we're about to replace this op. - if (auto *rewriteListener = dyn_cast_if_present(listener)) - rewriteListener->notifyOperationReplaced(op, newValues); - // Replace all result uses. Also notifies the listener of modifications. replaceAllOpUsesWith(op, newValues); @@ -136,10 +148,6 @@ void RewriterBase::replaceOp(Operation *op, Operation *newOp) { assert(op->getNumResults() == newOp->getNumResults() && "ops have different number of results"); - // Notify the listener that we're about to replace this op. - if (auto *rewriteListener = dyn_cast_if_present(listener)) - rewriteListener->notifyOperationReplaced(op, newOp); - // Replace all result uses. Also notifies the listener of modifications. replaceAllOpUsesWith(op, newOp->getResults()); diff --git a/mlir/test/lib/Dialect/Test/TestPatterns.cpp b/mlir/test/lib/Dialect/Test/TestPatterns.cpp index 2da184bc3d85ba..76dc825fe44515 100644 --- a/mlir/test/lib/Dialect/Test/TestPatterns.cpp +++ b/mlir/test/lib/Dialect/Test/TestPatterns.cpp @@ -489,7 +489,10 @@ struct TestStrictPatternDriver OperationName("test.new_op", op->getContext()).getIdentifier(), op->getOperands(), op->getResultTypes()); } - rewriter.replaceOp(op, newOp->getResults()); + // "replaceOp" could be used instead of "replaceAllOpUsesWith"+"eraseOp". + // A "notifyOperationReplaced" callback is triggered in either case. + rewriter.replaceAllOpUsesWith(op, newOp->getResults()); + rewriter.eraseOp(op); return success(); } }; From 21f85e230056172cffcaec76352e5a2019b54b86 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Tue, 2 Apr 2024 09:52:52 +0800 Subject: [PATCH 061/201] [NFC] [C++20] [Modules] Pulling out getCXX20NamedModuleOutputPath into a seperate function Required in the review process of https://github.com/llvm/llvm-project/pull/85050. --- clang/lib/Driver/Driver.cpp | 14 ++------------ clang/lib/Driver/ToolChains/Clang.cpp | 18 ++++++++++++++++++ clang/lib/Driver/ToolChains/Clang.h | 15 +++++++++++++++ 3 files changed, 35 insertions(+), 12 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 7a53764364ce4d..1a0f5f27eda2fc 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -5814,19 +5814,9 @@ static const char *GetModuleOutputPath(Compilation &C, const JobAction &JA, (C.getArgs().hasArg(options::OPT_fmodule_output) || C.getArgs().hasArg(options::OPT_fmodule_output_EQ))); - if (Arg *ModuleOutputEQ = - C.getArgs().getLastArg(options::OPT_fmodule_output_EQ)) - return C.addResultFile(ModuleOutputEQ->getValue(), &JA); + SmallString<256> OutputPath = + tools::getCXX20NamedModuleOutputPath(C.getArgs(), BaseInput); - SmallString<64> OutputPath; - Arg *FinalOutput = C.getArgs().getLastArg(options::OPT_o); - if (FinalOutput && C.getArgs().hasArg(options::OPT_c)) - OutputPath = FinalOutput->getValue(); - else - OutputPath = BaseInput; - - const char *Extension = types::getTypeTempSuffix(JA.getType()); - llvm::sys::path::replace_extension(OutputPath, Extension); return C.addResultFile(C.getArgs().MakeArgString(OutputPath.c_str()), &JA); } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 3bcacff7724c7d..b03ac6018d2b80 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3839,6 +3839,24 @@ bool Driver::getDefaultModuleCachePath(SmallVectorImpl &Result) { return false; } +llvm::SmallString<256> +clang::driver::tools::getCXX20NamedModuleOutputPath(const ArgList &Args, + const char *BaseInput) { + if (Arg *ModuleOutputEQ = Args.getLastArg(options::OPT_fmodule_output_EQ)) + return StringRef(ModuleOutputEQ->getValue()); + + SmallString<256> OutputPath; + if (Arg *FinalOutput = Args.getLastArg(options::OPT_o); + FinalOutput && Args.hasArg(options::OPT_c)) + OutputPath = FinalOutput->getValue(); + else + OutputPath = BaseInput; + + const char *Extension = types::getTypeTempSuffix(types::TY_ModuleFile); + llvm::sys::path::replace_extension(OutputPath, Extension); + return OutputPath; +} + static bool RenderModulesOptions(Compilation &C, const Driver &D, const ArgList &Args, const InputInfo &Input, const InputInfo &Output, bool HaveStd20, diff --git a/clang/lib/Driver/ToolChains/Clang.h b/clang/lib/Driver/ToolChains/Clang.h index 0f503c4bd1c4fe..18f6c5ed06a59a 100644 --- a/clang/lib/Driver/ToolChains/Clang.h +++ b/clang/lib/Driver/ToolChains/Clang.h @@ -193,6 +193,21 @@ DwarfFissionKind getDebugFissionKind(const Driver &D, const llvm::opt::ArgList &Args, llvm::opt::Arg *&Arg); +// Calculate the output path of the module file when compiling a module unit +// with the `-fmodule-output` option or `-fmodule-output=` option specified. +// The behavior is: +// - If `-fmodule-output=` is specfied, then the module file is +// writing to the value. +// - Otherwise if the output object file of the module unit is specified, the +// output path +// of the module file should be the same with the output object file except +// the corresponding suffix. This requires both `-o` and `-c` are specified. +// - Otherwise, the output path of the module file will be the same with the +// input with the corresponding suffix. +llvm::SmallString<256> +getCXX20NamedModuleOutputPath(const llvm::opt::ArgList &Args, + const char *BaseInput); + } // end namespace tools } // end namespace driver From 9067f5470573454ad33f2d1786cdfa77f7f9329c Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Tue, 2 Apr 2024 11:03:12 +0900 Subject: [PATCH 062/201] [mlir][IR][NFC] Make `replaceAllUsesWith` non-templatized (#84722) Turn `RewriterBase::replaceAllUsesWith` into a non-templatized implementation, so that it can be made virtual and be overridden in the `ConversionPatternRewriter` in a subsequent change. This change is in preparation of adding dialect conversion support for `replaceAllUsesWith`. --- mlir/include/mlir/IR/PatternMatch.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/mlir/include/mlir/IR/PatternMatch.h b/mlir/include/mlir/IR/PatternMatch.h index ac2b0d5a38375a..15b1c38929485e 100644 --- a/mlir/include/mlir/IR/PatternMatch.h +++ b/mlir/include/mlir/IR/PatternMatch.h @@ -635,11 +635,13 @@ class RewriterBase : public OpBuilder { /// Find uses of `from` and replace them with `to`. Also notify the listener /// about every in-place op modification (for every use that was replaced). void replaceAllUsesWith(Value from, Value to) { - return replaceAllUsesWith(from.getImpl(), to); + for (OpOperand &operand : llvm::make_early_inc_range(from.getUses())) { + Operation *op = operand.getOwner(); + modifyOpInPlace(op, [&]() { operand.set(to); }); + } } - template - void replaceAllUsesWith(IRObjectWithUseList *from, ValueT &&to) { - for (OperandType &operand : llvm::make_early_inc_range(from->getUses())) { + void replaceAllUsesWith(Block *from, Block *to) { + for (BlockOperand &operand : llvm::make_early_inc_range(from->getUses())) { Operation *op = operand.getOwner(); modifyOpInPlace(op, [&]() { operand.set(to); }); } From 49a4ec20a8be5888cbf225bab340dbaf204902c7 Mon Sep 17 00:00:00 2001 From: Rob Suderman Date: Mon, 1 Apr 2024 19:22:49 -0700 Subject: [PATCH 063/201] [mlir] Reland the dialect conversion hanging use fix (#87297) Dialect conversion sometimes can have a hanging use of an argument. Ensured that argument uses are dropped before removing the block. --- mlir/lib/Transforms/Utils/DialectConversion.cpp | 2 ++ .../TosaToLinalg/tosa-to-linalg-invalid.mlir | 13 +++++++++++++ 2 files changed, 15 insertions(+) diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index 8671c1008902a0..270ac0a0868960 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -279,6 +279,8 @@ class CreateBlockRewrite : public BlockRewrite { auto &blockOps = block->getOperations(); while (!blockOps.empty()) blockOps.remove(blockOps.begin()); + for (auto arg : block->getArguments()) + arg.dropAllUses(); block->dropAllUses(); if (block->getParent()) block->erase(); diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-invalid.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-invalid.mlir index 17eec593691860..6494e1b2719487 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-invalid.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-invalid.mlir @@ -15,3 +15,16 @@ func.func @tensor_with_unknown_rank(%arg0: tensor<*xi8>) -> tensor<*xi8> { %0 = "tosa.abs"(%arg0) : (tensor<*xi8>) -> tensor<*xi8> return %0 : tensor<*xi8> } + +// ----- + +// CHECK-LABEL: @unranked_add +func.func @unranked_add(%arg0 : tensor<10x10xf32> , %arg1 : tensor<10x10xf32>, %arg2 : tensor<*xf32>) -> (tensor<10x10xf32>) { + // expected-error@+3 {{failed to legalize operation 'tosa.add'}} + %reduce = tosa.reduce_max %arg0 {axis = 1 : i32} : (tensor<10x10xf32>) -> tensor<10x1xf32> + %1 = tosa.add %reduce, %arg1 : (tensor<10x1xf32>, tensor<10x10xf32>) -> tensor<10x10xf32> + %0 = tosa.add %1, %arg2 : (tensor<10x10xf32>, tensor<*xf32>) -> tensor<*xf32> + %2 = tosa.reshape %0 {new_shape = array} : (tensor<*xf32>) -> tensor<10x10xf32> + return %2 : tensor<10x10xf32> +} + From b932db08bb8e56c80380468698a6f75d5ea35577 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 1 Apr 2024 20:19:59 -0700 Subject: [PATCH 064/201] [llvm-objcopy,test] Prepend error: to some messages --- llvm/test/tools/llvm-objcopy/ELF/discard-locals-rel.test | 4 ++-- llvm/test/tools/llvm-objcopy/ELF/strip-reloc-symbol.test | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/tools/llvm-objcopy/ELF/discard-locals-rel.test b/llvm/test/tools/llvm-objcopy/ELF/discard-locals-rel.test index 3658eb376010a6..00bb8fcf18205b 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/discard-locals-rel.test +++ b/llvm/test/tools/llvm-objcopy/ELF/discard-locals-rel.test @@ -1,5 +1,5 @@ # RUN: yaml2obj %s -o %t -# RUN: not llvm-objcopy --discard-locals %t %t2 2>&1 | FileCheck %s +# RUN: not llvm-objcopy --discard-locals %t %t2 2>&1 | FileCheck %s -DFILE=%t !ELF FileHeader: @@ -23,4 +23,4 @@ Symbols: Type: STT_FUNC Section: .text -# CHECK: not stripping symbol '.L.rel' because it is named in a relocation +# CHECK: error: '[[FILE]]': not stripping symbol '.L.rel' because it is named in a relocation diff --git a/llvm/test/tools/llvm-objcopy/ELF/strip-reloc-symbol.test b/llvm/test/tools/llvm-objcopy/ELF/strip-reloc-symbol.test index 63c9e122d9a216..941dacce2edf29 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/strip-reloc-symbol.test +++ b/llvm/test/tools/llvm-objcopy/ELF/strip-reloc-symbol.test @@ -1,5 +1,5 @@ # RUN: yaml2obj %s -o %t -# RUN: not llvm-objcopy -N foo %t %t2 2>&1 | FileCheck %s +# RUN: not llvm-objcopy -N foo %t %t2 2>&1 | FileCheck %s -DFILE=%t !ELF FileHeader: @@ -28,4 +28,4 @@ Symbols: Value: 0x1000 Size: 8 -# CHECK: not stripping symbol 'foo' because it is named in a relocation +# CHECK: error: '[[FILE]]': not stripping symbol 'foo' because it is named in a relocation From 59dd10faf8c3bb9dbcecb60d932284b8762cebf8 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Tue, 2 Apr 2024 13:02:03 +0800 Subject: [PATCH 065/201] [RISCV] Add tests for fixed vector vwsll. NFC We are missing patterns for fixed vectors, where the sexts and zexts are legalized to _vl nodes. --- .../CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll | 920 ++++++++++++++++++ 1 file changed, 920 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll new file mode 100644 index 00000000000000..f5305a1c36de16 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll @@ -0,0 +1,920 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB +; RUN: llc -mtriple=riscv64 -mattr=+v,+zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB + +; ============================================================================== +; i32 -> i64 +; ============================================================================== + +define <4 x i64> @vwsll_vv_v4i64_sext(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vwsll_vv_v4i64_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf2 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vv_v4i64_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %x = zext <4 x i32> %a to <4 x i64> + %y = sext <4 x i32> %b to <4 x i64> + %z = shl <4 x i64> %x, %y + ret <4 x i64> %z +} + +define <4 x i64> @vwsll_vv_v4i64_zext(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: vwsll_vv_v4i64_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vv_v4i64_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %x = zext <4 x i32> %a to <4 x i64> + %y = zext <4 x i32> %b to <4 x i64> + %z = shl <4 x i64> %x, %y + ret <4 x i64> %z +} + +define <4 x i64> @vwsll_vx_i64_v4i64(<4 x i32> %a, i64 %b) { +; CHECK-LABEL: vwsll_vx_i64_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsll.vx v8, v10, a0 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i64_v4i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-NEXT: vsll.vx v8, v10, a0 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <4 x i64> poison, i64 %b, i32 0 + %splat = shufflevector <4 x i64> %head, <4 x i64> poison, <4 x i32> zeroinitializer + %x = zext <4 x i32> %a to <4 x i64> + %z = shl <4 x i64> %x, %splat + ret <4 x i64> %z +} + +define <4 x i64> @vwsll_vx_i32_v4i64_sext(<4 x i32> %a, i32 %b) { +; CHECK-LABEL: vwsll_vx_i32_v4i64_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf2 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i32_v4i64_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <4 x i32> poison, i32 %b, i32 0 + %splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer + %x = zext <4 x i32> %a to <4 x i64> + %y = sext <4 x i32> %splat to <4 x i64> + %z = shl <4 x i64> %x, %y + ret <4 x i64> %z +} + +define <4 x i64> @vwsll_vx_i32_v4i64_zext(<4 x i32> %a, i32 %b) { +; CHECK-LABEL: vwsll_vx_i32_v4i64_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i32_v4i64_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <4 x i32> poison, i32 %b, i32 0 + %splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer + %x = zext <4 x i32> %a to <4 x i64> + %y = zext <4 x i32> %splat to <4 x i64> + %z = shl <4 x i64> %x, %y + ret <4 x i64> %z +} + +define <4 x i64> @vwsll_vx_i16_v4i64_sext(<4 x i32> %a, i16 %b) { +; CHECK-LABEL: vwsll_vx_i16_v4i64_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf4 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i16_v4i64_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-NEXT: vsext.vf4 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <4 x i16> poison, i16 %b, i32 0 + %splat = shufflevector <4 x i16> %head, <4 x i16> poison, <4 x i32> zeroinitializer + %x = zext <4 x i32> %a to <4 x i64> + %y = sext <4 x i16> %splat to <4 x i64> + %z = shl <4 x i64> %x, %y + ret <4 x i64> %z +} + +define <4 x i64> @vwsll_vx_i16_v4i64_zext(<4 x i32> %a, i16 %b) { +; CHECK-LABEL: vwsll_vx_i16_v4i64_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf4 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i16_v4i64_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-NEXT: vzext.vf4 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <4 x i16> poison, i16 %b, i32 0 + %splat = shufflevector <4 x i16> %head, <4 x i16> poison, <4 x i32> zeroinitializer + %x = zext <4 x i32> %a to <4 x i64> + %y = zext <4 x i16> %splat to <4 x i64> + %z = shl <4 x i64> %x, %y + ret <4 x i64> %z +} + +define <4 x i64> @vwsll_vx_i8_v4i64_sext(<4 x i32> %a, i8 %b) { +; CHECK-LABEL: vwsll_vx_i8_v4i64_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf8 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i8_v4i64_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-NEXT: vsext.vf8 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <4 x i8> poison, i8 %b, i32 0 + %splat = shufflevector <4 x i8> %head, <4 x i8> poison, <4 x i32> zeroinitializer + %x = zext <4 x i32> %a to <4 x i64> + %y = sext <4 x i8> %splat to <4 x i64> + %z = shl <4 x i64> %x, %y + ret <4 x i64> %z +} + +define <4 x i64> @vwsll_vx_i8_v4i64_zext(<4 x i32> %a, i8 %b) { +; CHECK-LABEL: vwsll_vx_i8_v4i64_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf8 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i8_v4i64_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-NEXT: vzext.vf8 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <4 x i8> poison, i8 %b, i32 0 + %splat = shufflevector <4 x i8> %head, <4 x i8> poison, <4 x i32> zeroinitializer + %x = zext <4 x i32> %a to <4 x i64> + %y = zext <4 x i8> %splat to <4 x i64> + %z = shl <4 x i64> %x, %y + ret <4 x i64> %z +} + +define <4 x i64> @vwsll_vi_v4i64(<4 x i32> %a) { +; CHECK-LABEL: vwsll_vi_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsll.vi v8, v10, 2 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vi_v4i64: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-NEXT: vsll.vi v8, v10, 2 +; CHECK-ZVBB-NEXT: ret + %x = zext <4 x i32> %a to <4 x i64> + %z = shl <4 x i64> %x, splat (i64 2) + ret <4 x i64> %z +} + +; ============================================================================== +; i16 -> i32 +; ============================================================================== + +define <8 x i32> @vwsll_vv_v8i32_sext(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vwsll_vv_v8i32_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf2 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vv_v8i32_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %x = zext <8 x i16> %a to <8 x i32> + %y = sext <8 x i16> %b to <8 x i32> + %z = shl <8 x i32> %x, %y + ret <8 x i32> %z +} + +define <8 x i32> @vwsll_vv_v8i32_zext(<8 x i16> %a, <8 x i16> %b) { +; CHECK-LABEL: vwsll_vv_v8i32_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vv_v8i32_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %x = zext <8 x i16> %a to <8 x i32> + %y = zext <8 x i16> %b to <8 x i32> + %z = shl <8 x i32> %x, %y + ret <8 x i32> %z +} + +define <8 x i32> @vwsll_vx_i64_v8i32(<8 x i16> %a, i64 %b) { +; CHECK-LABEL: vwsll_vx_i64_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsll.vx v8, v10, a0 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i64_v8i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-NEXT: vsll.vx v8, v10, a0 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <8 x i64> poison, i64 %b, i32 0 + %splat = shufflevector <8 x i64> %head, <8 x i64> poison, <8 x i32> zeroinitializer + %x = zext <8 x i16> %a to <8 x i32> + %y = trunc <8 x i64> %splat to <8 x i32> + %z = shl <8 x i32> %x, %y + ret <8 x i32> %z +} + +define <8 x i32> @vwsll_vx_i32_v8i32(<8 x i16> %a, i32 %b) { +; CHECK-LABEL: vwsll_vx_i32_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsll.vx v8, v10, a0 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i32_v8i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-NEXT: vsll.vx v8, v10, a0 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <8 x i32> poison, i32 %b, i32 0 + %splat = shufflevector <8 x i32> %head, <8 x i32> poison, <8 x i32> zeroinitializer + %x = zext <8 x i16> %a to <8 x i32> + %z = shl <8 x i32> %x, %splat + ret <8 x i32> %z +} + +define <8 x i32> @vwsll_vx_i16_v8i32_sext(<8 x i16> %a, i16 %b) { +; CHECK-LABEL: vwsll_vx_i16_v8i32_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf2 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i16_v8i32_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <8 x i16> poison, i16 %b, i32 0 + %splat = shufflevector <8 x i16> %head, <8 x i16> poison, <8 x i32> zeroinitializer + %x = zext <8 x i16> %a to <8 x i32> + %y = sext <8 x i16> %splat to <8 x i32> + %z = shl <8 x i32> %x, %y + ret <8 x i32> %z +} + +define <8 x i32> @vwsll_vx_i16_v8i32_zext(<8 x i16> %a, i16 %b) { +; CHECK-LABEL: vwsll_vx_i16_v8i32_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i16_v8i32_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <8 x i16> poison, i16 %b, i32 0 + %splat = shufflevector <8 x i16> %head, <8 x i16> poison, <8 x i32> zeroinitializer + %x = zext <8 x i16> %a to <8 x i32> + %y = zext <8 x i16> %splat to <8 x i32> + %z = shl <8 x i32> %x, %y + ret <8 x i32> %z +} + +define <8 x i32> @vwsll_vx_i8_v8i32_sext(<8 x i16> %a, i8 %b) { +; CHECK-LABEL: vwsll_vx_i8_v8i32_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf4 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i8_v8i32_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-NEXT: vsext.vf4 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <8 x i8> poison, i8 %b, i32 0 + %splat = shufflevector <8 x i8> %head, <8 x i8> poison, <8 x i32> zeroinitializer + %x = zext <8 x i16> %a to <8 x i32> + %y = sext <8 x i8> %splat to <8 x i32> + %z = shl <8 x i32> %x, %y + ret <8 x i32> %z +} + +define <8 x i32> @vwsll_vx_i8_v8i32_zext(<8 x i16> %a, i8 %b) { +; CHECK-LABEL: vwsll_vx_i8_v8i32_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf4 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i8_v8i32_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-NEXT: vzext.vf4 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <8 x i8> poison, i8 %b, i32 0 + %splat = shufflevector <8 x i8> %head, <8 x i8> poison, <8 x i32> zeroinitializer + %x = zext <8 x i16> %a to <8 x i32> + %y = zext <8 x i8> %splat to <8 x i32> + %z = shl <8 x i32> %x, %y + ret <8 x i32> %z +} + +define <8 x i32> @vwsll_vi_v8i32(<8 x i16> %a) { +; CHECK-LABEL: vwsll_vi_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsll.vi v8, v10, 2 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vi_v8i32: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-NEXT: vsll.vi v8, v10, 2 +; CHECK-ZVBB-NEXT: ret + %x = zext <8 x i16> %a to <8 x i32> + %z = shl <8 x i32> %x, splat (i32 2) + ret <8 x i32> %z +} + +; ============================================================================== +; i8 -> i16 +; ============================================================================== + +define <16 x i16> @vwsll_vv_v16i16_sext(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vwsll_vv_v16i16_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf2 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vv_v16i16_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %x = zext <16 x i8> %a to <16 x i16> + %y = sext <16 x i8> %b to <16 x i16> + %z = shl <16 x i16> %x, %y + ret <16 x i16> %z +} + +define <16 x i16> @vwsll_vv_v16i16_zext(<16 x i8> %a, <16 x i8> %b) { +; CHECK-LABEL: vwsll_vv_v16i16_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vv_v16i16_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %x = zext <16 x i8> %a to <16 x i16> + %y = zext <16 x i8> %b to <16 x i16> + %z = shl <16 x i16> %x, %y + ret <16 x i16> %z +} + +define <16 x i16> @vwsll_vx_i64_v16i16(<16 x i8> %a, i64 %b) { + %head = insertelement <8 x i64> poison, i64 %b, i32 0 + %splat = shufflevector <8 x i64> %head, <8 x i64> poison, <16 x i32> zeroinitializer + %x = zext <16 x i8> %a to <16 x i16> + %y = trunc <16 x i64> %splat to <16 x i16> + %z = shl <16 x i16> %x, %y + ret <16 x i16> %z +} + +define <16 x i16> @vwsll_vx_i32_v16i16(<16 x i8> %a, i32 %b) { +; CHECK-LABEL: vwsll_vx_i32_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-NEXT: vsll.vv v8, v10, v8 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i32_v16i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v12, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v8 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <16 x i32> poison, i32 %b, i32 0 + %splat = shufflevector <16 x i32> %head, <16 x i32> poison, <16 x i32> zeroinitializer + %x = zext <16 x i8> %a to <16 x i16> + %y = trunc <16 x i32> %splat to <16 x i16> + %z = shl <16 x i16> %x, %y + ret <16 x i16> %z +} + +define <16 x i16> @vwsll_vx_i16_v16i16(<16 x i8> %a, i16 %b) { +; CHECK-LABEL: vwsll_vx_i16_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsll.vx v8, v10, a0 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i16_v16i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-NEXT: vsll.vx v8, v10, a0 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <16 x i16> poison, i16 %b, i32 0 + %splat = shufflevector <16 x i16> %head, <16 x i16> poison, <16 x i32> zeroinitializer + %x = zext <16 x i8> %a to <16 x i16> + %z = shl <16 x i16> %x, %splat + ret <16 x i16> %z +} + +define <16 x i16> @vwsll_vx_i8_v16i16_sext(<16 x i8> %a, i8 %b) { +; CHECK-LABEL: vwsll_vx_i8_v16i16_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsext.vf2 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i8_v16i16_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <16 x i8> poison, i8 %b, i32 0 + %splat = shufflevector <16 x i8> %head, <16 x i8> poison, <16 x i32> zeroinitializer + %x = zext <16 x i8> %a to <16 x i16> + %y = sext <16 x i8> %splat to <16 x i16> + %z = shl <16 x i16> %x, %y + ret <16 x i16> %z +} + +define <16 x i16> @vwsll_vx_i8_v16i16_zext(<16 x i8> %a, i8 %b) { +; CHECK-LABEL: vwsll_vx_i8_v16i16_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vzext.vf2 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i8_v16i16_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <16 x i8> poison, i8 %b, i32 0 + %splat = shufflevector <16 x i8> %head, <16 x i8> poison, <16 x i32> zeroinitializer + %x = zext <16 x i8> %a to <16 x i16> + %y = zext <16 x i8> %splat to <16 x i16> + %z = shl <16 x i16> %x, %y + ret <16 x i16> %z +} + +define <16 x i16> @vwsll_vi_v16i16(<16 x i8> %a) { +; CHECK-LABEL: vwsll_vi_v16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vzext.vf2 v10, v8 +; CHECK-NEXT: vsll.vi v8, v10, 2 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vi_v16i16: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 +; CHECK-ZVBB-NEXT: vsll.vi v8, v10, 2 +; CHECK-ZVBB-NEXT: ret + %x = zext <16 x i8> %a to <16 x i16> + %z = shl <16 x i16> %x, splat (i16 2) + ret <16 x i16> %z +} + +; ============================================================================== +; i8 -> i64 +; ============================================================================== + +define <4 x i64> @vwsll_vv_v4i64_v4i8_sext(<4 x i8> %a, <4 x i8> %b) { +; CHECK-LABEL: vwsll_vv_v4i64_v4i8_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vsext.vf8 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vv_v4i64_v4i8_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vsext.vf8 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %x = zext <4 x i8> %a to <4 x i64> + %y = sext <4 x i8> %b to <4 x i64> + %z = shl <4 x i64> %x, %y + ret <4 x i64> %z +} + +define <4 x i64> @vwsll_vv_v4i64_v4i8_zext(<4 x i8> %a, <4 x i8> %b) { +; CHECK-LABEL: vwsll_vv_v4i64_v4i8_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vzext.vf8 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vv_v4i64_v4i8_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vzext.vf8 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %x = zext <4 x i8> %a to <4 x i64> + %y = zext <4 x i8> %b to <4 x i64> + %z = shl <4 x i64> %x, %y + ret <4 x i64> %z +} + +define <4 x i64> @vwsll_vx_i64_v4i64_v4i8(<4 x i8> %a, i64 %b) { +; CHECK-LABEL: vwsll_vx_i64_v4i64_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vsll.vx v8, v10, a0 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i64_v4i64_v4i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vsll.vx v8, v10, a0 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <4 x i64> poison, i64 %b, i32 0 + %splat = shufflevector <4 x i64> %head, <4 x i64> poison, <4 x i32> zeroinitializer + %x = zext <4 x i8> %a to <4 x i64> + %z = shl <4 x i64> %x, %splat + ret <4 x i64> %z +} + +define <4 x i64> @vwsll_vx_i32_v4i64_v4i8_sext(<4 x i8> %a, i32 %b) { +; CHECK-LABEL: vwsll_vx_i32_v4i64_v4i8_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vsext.vf2 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i32_v4i64_v4i8_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <4 x i32> poison, i32 %b, i32 0 + %splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer + %x = zext <4 x i8> %a to <4 x i64> + %y = sext <4 x i32> %splat to <4 x i64> + %z = shl <4 x i64> %x, %y + ret <4 x i64> %z +} + +define <4 x i64> @vwsll_vx_i32_v4i64_v4i8_zext(<4 x i8> %a, i32 %b) { +; CHECK-LABEL: vwsll_vx_i32_v4i64_v4i8_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vzext.vf2 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i32_v4i64_v4i8_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <4 x i32> poison, i32 %b, i32 0 + %splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer + %x = zext <4 x i8> %a to <4 x i64> + %y = zext <4 x i32> %splat to <4 x i64> + %z = shl <4 x i64> %x, %y + ret <4 x i64> %z +} + +define <4 x i64> @vwsll_vx_i16_v4i64_v4i8_sext(<4 x i8> %a, i16 %b) { +; CHECK-LABEL: vwsll_vx_i16_v4i64_v4i8_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vsext.vf4 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i16_v4i64_v4i8_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vsext.vf4 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <4 x i16> poison, i16 %b, i32 0 + %splat = shufflevector <4 x i16> %head, <4 x i16> poison, <4 x i32> zeroinitializer + %x = zext <4 x i8> %a to <4 x i64> + %y = sext <4 x i16> %splat to <4 x i64> + %z = shl <4 x i64> %x, %y + ret <4 x i64> %z +} + +define <4 x i64> @vwsll_vx_i16_v4i64_v4i8_zext(<4 x i8> %a, i16 %b) { +; CHECK-LABEL: vwsll_vx_i16_v4i64_v4i8_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vzext.vf4 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i16_v4i64_v4i8_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vzext.vf4 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <4 x i16> poison, i16 %b, i32 0 + %splat = shufflevector <4 x i16> %head, <4 x i16> poison, <4 x i32> zeroinitializer + %x = zext <4 x i8> %a to <4 x i64> + %y = zext <4 x i16> %splat to <4 x i64> + %z = shl <4 x i64> %x, %y + ret <4 x i64> %z +} + +define <4 x i64> @vwsll_vx_i8_v4i64_v4i8_sext(<4 x i8> %a, i8 %b) { +; CHECK-LABEL: vwsll_vx_i8_v4i64_v4i8_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vsext.vf8 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i8_v4i64_v4i8_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vsext.vf8 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <4 x i8> poison, i8 %b, i32 0 + %splat = shufflevector <4 x i8> %head, <4 x i8> poison, <4 x i32> zeroinitializer + %x = zext <4 x i8> %a to <4 x i64> + %y = sext <4 x i8> %splat to <4 x i64> + %z = shl <4 x i64> %x, %y + ret <4 x i64> %z +} + +define <4 x i64> @vwsll_vx_i8_v4i64_v4i8_zext(<4 x i8> %a, i8 %b) { +; CHECK-LABEL: vwsll_vx_i8_v4i64_v4i8_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vzext.vf8 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i8_v4i64_v4i8_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vzext.vf8 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement <4 x i8> poison, i8 %b, i32 0 + %splat = shufflevector <4 x i8> %head, <4 x i8> poison, <4 x i32> zeroinitializer + %x = zext <4 x i8> %a to <4 x i64> + %y = zext <4 x i8> %splat to <4 x i64> + %z = shl <4 x i64> %x, %y + ret <4 x i64> %z +} + +define <4 x i64> @vwsll_vi_v4i64_v4i8(<4 x i8> %a) { +; CHECK-LABEL: vwsll_vi_v4i64_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vsll.vi v8, v10, 2 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vi_v4i64_v4i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vsll.vi v8, v10, 2 +; CHECK-ZVBB-NEXT: ret + %x = zext <4 x i8> %a to <4 x i64> + %z = shl <4 x i64> %x, splat (i64 2) + ret <4 x i64> %z +} From 24d528cf4685668d3ad17116846769bed843e933 Mon Sep 17 00:00:00 2001 From: Prabhuk Date: Mon, 1 Apr 2024 23:21:45 -0700 Subject: [PATCH 066/201] [MIPS][CallSiteInfo][NFC] Fill CallSiteInfo only when needed (#86847) Argument-register pairs in CallSiteInfo is only needed when EmitCallSiteInfo is on. Currently, the pairs are always pushed to the vector but only used when EmitCallSiteInfo is on. Don't fill the CallSiteInfo vector unless used. Differential Revision: https://reviews.llvm.org/D107108?id=362887 Co-authored-by: Necip Fazil Yildiran --- llvm/lib/Target/Mips/MipsISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/Mips/MipsISelLowering.cpp b/llvm/lib/Target/Mips/MipsISelLowering.cpp index 0a0d40751fcf05..1c9c99c6fa9459 100644 --- a/llvm/lib/Target/Mips/MipsISelLowering.cpp +++ b/llvm/lib/Target/Mips/MipsISelLowering.cpp @@ -3381,7 +3381,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Collect CSInfo about which register passes which parameter. const TargetOptions &Options = DAG.getTarget().Options; - if (Options.SupportsDebugEntryValues) + if (Options.EmitCallSiteInfo) CSInfo.emplace_back(VA.getLocReg(), i); continue; From 93c387df908923f17875ab9cf0463d5f181318bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20K=C3=A9ri?= Date: Tue, 2 Apr 2024 08:55:20 +0200 Subject: [PATCH 067/201] [clang][analyzer] Change modeling of `fseek` in StreamChecker. (#86919) Until now function `fseek` returned nonzero on error, this is changed to -1 only. And it does not produce EOF error any more. This complies better with the POSIX standard. --- .../StaticAnalyzer/Checkers/StreamChecker.cpp | 21 +++------ clang/test/Analysis/stream-error.c | 43 ++++++++----------- clang/test/Analysis/stream-note.c | 31 ++++++++++++- 3 files changed, 55 insertions(+), 40 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp index 902c42a2799be4..069e3a633c1214 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp @@ -1264,15 +1264,10 @@ void StreamChecker::evalFseek(const FnDescription *Desc, const CallEvent &Call, if (!E.Init(Desc, Call, C, State)) return; - const llvm::APSInt *PosV = - C.getSValBuilder().getKnownValue(State, Call.getArgSVal(1)); - const llvm::APSInt *WhenceV = - C.getSValBuilder().getKnownValue(State, Call.getArgSVal(2)); - // Bifurcate the state into failed and non-failed. - // Return zero on success, nonzero on error. - ProgramStateRef StateNotFailed, StateFailed; - std::tie(StateFailed, StateNotFailed) = E.makeRetValAndAssumeDual(State, C); + // Return zero on success, -1 on error. + ProgramStateRef StateNotFailed = E.bindReturnValue(State, C, 0); + ProgramStateRef StateFailed = E.bindReturnValue(State, C, -1); // No failure: Reset the state to opened with no error. StateNotFailed = @@ -1282,12 +1277,10 @@ void StreamChecker::evalFseek(const FnDescription *Desc, const CallEvent &Call, // At error it is possible that fseek fails but sets none of the error flags. // If fseek failed, assume that the file position becomes indeterminate in any // case. - StreamErrorState NewErrS = ErrorNone | ErrorFError; - // Setting the position to start of file never produces EOF error. - if (!(PosV && *PosV == 0 && WhenceV && *WhenceV == SeekSetVal)) - NewErrS = NewErrS | ErrorFEof; - StateFailed = E.setStreamState(StateFailed, - StreamState::getOpened(Desc, NewErrS, true)); + // It is allowed to set the position beyond the end of the file. EOF error + // should not occur. + StateFailed = E.setStreamState( + StateFailed, StreamState::getOpened(Desc, ErrorNone | ErrorFError, true)); C.addTransition(StateFailed, E.getFailureNoteTag(this, C)); } diff --git a/clang/test/Analysis/stream-error.c b/clang/test/Analysis/stream-error.c index 88f7de4234ffb4..7f9116ff401445 100644 --- a/clang/test/Analysis/stream-error.c +++ b/clang/test/Analysis/stream-error.c @@ -365,27 +365,22 @@ void error_fseek(void) { return; int rc = fseek(F, 1, SEEK_SET); if (rc) { + clang_analyzer_eval(rc == -1); // expected-warning {{TRUE}} int IsFEof = feof(F), IsFError = ferror(F); - // Get feof or ferror or no error. - clang_analyzer_eval(IsFEof || IsFError); - // expected-warning@-1 {{FALSE}} - // expected-warning@-2 {{TRUE}} - clang_analyzer_eval(IsFEof && IsFError); // expected-warning {{FALSE}} + // Get ferror or no error. + clang_analyzer_eval(IsFError); // expected-warning {{FALSE}} \ + // expected-warning {{TRUE}} + clang_analyzer_eval(IsFEof); // expected-warning {{FALSE}} // Error flags should not change. - if (IsFEof) - clang_analyzer_eval(feof(F)); // expected-warning {{TRUE}} - else - clang_analyzer_eval(feof(F)); // expected-warning {{FALSE}} + clang_analyzer_eval(feof(F)); // expected-warning {{FALSE}} if (IsFError) - clang_analyzer_eval(ferror(F)); // expected-warning {{TRUE}} - else - clang_analyzer_eval(ferror(F)); // expected-warning {{FALSE}} + clang_analyzer_eval(ferror(F)); // expected-warning {{TRUE}} } else { - clang_analyzer_eval(feof(F)); // expected-warning {{FALSE}} - clang_analyzer_eval(ferror(F)); // expected-warning {{FALSE}} + clang_analyzer_eval(feof(F)); // expected-warning {{FALSE}} + clang_analyzer_eval(ferror(F)); // expected-warning {{FALSE}} // Error flags should not change. - clang_analyzer_eval(feof(F)); // expected-warning {{FALSE}} - clang_analyzer_eval(ferror(F)); // expected-warning {{FALSE}} + clang_analyzer_eval(feof(F)); // expected-warning {{FALSE}} + clang_analyzer_eval(ferror(F)); // expected-warning {{FALSE}} } fclose(F); } @@ -396,15 +391,13 @@ void error_fseeko(void) { return; int rc = fseeko(F, 1, SEEK_SET); if (rc) { - int IsFEof = feof(F), IsFError = ferror(F); - // Get feof or ferror or no error. - clang_analyzer_eval(IsFEof || IsFError); - // expected-warning@-1 {{FALSE}} - // expected-warning@-2 {{TRUE}} - clang_analyzer_eval(IsFEof && IsFError); // expected-warning {{FALSE}} + // Get ferror or no error. + clang_analyzer_eval(ferror(F)); // expected-warning {{FALSE}} \ + // expected-warning {{TRUE}} + clang_analyzer_eval(feof(F)); // expected-warning {{FALSE}} } else { - clang_analyzer_eval(feof(F)); // expected-warning {{FALSE}} - clang_analyzer_eval(ferror(F)); // expected-warning {{FALSE}} + clang_analyzer_eval(feof(F)); // expected-warning {{FALSE}} + clang_analyzer_eval(ferror(F)); // expected-warning {{FALSE}} } fclose(F); } @@ -414,7 +407,7 @@ void error_fseek_0(void) { if (!F) return; int rc = fseek(F, 0, SEEK_SET); - if (rc) { + if (rc == -1) { int IsFEof = feof(F), IsFError = ferror(F); // Get ferror or no error, but not feof. clang_analyzer_eval(IsFError); diff --git a/clang/test/Analysis/stream-note.c b/clang/test/Analysis/stream-note.c index f77cd4aa62841d..54ea699f46674e 100644 --- a/clang/test/Analysis/stream-note.c +++ b/clang/test/Analysis/stream-note.c @@ -226,10 +226,39 @@ void check_indeterminate_fseek(void) { return; int Ret = fseek(F, 1, SEEK_SET); // expected-note {{Assuming this stream operation fails}} if (Ret) { // expected-note {{Taking true branch}} \ - // expected-note {{'Ret' is not equal to 0}} + // expected-note {{'Ret' is -1}} char Buf[2]; fwrite(Buf, 1, 2, F); // expected-warning {{might be 'indeterminate'}} \ // expected-note {{might be 'indeterminate'}} } fclose(F); } + +void error_fseek_ftell(void) { + FILE *F = fopen("file", "r"); + if (!F) // expected-note {{Taking false branch}} \ + // expected-note {{'F' is non-null}} + return; + fseek(F, 0, SEEK_END); // expected-note {{Assuming this stream operation fails}} + long size = ftell(F); // expected-warning {{might be 'indeterminate'}} \ + // expected-note {{might be 'indeterminate'}} + if (size == -1) { + fclose(F); + return; + } + if (size == 1) + fprintf(F, "abcd"); + fclose(F); +} + +void error_fseek_read_eof(void) { + FILE *F = fopen("file", "r"); + if (!F) + return; + if (fseek(F, 22, SEEK_SET) == -1) { + fclose(F); + return; + } + fgetc(F); // no warning + fclose(F); +} From 8bb9443333e0117ab61feecce9de339b11b924fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thorsten=20Sch=C3=BCtt?= Date: Tue, 2 Apr 2024 09:01:24 +0200 Subject: [PATCH 068/201] [GlobalIsel] Combine G_EXTRACT_VECTOR_ELT (#85321) preliminary steps --- .../llvm/CodeGen/GlobalISel/CombinerHelper.h | 25 ++ .../CodeGen/GlobalISel/GenericMachineInstrs.h | 41 +++ .../include/llvm/Target/GlobalISel/Combine.td | 230 +++++++++++- llvm/lib/CodeGen/GlobalISel/CMakeLists.txt | 1 + .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 8 + .../GlobalISel/CombinerHelperVectorOps.cpp | 326 ++++++++++++++++++ .../GlobalISel/combine-extract-vec-elt.mir | 299 +++++++++++++++- .../CodeGen/AArch64/extract-vector-elt.ll | 18 +- .../AArch64/extractvector-oob-load.mir | 7 +- 9 files changed, 930 insertions(+), 25 deletions(-) create mode 100644 llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h index 28d9cf6260d620..3af32043391fec 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -594,6 +594,10 @@ class CombinerHelper { /// This variant does not erase \p MI after calling the build function. void applyBuildFnNoErase(MachineInstr &MI, BuildFnTy &MatchInfo); + /// Use a function which takes in a MachineIRBuilder to perform a combine. + /// By default, it erases the instruction \p MI from the function. + void applyBuildFnMO(const MachineOperand &MO, BuildFnTy &MatchInfo); + bool matchOrShiftToFunnelShift(MachineInstr &MI, BuildFnTy &MatchInfo); bool matchFunnelShiftToRotate(MachineInstr &MI); void applyFunnelShiftToRotate(MachineInstr &MI); @@ -823,6 +827,27 @@ class CombinerHelper { /// Combine addos. bool matchAddOverflow(MachineInstr &MI, BuildFnTy &MatchInfo); + /// Combine extract vector element. + bool matchExtractVectorElement(MachineInstr &MI, BuildFnTy &MatchInfo); + + /// Combine extract vector element with freeze on the vector register. + bool matchExtractVectorElementWithFreeze(const MachineOperand &MO, + BuildFnTy &MatchInfo); + + /// Combine extract vector element with a build vector on the vector register. + bool matchExtractVectorElementWithBuildVector(const MachineOperand &MO, + BuildFnTy &MatchInfo); + + /// Combine extract vector element with a build vector trunc on the vector + /// register. + bool matchExtractVectorElementWithBuildVectorTrunc(const MachineOperand &MO, + BuildFnTy &MatchInfo); + + /// Combine extract vector element with a insert vector element on the vector + /// register and different indices. + bool matchExtractVectorElementWithDifferentIndices(const MachineOperand &MO, + BuildFnTy &MatchInfo); + private: /// Checks for legality of an indexed variant of \p LdSt. bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const; diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h index 261cfcf504d5fe..25e47114e4a39a 100644 --- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h @@ -286,6 +286,14 @@ class GBuildVector : public GMergeLikeInstr { } }; +/// Represents a G_BUILD_VECTOR_TRUNC. +class GBuildVectorTrunc : public GMergeLikeInstr { +public: + static bool classof(const MachineInstr *MI) { + return MI->getOpcode() == TargetOpcode::G_BUILD_VECTOR_TRUNC; + } +}; + /// Represents a G_PTR_ADD. class GPtrAdd : public GenericMachineInstr { public: @@ -739,6 +747,39 @@ class GOr : public GLogicalBinOp { }; }; +/// Represents an extract vector element. +class GExtractVectorElement : public GenericMachineInstr { +public: + Register getVectorReg() const { return getOperand(1).getReg(); } + Register getIndexReg() const { return getOperand(2).getReg(); } + + static bool classof(const MachineInstr *MI) { + return MI->getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT; + } +}; + +/// Represents an insert vector element. +class GInsertVectorElement : public GenericMachineInstr { +public: + Register getVectorReg() const { return getOperand(1).getReg(); } + Register getElementReg() const { return getOperand(2).getReg(); } + Register getIndexReg() const { return getOperand(3).getReg(); } + + static bool classof(const MachineInstr *MI) { + return MI->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT; + } +}; + +/// Represents a freeze. +class GFreeze : public GenericMachineInstr { +public: + Register getSourceReg() const { return getOperand(1).getReg(); } + + static bool classof(const MachineInstr *MI) { + return MI->getOpcode() == TargetOpcode::G_FREEZE; + } +}; + } // namespace llvm #endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td index 72d3c0ea69bcd2..778ff7e437eb50 100644 --- a/llvm/include/llvm/Target/GlobalISel/Combine.td +++ b/llvm/include/llvm/Target/GlobalISel/Combine.td @@ -1305,6 +1305,200 @@ def match_addos : GICombineRule< [{ return Helper.matchAddOverflow(*${root}, ${matchinfo}); }]), (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; +def match_extract_of_element_undef_vector: GICombineRule < + (defs root:$root), + (match (G_IMPLICIT_DEF $vector), + (G_EXTRACT_VECTOR_ELT $root, $vector, $idx)), + (apply (G_IMPLICIT_DEF $root)) +>; + +def match_extract_of_element_undef_index: GICombineRule < + (defs root:$root), + (match (G_IMPLICIT_DEF $idx), + (G_EXTRACT_VECTOR_ELT $root, $vector, $idx)), + (apply (G_IMPLICIT_DEF $root)) +>; + +def match_extract_of_element : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (wip_match_opcode G_EXTRACT_VECTOR_ELT):$root, + [{ return Helper.matchExtractVectorElement(*${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>; + +def extract_vector_element_not_const : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_INSERT_VECTOR_ELT $src, $x, $value, $idx), + (G_EXTRACT_VECTOR_ELT $root, $src, $idx)), + (apply (GIReplaceReg $root, $value))>; + +def extract_vector_element_different_indices : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_INSERT_VECTOR_ELT $src, $x, $value, $idx2), + (G_EXTRACT_VECTOR_ELT $root, $src, $idx1), + [{ return Helper.matchExtractVectorElementWithDifferentIndices(${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector2 : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_BUILD_VECTOR $src, $x, $y), + (G_EXTRACT_VECTOR_ELT $root, $src, $idx), + [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector3 : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_BUILD_VECTOR $src, $x, $y, $z), + (G_EXTRACT_VECTOR_ELT $root, $src, $idx), + [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector4 : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_BUILD_VECTOR $src, $x, $y, $z, $a), + (G_EXTRACT_VECTOR_ELT $root, $src, $idx), + [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector5 : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_BUILD_VECTOR $src, $x, $y, $z, $a, $b), + (G_EXTRACT_VECTOR_ELT $root, $src, $idx), + [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector6 : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_BUILD_VECTOR $src, $x, $y, $z, $a, $b, $c), + (G_EXTRACT_VECTOR_ELT $root, $src, $idx), + [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector7 : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_BUILD_VECTOR $src, $x, $y, $z, $a, $b, $c, $d), + (G_EXTRACT_VECTOR_ELT $root, $src, $idx), + [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector8 : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_BUILD_VECTOR $src, $x, $y, $z, $a, $b, $c, $d, $e), + (G_EXTRACT_VECTOR_ELT $root, $src, $idx), + [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector9 : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_BUILD_VECTOR $src, $x, $y, $z, $a, $b, $c, $d, $e, $f), + (G_EXTRACT_VECTOR_ELT $root, $src, $idx), + [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector10 : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_BUILD_VECTOR $src, $x, $y, $z, $a, $b, $c, $d, $e, $f, $g), + (G_EXTRACT_VECTOR_ELT $root, $src, $idx), + [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector11 : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_BUILD_VECTOR $src, $x, $y, $z, $a, $b, $c, $d, $e, $f, $g, $h), + (G_EXTRACT_VECTOR_ELT $root, $src, $idx), + [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector12 : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_BUILD_VECTOR $src, $x, $y, $z, $a, $b, $c, $d, $e, $f, $g, $h, $i), + (G_EXTRACT_VECTOR_ELT $root, $src, $idx), + [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector13 : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_BUILD_VECTOR $src, $x, $y, $z, $a, $b, $c, $d, $e, $f, $g, $h, $i, $j), + (G_EXTRACT_VECTOR_ELT $root, $src, $idx), + [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector14 : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_BUILD_VECTOR $src, $x, $y, $z, $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k), + (G_EXTRACT_VECTOR_ELT $root, $src, $idx), + [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector15 : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_BUILD_VECTOR $src, $x, $y, $z, $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l), + (G_EXTRACT_VECTOR_ELT $root, $src, $idx), + [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector16 : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_BUILD_VECTOR $src, $x, $y, $z, $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m), + (G_EXTRACT_VECTOR_ELT $root, $src, $idx), + [{ return Helper.matchExtractVectorElementWithBuildVector(${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector_trunc2 : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_BUILD_VECTOR_TRUNC $src, $x, $y), + (G_EXTRACT_VECTOR_ELT $root, $src, $idx), + [{ return Helper.matchExtractVectorElementWithBuildVectorTrunc(${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector_trunc3 : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_BUILD_VECTOR_TRUNC $src, $x, $y, $z), + (G_EXTRACT_VECTOR_ELT $root, $src, $idx), + [{ return Helper.matchExtractVectorElementWithBuildVectorTrunc(${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector_trunc4 : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_BUILD_VECTOR_TRUNC $src, $x, $y, $z, $a), + (G_EXTRACT_VECTOR_ELT $root, $src, $idx), + [{ return Helper.matchExtractVectorElementWithBuildVectorTrunc(${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector_trunc5 : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_BUILD_VECTOR_TRUNC $src, $x, $y, $z, $a, $b), + (G_EXTRACT_VECTOR_ELT $root, $src, $idx), + [{ return Helper.matchExtractVectorElementWithBuildVectorTrunc(${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector_trunc6 : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_BUILD_VECTOR_TRUNC $src, $x, $y, $z, $a, $b, $c), + (G_EXTRACT_VECTOR_ELT $root, $src, $idx), + [{ return Helper.matchExtractVectorElementWithBuildVectorTrunc(${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector_trunc7 : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_BUILD_VECTOR_TRUNC $src, $x, $y, $z, $a, $b, $c, $d), + (G_EXTRACT_VECTOR_ELT $root, $src, $idx), + [{ return Helper.matchExtractVectorElementWithBuildVectorTrunc(${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>; + +def extract_vector_element_build_vector_trunc8 : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_BUILD_VECTOR_TRUNC $src, $x, $y, $z, $a, $b, $c, $d, $e), + (G_EXTRACT_VECTOR_ELT $root, $src, $idx), + [{ return Helper.matchExtractVectorElementWithBuildVectorTrunc(${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>; + +def extract_vector_element_freeze : GICombineRule< + (defs root:$root, build_fn_matchinfo:$matchinfo), + (match (G_FREEZE $src, $input), + (G_EXTRACT_VECTOR_ELT $root, $src, $idx), + [{ return Helper.matchExtractVectorElementWithFreeze(${root}, ${matchinfo}); }]), + (apply [{ Helper.applyBuildFnMO(${root}, ${matchinfo}); }])>; + // Combines concat operations def concat_matchinfo : GIDefMatchData<"SmallVector">; def combine_concat_vector : GICombineRule< @@ -1313,6 +1507,37 @@ def combine_concat_vector : GICombineRule< [{ return Helper.matchCombineConcatVectors(*${root}, ${matchinfo}); }]), (apply [{ Helper.applyCombineConcatVectors(*${root}, ${matchinfo}); }])>; +// match_extract_of_element must be the first! +def vector_ops_combines: GICombineGroup<[ +match_extract_of_element_undef_vector, +match_extract_of_element_undef_index, +match_extract_of_element, +extract_vector_element_not_const, +extract_vector_element_different_indices, +extract_vector_element_build_vector2, +extract_vector_element_build_vector3, +extract_vector_element_build_vector4, +extract_vector_element_build_vector5, +extract_vector_element_build_vector7, +extract_vector_element_build_vector8, +extract_vector_element_build_vector9, +extract_vector_element_build_vector10, +extract_vector_element_build_vector11, +extract_vector_element_build_vector12, +extract_vector_element_build_vector13, +extract_vector_element_build_vector14, +extract_vector_element_build_vector15, +extract_vector_element_build_vector16, +extract_vector_element_build_vector_trunc2, +extract_vector_element_build_vector_trunc3, +extract_vector_element_build_vector_trunc4, +extract_vector_element_build_vector_trunc5, +extract_vector_element_build_vector_trunc6, +extract_vector_element_build_vector_trunc7, +extract_vector_element_build_vector_trunc8, +extract_vector_element_freeze +]>; + // FIXME: These should use the custom predicate feature once it lands. def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero, undef_to_negative_one, @@ -1368,8 +1593,9 @@ def fma_combines : GICombineGroup<[combine_fadd_fmul_to_fmad_or_fma, def constant_fold_binops : GICombineGroup<[constant_fold_binop, constant_fold_fp_binop]>; -def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines, - extract_vec_elt_combines, combines_for_extload, combine_extracted_vector_load, +def all_combines : GICombineGroup<[trivial_combines, vector_ops_combines, + insert_vec_elt_combines, extract_vec_elt_combines, combines_for_extload, + combine_extracted_vector_load, undef_combines, identity_combines, phi_combines, simplify_add_to_sub, hoist_logic_op_with_same_opcode_hands, shifts_too_big, reassocs, ptr_add_immed_chain, diff --git a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt index 46e6c6df5998e5..54ac7f72011a6e 100644 --- a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt +++ b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_component_library(LLVMGlobalISel GlobalISel.cpp Combiner.cpp CombinerHelper.cpp + CombinerHelperVectorOps.cpp GIMatchTableExecutor.cpp GISelChangeObserver.cpp IRTranslator.cpp diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 98e7c73a801f59..5cf7a33a5f6756 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -4058,6 +4058,14 @@ void CombinerHelper::applyBuildFn( MI.eraseFromParent(); } +void CombinerHelper::applyBuildFnMO(const MachineOperand &MO, + BuildFnTy &MatchInfo) { + MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI); + Builder.setInstrAndDebugLoc(*Root); + MatchInfo(Builder); + Root->eraseFromParent(); +} + void CombinerHelper::applyBuildFnNoErase( MachineInstr &MI, std::function &MatchInfo) { MatchInfo(Builder); diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp new file mode 100644 index 00000000000000..123bf21f657c36 --- /dev/null +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperVectorOps.cpp @@ -0,0 +1,326 @@ +//===- CombinerHelperVectorOps.cpp-----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements CombinerHelper for G_EXTRACT_VECTOR_ELT. +// +//===----------------------------------------------------------------------===// +#include "llvm/CodeGen/GlobalISel/CombinerHelper.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" +#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" +#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" +#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" +#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/LowLevelTypeUtils.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/Support/Casting.h" +#include + +#define DEBUG_TYPE "gi-combiner" + +using namespace llvm; +using namespace MIPatternMatch; + +bool CombinerHelper::matchExtractVectorElement(MachineInstr &MI, + BuildFnTy &MatchInfo) { + GExtractVectorElement *Extract = cast(&MI); + + Register Dst = Extract->getReg(0); + Register Vector = Extract->getVectorReg(); + Register Index = Extract->getIndexReg(); + LLT DstTy = MRI.getType(Dst); + LLT VectorTy = MRI.getType(Vector); + + // The vector register can be def'd by various ops that have vector as its + // type. They can all be used for constant folding, scalarizing, + // canonicalization, or combining based on symmetry. + // + // vector like ops + // * build vector + // * build vector trunc + // * shuffle vector + // * splat vector + // * concat vectors + // * insert/extract vector element + // * insert/extract subvector + // * vector loads + // * scalable vector loads + // + // compute like ops + // * binary ops + // * unary ops + // * exts and truncs + // * casts + // * fneg + // * select + // * phis + // * cmps + // * freeze + // * bitcast + // * undef + + // We try to get the value of the Index register. + std::optional MaybeIndex = + getIConstantVRegValWithLookThrough(Index, MRI); + std::optional IndexC = std::nullopt; + + if (MaybeIndex) + IndexC = MaybeIndex->Value; + + // Fold extractVectorElement(Vector, TOOLARGE) -> undef + if (IndexC && VectorTy.isFixedVector() && + IndexC->getZExtValue() >= VectorTy.getNumElements() && + isLegalOrBeforeLegalizer({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) { + // For fixed-length vectors, it's invalid to extract out-of-range elements. + MatchInfo = [=](MachineIRBuilder &B) { B.buildUndef(Dst); }; + return true; + } + + return false; +} + +bool CombinerHelper::matchExtractVectorElementWithDifferentIndices( + const MachineOperand &MO, BuildFnTy &MatchInfo) { + MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI); + GExtractVectorElement *Extract = cast(Root); + + // + // %idx1:_(s64) = G_CONSTANT i64 1 + // %idx2:_(s64) = G_CONSTANT i64 2 + // %insert:_(<2 x s32>) = G_INSERT_VECTOR_ELT_ELT %bv(<2 x s32>), + // %value(s32), %idx2(s64) %extract:_(s32) = G_EXTRACT_VECTOR_ELT %insert(<2 + // x s32>), %idx1(s64) + // + // --> + // + // %insert:_(<2 x s32>) = G_INSERT_VECTOR_ELT_ELT %bv(<2 x s32>), + // %value(s32), %idx2(s64) %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x + // s32>), %idx1(s64) + // + // + + Register Index = Extract->getIndexReg(); + + // We try to get the value of the Index register. + std::optional MaybeIndex = + getIConstantVRegValWithLookThrough(Index, MRI); + std::optional IndexC = std::nullopt; + + if (!MaybeIndex) + return false; + else + IndexC = MaybeIndex->Value; + + Register Vector = Extract->getVectorReg(); + + GInsertVectorElement *Insert = + getOpcodeDef(Vector, MRI); + if (!Insert) + return false; + + Register Dst = Extract->getReg(0); + + std::optional MaybeInsertIndex = + getIConstantVRegValWithLookThrough(Insert->getIndexReg(), MRI); + + if (MaybeInsertIndex && MaybeInsertIndex->Value != *IndexC) { + // There is no one-use check. We have to keep the insert. When both Index + // registers are constants and not equal, we can look into the Vector + // register of the insert. + MatchInfo = [=](MachineIRBuilder &B) { + B.buildExtractVectorElement(Dst, Insert->getVectorReg(), Index); + }; + return true; + } + + return false; +} + +bool CombinerHelper::matchExtractVectorElementWithFreeze( + const MachineOperand &MO, BuildFnTy &MatchInfo) { + MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI); + GExtractVectorElement *Extract = cast(Root); + + Register Vector = Extract->getVectorReg(); + + // + // %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32) + // %freeze:_(<2 x s32>) = G_FREEZE %bv(<2 x s32>) + // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %opaque(s64) + // + // --> + // + // %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32) + // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %opaque(s64) + // %freeze:_(s32) = G_FREEZE %extract(s32) + // + // + + // For G_FREEZE, the input and the output types are identical. Moving the + // freeze from the Vector into the front of the extract preserves the freeze + // semantics. The result is still freeze'd. Furthermore, the Vector register + // becomes easier to analyze. A build vector could have been hidden behind the + // freeze. + + // We expect a freeze on the Vector register. + GFreeze *Freeze = getOpcodeDef(Vector, MRI); + if (!Freeze) + return false; + + Register Dst = Extract->getReg(0); + LLT DstTy = MRI.getType(Dst); + + // We first have to check for one-use and legality of the freeze. + // The type of the extractVectorElement did not change. + if (!MRI.hasOneNonDBGUse(Freeze->getReg(0)) || + !isLegalOrBeforeLegalizer({TargetOpcode::G_FREEZE, {DstTy}})) + return false; + + Register Index = Extract->getIndexReg(); + + // We move the freeze from the Vector register in front of the + // extractVectorElement. + MatchInfo = [=](MachineIRBuilder &B) { + auto Extract = + B.buildExtractVectorElement(DstTy, Freeze->getSourceReg(), Index); + B.buildFreeze(Dst, Extract); + }; + + return true; +} + +bool CombinerHelper::matchExtractVectorElementWithBuildVector( + const MachineOperand &MO, BuildFnTy &MatchInfo) { + MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI); + GExtractVectorElement *Extract = cast(Root); + + // + // %zero:_(s64) = G_CONSTANT i64 0 + // %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32) + // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %zero(s64) + // + // --> + // + // %extract:_(32) = COPY %arg1(s32) + // + // + // + // %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32) + // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %opaque(s64) + // + // --> + // + // %bv:_(<2 x s32>) = G_BUILD_VECTOR %arg1(s32), %arg2(s32) + // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %opaque(s64) + // + + Register Vector = Extract->getVectorReg(); + + // We expect a buildVector on the Vector register. + GBuildVector *Build = getOpcodeDef(Vector, MRI); + if (!Build) + return false; + + LLT VectorTy = MRI.getType(Vector); + + // There is a one-use check. There are more combines on build vectors. + EVT Ty(getMVTForLLT(VectorTy)); + if (!MRI.hasOneNonDBGUse(Build->getReg(0)) || + !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty)) + return false; + + Register Index = Extract->getIndexReg(); + + // If the Index is constant, then we can extract the element from the given + // offset. + std::optional MaybeIndex = + getIConstantVRegValWithLookThrough(Index, MRI); + if (!MaybeIndex) + return false; + + // We now know that there is a buildVector def'd on the Vector register and + // the index is const. The combine will succeed. + + Register Dst = Extract->getReg(0); + + MatchInfo = [=](MachineIRBuilder &B) { + B.buildCopy(Dst, Build->getSourceReg(MaybeIndex->Value.getZExtValue())); + }; + + return true; +} + +bool CombinerHelper::matchExtractVectorElementWithBuildVectorTrunc( + const MachineOperand &MO, BuildFnTy &MatchInfo) { + MachineInstr *Root = getDefIgnoringCopies(MO.getReg(), MRI); + GExtractVectorElement *Extract = cast(Root); + + // + // %zero:_(s64) = G_CONSTANT i64 0 + // %bv:_(<2 x s32>) = G_BUILD_VECTOR_TRUNC %arg1(s64), %arg2(s64) + // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %zero(s64) + // + // --> + // + // %extract:_(32) = G_TRUNC %arg1(s64) + // + // + // + // %bv:_(<2 x s32>) = G_BUILD_VECTOR_TRUNC %arg1(s64), %arg2(s64) + // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %opaque(s64) + // + // --> + // + // %bv:_(<2 x s32>) = G_BUILD_VECTOR_TRUNC %arg1(s64), %arg2(s64) + // %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %opaque(s64) + // + + Register Vector = Extract->getVectorReg(); + + // We expect a buildVectorTrunc on the Vector register. + GBuildVectorTrunc *Build = getOpcodeDef(Vector, MRI); + if (!Build) + return false; + + LLT VectorTy = MRI.getType(Vector); + + // There is a one-use check. There are more combines on build vectors. + EVT Ty(getMVTForLLT(VectorTy)); + if (!MRI.hasOneNonDBGUse(Build->getReg(0)) || + !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty)) + return false; + + Register Index = Extract->getIndexReg(); + + // If the Index is constant, then we can extract the element from the given + // offset. + std::optional MaybeIndex = + getIConstantVRegValWithLookThrough(Index, MRI); + if (!MaybeIndex) + return false; + + // We now know that there is a buildVectorTrunc def'd on the Vector register + // and the index is const. The combine will succeed. + + Register Dst = Extract->getReg(0); + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Build->getSourceReg(0)); + + // For buildVectorTrunc, the inputs are truncated. + if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {DstTy, SrcTy}})) + return false; + + MatchInfo = [=](MachineIRBuilder &B) { + B.buildTrunc(Dst, Build->getSourceReg(MaybeIndex->Value.getZExtValue())); + }; + + return true; +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir index a2116ccc767112..c2a38e26676cf9 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-extract-vec-elt.mir @@ -192,8 +192,8 @@ body: | ... --- +# This test checks that this combine runs after the insertvec->build_vector name: extract_from_insert -alignment: 4 tracksRegLiveness: true liveins: - { reg: '$x0' } @@ -203,8 +203,6 @@ frameInfo: body: | bb.1: liveins: $x0, $x1 - ; This test checks that this combine runs after the insertvec->build_vector - ; combine. ; CHECK-LABEL: name: extract_from_insert ; CHECK: liveins: $x0, $x1 ; CHECK-NEXT: {{ $}} @@ -247,3 +245,298 @@ body: | RET_ReallyLR implicit $x0 ... +--- +name: extract_from_vector_undef +alignment: 4 +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: extract_from_vector_undef + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %extract:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: $x0 = COPY %extract(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %vec:_(<2 x s64>) = G_IMPLICIT_DEF + %idx:_(s32) = G_CONSTANT i32 -2 + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s32) + $x0 = COPY %extract(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: extract_from_index_undef +alignment: 4 +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 +body: | + bb.1: + ; CHECK-LABEL: name: extract_from_index_undef + ; CHECK: %extract:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: $x0 = COPY %extract(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %vec:_(<2 x s64>) = COPY $q0 + %idx:_(s32) = G_IMPLICIT_DEF + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s32) + $x0 = COPY %extract(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: extract_from_index_too_large +alignment: 4 +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: extract_from_index_too_large + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %extract:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: $x0 = COPY %extract(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %vec:_(<2 x s64>) = COPY $q0 + %idx:_(s32) = G_CONSTANT i32 3000 + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s32) + $x0 = COPY %extract(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: extract_with_freeze +alignment: 4 +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: extract_with_freeze + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %vec:_(<2 x s64>) = COPY $q0 + ; CHECK-NEXT: %idx:_(s32) = COPY $w1 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx(s32) + ; CHECK-NEXT: %extract:_(s64) = G_FREEZE [[EVEC]] + ; CHECK-NEXT: $x0 = COPY %extract(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %vec:_(<2 x s64>) = COPY $q0 + %idx:_(s32) = COPY $w1 + %fvec:_(<2 x s64>) = G_FREEZE %vec + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %fvec(<2 x s64>), %idx(s32) + $x0 = COPY %extract(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: extract_from_insert_symmetry +alignment: 4 +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: extract_from_insert_symmetry + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %element:_(s64) = COPY $x1 + ; CHECK-NEXT: $x0 = COPY %element(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %vec:_(<2 x s64>) = COPY $q0 + %idx:_(s32) = COPY $w1 + %element:_(s64) = COPY $x1 + %invec:_(<2 x s64>) = G_INSERT_VECTOR_ELT %vec(<2 x s64>), %element(s64), %idx(s32) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %invec(<2 x s64>), %idx(s32) + $x0 = COPY %extract(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: extract_from_insert_with_different_consts +alignment: 4 +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: extract_from_insert_with_different_consts + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %vec:_(<2 x s64>) = COPY $q0 + ; CHECK-NEXT: %idx2:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %vec(<2 x s64>), %idx2(s32) + ; CHECK-NEXT: $x0 = COPY %extract(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %vec:_(<2 x s64>) = COPY $q0 + %idx:_(s32) = G_CONSTANT i32 0 + %idx2:_(s32) = G_CONSTANT i32 1 + %element:_(s64) = COPY $x1 + %invec:_(<2 x s64>) = G_INSERT_VECTOR_ELT %vec(<2 x s64>), %element(s64), %idx(s32) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %invec(<2 x s64>), %idx2(s32) + $x0 = COPY %extract(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: extract_from_build_vector_non_const +alignment: 4 +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: extract_from_build_vector_non_const + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %idx:_(s32) = COPY $w0 + ; CHECK-NEXT: %arg1:_(s64) = COPY $x0 + ; CHECK-NEXT: %arg2:_(s64) = COPY $x1 + ; CHECK-NEXT: %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) + ; CHECK-NEXT: %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %idx(s32) + ; CHECK-NEXT: $x0 = COPY %extract(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %vec:_(<2 x s64>) = COPY $q0 + %idx:_(s32) = COPY $w0 + %arg1:_(s64) = COPY $x0 + %arg2:_(s64) = COPY $x1 + %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %idx(s32) + $x0 = COPY %extract(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: extract_from_build_vector_const +alignment: 4 +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: extract_from_build_vector_const + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %arg1:_(s64) = COPY $x0 + ; CHECK-NEXT: $x0 = COPY %arg1(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %vec:_(<2 x s64>) = COPY $q0 + %idx:_(s32) = G_CONSTANT i32 0 + %arg1:_(s64) = COPY $x0 + %arg2:_(s64) = COPY $x1 + %bv:_(<2 x s64>) = G_BUILD_VECTOR %arg1(s64), %arg2(s64) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %idx(s32) + $x0 = COPY %extract(s64) + RET_ReallyLR implicit $x0 + +... +--- +name: extract_from_build_vector_trunc_const2 +alignment: 4 +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: extract_from_build_vector_trunc_const2 + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %arg1:_(s64) = COPY $x0 + ; CHECK-NEXT: %extract:_(s32) = G_TRUNC %arg1(s64) + ; CHECK-NEXT: $w0 = COPY %extract(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %vec:_(<2 x s64>) = COPY $q0 + %arg1:_(s64) = COPY $x0 + %arg2:_(s64) = COPY $x1 + %arg3:_(s64) = COPY $x0 + %arg4:_(s64) = COPY $x1 + %idx:_(s32) = G_CONSTANT i32 0 + %bv:_(<4 x s32>) = G_BUILD_VECTOR_TRUNC %arg1(s64), %arg2(s64), %arg3(s64), %arg4(s64) + %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<4 x s32>), %idx(s32) + $w0 = COPY %extract(s32) + RET_ReallyLR implicit $x0 +... +--- +name: extract_from_build_vector_trunc2 +alignment: 4 +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: extract_from_build_vector_trunc2 + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %arg1:_(s64) = COPY $x0 + ; CHECK-NEXT: %arg2:_(s64) = COPY $x1 + ; CHECK-NEXT: %idx:_(s32) = COPY $w0 + ; CHECK-NEXT: %bv:_(<2 x s32>) = G_BUILD_VECTOR_TRUNC %arg1(s64), %arg2(s64) + ; CHECK-NEXT: %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %idx(s32) + ; CHECK-NEXT: $w0 = COPY %extract(s32) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %arg1:_(s64) = COPY $x0 + %arg2:_(s64) = COPY $x1 + %idx:_(s32) = COPY $w0 + %bv:_(<2 x s32>) = G_BUILD_VECTOR_TRUNC %arg1(s64), %arg2(s64) + %extract:_(s32) = G_EXTRACT_VECTOR_ELT %bv(<2 x s32>), %idx(s32) + $w0 = COPY %extract(s32) + RET_ReallyLR implicit $x0 +... +--- +name: extract_from_build_vector_trunc_const3 +alignment: 4 +liveins: + - { reg: '$x0' } + - { reg: '$x1' } +frameInfo: + maxAlignment: 1 +body: | + bb.1: + liveins: $x0, $x1 + ; CHECK-LABEL: name: extract_from_build_vector_trunc_const3 + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %arg1:_(s128) = COPY $q0 + ; CHECK-NEXT: %extract:_(s64) = G_TRUNC %arg1(s128) + ; CHECK-NEXT: $x0 = COPY %extract(s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 + %arg1:_(s128) = COPY $q0 + %arg2:_(s128) = COPY $q1 + %idx:_(s32) = G_CONSTANT i32 0 + %bv:_(<2 x s64>) = G_BUILD_VECTOR_TRUNC %arg1(s128), %arg2(s128) + %extract:_(s64) = G_EXTRACT_VECTOR_ELT %bv(<2 x s64>), %idx(s32) + $x0 = COPY %extract(s64) + RET_ReallyLR implicit $x0 +... +--- diff --git a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll index c5c525a15ad9be..504222e0036e22 100644 --- a/llvm/test/CodeGen/AArch64/extract-vector-elt.ll +++ b/llvm/test/CodeGen/AArch64/extract-vector-elt.ll @@ -25,20 +25,9 @@ entry: } define i64 @extract_v2i64_undef_vector(<2 x i64> %a, i32 %c) { -; CHECK-SD-LABEL: extract_v2i64_undef_vector: -; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: extract_v2i64_undef_vector: -; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: sub sp, sp, #16 -; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 -; CHECK-GI-NEXT: mov w9, w0 -; CHECK-GI-NEXT: mov x8, sp -; CHECK-GI-NEXT: and x9, x9, #0x1 -; CHECK-GI-NEXT: ldr x0, [x8, x9, lsl #3] -; CHECK-GI-NEXT: add sp, sp, #16 -; CHECK-GI-NEXT: ret +; CHECK-LABEL: extract_v2i64_undef_vector: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ret entry: %d = extractelement <2 x i64> undef, i32 %c ret i64 %d @@ -130,7 +119,6 @@ define i64 @extract_v2i64_extract_of_insert_different_const(<2 x i64> %a, i64 %e ; ; CHECK-GI-LABEL: extract_v2i64_extract_of_insert_different_const: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: mov v0.d[0], x0 ; CHECK-GI-NEXT: mov d0, v0.d[1] ; CHECK-GI-NEXT: fmov x0, d0 ; CHECK-GI-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/extractvector-oob-load.mir b/llvm/test/CodeGen/AArch64/extractvector-oob-load.mir index e8c5819e75e090..e7e8c939910941 100644 --- a/llvm/test/CodeGen/AArch64/extractvector-oob-load.mir +++ b/llvm/test/CodeGen/AArch64/extractvector-oob-load.mir @@ -22,11 +22,8 @@ body: | ; CHECK-LABEL: name: f ; CHECK: liveins: $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load (s64)) - ; CHECK-NEXT: $x0 = COPY [[LOAD]](s64) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: $x0 = COPY [[DEF]](s64) ; CHECK-NEXT: RET_ReallyLR implicit $x0 %0:_(p0) = COPY $x0 %3:_(s64) = G_CONSTANT i64 224567957 From e47a81c1d2830dda45a561e2c092ebb0c868ed27 Mon Sep 17 00:00:00 2001 From: Sven van Haastregt Date: Tue, 2 Apr 2024 09:31:38 +0200 Subject: [PATCH 069/201] [OpenCL] Fix BIenqueue_kernel fallthrough (#83238) Handling of the `BIenqueue_kernel` builtin must not fallthrough to the `BIget_kernel_work_group_size` builtin, as these builtins have no common functionality. --- clang/lib/CodeGen/CGBuiltin.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index bb007231c0b783..483f9c26859923 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5835,7 +5835,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, EmitLifetimeEnd(TmpSize, TmpPtr); return Call; } - [[fallthrough]]; + llvm_unreachable("Unexpected enqueue_kernel signature"); } // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block // parameter. From f6c87be1dd24a121d7eccd6b91ca808ecdf80356 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Tue, 2 Apr 2024 00:33:50 -0700 Subject: [PATCH 070/201] [Github] Fix typo in PR code formatting job The recent change to split the PR code formatting job accidentally misspelled the repository field when specifying the repository to fetch the code formatting utils from. This patch fixes the spelling so that the job does not throw a warning and clones the tools from the specified repository. --- .github/workflows/pr-code-format.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr-code-format.yml b/.github/workflows/pr-code-format.yml index 54dfe3aadbb423..10b18f245d8965 100644 --- a/.github/workflows/pr-code-format.yml +++ b/.github/workflows/pr-code-format.yml @@ -33,7 +33,7 @@ jobs: - name: Fetch code formatting utils uses: actions/checkout@v4 with: - reository: ${{ github.repository }} + repository: ${{ github.repository }} ref: ${{ github.base_ref }} sparse-checkout: | llvm/utils/git/requirements_formatting.txt From fa8dc363506893eb9371dd3b7590f41fa9a7174a Mon Sep 17 00:00:00 2001 From: elhewaty Date: Tue, 2 Apr 2024 09:49:31 +0200 Subject: [PATCH 071/201] [IR] Fix crashes caused by #85592 (#87169) This patch fixes the crash caused by the pull request: https://github.com/llvm/llvm-project/pull/85592 --- llvm/lib/IR/Operator.cpp | 5 +++-- llvm/test/Transforms/FunctionAttrs/noundef.ll | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/llvm/lib/IR/Operator.cpp b/llvm/lib/IR/Operator.cpp index 495769279e3363..7b4449cd825f9b 100644 --- a/llvm/lib/IR/Operator.cpp +++ b/llvm/lib/IR/Operator.cpp @@ -28,8 +28,9 @@ bool Operator::hasPoisonGeneratingFlags() const { return OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap(); } case Instruction::Trunc: { - auto *TI = dyn_cast(this); - return TI->hasNoUnsignedWrap() || TI->hasNoSignedWrap(); + if (auto *TI = dyn_cast(this)) + return TI->hasNoUnsignedWrap() || TI->hasNoSignedWrap(); + return false; } case Instruction::UDiv: case Instruction::SDiv: diff --git a/llvm/test/Transforms/FunctionAttrs/noundef.ll b/llvm/test/Transforms/FunctionAttrs/noundef.ll index 946b562f39553e..9ab37082a30329 100644 --- a/llvm/test/Transforms/FunctionAttrs/noundef.ll +++ b/llvm/test/Transforms/FunctionAttrs/noundef.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt < %s -passes='function-attrs' -S | FileCheck %s +@g_var = external global [0 x i8] + define i32 @test_ret_constant() { ; CHECK-LABEL: define noundef i32 @test_ret_constant( ; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { @@ -152,3 +154,15 @@ define i32 @test_ret_constant_msan() sanitize_memory { ; ret i32 0 } + +define i64 @test_trunc_with_constexpr() { +; CHECK-LABEL: define noundef i64 @test_trunc_with_constexpr( +; CHECK-SAME: ) #[[ATTR0]] { +; CHECK-NEXT: [[ADD:%.*]] = add i32 trunc (i64 sub (i64 0, i64 ptrtoint (ptr @g_var to i64)) to i32), 1 +; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[ADD]] to i64 +; CHECK-NEXT: ret i64 [[CONV]] +; + %add = add i32 trunc (i64 sub (i64 0, i64 ptrtoint (ptr @g_var to i64)) to i32), 1 + %conv = sext i32 %add to i64 + ret i64 %conv +} From 2d14ea68b8c0acdff7c040d581f7fde15d2683d9 Mon Sep 17 00:00:00 2001 From: jeanPerier Date: Tue, 2 Apr 2024 10:22:03 +0200 Subject: [PATCH 072/201] [flang][NFC] speed-up external name conversion pass (#86814) The ExternalNameConversion pass can be surprisingly slow on big programs. On an example with a 50kloc Fortran file with about 10000 calls to external procedures, the pass alone took 25s on my machine. This patch reduces this to 0.16s. The root cause is that using `replaceAllSymbolUses` on each modified FuncOp is very expensive: it is walking all operations and attribute every time. An alternative would be to use mlir::SymbolUserMap to avoid walking the module again and again, but this is still much more expensive than what is needed because it is essentially caching all symbol uses of the module, and there is no need to such caching here. Instead: - Do a shallow walk of the module (only top level operation) to detect FuncOp/GlobalOp that needs to be updated. Update them and place the name remapping in a DenseMap. - If any remapping were done, do a single deep walk of the module operation, and update any SymbolRefAttr that matches a name that was remapped. --- .../Transforms/ExternalNameConversion.cpp | 155 ++++-------------- 1 file changed, 34 insertions(+), 121 deletions(-) diff --git a/flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp b/flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp index 3a9686418c2eae..b265c74c33dd5e 100644 --- a/flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp +++ b/flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp @@ -12,13 +12,9 @@ #include "flang/Optimizer/Dialect/FIROpsSupport.h" #include "flang/Optimizer/Support/InternalNames.h" #include "flang/Optimizer/Transforms/Passes.h" -#include "mlir/Dialect/LLVMIR/LLVMDialect.h" -#include "mlir/Dialect/OpenACC/OpenACC.h" -#include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/SymbolTable.h" #include "mlir/Pass/Pass.h" -#include "mlir/Transforms/DialectConversion.h" namespace fir { #define GEN_PASS_DEF_EXTERNALNAMECONVERSION @@ -44,102 +40,8 @@ mangleExternalName(const std::pair { -public: - using OpRewritePattern::OpRewritePattern; - - MangleNameOnFuncOp(mlir::MLIRContext *ctx, bool appendUnderscore) - : mlir::OpRewritePattern(ctx), - appendUnderscore(appendUnderscore) {} - - mlir::LogicalResult - matchAndRewrite(mlir::func::FuncOp op, - mlir::PatternRewriter &rewriter) const override { - mlir::LogicalResult ret = success(); - rewriter.startOpModification(op); - llvm::StringRef oldName = op.getSymName(); - auto result = fir::NameUniquer::deconstruct(oldName); - if (fir::NameUniquer::isExternalFacingUniquedName(result)) { - auto newSymbol = - rewriter.getStringAttr(mangleExternalName(result, appendUnderscore)); - - // Try to update all SymbolRef's in the module that match the current op - if (mlir::ModuleOp mod = op->getParentOfType()) - ret = op.replaceAllSymbolUses(newSymbol, mod); - - op.setSymNameAttr(newSymbol); - mlir::SymbolTable::setSymbolName(op, newSymbol); - - op->setAttr(fir::getInternalFuncNameAttrName(), - mlir::StringAttr::get(op->getContext(), oldName)); - } - rewriter.finalizeOpModification(op); - return ret; - } - -private: - bool appendUnderscore; -}; - -struct MangleNameForCommonBlock : public mlir::OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - MangleNameForCommonBlock(mlir::MLIRContext *ctx, bool appendUnderscore) - : mlir::OpRewritePattern(ctx), - appendUnderscore(appendUnderscore) {} - - mlir::LogicalResult - matchAndRewrite(fir::GlobalOp op, - mlir::PatternRewriter &rewriter) const override { - rewriter.startOpModification(op); - auto result = fir::NameUniquer::deconstruct( - op.getSymref().getRootReference().getValue()); - if (fir::NameUniquer::isExternalFacingUniquedName(result)) { - auto newName = mangleExternalName(result, appendUnderscore); - op.setSymrefAttr(mlir::SymbolRefAttr::get(op.getContext(), newName)); - SymbolTable::setSymbolName(op, newName); - } - rewriter.finalizeOpModification(op); - return success(); - } - -private: - bool appendUnderscore; -}; - -struct MangleNameOnAddrOfOp : public mlir::OpRewritePattern { -public: - using OpRewritePattern::OpRewritePattern; - - MangleNameOnAddrOfOp(mlir::MLIRContext *ctx, bool appendUnderscore) - : mlir::OpRewritePattern(ctx), - appendUnderscore(appendUnderscore) {} - - mlir::LogicalResult - matchAndRewrite(fir::AddrOfOp op, - mlir::PatternRewriter &rewriter) const override { - auto result = fir::NameUniquer::deconstruct( - op.getSymbol().getRootReference().getValue()); - if (fir::NameUniquer::isExternalFacingUniquedName(result)) { - auto newName = SymbolRefAttr::get( - op.getContext(), mangleExternalName(result, appendUnderscore)); - rewriter.replaceOpWithNewOp(op, op.getResTy().getType(), - newName); - } - return success(); - } - -private: - bool appendUnderscore; -}; - class ExternalNameConversionPass : public fir::impl::ExternalNameConversionBase { public: @@ -162,31 +64,42 @@ void ExternalNameConversionPass::runOnOperation() { auto *context = &getContext(); appendUnderscores = (usePassOpt) ? appendUnderscoreOpt : appendUnderscores; + llvm::DenseMap remappings; + // Update names of external Fortran functions and names of Common Block + // globals. + for (auto &funcOrGlobal : op->getRegion(0).front()) { + if (llvm::isa(funcOrGlobal) || + llvm::isa(funcOrGlobal)) { + auto symName = funcOrGlobal.getAttrOfType( + mlir::SymbolTable::getSymbolAttrName()); + auto deconstructedName = fir::NameUniquer::deconstruct(symName); + if (fir::NameUniquer::isExternalFacingUniquedName(deconstructedName)) { + auto newName = mangleExternalName(deconstructedName, appendUnderscores); + auto newAttr = mlir::StringAttr::get(context, newName); + mlir::SymbolTable::setSymbolName(&funcOrGlobal, newAttr); + auto newSymRef = mlir::FlatSymbolRefAttr::get(newAttr); + remappings.try_emplace(symName, newSymRef); + if (llvm::isa(funcOrGlobal)) + funcOrGlobal.setAttr(fir::getInternalFuncNameAttrName(), symName); + } + } + } - mlir::RewritePatternSet patterns(context); - patterns.insert(context, appendUnderscores); - - ConversionTarget target(*context); - target.addLegalDialect(); - - target.addDynamicallyLegalOp([](mlir::func::FuncOp op) { - return !fir::NameUniquer::needExternalNameMangling(op.getSymName()); - }); - - target.addDynamicallyLegalOp([](fir::GlobalOp op) { - return !fir::NameUniquer::needExternalNameMangling( - op.getSymref().getRootReference().getValue()); - }); - - target.addDynamicallyLegalOp([](fir::AddrOfOp op) { - return !fir::NameUniquer::needExternalNameMangling( - op.getSymbol().getRootReference().getValue()); + if (remappings.empty()) + return; + + // Update all uses of the functions and globals that have been renamed. + op.walk([&remappings](mlir::Operation *nestedOp) { + llvm::SmallVector> updates; + for (const mlir::NamedAttribute &attr : nestedOp->getAttrDictionary()) + if (auto symRef = llvm::dyn_cast(attr.getValue())) + if (auto remap = remappings.find(symRef.getRootReference()); + remap != remappings.end()) + updates.emplace_back(std::pair{ + attr.getName(), mlir::SymbolRefAttr(remap->second)}); + for (auto update : updates) + nestedOp->setAttr(update.first, update.second); }); - - if (failed(applyPartialConversion(op, target, std::move(patterns)))) - signalPassFailure(); } std::unique_ptr fir::createExternalNameConversionPass() { From 77e5c0a95c54e0ca34b8e9c56c702490619b73c9 Mon Sep 17 00:00:00 2001 From: Marc Auberer Date: Tue, 2 Apr 2024 10:49:06 +0200 Subject: [PATCH 073/201] [AArch64][GISEL] Reduce likelihood of hash collisions for mappings in RegisterBankInfo (#87033) Fixes #85209 This patch removes the truncation from `hash_code` aka `size_t` down to `unsigned`, that currently happens on DenseMap accesses in RegisterBankInfo. This reduces the likelihood of hash collisions, as well as the likelihood of hitting EmptyKey or TombstoneKey, the special key values of DenseMap. This is not the ultimate solution to the problem, but we can do it in any case. --- llvm/include/llvm/CodeGen/RegisterBankInfo.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/CodeGen/RegisterBankInfo.h b/llvm/include/llvm/CodeGen/RegisterBankInfo.h index 62c4a57a605d68..9704e3b1fdedd6 100644 --- a/llvm/include/llvm/CodeGen/RegisterBankInfo.h +++ b/llvm/include/llvm/CodeGen/RegisterBankInfo.h @@ -399,22 +399,22 @@ class RegisterBankInfo { /// Keep dynamically allocated PartialMapping in a separate map. /// This shouldn't be needed when everything gets TableGen'ed. - mutable DenseMap> + mutable DenseMap> MapOfPartialMappings; /// Keep dynamically allocated ValueMapping in a separate map. /// This shouldn't be needed when everything gets TableGen'ed. - mutable DenseMap> + mutable DenseMap> MapOfValueMappings; /// Keep dynamically allocated array of ValueMapping in a separate map. /// This shouldn't be needed when everything gets TableGen'ed. - mutable DenseMap> + mutable DenseMap> MapOfOperandsMappings; /// Keep dynamically allocated InstructionMapping in a separate map. /// This shouldn't be needed when everything gets TableGen'ed. - mutable DenseMap> + mutable DenseMap> MapOfInstructionMappings; /// Getting the minimal register class of a physreg is expensive. From 6cce67a8f9bbab7ebaafa6f33e0efbb22dee3ea1 Mon Sep 17 00:00:00 2001 From: Vyacheslav Levytskyy Date: Tue, 2 Apr 2024 10:59:18 +0200 Subject: [PATCH 074/201] [SPIR-V] Fix validity of atomic instructions (#87051) This PR fixes validity of atomic instructions and improves type inference. More tests are able now to be accepted by `spirv-val`. --- llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp | 7 ++ llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp | 89 ++++++++++++++++--- llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp | 42 +++++++++ llvm/test/CodeGen/SPIRV/ExecutionMode.ll | 1 + .../test/CodeGen/SPIRV/instructions/atomic.ll | 28 ++++-- .../SPIRV/instructions/atomic_acqrel.ll | 28 ++++-- .../CodeGen/SPIRV/instructions/atomic_seq.ll | 28 ++++-- .../SPIRV/pointers/bitcast-fix-accesschain.ll | 37 ++++++++ .../pointers/type-deduce-by-call-complex.ll | 29 ++++++ 9 files changed, 252 insertions(+), 37 deletions(-) create mode 100644 llvm/test/CodeGen/SPIRV/pointers/bitcast-fix-accesschain.ll create mode 100644 llvm/test/CodeGen/SPIRV/pointers/type-deduce-by-call-complex.ll diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp index ad4e72a3128b1e..1674cef7cb8270 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp @@ -251,6 +251,13 @@ static SPIRVType *getArgSPIRVType(const Function &F, unsigned ArgIdx, cast(II->getOperand(2))->getZExtValue(), ST)); } + // Replace PointerType with TypedPointerType to be able to map SPIR-V types to + // LLVM types in a consistent manner + if (isUntypedPointerTy(OriginalArgType)) { + OriginalArgType = + TypedPointerType::get(Type::getInt8Ty(F.getContext()), + getPointerAddressSpace(OriginalArgType)); + } return GR->getOrCreateSPIRVType(OriginalArgType, MIRBuilder, ArgAccessQual); } diff --git a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp index 7c5a38fa48d009..b341fcb41d0312 100644 --- a/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVEmitIntrinsics.cpp @@ -65,6 +65,10 @@ class SPIRVEmitIntrinsics Type *deduceElementType(Value *I); Type *deduceElementTypeHelper(Value *I); Type *deduceElementTypeHelper(Value *I, std::unordered_set &Visited); + Type *deduceElementTypeByValueDeep(Type *ValueTy, Value *Operand, + std::unordered_set &Visited); + Type *deduceElementTypeByUsersDeep(Value *Op, + std::unordered_set &Visited); // deduce nested types of composites Type *deduceNestedTypeHelper(User *U); @@ -176,6 +180,44 @@ static inline void reportFatalOnTokenType(const Instruction *I) { false); } +// Set element pointer type to the given value of ValueTy and tries to +// specify this type further (recursively) by Operand value, if needed. +Type *SPIRVEmitIntrinsics::deduceElementTypeByValueDeep( + Type *ValueTy, Value *Operand, std::unordered_set &Visited) { + Type *Ty = ValueTy; + if (Operand) { + if (auto *PtrTy = dyn_cast(Ty)) { + if (Type *NestedTy = deduceElementTypeHelper(Operand, Visited)) + Ty = TypedPointerType::get(NestedTy, PtrTy->getAddressSpace()); + } else { + Ty = deduceNestedTypeHelper(dyn_cast(Operand), Ty, Visited); + } + } + return Ty; +} + +// Traverse User instructions to deduce an element pointer type of the operand. +Type *SPIRVEmitIntrinsics::deduceElementTypeByUsersDeep( + Value *Op, std::unordered_set &Visited) { + if (!Op || !isPointerTy(Op->getType())) + return nullptr; + + if (auto PType = dyn_cast(Op->getType())) + return PType->getElementType(); + + // maybe we already know operand's element type + if (Type *KnownTy = GR->findDeducedElementType(Op)) + return KnownTy; + + for (User *OpU : Op->users()) { + if (Instruction *Inst = dyn_cast(OpU)) { + if (Type *Ty = deduceElementTypeHelper(Inst, Visited)) + return Ty; + } + } + return nullptr; +} + // Deduce and return a successfully deduced Type of the Instruction, // or nullptr otherwise. Type *SPIRVEmitIntrinsics::deduceElementTypeHelper(Value *I) { @@ -206,21 +248,27 @@ Type *SPIRVEmitIntrinsics::deduceElementTypeHelper( } else if (auto *Ref = dyn_cast(I)) { Ty = Ref->getResultElementType(); } else if (auto *Ref = dyn_cast(I)) { - Ty = Ref->getValueType(); - if (Value *Op = Ref->getNumOperands() > 0 ? Ref->getOperand(0) : nullptr) { - if (auto *PtrTy = dyn_cast(Ty)) { - if (Type *NestedTy = deduceElementTypeHelper(Op, Visited)) - Ty = TypedPointerType::get(NestedTy, PtrTy->getAddressSpace()); - } else { - Ty = deduceNestedTypeHelper(dyn_cast(Op), Ty, Visited); - } - } + Ty = deduceElementTypeByValueDeep( + Ref->getValueType(), + Ref->getNumOperands() > 0 ? Ref->getOperand(0) : nullptr, Visited); } else if (auto *Ref = dyn_cast(I)) { Ty = deduceElementTypeHelper(Ref->getPointerOperand(), Visited); } else if (auto *Ref = dyn_cast(I)) { if (Type *Src = Ref->getSrcTy(), *Dest = Ref->getDestTy(); isPointerTy(Src) && isPointerTy(Dest)) Ty = deduceElementTypeHelper(Ref->getOperand(0), Visited); + } else if (auto *Ref = dyn_cast(I)) { + Value *Op = Ref->getNewValOperand(); + Ty = deduceElementTypeByValueDeep(Op->getType(), Op, Visited); + } else if (auto *Ref = dyn_cast(I)) { + Value *Op = Ref->getValOperand(); + Ty = deduceElementTypeByValueDeep(Op->getType(), Op, Visited); + } else if (auto *Ref = dyn_cast(I)) { + for (unsigned i = 0; i < Ref->getNumIncomingValues(); i++) { + Ty = deduceElementTypeByUsersDeep(Ref->getIncomingValue(i), Visited); + if (Ty) + break; + } } // remember the found relationship @@ -293,6 +341,22 @@ Type *SPIRVEmitIntrinsics::deduceNestedTypeHelper( return NewTy; } } + } else if (auto *VecTy = dyn_cast(OrigTy)) { + if (Value *Op = U->getNumOperands() > 0 ? U->getOperand(0) : nullptr) { + Type *OpTy = VecTy->getElementType(); + Type *Ty = OpTy; + if (auto *PtrTy = dyn_cast(OpTy)) { + if (Type *NestedTy = deduceElementTypeHelper(Op, Visited)) + Ty = TypedPointerType::get(NestedTy, PtrTy->getAddressSpace()); + } else { + Ty = deduceNestedTypeHelper(dyn_cast(Op), OpTy, Visited); + } + if (Ty != OpTy) { + Type *NewTy = VectorType::get(Ty, VecTy->getElementCount()); + GR->addDeducedCompositeType(U, NewTy); + return NewTy; + } + } } return OrigTy; @@ -578,7 +642,8 @@ void SPIRVEmitIntrinsics::insertPtrCastOrAssignTypeInstr(Instruction *I, // Handle calls to builtins (non-intrinsics): CallInst *CI = dyn_cast(I); - if (!CI || CI->isIndirectCall() || CI->getCalledFunction()->isIntrinsic()) + if (!CI || CI->isIndirectCall() || CI->isInlineAsm() || + !CI->getCalledFunction() || CI->getCalledFunction()->isIntrinsic()) return; // collect information about formal parameter types @@ -929,6 +994,10 @@ Type *SPIRVEmitIntrinsics::deduceFunParamElementType( // maybe we already know operand's element type if (Type *KnownTy = GR->findDeducedElementType(OpArg)) return KnownTy; + // try to deduce from the operand itself + Visited.clear(); + if (Type *Ty = deduceElementTypeHelper(OpArg, Visited)) + return Ty; // search in actual parameter's users for (User *OpU : OpArg->users()) { Instruction *Inst = dyn_cast(OpU); diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp index 4f5c1dc4f90b0d..90a31551f45a23 100644 --- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp @@ -201,6 +201,17 @@ void validateForwardCalls(const SPIRVSubtarget &STI, } } +// Validation of an access chain. +void validateAccessChain(const SPIRVSubtarget &STI, MachineRegisterInfo *MRI, + SPIRVGlobalRegistry &GR, MachineInstr &I) { + SPIRVType *BaseTypeInst = GR.getSPIRVTypeForVReg(I.getOperand(0).getReg()); + if (BaseTypeInst && BaseTypeInst->getOpcode() == SPIRV::OpTypePointer) { + SPIRVType *BaseElemType = + GR.getSPIRVTypeForVReg(BaseTypeInst->getOperand(2).getReg()); + validatePtrTypes(STI, MRI, GR, I, 2, BaseElemType); + } +} + // TODO: the logic of inserting additional bitcast's is to be moved // to pre-IRTranslation passes eventually void SPIRVTargetLowering::finalizeLowering(MachineFunction &MF) const { @@ -213,16 +224,47 @@ void SPIRVTargetLowering::finalizeLowering(MachineFunction &MF) const { MBBI != MBBE;) { MachineInstr &MI = *MBBI++; switch (MI.getOpcode()) { + case SPIRV::OpAtomicLoad: + case SPIRV::OpAtomicExchange: + case SPIRV::OpAtomicCompareExchange: + case SPIRV::OpAtomicCompareExchangeWeak: + case SPIRV::OpAtomicIIncrement: + case SPIRV::OpAtomicIDecrement: + case SPIRV::OpAtomicIAdd: + case SPIRV::OpAtomicISub: + case SPIRV::OpAtomicSMin: + case SPIRV::OpAtomicUMin: + case SPIRV::OpAtomicSMax: + case SPIRV::OpAtomicUMax: + case SPIRV::OpAtomicAnd: + case SPIRV::OpAtomicOr: + case SPIRV::OpAtomicXor: + // for the above listed instructions + // OpAtomicXXX , ptr %Op, ... + // implies that %Op is a pointer to case SPIRV::OpLoad: // OpLoad , ptr %Op implies that %Op is a pointer to validatePtrTypes(STI, MRI, GR, MI, 2, GR.getSPIRVTypeForVReg(MI.getOperand(0).getReg())); break; + case SPIRV::OpAtomicStore: + // OpAtomicStore ptr %Op, , , + // implies that %Op points to the 's type + validatePtrTypes(STI, MRI, GR, MI, 0, + GR.getSPIRVTypeForVReg(MI.getOperand(3).getReg())); + break; case SPIRV::OpStore: // OpStore ptr %Op, implies that %Op points to the 's type validatePtrTypes(STI, MRI, GR, MI, 0, GR.getSPIRVTypeForVReg(MI.getOperand(1).getReg())); break; + case SPIRV::OpPtrCastToGeneric: + validateAccessChain(STI, MRI, GR, MI); + break; + case SPIRV::OpInBoundsPtrAccessChain: + if (MI.getNumOperands() == 4) + validateAccessChain(STI, MRI, GR, MI); + break; case SPIRV::OpFunctionCall: // ensure there is no mismatch between actual and expected arg types: diff --git a/llvm/test/CodeGen/SPIRV/ExecutionMode.ll b/llvm/test/CodeGen/SPIRV/ExecutionMode.ll index 3e321e1c2bd280..180b7246952db5 100644 --- a/llvm/test/CodeGen/SPIRV/ExecutionMode.ll +++ b/llvm/test/CodeGen/SPIRV/ExecutionMode.ll @@ -1,4 +1,5 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK-DAG: %[[#VOID:]] = OpTypeVoid diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic.ll index 9715504fcc5d38..ce59bb2064027a 100644 --- a/llvm/test/CodeGen/SPIRV/instructions/atomic.ll +++ b/llvm/test/CodeGen/SPIRV/instructions/atomic.ll @@ -1,4 +1,5 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK-DAG: OpName [[ADD:%.*]] "test_add" ; CHECK-DAG: OpName [[SUB:%.*]] "test_sub" @@ -20,7 +21,8 @@ ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicIAdd [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicIAdd [[I32Ty]] [[BC_A]] [[SCOPE]] [[RELAXED]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_add(i32* %ptr, i32 %val) { @@ -32,7 +34,8 @@ define i32 @test_add(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicISub [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicISub [[I32Ty]] [[BC_A]] [[SCOPE]] [[RELAXED]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_sub(i32* %ptr, i32 %val) { @@ -44,7 +47,8 @@ define i32 @test_sub(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicSMin [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicSMin [[I32Ty]] [[BC_A]] [[SCOPE]] [[RELAXED]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_min(i32* %ptr, i32 %val) { @@ -56,7 +60,8 @@ define i32 @test_min(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicSMax [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicSMax [[I32Ty]] [[BC_A]] [[SCOPE]] [[RELAXED]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_max(i32* %ptr, i32 %val) { @@ -68,7 +73,8 @@ define i32 @test_max(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicUMin [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicUMin [[I32Ty]] [[BC_A]] [[SCOPE]] [[RELAXED]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_umin(i32* %ptr, i32 %val) { @@ -80,7 +86,8 @@ define i32 @test_umin(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicUMax [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicUMax [[I32Ty]] [[BC_A]] [[SCOPE]] [[RELAXED]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_umax(i32* %ptr, i32 %val) { @@ -92,7 +99,8 @@ define i32 @test_umax(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicAnd [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicAnd [[I32Ty]] [[BC_A]] [[SCOPE]] [[RELAXED]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_and(i32* %ptr, i32 %val) { @@ -104,7 +112,8 @@ define i32 @test_and(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicOr [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicOr [[I32Ty]] [[BC_A]] [[SCOPE]] [[RELAXED]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_or(i32* %ptr, i32 %val) { @@ -116,7 +125,8 @@ define i32 @test_or(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicXor [[I32Ty]] [[A]] [[SCOPE]] [[RELAXED]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicXor [[I32Ty]] [[BC_A]] [[SCOPE]] [[RELAXED]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_xor(i32* %ptr, i32 %val) { diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic_acqrel.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic_acqrel.ll index 63c0ae75f5ecdd..950dfe417637fe 100644 --- a/llvm/test/CodeGen/SPIRV/instructions/atomic_acqrel.ll +++ b/llvm/test/CodeGen/SPIRV/instructions/atomic_acqrel.ll @@ -1,4 +1,5 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK-DAG: OpName [[ADD:%.*]] "test_add" ; CHECK-DAG: OpName [[SUB:%.*]] "test_sub" @@ -20,7 +21,8 @@ ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicIAdd [[I32Ty]] [[A]] [[SCOPE]] [[ACQREL]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicIAdd [[I32Ty]] [[BC_A]] [[SCOPE]] [[ACQREL]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_add(i32* %ptr, i32 %val) { @@ -32,7 +34,8 @@ define i32 @test_add(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicISub [[I32Ty]] [[A]] [[SCOPE]] [[ACQREL]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicISub [[I32Ty]] [[BC_A]] [[SCOPE]] [[ACQREL]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_sub(i32* %ptr, i32 %val) { @@ -44,7 +47,8 @@ define i32 @test_sub(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicSMin [[I32Ty]] [[A]] [[SCOPE]] [[ACQREL]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicSMin [[I32Ty]] [[BC_A]] [[SCOPE]] [[ACQREL]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_min(i32* %ptr, i32 %val) { @@ -56,7 +60,8 @@ define i32 @test_min(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicSMax [[I32Ty]] [[A]] [[SCOPE]] [[ACQREL]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicSMax [[I32Ty]] [[BC_A]] [[SCOPE]] [[ACQREL]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_max(i32* %ptr, i32 %val) { @@ -68,7 +73,8 @@ define i32 @test_max(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicUMin [[I32Ty]] [[A]] [[SCOPE]] [[ACQREL]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicUMin [[I32Ty]] [[BC_A]] [[SCOPE]] [[ACQREL]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_umin(i32* %ptr, i32 %val) { @@ -80,7 +86,8 @@ define i32 @test_umin(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicUMax [[I32Ty]] [[A]] [[SCOPE]] [[ACQREL]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicUMax [[I32Ty]] [[BC_A]] [[SCOPE]] [[ACQREL]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_umax(i32* %ptr, i32 %val) { @@ -92,7 +99,8 @@ define i32 @test_umax(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicAnd [[I32Ty]] [[A]] [[SCOPE]] [[ACQREL]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicAnd [[I32Ty]] [[BC_A]] [[SCOPE]] [[ACQREL]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_and(i32* %ptr, i32 %val) { @@ -104,7 +112,8 @@ define i32 @test_and(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicOr [[I32Ty]] [[A]] [[SCOPE]] [[ACQREL]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicOr [[I32Ty]] [[BC_A]] [[SCOPE]] [[ACQREL]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_or(i32* %ptr, i32 %val) { @@ -116,7 +125,8 @@ define i32 @test_or(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicXor [[I32Ty]] [[A]] [[SCOPE]] [[ACQREL]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicXor [[I32Ty]] [[BC_A]] [[SCOPE]] [[ACQREL]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_xor(i32* %ptr, i32 %val) { diff --git a/llvm/test/CodeGen/SPIRV/instructions/atomic_seq.ll b/llvm/test/CodeGen/SPIRV/instructions/atomic_seq.ll index f6a8fe1e6db18e..f142e012dcb744 100644 --- a/llvm/test/CodeGen/SPIRV/instructions/atomic_seq.ll +++ b/llvm/test/CodeGen/SPIRV/instructions/atomic_seq.ll @@ -1,4 +1,5 @@ ; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} ; CHECK-DAG: OpName [[ADD:%.*]] "test_add" ; CHECK-DAG: OpName [[SUB:%.*]] "test_sub" @@ -20,7 +21,8 @@ ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicIAdd [[I32Ty]] [[A]] [[SCOPE]] [[SEQ]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicIAdd [[I32Ty]] [[BC_A]] [[SCOPE]] [[SEQ]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_add(i32* %ptr, i32 %val) { @@ -32,7 +34,8 @@ define i32 @test_add(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicISub [[I32Ty]] [[A]] [[SCOPE]] [[SEQ]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicISub [[I32Ty]] [[BC_A]] [[SCOPE]] [[SEQ]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_sub(i32* %ptr, i32 %val) { @@ -44,7 +47,8 @@ define i32 @test_sub(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicSMin [[I32Ty]] [[A]] [[SCOPE]] [[SEQ]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicSMin [[I32Ty]] [[BC_A]] [[SCOPE]] [[SEQ]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_min(i32* %ptr, i32 %val) { @@ -56,7 +60,8 @@ define i32 @test_min(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicSMax [[I32Ty]] [[A]] [[SCOPE]] [[SEQ]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicSMax [[I32Ty]] [[BC_A]] [[SCOPE]] [[SEQ]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_max(i32* %ptr, i32 %val) { @@ -68,7 +73,8 @@ define i32 @test_max(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicUMin [[I32Ty]] [[A]] [[SCOPE]] [[SEQ]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicUMin [[I32Ty]] [[BC_A]] [[SCOPE]] [[SEQ]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_umin(i32* %ptr, i32 %val) { @@ -80,7 +86,8 @@ define i32 @test_umin(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicUMax [[I32Ty]] [[A]] [[SCOPE]] [[SEQ]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicUMax [[I32Ty]] [[BC_A]] [[SCOPE]] [[SEQ]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_umax(i32* %ptr, i32 %val) { @@ -92,7 +99,8 @@ define i32 @test_umax(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicAnd [[I32Ty]] [[A]] [[SCOPE]] [[SEQ]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicAnd [[I32Ty]] [[BC_A]] [[SCOPE]] [[SEQ]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_and(i32* %ptr, i32 %val) { @@ -104,7 +112,8 @@ define i32 @test_and(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicOr [[I32Ty]] [[A]] [[SCOPE]] [[SEQ]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicOr [[I32Ty]] [[BC_A]] [[SCOPE]] [[SEQ]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_or(i32* %ptr, i32 %val) { @@ -116,7 +125,8 @@ define i32 @test_or(i32* %ptr, i32 %val) { ; CHECK-NEXT: [[A:%.*]] = OpFunctionParameter ; CHECK-NEXT: [[B:%.*]] = OpFunctionParameter ; CHECK-NEXT: OpLabel -; CHECK-NEXT: [[R:%.*]] = OpAtomicXor [[I32Ty]] [[A]] [[SCOPE]] [[SEQ]] [[B]] +; CHECK-NEXT: [[BC_A:%.*]] = OpBitcast %[[#]] [[A]] +; CHECK-NEXT: [[R:%.*]] = OpAtomicXor [[I32Ty]] [[BC_A]] [[SCOPE]] [[SEQ]] [[B]] ; CHECK-NEXT: OpReturnValue [[R]] ; CHECK-NEXT: OpFunctionEnd define i32 @test_xor(i32* %ptr, i32 %val) { diff --git a/llvm/test/CodeGen/SPIRV/pointers/bitcast-fix-accesschain.ll b/llvm/test/CodeGen/SPIRV/pointers/bitcast-fix-accesschain.ll new file mode 100644 index 00000000000000..7fae6ca2c48cf1 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/pointers/bitcast-fix-accesschain.ll @@ -0,0 +1,37 @@ +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: %[[#TYCHAR:]] = OpTypeInt 8 0 +; CHECK-DAG: %[[#TYCHARPTR:]] = OpTypePointer Function %[[#TYCHAR]] +; CHECK-DAG: %[[#TYINT32:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#TYSTRUCTINT32:]] = OpTypeStruct %[[#TYINT32]] +; CHECK-DAG: %[[#TYARRAY:]] = OpTypeArray %[[#TYSTRUCTINT32]] %[[#]] +; CHECK-DAG: %[[#TYSTRUCT:]] = OpTypeStruct %[[#TYARRAY]] +; CHECK-DAG: %[[#TYSTRUCTPTR:]] = OpTypePointer Function %[[#TYSTRUCT]] +; CHECK-DAG: %[[#TYINT64:]] = OpTypeInt 64 0 +; CHECK-DAG: %[[#TYINT64PTR:]] = OpTypePointer Function %[[#TYINT64]] +; CHECK: OpFunction +; CHECK: %[[#PTRTOSTRUCT:]] = OpFunctionParameter %[[#TYSTRUCTPTR]] +; CHECK: %[[#PTRTOCHAR:]] = OpBitcast %[[#TYCHARPTR]] %[[#PTRTOSTRUCT]] +; CHECK-NEXT: OpInBoundsPtrAccessChain %[[#TYCHARPTR]] %[[#PTRTOCHAR]] +; CHECK: OpFunction +; CHECK: %[[#PTRTOSTRUCT2:]] = OpFunctionParameter %[[#TYSTRUCTPTR]] +; CHECK: %[[#ELEM:]] = OpInBoundsPtrAccessChain %[[#TYSTRUCTPTR]] %[[#PTRTOSTRUCT2]] +; CHECK-NEXT: %[[#TOLOAD:]] = OpBitcast %[[#TYINT64PTR]] %[[#ELEM]] +; CHECK-NEXT: OpLoad %[[#TYINT64]] %[[#TOLOAD]] + +%struct.S = type { i32 } +%struct.__wrapper_class = type { [7 x %struct.S] } + +define spir_kernel void @foo1(ptr noundef byval(%struct.__wrapper_class) align 4 %_arg_Arr) { +entry: + %elem = getelementptr inbounds i8, ptr %_arg_Arr, i64 0 + ret void +} + +define spir_kernel void @foo2(ptr noundef byval(%struct.__wrapper_class) align 4 %_arg_Arr) { +entry: + %elem = getelementptr inbounds %struct.__wrapper_class, ptr %_arg_Arr, i64 0 + %data = load i64, ptr %elem + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/pointers/type-deduce-by-call-complex.ll b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-by-call-complex.ll new file mode 100644 index 00000000000000..ea7a22c31d0e85 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-by-call-complex.ll @@ -0,0 +1,29 @@ +; RUN: llc -O0 -mtriple=spirv32-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-SPIRV-DAG: %[[Long:.*]] = OpTypeInt 32 0 +; CHECK-SPIRV-DAG: %[[Void:.*]] = OpTypeVoid +; CHECK-SPIRV-DAG: %[[Struct:.*]] = OpTypeStruct %[[Long]] +; CHECK-SPIRV-DAG: %[[StructPtr:.*]] = OpTypePointer Generic %[[Struct]] +; CHECK-SPIRV-DAG: %[[Function:.*]] = OpTypeFunction %[[Void]] %[[StructPtr]] +; CHECK-SPIRV-DAG: %[[Const:.*]] = OpConstantNull %[[Struct]] +; CHECK-SPIRV-DAG: %[[CrossStructPtr:.*]] = OpTypePointer CrossWorkgroup %[[Struct]] +; CHECK-SPIRV-DAG: %[[Var:.*]] = OpVariable %[[CrossStructPtr]] CrossWorkgroup %[[Const]] +; CHECK-SPIRV: %[[Foo:.*]] = OpFunction %[[Void]] None %[[Function]] +; CHECK-SPIRV-NEXT: OpFunctionParameter %[[StructPtr]] +; CHECK-SPIRV: %[[Casted:.*]] = OpPtrCastToGeneric %[[StructPtr]] %[[Var]] +; CHECK-SPIRV-NEXT: OpFunctionCall %[[Void]] %[[Foo]] %[[Casted]] + +%struct.global_ctor_dtor = type { i32 } +@g1 = addrspace(1) global %struct.global_ctor_dtor zeroinitializer + +define linkonce_odr spir_func void @foo(ptr addrspace(4) %this) { +entry: + ret void +} + +define internal spir_func void @bar() { +entry: + call spir_func void @foo(ptr addrspace(4) addrspacecast (ptr addrspace(1) @g1 to ptr addrspace(4))) + ret void +} From 6654235594d86e7ed70abb7358ed25029d1560e5 Mon Sep 17 00:00:00 2001 From: Sizov Nikita Date: Tue, 2 Apr 2024 12:39:49 +0300 Subject: [PATCH 075/201] [SelectionDAG] implement computeKnownBits for add AVG* instructions (#86754) knownBits calculation for **AVGFLOORU** / **AVGFLOORS** / **AVGCEILU** / **AVGCEILS** instructions Prerequisite for #76644 --- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 15 ++++-- .../CodeGen/AArch64SelectionDAGTest.cpp | 48 +++++++++++++++++++ 2 files changed, 58 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index e8d1ac1d3a9167..e3b76b95eb86ad 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3419,13 +3419,18 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts, Known = KnownBits::mulhs(Known, Known2); break; } - case ISD::AVGCEILU: { + case ISD::AVGFLOORU: + case ISD::AVGCEILU: + case ISD::AVGFLOORS: + case ISD::AVGCEILS: { + bool IsCeil = Opcode == ISD::AVGCEILU || Opcode == ISD::AVGCEILS; + bool IsSigned = Opcode == ISD::AVGFLOORS || Opcode == ISD::AVGCEILS; Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); - Known = Known.zext(BitWidth + 1); - Known2 = Known2.zext(BitWidth + 1); - KnownBits One = KnownBits::makeConstant(APInt(1, 1)); - Known = KnownBits::computeForAddCarry(Known, Known2, One); + Known = IsSigned ? Known.sext(BitWidth + 1) : Known.zext(BitWidth + 1); + Known2 = IsSigned ? Known2.sext(BitWidth + 1) : Known2.zext(BitWidth + 1); + KnownBits Carry = KnownBits::makeConstant(APInt(1, IsCeil ? 1 : 0)); + Known = KnownBits::computeForAddCarry(Known, Known2, Carry); Known = Known.extractBits(BitWidth, 1); break; } diff --git a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp b/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp index e0772684e3a954..27bcad7c24c4db 100644 --- a/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp +++ b/llvm/unittests/CodeGen/AArch64SelectionDAGTest.cpp @@ -796,4 +796,52 @@ TEST_F(AArch64SelectionDAGTest, computeKnownBits_extload_knownnegative) { EXPECT_EQ(Known.One, APInt(32, 0xfffffff0)); } +TEST_F(AArch64SelectionDAGTest, + computeKnownBits_AVGFLOORU_AVGFLOORS_AVGCEILU_AVGCEILS) { + SDLoc Loc; + auto Int8VT = EVT::getIntegerVT(Context, 8); + auto Int16VT = EVT::getIntegerVT(Context, 16); + auto Int8Vec8VT = EVT::getVectorVT(Context, Int8VT, 8); + auto Int16Vec8VT = EVT::getVectorVT(Context, Int16VT, 8); + + SDValue UnknownOp0 = DAG->getRegister(0, Int8Vec8VT); + SDValue UnknownOp1 = DAG->getRegister(1, Int8Vec8VT); + + SDValue ZextOp0 = + DAG->getNode(ISD::ZERO_EXTEND, Loc, Int16Vec8VT, UnknownOp0); + SDValue ZextOp1 = + DAG->getNode(ISD::ZERO_EXTEND, Loc, Int16Vec8VT, UnknownOp1); + // ZextOp0 = 00000000???????? + // ZextOp1 = 00000000???????? + // => (for all AVG* instructions) + // Known.Zero = 1111111100000000 (0xFF00) + // Known.One = 0000000000000000 (0x0000) + auto Zeroes = APInt(16, 0xFF00); + auto Ones = APInt(16, 0x0000); + + SDValue AVGFLOORU = + DAG->getNode(ISD::AVGFLOORU, Loc, Int16Vec8VT, ZextOp0, ZextOp1); + KnownBits KnownAVGFLOORU = DAG->computeKnownBits(AVGFLOORU); + EXPECT_EQ(KnownAVGFLOORU.Zero, Zeroes); + EXPECT_EQ(KnownAVGFLOORU.One, Ones); + + SDValue AVGFLOORS = + DAG->getNode(ISD::AVGFLOORU, Loc, Int16Vec8VT, ZextOp0, ZextOp1); + KnownBits KnownAVGFLOORS = DAG->computeKnownBits(AVGFLOORS); + EXPECT_EQ(KnownAVGFLOORS.Zero, Zeroes); + EXPECT_EQ(KnownAVGFLOORS.One, Ones); + + SDValue AVGCEILU = + DAG->getNode(ISD::AVGCEILU, Loc, Int16Vec8VT, ZextOp0, ZextOp1); + KnownBits KnownAVGCEILU = DAG->computeKnownBits(AVGCEILU); + EXPECT_EQ(KnownAVGCEILU.Zero, Zeroes); + EXPECT_EQ(KnownAVGCEILU.One, Ones); + + SDValue AVGCEILS = + DAG->getNode(ISD::AVGCEILS, Loc, Int16Vec8VT, ZextOp0, ZextOp1); + KnownBits KnownAVGCEILS = DAG->computeKnownBits(AVGCEILS); + EXPECT_EQ(KnownAVGCEILS.Zero, Zeroes); + EXPECT_EQ(KnownAVGCEILS.One, Ones); +} + } // end namespace llvm From 89cfae41ecc043f8c47be4dea4b7c740d4f950b3 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Tue, 2 Apr 2024 11:37:20 +0200 Subject: [PATCH 076/201] [mlir] Add missing #include header for std::is_pointer --- mlir/include/mlir/IR/OwningOpRef.h | 1 + 1 file changed, 1 insertion(+) diff --git a/mlir/include/mlir/IR/OwningOpRef.h b/mlir/include/mlir/IR/OwningOpRef.h index eb4bf2dc67e3e7..0c35eae8de0964 100644 --- a/mlir/include/mlir/IR/OwningOpRef.h +++ b/mlir/include/mlir/IR/OwningOpRef.h @@ -13,6 +13,7 @@ #ifndef MLIR_IR_OWNINGOPREF_H #define MLIR_IR_OWNINGOPREF_H +#include #include namespace mlir { From 16da9d53519214475c04109d953022f272ac8022 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 2 Apr 2024 10:43:34 +0100 Subject: [PATCH 077/201] [VPlan] Remove redundant set of debug loc in VPInstruction (NFCI). Consistently use setDebugLocFrom and remove redundant setDebugLocFrom. --- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 23d025cf33ea2d..124ae3108d8a07 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -455,8 +455,6 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) { RecurKind RK = RdxDesc.getRecurrenceKind(); - State.setDebugLocFrom(getDebugLoc()); - VPValue *LoopExitingDef = getOperand(1); Type *PhiTy = OrigPhi->getType(); VectorParts RdxParts(State.UF); @@ -551,7 +549,7 @@ void VPInstruction::execute(VPTransformState &State) { "Recipe not a FPMathOp but has fast-math flags?"); if (hasFastMathFlags()) State.Builder.setFastMathFlags(getFastMathFlags()); - State.Builder.SetCurrentDebugLocation(getDebugLoc()); + State.setDebugLocFrom(getDebugLoc()); bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() && (vputils::onlyFirstLaneUsed(this) || From 46246683a61a081d9d78cf987fd4f024556ecdc8 Mon Sep 17 00:00:00 2001 From: Rin Dobrescu Date: Tue, 2 Apr 2024 10:47:51 +0100 Subject: [PATCH 078/201] [AArch64] Update Neoverse V2 FSQRT execution units in schedule model. (#86803) This patch updates the SVE FSQRT instruction execution units to be able to run on VX0 and VX2. --- llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td | 10 ++++------ .../AArch64/Neoverse/V2-sve-instructions.s | 14 +++++++------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td index f10b94523d2e03..4d7f44e7b9b9ab 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td @@ -1076,14 +1076,12 @@ def V2Write_7cyc_1V02_7rc : SchedWriteRes<[V2UnitV02]> { let Latency = 7; let def V2Write_10cyc_1V02_5rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [ 5]; } def V2Write_10cyc_1V02_9rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [ 9]; } def V2Write_10cyc_1V02_10rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [10]; } -def V2Write_10cyc_1V0_9rc : SchedWriteRes<[V2UnitV0]> { let Latency = 10; let ReleaseAtCycles = [ 9]; } def V2Write_10cyc_1V1_9rc : SchedWriteRes<[V2UnitV1]> { let Latency = 10; let ReleaseAtCycles = [ 9]; } -def V2Write_13cyc_1V0_12rc : SchedWriteRes<[V2UnitV0]> { let Latency = 13; let ReleaseAtCycles = [12]; } def V2Write_13cyc_1V02_12rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ReleaseAtCycles = [12]; } def V2Write_13cyc_1V02_13rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ReleaseAtCycles = [13]; } def V2Write_15cyc_1V02_14rc : SchedWriteRes<[V2UnitV02]> { let Latency = 15; let ReleaseAtCycles = [14]; } +def V2Write_16cyc_1V02_14rc : SchedWriteRes<[V2UnitV02]> { let Latency = 16; let ReleaseAtCycles = [14]; } def V2Write_16cyc_1V02_15rc : SchedWriteRes<[V2UnitV02]> { let Latency = 16; let ReleaseAtCycles = [15]; } -def V2Write_16cyc_1V0_14rc : SchedWriteRes<[V2UnitV0]> { let Latency = 16; let ReleaseAtCycles = [14]; } // Miscellaneous // ----------------------------------------------------------------------------- @@ -2567,13 +2565,13 @@ def : InstRW<[V2Write_4cyc_2V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>; def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>; // Floating point square root, F16 -def : InstRW<[V2Write_13cyc_1V0_12rc], (instregex "^FSQRT_ZPmZ_H")>; +def : InstRW<[V2Write_13cyc_1V02_12rc], (instregex "^FSQRT_ZPmZ_H")>; // Floating point square root, F32 -def : InstRW<[V2Write_10cyc_1V0_9rc], (instregex "^FSQRT_ZPmZ_S")>; +def : InstRW<[V2Write_10cyc_1V02_9rc], (instregex "^FSQRT_ZPmZ_S")>; // Floating point square root, F64 -def : InstRW<[V2Write_16cyc_1V0_14rc], (instregex "^FSQRT_ZPmZ_D")>; +def : InstRW<[V2Write_16cyc_1V02_14rc], (instregex "^FSQRT_ZPmZ_D")>; // Floating point trigonometric exponentiation def : InstRW<[V2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]")>; diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-sve-instructions.s index 4d6ce706b05274..acd35568249499 100644 --- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-sve-instructions.s +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-sve-instructions.s @@ -4278,9 +4278,9 @@ zip2 z31.s, z31.s, z31.s # CHECK-NEXT: 1 3 0.25 fscale z0.d, p7/m, z0.d, z31.d # CHECK-NEXT: 1 3 0.25 fscale z0.h, p7/m, z0.h, z31.h # CHECK-NEXT: 1 3 0.25 fscale z0.s, p7/m, z0.s, z31.s -# CHECK-NEXT: 1 16 14.00 fsqrt z31.d, p7/m, z31.d -# CHECK-NEXT: 1 13 12.00 fsqrt z31.h, p7/m, z31.h -# CHECK-NEXT: 1 10 9.00 fsqrt z31.s, p7/m, z31.s +# CHECK-NEXT: 1 16 7.00 fsqrt z31.d, p7/m, z31.d +# CHECK-NEXT: 1 13 6.00 fsqrt z31.h, p7/m, z31.h +# CHECK-NEXT: 1 10 4.50 fsqrt z31.s, p7/m, z31.s # CHECK-NEXT: 1 2 0.25 fsub z0.d, p0/m, z0.d, #0.5 # CHECK-NEXT: 1 2 0.25 fsub z0.d, p7/m, z0.d, z31.d # CHECK-NEXT: 1 2 0.25 fsub z0.d, z1.d, z31.d @@ -6861,7 +6861,7 @@ zip2 z31.s, z31.s, z31.s # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: - - - - 245.00 651.00 651.00 570.50 272.50 83.75 83.75 81.75 81.75 1554.25 1281.75 776.75 748.25 +# CHECK-NEXT: - - - - 245.00 651.00 651.00 570.50 272.50 83.75 83.75 81.75 81.75 1536.75 1281.75 794.25 748.25 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: @@ -7718,9 +7718,9 @@ zip2 z31.s, z31.s, z31.s # CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fscale z0.d, p7/m, z0.d, z31.d # CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fscale z0.h, p7/m, z0.h, z31.h # CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fscale z0.s, p7/m, z0.s, z31.s -# CHECK-NEXT: - - - - - - - - - - - - - 14.00 - - - fsqrt z31.d, p7/m, z31.d -# CHECK-NEXT: - - - - - - - - - - - - - 12.00 - - - fsqrt z31.h, p7/m, z31.h -# CHECK-NEXT: - - - - - - - - - - - - - 9.00 - - - fsqrt z31.s, p7/m, z31.s +# CHECK-NEXT: - - - - - - - - - - - - - 7.00 - 7.00 - fsqrt z31.d, p7/m, z31.d +# CHECK-NEXT: - - - - - - - - - - - - - 6.00 - 6.00 - fsqrt z31.h, p7/m, z31.h +# CHECK-NEXT: - - - - - - - - - - - - - 4.50 - 4.50 - fsqrt z31.s, p7/m, z31.s # CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub z0.d, p0/m, z0.d, #0.5 # CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub z0.d, p7/m, z0.d, z31.d # CHECK-NEXT: - - - - - - - - - - - - - 0.25 0.25 0.25 0.25 fsub z0.d, z1.d, z31.d From 1d06f41b72e429a5b3ba318ff639b8b997e21ff8 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 2 Apr 2024 10:58:45 +0100 Subject: [PATCH 079/201] [VectorCombine] foldBitcastShuffle - peek through any residual bitcasts before creating a new bitcast on top (#86119) Encountered while working on #67803, wading through the chains of bitcasts that SSE intrinsics introduces - this patch helps prevents cases where the bitcast chains aren't cleared out and we can't perform further combines until after InstCombine/InstSimplify has run. --- llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 12 ++++++++++-- .../VectorCombine/X86/shuffle-inseltpoison.ll | 6 ++---- llvm/test/Transforms/VectorCombine/X86/shuffle.ll | 6 ++---- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 7e86137f23f3c8..af5e7c9bc385ca 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -135,6 +135,14 @@ class VectorCombine { }; } // namespace +/// Return the source operand of a potentially bitcasted value. If there is no +/// bitcast, return the input value itself. +static Value *peekThroughBitcasts(Value *V) { + while (auto *BitCast = dyn_cast(V)) + V = BitCast->getOperand(0); + return V; +} + static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI) { // Do not widen load if atomic/volatile or under asan/hwasan/memtag/tsan. // The widened load may load data from dirty regions or create data races @@ -751,8 +759,8 @@ bool VectorCombine::foldBitcastShuffle(Instruction &I) { // bitcast (shuf V0, V1, MaskC) --> shuf (bitcast V0), (bitcast V1), MaskC' ++NumShufOfBitcast; - Value *CastV0 = Builder.CreateBitCast(V0, NewShuffleTy); - Value *CastV1 = Builder.CreateBitCast(V1, NewShuffleTy); + Value *CastV0 = Builder.CreateBitCast(peekThroughBitcasts(V0), NewShuffleTy); + Value *CastV1 = Builder.CreateBitCast(peekThroughBitcasts(V1), NewShuffleTy); Value *Shuf = Builder.CreateShuffleVector(CastV0, CastV1, NewMask); replaceValue(I, *Shuf); return true; diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll index 8c5c6656ca1795..74a58c8d313611 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-inseltpoison.ll @@ -133,8 +133,7 @@ define <2 x i64> @PR35454_1(<2 x i64> %v) { ; SSE-NEXT: ret <2 x i64> [[BC3]] ; ; AVX-LABEL: @PR35454_1( -; AVX-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32> -; AVX-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[BC]] to <16 x i8> +; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <16 x i8> ; AVX-NEXT: [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> ; AVX-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], ; AVX-NEXT: [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32> @@ -164,8 +163,7 @@ define <2 x i64> @PR35454_2(<2 x i64> %v) { ; SSE-NEXT: ret <2 x i64> [[BC3]] ; ; AVX-LABEL: @PR35454_2( -; AVX-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32> -; AVX-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[BC]] to <8 x i16> +; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <8 x i16> ; AVX-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; AVX-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], ; AVX-NEXT: [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32> diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle.ll index 60cfc4d4b07079..d1484fd5ab3399 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle.ll @@ -133,8 +133,7 @@ define <2 x i64> @PR35454_1(<2 x i64> %v) { ; SSE-NEXT: ret <2 x i64> [[BC3]] ; ; AVX-LABEL: @PR35454_1( -; AVX-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32> -; AVX-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[BC]] to <16 x i8> +; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <16 x i8> ; AVX-NEXT: [[BC1:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> ; AVX-NEXT: [[ADD:%.*]] = shl <16 x i8> [[BC1]], ; AVX-NEXT: [[BC2:%.*]] = bitcast <16 x i8> [[ADD]] to <4 x i32> @@ -164,8 +163,7 @@ define <2 x i64> @PR35454_2(<2 x i64> %v) { ; SSE-NEXT: ret <2 x i64> [[BC3]] ; ; AVX-LABEL: @PR35454_2( -; AVX-NEXT: [[BC:%.*]] = bitcast <2 x i64> [[V:%.*]] to <4 x i32> -; AVX-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[BC]] to <8 x i16> +; AVX-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[V:%.*]] to <8 x i16> ; AVX-NEXT: [[BC1:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> ; AVX-NEXT: [[ADD:%.*]] = shl <8 x i16> [[BC1]], ; AVX-NEXT: [[BC2:%.*]] = bitcast <8 x i16> [[ADD]] to <4 x i32> From 5b66b6a32ad89562732ad6a81c84783486b6187a Mon Sep 17 00:00:00 2001 From: Ivan Butygin Date: Tue, 2 Apr 2024 13:30:45 +0300 Subject: [PATCH 080/201] [mlir][pass] Add composite pass utility (#87166) Composite pass allows to run sequence of passes in the loop until fixed point or maximum number of iterations is reached. The usual candidates are canonicalize+CSE as canonicalize can open more opportunities for CSE and vice-versa. --- mlir/include/mlir/Transforms/Passes.h | 7 ++ mlir/include/mlir/Transforms/Passes.td | 17 +++ mlir/lib/Transforms/CMakeLists.txt | 1 + mlir/lib/Transforms/CompositePass.cpp | 105 ++++++++++++++++++ mlir/test/Transforms/composite-pass.mlir | 26 +++++ mlir/test/lib/Transforms/CMakeLists.txt | 1 + .../test/lib/Transforms/TestCompositePass.cpp | 38 +++++++ mlir/tools/mlir-opt/mlir-opt.cpp | 2 + 8 files changed, 197 insertions(+) create mode 100644 mlir/lib/Transforms/CompositePass.cpp create mode 100644 mlir/test/Transforms/composite-pass.mlir create mode 100644 mlir/test/lib/Transforms/TestCompositePass.cpp diff --git a/mlir/include/mlir/Transforms/Passes.h b/mlir/include/mlir/Transforms/Passes.h index 11f5b23e62c663..58bd61b2ae8b88 100644 --- a/mlir/include/mlir/Transforms/Passes.h +++ b/mlir/include/mlir/Transforms/Passes.h @@ -43,6 +43,7 @@ class GreedyRewriteConfig; #define GEN_PASS_DECL_SYMBOLDCE #define GEN_PASS_DECL_SYMBOLPRIVATIZE #define GEN_PASS_DECL_TOPOLOGICALSORT +#define GEN_PASS_DECL_COMPOSITEFIXEDPOINTPASS #include "mlir/Transforms/Passes.h.inc" /// Creates an instance of the Canonicalizer pass, configured with default @@ -130,6 +131,12 @@ createSymbolPrivatizePass(ArrayRef excludeSymbols = {}); /// their producers. std::unique_ptr createTopologicalSortPass(); +/// Create composite pass, which runs provided set of passes until fixed point +/// or maximum number of iterations reached. +std::unique_ptr createCompositeFixedPointPass( + std::string name, llvm::function_ref populateFunc, + int maxIterations = 10); + //===----------------------------------------------------------------------===// // Registration //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Transforms/Passes.td b/mlir/include/mlir/Transforms/Passes.td index 51b2a27da639d6..1b40a87c63f27e 100644 --- a/mlir/include/mlir/Transforms/Passes.td +++ b/mlir/include/mlir/Transforms/Passes.td @@ -552,4 +552,21 @@ def TopologicalSort : Pass<"topological-sort"> { let constructor = "mlir::createTopologicalSortPass()"; } +def CompositeFixedPointPass : Pass<"composite-fixed-point-pass"> { + let summary = "Composite fixed point pass"; + let description = [{ + Composite pass runs provided set of passes until fixed point or maximum + number of iterations reached. + }]; + + let options = [ + Option<"name", "name", "std::string", /*default=*/"\"CompositeFixedPointPass\"", + "Composite pass display name">, + Option<"pipelineStr", "pipeline", "std::string", /*default=*/"", + "Composite pass inner pipeline">, + Option<"maxIter", "max-iterations", "int", /*default=*/"10", + "Maximum number of iterations if inner pipeline">, + ]; +} + #endif // MLIR_TRANSFORMS_PASSES diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt index 6c32ecf8a2a2f1..90c0298fb5e46a 100644 --- a/mlir/lib/Transforms/CMakeLists.txt +++ b/mlir/lib/Transforms/CMakeLists.txt @@ -2,6 +2,7 @@ add_subdirectory(Utils) add_mlir_library(MLIRTransforms Canonicalizer.cpp + CompositePass.cpp ControlFlowSink.cpp CSE.cpp GenerateRuntimeVerification.cpp diff --git a/mlir/lib/Transforms/CompositePass.cpp b/mlir/lib/Transforms/CompositePass.cpp new file mode 100644 index 00000000000000..b388a28da6424f --- /dev/null +++ b/mlir/lib/Transforms/CompositePass.cpp @@ -0,0 +1,105 @@ +//===- CompositePass.cpp - Composite pass code ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// CompositePass allows to run set of passes until fixed point is reached. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Transforms/Passes.h" + +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassManager.h" + +namespace mlir { +#define GEN_PASS_DEF_COMPOSITEFIXEDPOINTPASS +#include "mlir/Transforms/Passes.h.inc" +} // namespace mlir + +using namespace mlir; + +namespace { +struct CompositeFixedPointPass final + : public impl::CompositeFixedPointPassBase { + using CompositeFixedPointPassBase::CompositeFixedPointPassBase; + + CompositeFixedPointPass( + std::string name_, llvm::function_ref populateFunc, + int maxIterations) { + name = std::move(name_); + maxIter = maxIterations; + populateFunc(dynamicPM); + + llvm::raw_string_ostream os(pipelineStr); + dynamicPM.printAsTextualPipeline(os); + } + + LogicalResult initializeOptions( + StringRef options, + function_ref errorHandler) override { + if (failed(CompositeFixedPointPassBase::initializeOptions(options, + errorHandler))) + return failure(); + + if (failed(parsePassPipeline(pipelineStr, dynamicPM))) + return errorHandler("Failed to parse composite pass pipeline"); + + return success(); + } + + LogicalResult initialize(MLIRContext *context) override { + if (maxIter <= 0) + return emitError(UnknownLoc::get(context)) + << "Invalid maxIterations value: " << maxIter << "\n"; + + return success(); + } + + void getDependentDialects(DialectRegistry ®istry) const override { + dynamicPM.getDependentDialects(registry); + } + + void runOnOperation() override { + auto op = getOperation(); + OperationFingerPrint fp(op); + + int currentIter = 0; + int maxIterVal = maxIter; + while (true) { + if (failed(runPipeline(dynamicPM, op))) + return signalPassFailure(); + + if (currentIter++ >= maxIterVal) { + op->emitWarning("Composite pass \"" + llvm::Twine(name) + + "\"+ didn't converge in " + llvm::Twine(maxIterVal) + + " iterations"); + break; + } + + OperationFingerPrint newFp(op); + if (newFp == fp) + break; + + fp = newFp; + } + } + +protected: + llvm::StringRef getName() const override { return name; } + +private: + OpPassManager dynamicPM; +}; +} // namespace + +std::unique_ptr mlir::createCompositeFixedPointPass( + std::string name, llvm::function_ref populateFunc, + int maxIterations) { + + return std::make_unique(std::move(name), + populateFunc, maxIterations); +} diff --git a/mlir/test/Transforms/composite-pass.mlir b/mlir/test/Transforms/composite-pass.mlir new file mode 100644 index 00000000000000..829470c2c9aa64 --- /dev/null +++ b/mlir/test/Transforms/composite-pass.mlir @@ -0,0 +1,26 @@ +// RUN: mlir-opt %s --log-actions-to=- --test-composite-fixed-point-pass -split-input-file | FileCheck %s +// RUN: mlir-opt %s --log-actions-to=- --composite-fixed-point-pass='name=TestCompositePass pipeline=any(canonicalize,cse)' -split-input-file | FileCheck %s + +// CHECK-LABEL: running `TestCompositePass` +// CHECK: running `Canonicalizer` +// CHECK: running `CSE` +// CHECK-NOT: running `Canonicalizer` +// CHECK-NOT: running `CSE` +func.func @test() { + return +} + +// ----- + +// CHECK-LABEL: running `TestCompositePass` +// CHECK: running `Canonicalizer` +// CHECK: running `CSE` +// CHECK: running `Canonicalizer` +// CHECK: running `CSE` +// CHECK-NOT: running `Canonicalizer` +// CHECK-NOT: running `CSE` +func.func @test() { +// this constant will be canonicalized away, causing another pass iteration + %0 = arith.constant 1.5 : f32 + return +} diff --git a/mlir/test/lib/Transforms/CMakeLists.txt b/mlir/test/lib/Transforms/CMakeLists.txt index 2a3a8608db5442..a849b7ebd29e23 100644 --- a/mlir/test/lib/Transforms/CMakeLists.txt +++ b/mlir/test/lib/Transforms/CMakeLists.txt @@ -20,6 +20,7 @@ endif() # Exclude tests from libMLIR.so add_mlir_library(MLIRTestTransforms TestCommutativityUtils.cpp + TestCompositePass.cpp TestConstantFold.cpp TestControlFlowSink.cpp TestInlining.cpp diff --git a/mlir/test/lib/Transforms/TestCompositePass.cpp b/mlir/test/lib/Transforms/TestCompositePass.cpp new file mode 100644 index 00000000000000..5c0d93cc0d64ec --- /dev/null +++ b/mlir/test/lib/Transforms/TestCompositePass.cpp @@ -0,0 +1,38 @@ +//===------ TestCompositePass.cpp --- composite test pass -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements a pass to test the composite pass utility. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Pass/Pass.h" +#include "mlir/Pass/PassManager.h" +#include "mlir/Pass/PassRegistry.h" +#include "mlir/Transforms/Passes.h" + +namespace mlir { +namespace test { +void registerTestCompositePass() { + registerPassPipeline( + "test-composite-fixed-point-pass", "Test composite pass", + [](OpPassManager &pm, StringRef optionsStr, + function_ref errorHandler) { + if (!optionsStr.empty()) + return failure(); + + pm.addPass(createCompositeFixedPointPass( + "TestCompositePass", [](OpPassManager &p) { + p.addPass(createCanonicalizerPass()); + p.addPass(createCSEPass()); + })); + return success(); + }, + [](function_ref) {}); +} +} // namespace test +} // namespace mlir diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp index 82b3881792bf3f..6ce9f3041d6f48 100644 --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -68,6 +68,7 @@ void registerTosaTestQuantUtilAPIPass(); void registerVectorizerTestPass(); namespace test { +void registerTestCompositePass(); void registerCommutativityUtils(); void registerConvertCallOpPass(); void registerInliner(); @@ -195,6 +196,7 @@ void registerTestPasses() { registerVectorizerTestPass(); registerTosaTestQuantUtilAPIPass(); + mlir::test::registerTestCompositePass(); mlir::test::registerCommutativityUtils(); mlir::test::registerConvertCallOpPass(); mlir::test::registerInliner(); From 2f48a1ff574573e7be170d39ab8de79d9db8bcea Mon Sep 17 00:00:00 2001 From: David Spickett Date: Tue, 2 Apr 2024 12:13:59 +0100 Subject: [PATCH 081/201] [lldb][FreeBSD] Add FreeBSD specific AT_HWCAP value (#84147) While adding register fields I realised that the AUXV values for Linux and FreeBSD disagree here. So I've added a FreeBSD specific HWCAP value that I can use from FreeBSD specific code. The alternative is translating GetAuxValue calls depending on platform, which requires that we know what we are at all times. Another way would be to convert the entries' values when we construct the AuxVector but the platform specific call that reads the data just returns a raw array. So adding another layer here is more disruption. --- lldb/source/Plugins/Process/Utility/AuxVector.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/lldb/source/Plugins/Process/Utility/AuxVector.h b/lldb/source/Plugins/Process/Utility/AuxVector.h index 3b0f55d35e5d11..4175cb73b23432 100644 --- a/lldb/source/Plugins/Process/Utility/AuxVector.h +++ b/lldb/source/Plugins/Process/Utility/AuxVector.h @@ -20,9 +20,9 @@ class AuxVector { AuxVector(const lldb_private::DataExtractor &data); /// Constants describing the type of entry. - /// On Linux, running "LD_SHOW_AUXV=1 ./executable" will spew AUX + /// On Linux and FreeBSD, running "LD_SHOW_AUXV=1 ./executable" will spew AUX /// information. Added AUXV prefix to avoid potential conflicts with system- - /// defined macros + /// defined macros. For FreeBSD, the numbers can be found in sys/elf_common.h. enum EntryType { AUXV_AT_NULL = 0, ///< End of auxv. AUXV_AT_IGNORE = 1, ///< Ignore entry. @@ -39,6 +39,11 @@ class AuxVector { AUXV_AT_EUID = 12, ///< Effective UID. AUXV_AT_GID = 13, ///< GID. AUXV_AT_EGID = 14, ///< Effective GID. + + // At this point Linux and FreeBSD diverge and many of the following values + // are Linux specific. If you use them make sure you are in Linux specific + // code or they have the same value on other platforms. + AUXV_AT_CLKTCK = 17, ///< Clock frequency (e.g. times(2)). AUXV_AT_PLATFORM = 15, ///< String identifying platform. AUXV_AT_HWCAP = @@ -60,6 +65,10 @@ class AuxVector { AUXV_AT_L1D_CACHESHAPE = 35, AUXV_AT_L2_CACHESHAPE = 36, AUXV_AT_L3_CACHESHAPE = 37, + + // Platform specific values which may overlap the Linux values. + + AUXV_FREEBSD_AT_HWCAP = 25, ///< FreeBSD specific AT_HWCAP value. }; std::optional GetAuxValue(enum EntryType entry_type) const; From 198c3eecee50d90cdff4b7840cfa39eef5613870 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Tue, 2 Apr 2024 13:19:51 +0200 Subject: [PATCH 082/201] [bazel] Fix the format of libc_build_rules.bzl --- utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl b/utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl index 7dc12bade2605a..80cf59d7ef12c1 100644 --- a/utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl +++ b/utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl @@ -92,7 +92,7 @@ def libc_function( # x86 targets have -mno-omit-leaf-frame-pointer. platform_copts = selects.with_or({ PLATFORM_CPU_X86_64: ["-mno-omit-leaf-frame-pointer"], - "//conditions:default": [] + "//conditions:default": [], }) copts = copts + platform_copts From a88a4da61a8eb3378bc333602d5b7e56a24cfb66 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Tue, 2 Apr 2024 12:21:57 +0100 Subject: [PATCH 083/201] [lldb] clang-format AuxVector.h (#85057) Doing this in its own commit so the intent of 2f48a1ff574573e7be170d39ab8de79d9db8bcea is clearer. --- .../Plugins/Process/Utility/AuxVector.h | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/lldb/source/Plugins/Process/Utility/AuxVector.h b/lldb/source/Plugins/Process/Utility/AuxVector.h index 4175cb73b23432..2670b34f6b0af9 100644 --- a/lldb/source/Plugins/Process/Utility/AuxVector.h +++ b/lldb/source/Plugins/Process/Utility/AuxVector.h @@ -24,21 +24,21 @@ class AuxVector { /// information. Added AUXV prefix to avoid potential conflicts with system- /// defined macros. For FreeBSD, the numbers can be found in sys/elf_common.h. enum EntryType { - AUXV_AT_NULL = 0, ///< End of auxv. - AUXV_AT_IGNORE = 1, ///< Ignore entry. - AUXV_AT_EXECFD = 2, ///< File descriptor of program. - AUXV_AT_PHDR = 3, ///< Program headers. - AUXV_AT_PHENT = 4, ///< Size of program header. - AUXV_AT_PHNUM = 5, ///< Number of program headers. - AUXV_AT_PAGESZ = 6, ///< Page size. - AUXV_AT_BASE = 7, ///< Interpreter base address. - AUXV_AT_FLAGS = 8, ///< Flags. - AUXV_AT_ENTRY = 9, ///< Program entry point. - AUXV_AT_NOTELF = 10, ///< Set if program is not an ELF. - AUXV_AT_UID = 11, ///< UID. - AUXV_AT_EUID = 12, ///< Effective UID. - AUXV_AT_GID = 13, ///< GID. - AUXV_AT_EGID = 14, ///< Effective GID. + AUXV_AT_NULL = 0, ///< End of auxv. + AUXV_AT_IGNORE = 1, ///< Ignore entry. + AUXV_AT_EXECFD = 2, ///< File descriptor of program. + AUXV_AT_PHDR = 3, ///< Program headers. + AUXV_AT_PHENT = 4, ///< Size of program header. + AUXV_AT_PHNUM = 5, ///< Number of program headers. + AUXV_AT_PAGESZ = 6, ///< Page size. + AUXV_AT_BASE = 7, ///< Interpreter base address. + AUXV_AT_FLAGS = 8, ///< Flags. + AUXV_AT_ENTRY = 9, ///< Program entry point. + AUXV_AT_NOTELF = 10, ///< Set if program is not an ELF. + AUXV_AT_UID = 11, ///< UID. + AUXV_AT_EUID = 12, ///< Effective UID. + AUXV_AT_GID = 13, ///< GID. + AUXV_AT_EGID = 14, ///< Effective GID. // At this point Linux and FreeBSD diverge and many of the following values // are Linux specific. If you use them make sure you are in Linux specific From 9a05a89d1ef73de7ab787071931f449935d841a7 Mon Sep 17 00:00:00 2001 From: Carlos Alberto Enciso Date: Tue, 2 Apr 2024 12:34:31 +0100 Subject: [PATCH 084/201] [speculative-execution] Hoists debug values unnecessarily. (#85782) After https://reviews.llvm.org/D81730: `SpeculativeExecutionPass::considerHoistingFromTo` hoists instructions, including debug intrinsics, as long as none of their used values are instructions that appear prior in the block that are not being hoisted. This behaviour has been duplicated for DPValues to get rid of a binary difference. The correct solution is not hoist these debug values at all, whichever format they're in. --- .../Scalar/SpeculativeExecution.cpp | 51 +++++++++---------- .../SpeculativeExecution/PR46267.ll | 2 +- 2 files changed, 25 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp index 5efc340da60b39..f921ee72a0a1ce 100644 --- a/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp +++ b/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp @@ -260,11 +260,31 @@ static InstructionCost ComputeSpeculationCost(const Instruction *I, } } +// Do not hoist any debug info intrinsics. +// ... +// if (cond) { +// x = y * z; +// foo(); +// } +// ... +// -------- Which then becomes: +// ... +// if.then: +// %x = mul i32 %y, %z +// call void @llvm.dbg.value(%x, !"x", !DIExpression()) +// call void foo() +// +// SpeculativeExecution might decide to hoist the 'y * z' calculation +// out of the 'if' block, because it is more efficient that way, so the +// '%x = mul i32 %y, %z' moves to the block above. But it might also +// decide to hoist the 'llvm.dbg.value' call. +// This is incorrect, because even if we've moved the calculation of +// 'y * z', we should not see the value of 'x' change unless we +// actually go inside the 'if' block. + bool SpeculativeExecutionPass::considerHoistingFromTo( BasicBlock &FromBlock, BasicBlock &ToBlock) { SmallPtrSet NotHoisted; - SmallDenseMap> - DbgVariableRecordsToHoist; auto HasNoUnhoistedInstr = [&NotHoisted](auto Values) { for (const Value *V : Values) { if (const auto *I = dyn_cast_or_null(V)) @@ -275,15 +295,8 @@ bool SpeculativeExecutionPass::considerHoistingFromTo( }; auto AllPrecedingUsesFromBlockHoisted = [&HasNoUnhoistedInstr](const User *U) { - // Debug variable has special operand to check it's not hoisted. - if (const auto *DVI = dyn_cast(U)) - return HasNoUnhoistedInstr(DVI->location_ops()); - - // Usially debug label intrinsic corresponds to label in LLVM IR. In - // these cases we should not move it here. - // TODO: Possible special processing needed to detect it is related to a - // hoisted instruction. - if (isa(U)) + // Do not hoist any debug info intrinsics. + if (isa(U)) return false; return HasNoUnhoistedInstr(U->operand_values()); @@ -292,12 +305,6 @@ bool SpeculativeExecutionPass::considerHoistingFromTo( InstructionCost TotalSpeculationCost = 0; unsigned NotHoistedInstCount = 0; for (const auto &I : FromBlock) { - // Make note of any DbgVariableRecords that need hoisting. DbgLabelRecords - // get left behind just like llvm.dbg.labels. - for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) { - if (HasNoUnhoistedInstr(DVR.location_ops())) - DbgVariableRecordsToHoist[DVR.getInstruction()].push_back(&DVR); - } const InstructionCost Cost = ComputeSpeculationCost(&I, *TTI); if (Cost.isValid() && isSafeToSpeculativelyExecute(&I) && AllPrecedingUsesFromBlockHoisted(&I)) { @@ -315,16 +322,6 @@ bool SpeculativeExecutionPass::considerHoistingFromTo( } for (auto I = FromBlock.begin(); I != FromBlock.end();) { - // If any DbgVariableRecords attached to this instruction should be hoisted, - // hoist them now - they will end up attached to either the next hoisted - // instruction or the ToBlock terminator. - if (DbgVariableRecordsToHoist.contains(&*I)) { - for (auto *DVR : DbgVariableRecordsToHoist[&*I]) { - DVR->removeFromParent(); - ToBlock.insertDbgRecordBefore(DVR, - ToBlock.getTerminator()->getIterator()); - } - } // We have to increment I before moving Current as moving Current // changes the list that I is iterating through. auto Current = I; diff --git a/llvm/test/Transforms/SpeculativeExecution/PR46267.ll b/llvm/test/Transforms/SpeculativeExecution/PR46267.ll index d940ee6a7863d7..69dac2220d9a64 100644 --- a/llvm/test/Transforms/SpeculativeExecution/PR46267.ll +++ b/llvm/test/Transforms/SpeculativeExecution/PR46267.ll @@ -31,7 +31,6 @@ define void @f(i32 %i) { entry: ; CHECK-LABEL: @f( ; CHECK: %a2 = add i32 %i, 0 -; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %a2 br i1 undef, label %land.rhs, label %land.end land.rhs: ; preds = %entry @@ -42,6 +41,7 @@ land.rhs: ; preds = %entry ; CHECK-NEXT: %a0 = load i32, ptr undef, align 1 ; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %a0 ; CHECK-NEXT: call void @llvm.dbg.label +; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %a2 call void @llvm.dbg.label(metadata !11), !dbg !10 %y = alloca i32, align 4 call void @llvm.dbg.declare(metadata ptr %y, metadata !14, metadata !DIExpression()), !dbg !10 From 0b13e2c82315eac8926f1c4497c4d56a507c3999 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Tue, 2 Apr 2024 13:44:38 +0200 Subject: [PATCH 085/201] [bazel] Another format fix for libc_build_rules.bzl, NFC --- utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl b/utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl index 80cf59d7ef12c1..be59e18ffd89a9 100644 --- a/utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl +++ b/utils/bazel/llvm-project-overlay/libc/libc_build_rules.bzl @@ -78,6 +78,7 @@ def libc_function( its deps. **kwargs: Other attributes relevant for a cc_library. For example, deps. """ + # We use the explicit equals pattern here because append and += mutate the # original list, where this creates a new list and stores it in deps. copts = copts or [] @@ -89,6 +90,7 @@ def libc_function( "-fno-omit-frame-pointer", "-fstack-protector-strong", ] + # x86 targets have -mno-omit-leaf-frame-pointer. platform_copts = selects.with_or({ PLATFORM_CPU_X86_64: ["-mno-omit-leaf-frame-pointer"], From 7ef602b58c1ccacab20d9d01e24b281458c3facc Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Tue, 2 Apr 2024 13:48:45 +0200 Subject: [PATCH 086/201] Reapply "[clang][nullability] allow _Nonnull etc on nullable class types (#82705)" (#87325) This reverts commit 28760b63bbf9e267713957105a8d17091fb0d20e. The last commit was missing the new testcase, now fixed. --- clang/docs/ReleaseNotes.rst | 15 +++++ clang/include/clang/Basic/Attr.td | 3 +- clang/include/clang/Basic/AttrDocs.td | 25 ++++++++ clang/include/clang/Basic/Features.def | 1 + clang/include/clang/Parse/Parser.h | 1 + clang/include/clang/Sema/Sema.h | 3 + clang/lib/AST/Type.cpp | 29 ++++++--- clang/lib/CodeGen/CGCall.cpp | 3 +- clang/lib/CodeGen/CodeGenFunction.cpp | 3 +- clang/lib/Parse/ParseDeclCXX.cpp | 33 +++++++--- clang/lib/Sema/SemaAttr.cpp | 12 ++++ clang/lib/Sema/SemaChecking.cpp | 9 +++ clang/lib/Sema/SemaDecl.cpp | 4 +- clang/lib/Sema/SemaDeclAttr.cpp | 18 ++++++ clang/lib/Sema/SemaInit.cpp | 5 ++ clang/lib/Sema/SemaOverload.cpp | 7 +++ clang/lib/Sema/SemaTemplate.cpp | 1 + clang/lib/Sema/SemaType.cpp | 18 ++++-- clang/test/Sema/nullability.c | 2 + clang/test/SemaCXX/nullability.cpp | 62 ++++++++++++++++++- .../Inputs/nullability-consistency-smart.h | 7 +++ .../SemaObjCXX/nullability-consistency.mm | 1 + 22 files changed, 233 insertions(+), 29 deletions(-) create mode 100644 clang/test/SemaObjCXX/Inputs/nullability-consistency-smart.h diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 76eaf0bf11c303..b2faab1f1525b2 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -253,6 +253,21 @@ Attribute Changes in Clang added a new extension query ``__has_extension(swiftcc)`` corresponding to the ``__attribute__((swiftcc))`` attribute. +- The ``_Nullable`` and ``_Nonnull`` family of type attributes can now apply + to certain C++ class types, such as smart pointers: + ``void useObject(std::unique_ptr _Nonnull obj);``. + + This works for standard library types including ``unique_ptr``, ``shared_ptr``, + and ``function``. See + `the attribute reference documentation `_ + for the full list. + +- The ``_Nullable`` attribute can be applied to C++ class declarations: + ``template class _Nullable MySmartPointer {};``. + + This allows the ``_Nullable`` and ``_Nonnull`` family of type attributes to + apply to this class. + Improvements to Clang's diagnostics ----------------------------------- - Clang now applies syntax highlighting to the code snippets it diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 80e607525a0a37..6584460cf5685e 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2178,9 +2178,10 @@ def TypeNonNull : TypeAttr { let Documentation = [TypeNonNullDocs]; } -def TypeNullable : TypeAttr { +def TypeNullable : DeclOrTypeAttr { let Spellings = [CustomKeyword<"_Nullable">]; let Documentation = [TypeNullableDocs]; +// let Subjects = SubjectList<[CXXRecord], ErrorDiag>; } def TypeNullableResult : TypeAttr { diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 3ea4d676b4f89d..0ca4ea377fc36a 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -4151,6 +4151,20 @@ non-underscored keywords. For example: @property (assign, nullable) NSView *superview; @property (readonly, nonnull) NSArray *subviews; @end + +As well as built-in pointer types, the nullability attributes can be attached +to C++ classes marked with the ``_Nullable`` attribute. + +The following C++ standard library types are considered nullable: +``unique_ptr``, ``shared_ptr``, ``auto_ptr``, ``exception_ptr``, ``function``, +``move_only_function`` and ``coroutine_handle``. + +Types should be marked nullable only where the type itself leaves nullability +ambiguous. For example, ``std::optional`` is not marked ``_Nullable``, because +``optional _Nullable`` is redundant and ``optional _Nonnull`` is +not a useful type. ``std::weak_ptr`` is not nullable, because its nullability +can change with no visible modification, so static annotation is unlikely to be +unhelpful. }]; } @@ -4185,6 +4199,17 @@ The ``_Nullable`` nullability qualifier indicates that a value of the int fetch_or_zero(int * _Nullable ptr); a caller of ``fetch_or_zero`` can provide null. + +The ``_Nullable`` attribute on classes indicates that the given class can +represent null values, and so the ``_Nullable``, ``_Nonnull`` etc qualifiers +make sense for this type. For example: + + .. code-block:: c + + class _Nullable ArenaPointer { ... }; + + ArenaPointer _Nonnull x = ...; + ArenaPointer _Nullable y = nullptr; }]; } diff --git a/clang/include/clang/Basic/Features.def b/clang/include/clang/Basic/Features.def index b41aadc73f205d..fe4d1c4afcca65 100644 --- a/clang/include/clang/Basic/Features.def +++ b/clang/include/clang/Basic/Features.def @@ -94,6 +94,7 @@ EXTENSION(define_target_os_macros, FEATURE(enumerator_attributes, true) FEATURE(nullability, true) FEATURE(nullability_on_arrays, true) +FEATURE(nullability_on_classes, true) FEATURE(nullability_nullable_result, true) FEATURE(memory_sanitizer, LangOpts.Sanitize.hasOneOf(SanitizerKind::Memory | diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index bba8ef4ff01739..580bf2a5d79df5 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -3014,6 +3014,7 @@ class Parser : public CodeCompletionHandler { void DiagnoseAndSkipExtendedMicrosoftTypeAttributes(); SourceLocation SkipExtendedMicrosoftTypeAttributes(); void ParseMicrosoftInheritanceClassAttributes(ParsedAttributes &attrs); + void ParseNullabilityClassAttributes(ParsedAttributes &attrs); void ParseBorlandTypeAttributes(ParsedAttributes &attrs); void ParseOpenCLKernelAttributes(ParsedAttributes &attrs); void ParseOpenCLQualifiers(ParsedAttributes &Attrs); diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index a02b684f2c77e2..8c98d8c7fef7a7 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -1660,6 +1660,9 @@ class Sema final { /// Add [[gsl::Pointer]] attributes for std:: types. void inferGslPointerAttribute(TypedefNameDecl *TD); + /// Add _Nullable attributes for std:: types. + void inferNullableClassAttribute(CXXRecordDecl *CRD); + enum PragmaOptionsAlignKind { POAK_Native, // #pragma options align=native POAK_Natural, // #pragma options align=natural diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index 779d8a810820d2..cb22c91a12aa89 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -4652,16 +4652,15 @@ bool Type::canHaveNullability(bool ResultIfUnknown) const { case Type::Auto: return ResultIfUnknown; - // Dependent template specializations can instantiate to pointer - // types unless they're known to be specializations of a class - // template. + // Dependent template specializations could instantiate to pointer types. case Type::TemplateSpecialization: - if (TemplateDecl *templateDecl - = cast(type.getTypePtr()) - ->getTemplateName().getAsTemplateDecl()) { - if (isa(templateDecl)) - return false; - } + // If it's a known class template, we can already check if it's nullable. + if (TemplateDecl *templateDecl = + cast(type.getTypePtr()) + ->getTemplateName() + .getAsTemplateDecl()) + if (auto *CTD = dyn_cast(templateDecl)) + return CTD->getTemplatedDecl()->hasAttr(); return ResultIfUnknown; case Type::Builtin: @@ -4718,6 +4717,17 @@ bool Type::canHaveNullability(bool ResultIfUnknown) const { } llvm_unreachable("unknown builtin type"); + case Type::Record: { + const RecordDecl *RD = cast(type)->getDecl(); + // For template specializations, look only at primary template attributes. + // This is a consistent regardless of whether the instantiation is known. + if (const auto *CTSD = dyn_cast(RD)) + return CTSD->getSpecializedTemplate() + ->getTemplatedDecl() + ->hasAttr(); + return RD->hasAttr(); + } + // Non-pointer types. case Type::Complex: case Type::LValueReference: @@ -4735,7 +4745,6 @@ bool Type::canHaveNullability(bool ResultIfUnknown) const { case Type::DependentAddressSpace: case Type::FunctionProto: case Type::FunctionNoProto: - case Type::Record: case Type::DeducedTemplateSpecialization: case Type::Enum: case Type::InjectedClassName: diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 9308528ac93823..f12765b826935b 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -4379,7 +4379,8 @@ void CodeGenFunction::EmitNonNullArgCheck(RValue RV, QualType ArgType, NNAttr = getNonNullAttr(AC.getDecl(), PVD, ArgType, ArgNo); bool CanCheckNullability = false; - if (SanOpts.has(SanitizerKind::NullabilityArg) && !NNAttr && PVD) { + if (SanOpts.has(SanitizerKind::NullabilityArg) && !NNAttr && PVD && + !PVD->getType()->isRecordType()) { auto Nullability = PVD->getType()->getNullability(); CanCheckNullability = Nullability && *Nullability == NullabilityKind::NonNull && diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 90324de7268ebe..6474d6c8c1d1e4 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -990,7 +990,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, // return value. Initialize the flag to 'true' and refine it in EmitParmDecl. if (SanOpts.has(SanitizerKind::NullabilityReturn)) { auto Nullability = FnRetTy->getNullability(); - if (Nullability && *Nullability == NullabilityKind::NonNull) { + if (Nullability && *Nullability == NullabilityKind::NonNull && + !FnRetTy->isRecordType()) { if (!(SanOpts.has(SanitizerKind::ReturnsNonnullAttribute) && CurCodeDecl && CurCodeDecl->getAttr())) RetValNullabilityPrecondition = diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp index 63fe678cbb29e2..861a25dc5103c1 100644 --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -1502,6 +1502,15 @@ void Parser::ParseMicrosoftInheritanceClassAttributes(ParsedAttributes &attrs) { } } +void Parser::ParseNullabilityClassAttributes(ParsedAttributes &attrs) { + while (Tok.is(tok::kw__Nullable)) { + IdentifierInfo *AttrName = Tok.getIdentifierInfo(); + auto Kind = Tok.getKind(); + SourceLocation AttrNameLoc = ConsumeToken(); + attrs.addNew(AttrName, AttrNameLoc, nullptr, AttrNameLoc, nullptr, 0, Kind); + } +} + /// Determine whether the following tokens are valid after a type-specifier /// which could be a standalone declaration. This will conservatively return /// true if there's any doubt, and is appropriate for insert-';' fixits. @@ -1683,15 +1692,21 @@ void Parser::ParseClassSpecifier(tok::TokenKind TagTokKind, ParsedAttributes attrs(AttrFactory); // If attributes exist after tag, parse them. - MaybeParseAttributes(PAKM_CXX11 | PAKM_Declspec | PAKM_GNU, attrs); - - // Parse inheritance specifiers. - if (Tok.isOneOf(tok::kw___single_inheritance, tok::kw___multiple_inheritance, - tok::kw___virtual_inheritance)) - ParseMicrosoftInheritanceClassAttributes(attrs); - - // Allow attributes to precede or succeed the inheritance specifiers. - MaybeParseAttributes(PAKM_CXX11 | PAKM_Declspec | PAKM_GNU, attrs); + for (;;) { + MaybeParseAttributes(PAKM_CXX11 | PAKM_Declspec | PAKM_GNU, attrs); + // Parse inheritance specifiers. + if (Tok.isOneOf(tok::kw___single_inheritance, + tok::kw___multiple_inheritance, + tok::kw___virtual_inheritance)) { + ParseMicrosoftInheritanceClassAttributes(attrs); + continue; + } + if (Tok.is(tok::kw__Nullable)) { + ParseNullabilityClassAttributes(attrs); + continue; + } + break; + } // Source location used by FIXIT to insert misplaced // C++11 attributes diff --git a/clang/lib/Sema/SemaAttr.cpp b/clang/lib/Sema/SemaAttr.cpp index 0dcf42e4899713..a5dd158808f26b 100644 --- a/clang/lib/Sema/SemaAttr.cpp +++ b/clang/lib/Sema/SemaAttr.cpp @@ -215,6 +215,18 @@ void Sema::inferGslOwnerPointerAttribute(CXXRecordDecl *Record) { inferGslPointerAttribute(Record, Record); } +void Sema::inferNullableClassAttribute(CXXRecordDecl *CRD) { + static llvm::StringSet<> Nullable{ + "auto_ptr", "shared_ptr", "unique_ptr", "exception_ptr", + "coroutine_handle", "function", "move_only_function", + }; + + if (CRD->isInStdNamespace() && Nullable.count(CRD->getName()) && + !CRD->hasAttr()) + for (Decl *Redecl : CRD->redecls()) + Redecl->addAttr(TypeNullableAttr::CreateImplicit(Context)); +} + void Sema::ActOnPragmaOptionsAlign(PragmaOptionsAlignKind Kind, SourceLocation PragmaLoc) { PragmaMsStackAction Action = Sema::PSK_Reset; diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 11401b6f56c0ea..3dcd18b3afc8b4 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -27,6 +27,7 @@ #include "clang/AST/ExprObjC.h" #include "clang/AST/ExprOpenMP.h" #include "clang/AST/FormatString.h" +#include "clang/AST/IgnoreExpr.h" #include "clang/AST/NSAPI.h" #include "clang/AST/NonTrivialTypeVisitor.h" #include "clang/AST/OperationKinds.h" @@ -7610,6 +7611,14 @@ bool Sema::getFormatStringInfo(const FormatAttr *Format, bool IsCXXMember, /// /// Returns true if the value evaluates to null. static bool CheckNonNullExpr(Sema &S, const Expr *Expr) { + // Treat (smart) pointers constructed from nullptr as null, whether we can + // const-evaluate them or not. + // This must happen first: the smart pointer expr might have _Nonnull type! + if (isa( + IgnoreExprNodes(Expr, IgnoreImplicitAsWrittenSingleStep, + IgnoreElidableImplicitConstructorSingleStep))) + return true; + // If the expression has non-null type, it doesn't evaluate to null. if (auto nullability = Expr->IgnoreImplicit()->getType()->getNullability()) { if (*nullability == NullabilityKind::NonNull) diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 5027deda0d7e09..6ff85c0c5c29da 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -18319,8 +18319,10 @@ Sema::ActOnTag(Scope *S, unsigned TagSpec, TagUseKind TUK, SourceLocation KWLoc, if (PrevDecl) mergeDeclAttributes(New, PrevDecl); - if (auto *CXXRD = dyn_cast(New)) + if (auto *CXXRD = dyn_cast(New)) { inferGslOwnerPointerAttribute(CXXRD); + inferNullableClassAttribute(CXXRD); + } // If there's a #pragma GCC visibility in scope, set the visibility of this // record. diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index f25f3afd0f4af2..8bce04640e748e 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -5982,6 +5982,20 @@ static void handleBuiltinAliasAttr(Sema &S, Decl *D, D->addAttr(::new (S.Context) BuiltinAliasAttr(S.Context, AL, Ident)); } +static void handleNullableTypeAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + if (AL.isUsedAsTypeAttr()) + return; + + if (auto *CRD = dyn_cast(D); + !CRD || !(CRD->isClass() || CRD->isStruct())) { + S.Diag(AL.getRange().getBegin(), diag::err_attribute_wrong_decl_type_str) + << AL << AL.isRegularKeywordAttribute() << "classes"; + return; + } + + handleSimpleAttribute(S, D, AL); +} + static void handlePreferredTypeAttr(Sema &S, Decl *D, const ParsedAttr &AL) { if (!AL.hasParsedType()) { S.Diag(AL.getLoc(), diag::err_attribute_wrong_number_arguments) << AL << 1; @@ -9933,6 +9947,10 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_UsingIfExists: handleSimpleAttribute(S, D, AL); break; + + case ParsedAttr::AT_TypeNullable: + handleNullableTypeAttr(S, D, AL); + break; } } diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index 777f89c70f87c2..e2a1951f1062cb 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -7082,6 +7082,11 @@ PerformConstructorInitialization(Sema &S, hasCopyOrMoveCtorParam(S.Context, getConstructorInfo(Step.Function.FoundDecl)); + // A smart pointer constructed from a nullable pointer is nullable. + if (NumArgs == 1 && !Kind.isExplicitCast()) + S.diagnoseNullableToNonnullConversion( + Entity.getType(), Args.front()->getType(), Kind.getLocation()); + // Determine the arguments required to actually perform the constructor // call. if (S.CompleteConstructorCall(Constructor, Step.Type, Args, Loc, diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 16d54c1ffe5fd9..0c913bc700f4a1 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -14826,6 +14826,13 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc, } } + // Check for nonnull = nullable. + // This won't be caught in the arg's initialization: the parameter to + // the assignment operator is not marked nonnull. + if (Op == OO_Equal) + diagnoseNullableToNonnullConversion(Args[0]->getType(), + Args[1]->getType(), OpLoc); + // Convert the arguments. if (CXXMethodDecl *Method = dyn_cast(FnDecl)) { // Best->Access is only meaningful for class members. diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index 1a2d5e9310dbe1..befec401c8eec3 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -2191,6 +2191,7 @@ DeclResult Sema::CheckClassTemplate( AddPushedVisibilityAttribute(NewClass); inferGslOwnerPointerAttribute(NewClass); + inferNullableClassAttribute(NewClass); if (TUK != TUK_Friend) { // Per C++ [basic.scope.temp]p2, skip the template parameter scopes. diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index d88895d3529458..8762744396f4dd 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -4717,6 +4717,18 @@ static bool DiagnoseMultipleAddrSpaceAttributes(Sema &S, LangAS ASOld, return false; } +// Whether this is a type broadly expected to have nullability attached. +// These types are affected by `#pragma assume_nonnull`, and missing nullability +// will be diagnosed with -Wnullability-completeness. +static bool shouldHaveNullability(QualType T) { + return T->canHaveNullability(/*ResultIfUnknown=*/false) && + // For now, do not infer/require nullability on C++ smart pointers. + // It's unclear whether the pragma's behavior is useful for C++. + // e.g. treating type-aliases and template-type-parameters differently + // from types of declarations can be surprising. + !isa(T->getCanonicalTypeInternal()); +} + static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, QualType declSpecType, TypeSourceInfo *TInfo) { @@ -4835,8 +4847,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, // inner pointers. complainAboutMissingNullability = CAMN_InnerPointers; - if (T->canHaveNullability(/*ResultIfUnknown*/ false) && - !T->getNullability()) { + if (shouldHaveNullability(T) && !T->getNullability()) { // Note that we allow but don't require nullability on dependent types. ++NumPointersRemaining; } @@ -5059,8 +5070,7 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state, // If the type itself could have nullability but does not, infer pointer // nullability and perform consistency checking. if (S.CodeSynthesisContexts.empty()) { - if (T->canHaveNullability(/*ResultIfUnknown*/ false) && - !T->getNullability()) { + if (shouldHaveNullability(T) && !T->getNullability()) { if (isVaList(T)) { // Record that we've seen a pointer, but do nothing else. if (NumPointersRemaining > 0) diff --git a/clang/test/Sema/nullability.c b/clang/test/Sema/nullability.c index 7d193bea46771f..0401516233b6db 100644 --- a/clang/test/Sema/nullability.c +++ b/clang/test/Sema/nullability.c @@ -248,3 +248,5 @@ void arraysInBlocks(void) { void (^withTypedefBad)(INTS _Nonnull [2]) = // expected-error {{nullability specifier '_Nonnull' cannot be applied to non-pointer type 'INTS' (aka 'int[4]')}} ^(INTS _Nonnull x[2]) {}; // expected-error {{nullability specifier '_Nonnull' cannot be applied to non-pointer type 'INTS' (aka 'int[4]')}} } + +struct _Nullable NotCplusplusClass {}; // expected-error {{'_Nullable' attribute only applies to classes}} diff --git a/clang/test/SemaCXX/nullability.cpp b/clang/test/SemaCXX/nullability.cpp index 8d0c4dc195a6bd..d52ba4efaccdbd 100644 --- a/clang/test/SemaCXX/nullability.cpp +++ b/clang/test/SemaCXX/nullability.cpp @@ -4,6 +4,10 @@ #else # error nullability feature should be defined #endif +#if __has_feature(nullability_on_classes) +#else +# error smart-pointer feature should be defined +#endif #include "nullability-completeness.h" @@ -27,6 +31,7 @@ template struct AddNonNull { typedef _Nonnull T type; // expected-error{{nullability specifier '_Nonnull' cannot be applied to non-pointer type 'int'}} // expected-error@-1{{nullability specifier '_Nonnull' cannot be applied to non-pointer type 'std::nullptr_t'}} + // expected-error@-2{{nullability specifier '_Nonnull' cannot be applied to non-pointer type 'NotPtr'}} }; typedef AddNonNull::type nonnull_int_ptr_1; @@ -35,6 +40,33 @@ typedef AddNonNull::type nonnull_int_ptr_3; // expected-note{{in inst typedef AddNonNull::type nonnull_non_pointer_1; // expected-note{{in instantiation of template class 'AddNonNull' requested here}} +// Nullability on C++ class types (smart pointers). +struct NotPtr{}; +typedef AddNonNull::type nonnull_non_pointer_2; // expected-note{{in instantiation}} +struct _Nullable SmartPtr{ + SmartPtr(); + SmartPtr(nullptr_t); + SmartPtr(const SmartPtr&); + SmartPtr(SmartPtr&&); + SmartPtr &operator=(const SmartPtr&); + SmartPtr &operator=(SmartPtr&&); +}; +typedef AddNonNull::type nonnull_smart_pointer_1; +template struct _Nullable SmartPtrTemplate{}; +typedef AddNonNull>::type nonnull_smart_pointer_2; +namespace std { inline namespace __1 { + template class unique_ptr {}; + template class function; + template class function {}; +} } +typedef AddNonNull>::type nonnull_smart_pointer_3; +typedef AddNonNull>::type nonnull_smart_pointer_4; + +class Derived : public SmartPtr {}; +Derived _Nullable x; // expected-error {{'_Nullable' cannot be applied}} +class DerivedPrivate : private SmartPtr {}; +DerivedPrivate _Nullable y; // expected-error {{'_Nullable' cannot be applied}} + // Non-null checking within a template. template struct AddNonNull2 { @@ -54,6 +86,7 @@ void (*& accepts_nonnull_2)(_Nonnull int *ptr) = accepts_nonnull_1; void (X::* accepts_nonnull_3)(_Nonnull int *ptr); void accepts_nonnull_4(_Nonnull int *ptr); void (&accepts_nonnull_5)(_Nonnull int *ptr) = accepts_nonnull_4; +void accepts_nonnull_6(SmartPtr _Nonnull); void test_accepts_nonnull_null_pointer_literal(X *x) { accepts_nonnull_1(0); // expected-warning{{null passed to a callee that requires a non-null argument}} @@ -61,6 +94,8 @@ void test_accepts_nonnull_null_pointer_literal(X *x) { (x->*accepts_nonnull_3)(0); // expected-warning{{null passed to a callee that requires a non-null argument}} accepts_nonnull_4(0); // expected-warning{{null passed to a callee that requires a non-null argument}} accepts_nonnull_5(0); // expected-warning{{null passed to a callee that requires a non-null argument}} + + accepts_nonnull_6(nullptr); // expected-warning{{null passed to a callee that requires a non-null argument}} } template @@ -71,6 +106,7 @@ void test_accepts_nonnull_null_pointer_literal_template() { template void test_accepts_nonnull_null_pointer_literal_template<&accepts_nonnull_4>(); // expected-note{{instantiation of function template specialization}} void TakeNonnull(void *_Nonnull); +void TakeSmartNonnull(SmartPtr _Nonnull); // Check different forms of assignment to a nonull type from a nullable one. void AssignAndInitNonNull() { void *_Nullable nullable; @@ -81,12 +117,26 @@ void AssignAndInitNonNull() { void *_Nonnull nonnull; nonnull = nullable; // expected-warning{{implicit conversion from nullable pointer 'void * _Nullable' to non-nullable pointer type 'void * _Nonnull'}} nonnull = {nullable}; // expected-warning{{implicit conversion from nullable pointer 'void * _Nullable' to non-nullable pointer type 'void * _Nonnull'}} - TakeNonnull(nullable); //expected-warning{{implicit conversion from nullable pointer 'void * _Nullable' to non-nullable pointer type 'void * _Nonnull}} TakeNonnull(nonnull); // OK + nonnull = (void *_Nonnull)nullable; // explicit cast OK + + SmartPtr _Nullable s_nullable; + SmartPtr _Nonnull s(s_nullable); // expected-warning{{implicit conversion from nullable pointer 'SmartPtr _Nullable' to non-nullable pointer type 'SmartPtr _Nonnull'}} + SmartPtr _Nonnull s2{s_nullable}; // expected-warning{{implicit conversion from nullable pointer 'SmartPtr _Nullable' to non-nullable pointer type 'SmartPtr _Nonnull'}} + SmartPtr _Nonnull s3 = {s_nullable}; // expected-warning{{implicit conversion from nullable pointer 'SmartPtr _Nullable' to non-nullable pointer type 'SmartPtr _Nonnull'}} + SmartPtr _Nonnull s4 = s_nullable; // expected-warning{{implicit conversion from nullable pointer 'SmartPtr _Nullable' to non-nullable pointer type 'SmartPtr _Nonnull'}} + SmartPtr _Nonnull s_nonnull; + s_nonnull = s_nullable; // expected-warning{{implicit conversion from nullable pointer 'SmartPtr _Nullable' to non-nullable pointer type 'SmartPtr _Nonnull'}} + s_nonnull = {s_nullable}; // no warning here - might be nice? + TakeSmartNonnull(s_nullable); //expected-warning{{implicit conversion from nullable pointer 'SmartPtr _Nullable' to non-nullable pointer type 'SmartPtr _Nonnull}} + TakeSmartNonnull(s_nonnull); // OK + s_nonnull = (SmartPtr _Nonnull)s_nullable; // explicit cast OK + s_nonnull = static_cast(s_nullable); // explicit cast OK } void *_Nullable ReturnNullable(); +SmartPtr _Nullable ReturnSmartNullable(); void AssignAndInitNonNullFromFn() { void *_Nonnull p(ReturnNullable()); // expected-warning{{implicit conversion from nullable pointer 'void * _Nullable' to non-nullable pointer type 'void * _Nonnull'}} @@ -96,8 +146,16 @@ void AssignAndInitNonNullFromFn() { void *_Nonnull nonnull; nonnull = ReturnNullable(); // expected-warning{{implicit conversion from nullable pointer 'void * _Nullable' to non-nullable pointer type 'void * _Nonnull'}} nonnull = {ReturnNullable()}; // expected-warning{{implicit conversion from nullable pointer 'void * _Nullable' to non-nullable pointer type 'void * _Nonnull'}} - TakeNonnull(ReturnNullable()); //expected-warning{{implicit conversion from nullable pointer 'void * _Nullable' to non-nullable pointer type 'void * _Nonnull}} + + SmartPtr _Nonnull s(ReturnSmartNullable()); // expected-warning{{implicit conversion from nullable pointer 'SmartPtr _Nullable' to non-nullable pointer type 'SmartPtr _Nonnull'}} + SmartPtr _Nonnull s2{ReturnSmartNullable()}; // expected-warning{{implicit conversion from nullable pointer 'SmartPtr _Nullable' to non-nullable pointer type 'SmartPtr _Nonnull'}} + SmartPtr _Nonnull s3 = {ReturnSmartNullable()}; // expected-warning{{implicit conversion from nullable pointer 'SmartPtr _Nullable' to non-nullable pointer type 'SmartPtr _Nonnull'}} + SmartPtr _Nonnull s4 = ReturnSmartNullable(); // expected-warning{{implicit conversion from nullable pointer 'SmartPtr _Nullable' to non-nullable pointer type 'SmartPtr _Nonnull'}} + SmartPtr _Nonnull s_nonnull; + s_nonnull = ReturnSmartNullable(); // expected-warning{{implicit conversion from nullable pointer 'SmartPtr _Nullable' to non-nullable pointer type 'SmartPtr _Nonnull'}} + s_nonnull = {ReturnSmartNullable()}; + TakeSmartNonnull(ReturnSmartNullable()); // expected-warning{{implicit conversion from nullable pointer 'SmartPtr _Nullable' to non-nullable pointer type 'SmartPtr _Nonnull'}} } void ConditionalExpr(bool c) { diff --git a/clang/test/SemaObjCXX/Inputs/nullability-consistency-smart.h b/clang/test/SemaObjCXX/Inputs/nullability-consistency-smart.h new file mode 100644 index 00000000000000..a28532e5d71668 --- /dev/null +++ b/clang/test/SemaObjCXX/Inputs/nullability-consistency-smart.h @@ -0,0 +1,7 @@ +class _Nullable Smart; + +void f1(int * _Nonnull); + +void f2(Smart); // OK, not required on smart-pointer types +using Alias = Smart; +void f3(Alias); diff --git a/clang/test/SemaObjCXX/nullability-consistency.mm b/clang/test/SemaObjCXX/nullability-consistency.mm index 6921d8b9d3dd5b..09c9a84475a939 100644 --- a/clang/test/SemaObjCXX/nullability-consistency.mm +++ b/clang/test/SemaObjCXX/nullability-consistency.mm @@ -9,6 +9,7 @@ #include "nullability-consistency-6.h" #include "nullability-consistency-7.h" #include "nullability-consistency-8.h" +#include "nullability-consistency-smart.h" #include "nullability-consistency-system.h" void h1(int *ptr) { } // don't warn From beeb15b71650b46f39cb6b1917e8d05568978656 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Tue, 2 Apr 2024 13:52:07 +0200 Subject: [PATCH 087/201] [libc++][NFC] Remove a few unused <__availablity> includes (#86126) --- libcxx/include/__exception/operations.h | 1 - libcxx/include/__filesystem/copy_options.h | 1 - libcxx/include/__filesystem/directory_options.h | 1 - libcxx/include/__filesystem/file_status.h | 1 - libcxx/include/__filesystem/file_time_type.h | 1 - libcxx/include/__filesystem/file_type.h | 1 - libcxx/include/__filesystem/perm_options.h | 1 - libcxx/include/__filesystem/perms.h | 1 - libcxx/include/__filesystem/space_info.h | 1 - libcxx/include/__format/format_args.h | 1 - libcxx/include/__format/format_context.h | 1 - libcxx/include/__format/formatter.h | 1 - libcxx/include/__format/formatter_bool.h | 1 - libcxx/include/__format/formatter_char.h | 1 - libcxx/include/__format/formatter_integer.h | 1 - libcxx/include/__format/formatter_pointer.h | 1 - libcxx/include/__format/formatter_string.h | 1 - libcxx/include/__fwd/format.h | 1 - libcxx/include/__locale | 1 - libcxx/include/__memory/shared_ptr.h | 1 - libcxx/include/__thread/support/pthread.h | 1 - libcxx/include/any | 1 - libcxx/include/future | 1 - libcxx/include/new | 1 - libcxx/include/shared_mutex | 1 - libcxx/include/thread | 1 - libcxx/include/typeinfo | 1 - 27 files changed, 27 deletions(-) diff --git a/libcxx/include/__exception/operations.h b/libcxx/include/__exception/operations.h index 8f374c0ccee50b..0a9c7a7c7f0d88 100644 --- a/libcxx/include/__exception/operations.h +++ b/libcxx/include/__exception/operations.h @@ -9,7 +9,6 @@ #ifndef _LIBCPP___EXCEPTION_OPERATIONS_H #define _LIBCPP___EXCEPTION_OPERATIONS_H -#include <__availability> #include <__config> #include diff --git a/libcxx/include/__filesystem/copy_options.h b/libcxx/include/__filesystem/copy_options.h index 1bf71292c8a66b..097eebe61137d7 100644 --- a/libcxx/include/__filesystem/copy_options.h +++ b/libcxx/include/__filesystem/copy_options.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___FILESYSTEM_COPY_OPTIONS_H #define _LIBCPP___FILESYSTEM_COPY_OPTIONS_H -#include <__availability> #include <__config> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__filesystem/directory_options.h b/libcxx/include/__filesystem/directory_options.h index 683c4678e083bc..d0cd3ebfdaa7ee 100644 --- a/libcxx/include/__filesystem/directory_options.h +++ b/libcxx/include/__filesystem/directory_options.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___FILESYSTEM_DIRECTORY_OPTIONS_H #define _LIBCPP___FILESYSTEM_DIRECTORY_OPTIONS_H -#include <__availability> #include <__config> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__filesystem/file_status.h b/libcxx/include/__filesystem/file_status.h index 3e2b32eef82e02..da316c8b027464 100644 --- a/libcxx/include/__filesystem/file_status.h +++ b/libcxx/include/__filesystem/file_status.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___FILESYSTEM_FILE_STATUS_H #define _LIBCPP___FILESYSTEM_FILE_STATUS_H -#include <__availability> #include <__config> #include <__filesystem/file_type.h> #include <__filesystem/perms.h> diff --git a/libcxx/include/__filesystem/file_time_type.h b/libcxx/include/__filesystem/file_time_type.h index e086dbcc3f51df..63e4ae1578cfd9 100644 --- a/libcxx/include/__filesystem/file_time_type.h +++ b/libcxx/include/__filesystem/file_time_type.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___FILESYSTEM_FILE_TIME_TYPE_H #define _LIBCPP___FILESYSTEM_FILE_TIME_TYPE_H -#include <__availability> #include <__chrono/file_clock.h> #include <__chrono/time_point.h> #include <__config> diff --git a/libcxx/include/__filesystem/file_type.h b/libcxx/include/__filesystem/file_type.h index c509085d90de0d..e4ac1dfee9ed9b 100644 --- a/libcxx/include/__filesystem/file_type.h +++ b/libcxx/include/__filesystem/file_type.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___FILESYSTEM_FILE_TYPE_H #define _LIBCPP___FILESYSTEM_FILE_TYPE_H -#include <__availability> #include <__config> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__filesystem/perm_options.h b/libcxx/include/__filesystem/perm_options.h index 529ef13558e972..64c16ee60a17d0 100644 --- a/libcxx/include/__filesystem/perm_options.h +++ b/libcxx/include/__filesystem/perm_options.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___FILESYSTEM_PERM_OPTIONS_H #define _LIBCPP___FILESYSTEM_PERM_OPTIONS_H -#include <__availability> #include <__config> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__filesystem/perms.h b/libcxx/include/__filesystem/perms.h index 8f5f9a7e8248ef..458f1e6e534833 100644 --- a/libcxx/include/__filesystem/perms.h +++ b/libcxx/include/__filesystem/perms.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___FILESYSTEM_PERMS_H #define _LIBCPP___FILESYSTEM_PERMS_H -#include <__availability> #include <__config> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) diff --git a/libcxx/include/__filesystem/space_info.h b/libcxx/include/__filesystem/space_info.h index 2e80ae3b2c1120..3fa57d33096fc8 100644 --- a/libcxx/include/__filesystem/space_info.h +++ b/libcxx/include/__filesystem/space_info.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___FILESYSTEM_SPACE_INFO_H #define _LIBCPP___FILESYSTEM_SPACE_INFO_H -#include <__availability> #include <__config> #include diff --git a/libcxx/include/__format/format_args.h b/libcxx/include/__format/format_args.h index 79fe51f96c6a50..a5fde36a298174 100644 --- a/libcxx/include/__format/format_args.h +++ b/libcxx/include/__format/format_args.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___FORMAT_FORMAT_ARGS_H #define _LIBCPP___FORMAT_FORMAT_ARGS_H -#include <__availability> #include <__config> #include <__format/format_arg.h> #include <__format/format_arg_store.h> diff --git a/libcxx/include/__format/format_context.h b/libcxx/include/__format/format_context.h index bf603c5c62d9db..087d4bf289b878 100644 --- a/libcxx/include/__format/format_context.h +++ b/libcxx/include/__format/format_context.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___FORMAT_FORMAT_CONTEXT_H #define _LIBCPP___FORMAT_FORMAT_CONTEXT_H -#include <__availability> #include <__concepts/same_as.h> #include <__config> #include <__format/buffer.h> diff --git a/libcxx/include/__format/formatter.h b/libcxx/include/__format/formatter.h index 47e35789b8175b..e2f418f936ee10 100644 --- a/libcxx/include/__format/formatter.h +++ b/libcxx/include/__format/formatter.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___FORMAT_FORMATTER_H #define _LIBCPP___FORMAT_FORMATTER_H -#include <__availability> #include <__config> #include <__fwd/format.h> diff --git a/libcxx/include/__format/formatter_bool.h b/libcxx/include/__format/formatter_bool.h index 5e3daff7b3dba6..17dc69541e8fe1 100644 --- a/libcxx/include/__format/formatter_bool.h +++ b/libcxx/include/__format/formatter_bool.h @@ -12,7 +12,6 @@ #include <__algorithm/copy.h> #include <__assert> -#include <__availability> #include <__config> #include <__format/concepts.h> #include <__format/format_parse_context.h> diff --git a/libcxx/include/__format/formatter_char.h b/libcxx/include/__format/formatter_char.h index 3358d422252f43..d33e84368a7650 100644 --- a/libcxx/include/__format/formatter_char.h +++ b/libcxx/include/__format/formatter_char.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___FORMAT_FORMATTER_CHAR_H #define _LIBCPP___FORMAT_FORMATTER_CHAR_H -#include <__availability> #include <__concepts/same_as.h> #include <__config> #include <__format/concepts.h> diff --git a/libcxx/include/__format/formatter_integer.h b/libcxx/include/__format/formatter_integer.h index d57082b3881baa..41400f00478eb4 100644 --- a/libcxx/include/__format/formatter_integer.h +++ b/libcxx/include/__format/formatter_integer.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___FORMAT_FORMATTER_INTEGER_H #define _LIBCPP___FORMAT_FORMATTER_INTEGER_H -#include <__availability> #include <__concepts/arithmetic.h> #include <__config> #include <__format/concepts.h> diff --git a/libcxx/include/__format/formatter_pointer.h b/libcxx/include/__format/formatter_pointer.h index 3373996ec3d5fa..6941343efd91f9 100644 --- a/libcxx/include/__format/formatter_pointer.h +++ b/libcxx/include/__format/formatter_pointer.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___FORMAT_FORMATTER_POINTER_H #define _LIBCPP___FORMAT_FORMATTER_POINTER_H -#include <__availability> #include <__config> #include <__format/concepts.h> #include <__format/format_parse_context.h> diff --git a/libcxx/include/__format/formatter_string.h b/libcxx/include/__format/formatter_string.h index d1ccfb9b5f7dc9..347439fc8dff13 100644 --- a/libcxx/include/__format/formatter_string.h +++ b/libcxx/include/__format/formatter_string.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___FORMAT_FORMATTER_STRING_H #define _LIBCPP___FORMAT_FORMATTER_STRING_H -#include <__availability> #include <__config> #include <__format/concepts.h> #include <__format/format_parse_context.h> diff --git a/libcxx/include/__fwd/format.h b/libcxx/include/__fwd/format.h index 6f5c71243711fe..b30c220f8a0435 100644 --- a/libcxx/include/__fwd/format.h +++ b/libcxx/include/__fwd/format.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___FWD_FORMAT_H #define _LIBCPP___FWD_FORMAT_H -#include <__availability> #include <__config> #include <__iterator/concepts.h> diff --git a/libcxx/include/__locale b/libcxx/include/__locale index 2186db84933103..fab87f0d6a2795 100644 --- a/libcxx/include/__locale +++ b/libcxx/include/__locale @@ -10,7 +10,6 @@ #ifndef _LIBCPP___LOCALE #define _LIBCPP___LOCALE -#include <__availability> #include <__config> #include <__locale_dir/locale_base_api.h> #include <__memory/shared_ptr.h> // __shared_count diff --git a/libcxx/include/__memory/shared_ptr.h b/libcxx/include/__memory/shared_ptr.h index 794a794d8fd85a..a8ff189df2aa52 100644 --- a/libcxx/include/__memory/shared_ptr.h +++ b/libcxx/include/__memory/shared_ptr.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___MEMORY_SHARED_PTR_H #define _LIBCPP___MEMORY_SHARED_PTR_H -#include <__availability> #include <__compare/compare_three_way.h> #include <__compare/ordering.h> #include <__config> diff --git a/libcxx/include/__thread/support/pthread.h b/libcxx/include/__thread/support/pthread.h index e194e5c68ad339..531f3e71de8397 100644 --- a/libcxx/include/__thread/support/pthread.h +++ b/libcxx/include/__thread/support/pthread.h @@ -10,7 +10,6 @@ #ifndef _LIBCPP___THREAD_SUPPORT_PTHREAD_H #define _LIBCPP___THREAD_SUPPORT_PTHREAD_H -#include <__availability> #include <__chrono/convert_to_timespec.h> #include <__chrono/duration.h> #include <__config> diff --git a/libcxx/include/any b/libcxx/include/any index a6212fedfa2cd0..0e66890593c389 100644 --- a/libcxx/include/any +++ b/libcxx/include/any @@ -80,7 +80,6 @@ namespace std { */ -#include <__availability> #include <__config> #include <__memory/allocator.h> #include <__memory/allocator_destructor.h> diff --git a/libcxx/include/future b/libcxx/include/future index fda1591818a667..3c228686063e4a 100644 --- a/libcxx/include/future +++ b/libcxx/include/future @@ -369,7 +369,6 @@ template struct uses_allocator, Alloc>; #endif #include <__assert> -#include <__availability> #include <__chrono/duration.h> #include <__chrono/time_point.h> #include <__exception/exception_ptr.h> diff --git a/libcxx/include/new b/libcxx/include/new index 988f7a84422c84..5a245dc5ef4596 100644 --- a/libcxx/include/new +++ b/libcxx/include/new @@ -86,7 +86,6 @@ void operator delete[](void* ptr, void*) noexcept; */ -#include <__availability> #include <__config> #include <__exception/exception.h> #include <__type_traits/is_function.h> diff --git a/libcxx/include/shared_mutex b/libcxx/include/shared_mutex index 38b559e8930fc5..9cc391db6fc5d9 100644 --- a/libcxx/include/shared_mutex +++ b/libcxx/include/shared_mutex @@ -128,7 +128,6 @@ template # error " is not supported since libc++ has been configured without support for threads." #endif -#include <__availability> #include <__chrono/duration.h> #include <__chrono/steady_clock.h> #include <__chrono/time_point.h> diff --git a/libcxx/include/thread b/libcxx/include/thread index ed70bde76094ae..68ce63bd0143df 100644 --- a/libcxx/include/thread +++ b/libcxx/include/thread @@ -92,7 +92,6 @@ void sleep_for(const chrono::duration& rel_time); # error " is not supported since libc++ has been configured without support for threads." #endif -#include <__availability> #include <__thread/formatter.h> #include <__thread/jthread.h> #include <__thread/support.h> diff --git a/libcxx/include/typeinfo b/libcxx/include/typeinfo index dafc7b89248eca..1ae075edd4b3c3 100644 --- a/libcxx/include/typeinfo +++ b/libcxx/include/typeinfo @@ -56,7 +56,6 @@ public: */ -#include <__availability> #include <__config> #include <__exception/exception.h> #include <__type_traits/is_constant_evaluated.h> From 2950283dddab03c183c1be2d7de9d4999cc86131 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Tue, 2 Apr 2024 08:14:04 -0400 Subject: [PATCH 088/201] [libc++] Simplify the implementation of (#86843) Libc++'s own is complicated by the need to handle various platform-specific macros and to support duplicate inclusion. In reality, we only need to add a declaration of nullptr_t to it, so we can simply include the underlying outside of our guards to let it handle re-inclusion itself. --- libcxx/include/stddef.h | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/libcxx/include/stddef.h b/libcxx/include/stddef.h index 887776b150e49d..470b5408336c6d 100644 --- a/libcxx/include/stddef.h +++ b/libcxx/include/stddef.h @@ -7,18 +7,6 @@ // //===----------------------------------------------------------------------===// -#if defined(__need_ptrdiff_t) || defined(__need_size_t) || defined(__need_wchar_t) || defined(__need_NULL) || \ - defined(__need_wint_t) - -# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -# endif - -# include_next - -#elif !defined(_LIBCPP_STDDEF_H) -# define _LIBCPP_STDDEF_H - /* stddef.h synopsis @@ -36,16 +24,19 @@ */ -# include <__config> +#include <__config> + +// Note: This include is outside of header guards because we sometimes get included multiple times +// with different defines and the underlying will know how to deal with that. +#include_next + +#ifndef _LIBCPP_STDDEF_H +# define _LIBCPP_STDDEF_H # if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header # endif -# if __has_include_next() -# include_next -# endif - # ifdef __cplusplus typedef decltype(nullptr) nullptr_t; # endif From a4798bb0b67533b37d6b34fd5292714aac3b17d9 Mon Sep 17 00:00:00 2001 From: jeanPerier Date: Tue, 2 Apr 2024 14:29:29 +0200 Subject: [PATCH 089/201] [flang][NFC] use mlir::SymbolTable in lowering (#86673) Whenever lowering is checking if a function or global already exists in the mlir::Module, it was doing module->lookup. On big programs (~5000 globals and functions), this causes important slowdowns because these lookups are linear. Use mlir::SymbolTable to speed-up these lookups. The SymbolTable has to be created from the ModuleOp and maintained in sync. It is therefore placed in the converter, and FirOPBuilders can take a pointer to it to speed-up the lookups. This patch does not bring mlir::SymbolTable to FIR/HLFIR passes, but some passes creating a lot of runtime calls could benefit from it too. More analysis will be needed. As an example of the speed-ups, this patch speeds-up compilation of Whizard compare_amplitude_UFO.F90 from 5 mins to 2 mins on my machine (there is still room for speed-ups). --- flang/include/flang/Lower/AbstractConverter.h | 13 ++++ .../flang/Optimizer/Builder/FIRBuilder.h | 66 +++++++++---------- .../flang/Optimizer/Dialect/FIROpsSupport.h | 19 +++--- flang/lib/Lower/Bridge.cpp | 23 +++++-- flang/lib/Lower/CallInterface.cpp | 9 ++- flang/lib/Lower/OpenACC.cpp | 6 +- flang/lib/Optimizer/Builder/FIRBuilder.cpp | 60 +++++++++++++---- flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 6 +- .../Optimizer/Builder/LowLevelIntrinsics.cpp | 62 +++++++++-------- .../Optimizer/Builder/PPCIntrinsicCall.cpp | 34 +++++----- flang/lib/Optimizer/Dialect/FIROps.cpp | 28 ++++++-- .../Transforms/SimplifyIntrinsics.cpp | 7 +- 12 files changed, 206 insertions(+), 127 deletions(-) diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h index 32e7a5e2b04061..d5dab9040d22bd 100644 --- a/flang/include/flang/Lower/AbstractConverter.h +++ b/flang/include/flang/Lower/AbstractConverter.h @@ -23,6 +23,10 @@ #include "mlir/IR/Operation.h" #include "llvm/ADT/ArrayRef.h" +namespace mlir { +class SymbolTable; +} + namespace fir { class KindMapping; class FirOpBuilder; @@ -305,6 +309,15 @@ class AbstractConverter { virtual Fortran::lower::SymbolBox lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) = 0; + /// Return the mlir::SymbolTable associated to the ModuleOp. + /// Look-ups are faster using it than using module.lookup<>, + /// but the module op should be queried in case of failure + /// because this symbol table is not guaranteed to contain + /// all the symbols from the ModuleOp (the symbol table should + /// always be provided to the builder helper creating globals and + /// functions in order to be in sync). + virtual mlir::SymbolTable *getMLIRSymbolTable() = 0; + private: /// Options controlling lowering behavior. const Fortran::lower::LoweringOptions &loweringOptions; diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h index d61bf681be6194..940866b25d2fe8 100644 --- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h +++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h @@ -28,6 +28,10 @@ #include #include +namespace mlir { +class SymbolTable; +} + namespace fir { class AbstractArrayBox; class ExtendedValue; @@ -42,8 +46,10 @@ class BoxValue; /// patterns. class FirOpBuilder : public mlir::OpBuilder, public mlir::OpBuilder::Listener { public: - explicit FirOpBuilder(mlir::Operation *op, fir::KindMapping kindMap) - : OpBuilder{op, /*listener=*/this}, kindMap{std::move(kindMap)} {} + explicit FirOpBuilder(mlir::Operation *op, fir::KindMapping kindMap, + mlir::SymbolTable *symbolTable = nullptr) + : OpBuilder{op, /*listener=*/this}, kindMap{std::move(kindMap)}, + symbolTable{symbolTable} {} explicit FirOpBuilder(mlir::OpBuilder &builder, fir::KindMapping kindMap) : OpBuilder(builder), OpBuilder::Listener(), kindMap{std::move(kindMap)} { setListener(this); @@ -69,13 +75,14 @@ class FirOpBuilder : public mlir::OpBuilder, public mlir::OpBuilder::Listener { // The listener self-reference has to be updated in case of copy-construction. FirOpBuilder(const FirOpBuilder &other) : OpBuilder(other), OpBuilder::Listener(), kindMap{other.kindMap}, - fastMathFlags{other.fastMathFlags} { + fastMathFlags{other.fastMathFlags}, symbolTable{other.symbolTable} { setListener(this); } FirOpBuilder(FirOpBuilder &&other) : OpBuilder(other), OpBuilder::Listener(), - kindMap{std::move(other.kindMap)}, fastMathFlags{other.fastMathFlags} { + kindMap{std::move(other.kindMap)}, fastMathFlags{other.fastMathFlags}, + symbolTable{other.symbolTable} { setListener(this); } @@ -95,6 +102,9 @@ class FirOpBuilder : public mlir::OpBuilder, public mlir::OpBuilder::Listener { /// Get a reference to the kind map. const fir::KindMapping &getKindMap() { return kindMap; } + /// Get func.func/fir.global symbol table attached to this builder if any. + mlir::SymbolTable *getMLIRSymbolTable() { return symbolTable; } + /// Get the default integer type [[maybe_unused]] mlir::IntegerType getDefaultIntegerType() { return getIntegerType( @@ -280,24 +290,27 @@ class FirOpBuilder : public mlir::OpBuilder, public mlir::OpBuilder::Listener { /// Get a function by name. If the function exists in the current module, it /// is returned. Otherwise, a null FuncOp is returned. mlir::func::FuncOp getNamedFunction(llvm::StringRef name) { - return getNamedFunction(getModule(), name); + return getNamedFunction(getModule(), getMLIRSymbolTable(), name); } - static mlir::func::FuncOp getNamedFunction(mlir::ModuleOp module, - llvm::StringRef name); + static mlir::func::FuncOp + getNamedFunction(mlir::ModuleOp module, const mlir::SymbolTable *symbolTable, + llvm::StringRef name); /// Get a function by symbol name. The result will be null if there is no /// function with the given symbol in the module. mlir::func::FuncOp getNamedFunction(mlir::SymbolRefAttr symbol) { - return getNamedFunction(getModule(), symbol); + return getNamedFunction(getModule(), getMLIRSymbolTable(), symbol); } - static mlir::func::FuncOp getNamedFunction(mlir::ModuleOp module, - mlir::SymbolRefAttr symbol); + static mlir::func::FuncOp + getNamedFunction(mlir::ModuleOp module, const mlir::SymbolTable *symbolTable, + mlir::SymbolRefAttr symbol); fir::GlobalOp getNamedGlobal(llvm::StringRef name) { - return getNamedGlobal(getModule(), name); + return getNamedGlobal(getModule(), getMLIRSymbolTable(), name); } static fir::GlobalOp getNamedGlobal(mlir::ModuleOp module, + const mlir::SymbolTable *symbolTable, llvm::StringRef name); /// Lazy creation of fir.convert op. @@ -313,35 +326,18 @@ class FirOpBuilder : public mlir::OpBuilder, public mlir::OpBuilder::Listener { /// result of the load if it was created, otherwise return \p val mlir::Value loadIfRef(mlir::Location loc, mlir::Value val); - /// Create a new FuncOp. If the function may have already been created, use - /// `addNamedFunction` instead. + /// Determine if the named function is already in the module. Return the + /// instance if found, otherwise add a new named function to the module. mlir::func::FuncOp createFunction(mlir::Location loc, llvm::StringRef name, mlir::FunctionType ty) { - return createFunction(loc, getModule(), name, ty); + return createFunction(loc, getModule(), name, ty, getMLIRSymbolTable()); } static mlir::func::FuncOp createFunction(mlir::Location loc, mlir::ModuleOp module, llvm::StringRef name, - mlir::FunctionType ty); - - /// Determine if the named function is already in the module. Return the - /// instance if found, otherwise add a new named function to the module. - mlir::func::FuncOp addNamedFunction(mlir::Location loc, llvm::StringRef name, - mlir::FunctionType ty) { - if (auto func = getNamedFunction(name)) - return func; - return createFunction(loc, name, ty); - } - - static mlir::func::FuncOp addNamedFunction(mlir::Location loc, - mlir::ModuleOp module, - llvm::StringRef name, - mlir::FunctionType ty) { - if (auto func = getNamedFunction(module, name)) - return func; - return createFunction(loc, module, name, ty); - } + mlir::FunctionType ty, + mlir::SymbolTable *); /// Cast the input value to IndexType. mlir::Value convertToIndexType(mlir::Location loc, mlir::Value val) { @@ -515,6 +511,10 @@ class FirOpBuilder : public mlir::OpBuilder, public mlir::OpBuilder::Listener { /// FastMathFlags that need to be set for operations that support /// mlir::arith::FastMathAttr. mlir::arith::FastMathFlags fastMathFlags{}; + + /// fir::GlobalOp and func::FuncOp symbol table to speed-up + /// lookups. + mlir::SymbolTable *symbolTable = nullptr; }; } // namespace fir diff --git a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h index e8226b6df58ca2..f29e44504acb63 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h +++ b/flang/include/flang/Optimizer/Dialect/FIROpsSupport.h @@ -52,16 +52,19 @@ inline bool pureCall(mlir::Operation *op) { /// Get or create a FuncOp in a module. /// /// If `module` already contains FuncOp `name`, it is returned. Otherwise, a new -/// FuncOp is created, and that new FuncOp is returned. -mlir::func::FuncOp -createFuncOp(mlir::Location loc, mlir::ModuleOp module, llvm::StringRef name, - mlir::FunctionType type, - llvm::ArrayRef attrs = {}); - -/// Get or create a GlobalOp in a module. +/// FuncOp is created, and that new FuncOp is returned. A symbol table can +/// be provided to speed-up the lookups. +mlir::func::FuncOp createFuncOp(mlir::Location loc, mlir::ModuleOp module, + llvm::StringRef name, mlir::FunctionType type, + llvm::ArrayRef attrs = {}, + const mlir::SymbolTable *symbolTable = nullptr); + +/// Get or create a GlobalOp in a module. A symbol table can be provided to +/// speed-up the lookups. fir::GlobalOp createGlobalOp(mlir::Location loc, mlir::ModuleOp module, llvm::StringRef name, mlir::Type type, - llvm::ArrayRef attrs = {}); + llvm::ArrayRef attrs = {}, + const mlir::SymbolTable *symbolTable = nullptr); /// Attribute to mark Fortran entities with the CONTIGUOUS attribute. constexpr llvm::StringRef getContiguousAttrName() { return "fir.contiguous"; } diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 91b898eb513e05..5bba0978617c79 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -273,7 +273,8 @@ class FirConverter : public Fortran::lower::AbstractConverter { public: explicit FirConverter(Fortran::lower::LoweringBridge &bridge) : Fortran::lower::AbstractConverter(bridge.getLoweringOptions()), - bridge{bridge}, foldingContext{bridge.createFoldingContext()} {} + bridge{bridge}, foldingContext{bridge.createFoldingContext()}, + mlirSymbolTable{bridge.getModule()} {} virtual ~FirConverter() = default; /// Convert the PFT to FIR. @@ -329,8 +330,8 @@ class FirConverter : public Fortran::lower::AbstractConverter { [&](Fortran::lower::pft::BlockDataUnit &b) {}, [&](Fortran::lower::pft::CompilerDirectiveUnit &d) {}, [&](Fortran::lower::pft::OpenACCDirectiveUnit &d) { - builder = new fir::FirOpBuilder(bridge.getModule(), - bridge.getKindMap()); + builder = new fir::FirOpBuilder( + bridge.getModule(), bridge.getKindMap(), &mlirSymbolTable); Fortran::lower::genOpenACCRoutineConstruct( *this, bridge.getSemanticsContext(), bridge.getModule(), d.routine, accRoutineInfos); @@ -1036,6 +1037,8 @@ class FirConverter : public Fortran::lower::AbstractConverter { return {}; } + mlir::SymbolTable *getMLIRSymbolTable() override { return &mlirSymbolTable; } + /// Add the symbol to the local map and return `true`. If the symbol is /// already in the map and \p forced is `false`, the map is not updated. /// Instead the value `false` is returned. @@ -4571,7 +4574,8 @@ class FirConverter : public Fortran::lower::AbstractConverter { llvm::dbgs() << "\n"); Fortran::lower::CalleeInterface callee(funit, *this); mlir::func::FuncOp func = callee.addEntryBlockAndMapArguments(); - builder = new fir::FirOpBuilder(func, bridge.getKindMap()); + builder = + new fir::FirOpBuilder(func, bridge.getKindMap(), &mlirSymbolTable); assert(builder && "FirOpBuilder did not instantiate"); builder->setFastMathFlags(bridge.getLoweringOptions().getMathOptions()); builder->setInsertionPointToStart(&func.front()); @@ -4839,12 +4843,14 @@ class FirConverter : public Fortran::lower::AbstractConverter { // FIXME: get rid of the bogus function context and instantiate the // globals directly into the module. mlir::MLIRContext *context = &getMLIRContext(); + mlir::SymbolTable *symbolTable = getMLIRSymbolTable(); mlir::func::FuncOp func = fir::FirOpBuilder::createFunction( mlir::UnknownLoc::get(context), getModuleOp(), fir::NameUniquer::doGenerated("Sham"), - mlir::FunctionType::get(context, std::nullopt, std::nullopt)); + mlir::FunctionType::get(context, std::nullopt, std::nullopt), + symbolTable); func.addEntryBlock(); - builder = new fir::FirOpBuilder(func, bridge.getKindMap()); + builder = new fir::FirOpBuilder(func, bridge.getKindMap(), symbolTable); assert(builder && "FirOpBuilder did not instantiate"); builder->setFastMathFlags(bridge.getLoweringOptions().getMathOptions()); createGlobals(); @@ -5336,6 +5342,11 @@ class FirConverter : public Fortran::lower::AbstractConverter { /// utilities to deal with procedure pointer components whose arguments have /// the type of the containing derived type. Fortran::lower::TypeConstructionStack typeConstructionStack; + /// MLIR symbol table of the fir.global/func.func operations. Note that it is + /// not guaranteed to contain all operations of the ModuleOp with Symbol + /// attribute since mlirSymbolTable must pro-actively be maintained when + /// new Symbol operations are created. + mlir::SymbolTable mlirSymbolTable; }; } // namespace diff --git a/flang/lib/Lower/CallInterface.cpp b/flang/lib/Lower/CallInterface.cpp index c65becc497459c..29cdb3cff589ba 100644 --- a/flang/lib/Lower/CallInterface.cpp +++ b/flang/lib/Lower/CallInterface.cpp @@ -667,11 +667,13 @@ void Fortran::lower::CallInterface::declare() { if (!side().isIndirectCall()) { std::string name = side().getMangledName(); mlir::ModuleOp module = converter.getModuleOp(); - func = fir::FirOpBuilder::getNamedFunction(module, name); + mlir::SymbolTable *symbolTable = converter.getMLIRSymbolTable(); + func = fir::FirOpBuilder::getNamedFunction(module, symbolTable, name); if (!func) { mlir::Location loc = side().getCalleeLocation(); mlir::FunctionType ty = genFunctionType(); - func = fir::FirOpBuilder::createFunction(loc, module, name, ty); + func = + fir::FirOpBuilder::createFunction(loc, module, name, ty, symbolTable); if (const Fortran::semantics::Symbol *sym = side().getProcedureSymbol()) { if (side().isMainProgram()) { func->setAttr(fir::getSymbolAttrName(), @@ -1644,7 +1646,8 @@ mlir::func::FuncOp Fortran::lower::getOrDeclareFunction( Fortran::lower::AbstractConverter &converter) { mlir::ModuleOp module = converter.getModuleOp(); std::string name = getProcMangledName(proc, converter); - mlir::func::FuncOp func = fir::FirOpBuilder::getNamedFunction(module, name); + mlir::func::FuncOp func = fir::FirOpBuilder::getNamedFunction( + module, converter.getMLIRSymbolTable(), name); if (func) return func; diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index 6e6714454f0591..d933c07aba0e0c 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -3821,7 +3821,8 @@ void Fortran::lower::genOpenACCRoutineConstruct( std::string funcName; if (name) { funcName = converter.mangleName(*name->symbol); - funcOp = builder.getNamedFunction(mod, funcName); + funcOp = + builder.getNamedFunction(mod, builder.getMLIRSymbolTable(), funcName); } else { Fortran::semantics::Scope &scope = semanticsContext.FindScope(routineConstruct.source); @@ -3833,7 +3834,8 @@ void Fortran::lower::genOpenACCRoutineConstruct( : nullptr}; if (subpDetails && subpDetails->isInterface()) { funcName = converter.mangleName(*progUnit.symbol()); - funcOp = builder.getNamedFunction(mod, funcName); + funcOp = + builder.getNamedFunction(mod, builder.getMLIRSymbolTable(), funcName); } else { funcOp = builder.getFunction(); funcName = funcOp.getName(); diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp index 2bcd5e5914027d..e4362b2f9e6945 100644 --- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp +++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp @@ -36,26 +36,56 @@ static llvm::cl::opt "name"), llvm::cl::init(32)); -mlir::func::FuncOp fir::FirOpBuilder::createFunction(mlir::Location loc, - mlir::ModuleOp module, - llvm::StringRef name, - mlir::FunctionType ty) { - return fir::createFuncOp(loc, module, name, ty); +mlir::func::FuncOp +fir::FirOpBuilder::createFunction(mlir::Location loc, mlir::ModuleOp module, + llvm::StringRef name, mlir::FunctionType ty, + mlir::SymbolTable *symbolTable) { + return fir::createFuncOp(loc, module, name, ty, /*attrs*/ {}, symbolTable); } -mlir::func::FuncOp fir::FirOpBuilder::getNamedFunction(mlir::ModuleOp modOp, - llvm::StringRef name) { +mlir::func::FuncOp +fir::FirOpBuilder::getNamedFunction(mlir::ModuleOp modOp, + const mlir::SymbolTable *symbolTable, + llvm::StringRef name) { + if (symbolTable) + if (auto func = symbolTable->lookup(name)) { +#ifdef EXPENSIVE_CHECKS + assert(func == modOp.lookupSymbol(name) && + "symbolTable and module out of sync"); +#endif + return func; + } return modOp.lookupSymbol(name); } mlir::func::FuncOp fir::FirOpBuilder::getNamedFunction(mlir::ModuleOp modOp, + const mlir::SymbolTable *symbolTable, mlir::SymbolRefAttr symbol) { + if (symbolTable) + if (auto func = symbolTable->lookup( + symbol.getLeafReference())) { +#ifdef EXPENSIVE_CHECKS + assert(func == modOp.lookupSymbol(symbol) && + "symbolTable and module out of sync"); +#endif + return func; + } return modOp.lookupSymbol(symbol); } -fir::GlobalOp fir::FirOpBuilder::getNamedGlobal(mlir::ModuleOp modOp, - llvm::StringRef name) { +fir::GlobalOp +fir::FirOpBuilder::getNamedGlobal(mlir::ModuleOp modOp, + const mlir::SymbolTable *symbolTable, + llvm::StringRef name) { + if (symbolTable) + if (auto global = symbolTable->lookup(name)) { +#ifdef EXPENSIVE_CHECKS + assert(global == modOp.lookupSymbol(name) && + "symbolTable and module out of sync"); +#endif + return global; + } return modOp.lookupSymbol(name); } @@ -279,10 +309,10 @@ fir::GlobalOp fir::FirOpBuilder::createGlobal( mlir::Location loc, mlir::Type type, llvm::StringRef name, mlir::StringAttr linkage, mlir::Attribute value, bool isConst, bool isTarget, fir::CUDADataAttributeAttr cudaAttr) { + if (auto global = getNamedGlobal(name)) + return global; auto module = getModule(); auto insertPt = saveInsertionPoint(); - if (auto glob = module.lookupSymbol(name)) - return glob; setInsertionPoint(module.getBody(), module.getBody()->end()); llvm::SmallVector attrs; if (cudaAttr) { @@ -294,6 +324,8 @@ fir::GlobalOp fir::FirOpBuilder::createGlobal( auto glob = create(loc, name, isConst, isTarget, type, value, linkage, attrs); restoreInsertionPoint(insertPt); + if (symbolTable) + symbolTable->insert(glob); return glob; } @@ -301,10 +333,10 @@ fir::GlobalOp fir::FirOpBuilder::createGlobal( mlir::Location loc, mlir::Type type, llvm::StringRef name, bool isConst, bool isTarget, std::function bodyBuilder, mlir::StringAttr linkage, fir::CUDADataAttributeAttr cudaAttr) { + if (auto global = getNamedGlobal(name)) + return global; auto module = getModule(); auto insertPt = saveInsertionPoint(); - if (auto glob = module.lookupSymbol(name)) - return glob; setInsertionPoint(module.getBody(), module.getBody()->end()); auto glob = create(loc, name, isConst, isTarget, type, mlir::Attribute{}, linkage); @@ -314,6 +346,8 @@ fir::GlobalOp fir::FirOpBuilder::createGlobal( setInsertionPointToStart(&block); bodyBuilder(*this); restoreInsertionPoint(insertPt); + if (symbolTable) + symbolTable->insert(glob); return glob; } diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index ea1ef1f08aba20..069ba81cfe96ab 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -724,7 +724,7 @@ mlir::Value genLibCall(fir::FirOpBuilder &builder, mlir::Location loc, mlir::func::FuncOp funcOp = builder.getNamedFunction(libFuncName); if (!funcOp) { - funcOp = builder.addNamedFunction(loc, libFuncName, libFuncType); + funcOp = builder.createFunction(loc, libFuncName, libFuncType); // C-interoperability rules apply to these library functions. funcOp->setAttr(fir::getSymbolAttrName(), mlir::StringAttr::get(builder.getContext(), libFuncName)); @@ -1894,8 +1894,8 @@ mlir::func::FuncOp IntrinsicLibrary::getWrapper(GeneratorType generator, // Create local context to emit code into the newly created function // This new function is not linked to a source file location, only // its calls will be. - auto localBuilder = - std::make_unique(function, builder.getKindMap()); + auto localBuilder = std::make_unique( + function, builder.getKindMap(), builder.getMLIRSymbolTable()); localBuilder->setFastMathFlags(builder.getFastMathFlags()); localBuilder->setInsertionPointToStart(&function.front()); // Location of code inside wrapper of the wrapper is independent from diff --git a/flang/lib/Optimizer/Builder/LowLevelIntrinsics.cpp b/flang/lib/Optimizer/Builder/LowLevelIntrinsics.cpp index 1d07b1e724d745..bb5f77d5d4d1de 100644 --- a/flang/lib/Optimizer/Builder/LowLevelIntrinsics.cpp +++ b/flang/lib/Optimizer/Builder/LowLevelIntrinsics.cpp @@ -27,8 +27,8 @@ mlir::func::FuncOp fir::factory::getLlvmMemcpy(fir::FirOpBuilder &builder) { builder.getI1Type()}; auto memcpyTy = mlir::FunctionType::get(builder.getContext(), args, std::nullopt); - return builder.addNamedFunction(builder.getUnknownLoc(), - "llvm.memcpy.p0.p0.i64", memcpyTy); + return builder.createFunction(builder.getUnknownLoc(), + "llvm.memcpy.p0.p0.i64", memcpyTy); } mlir::func::FuncOp fir::factory::getLlvmMemmove(fir::FirOpBuilder &builder) { @@ -37,8 +37,8 @@ mlir::func::FuncOp fir::factory::getLlvmMemmove(fir::FirOpBuilder &builder) { builder.getI1Type()}; auto memmoveTy = mlir::FunctionType::get(builder.getContext(), args, std::nullopt); - return builder.addNamedFunction(builder.getUnknownLoc(), - "llvm.memmove.p0.p0.i64", memmoveTy); + return builder.createFunction(builder.getUnknownLoc(), + "llvm.memmove.p0.p0.i64", memmoveTy); } mlir::func::FuncOp fir::factory::getLlvmMemset(fir::FirOpBuilder &builder) { @@ -47,16 +47,15 @@ mlir::func::FuncOp fir::factory::getLlvmMemset(fir::FirOpBuilder &builder) { builder.getI1Type()}; auto memsetTy = mlir::FunctionType::get(builder.getContext(), args, std::nullopt); - return builder.addNamedFunction(builder.getUnknownLoc(), - "llvm.memset.p0.p0.i64", memsetTy); + return builder.createFunction(builder.getUnknownLoc(), + "llvm.memset.p0.p0.i64", memsetTy); } mlir::func::FuncOp fir::factory::getRealloc(fir::FirOpBuilder &builder) { auto ptrTy = builder.getRefType(builder.getIntegerType(8)); llvm::SmallVector args = {ptrTy, builder.getI64Type()}; auto reallocTy = mlir::FunctionType::get(builder.getContext(), args, {ptrTy}); - return builder.addNamedFunction(builder.getUnknownLoc(), "realloc", - reallocTy); + return builder.createFunction(builder.getUnknownLoc(), "realloc", reallocTy); } mlir::func::FuncOp @@ -64,8 +63,8 @@ fir::factory::getLlvmGetRounding(fir::FirOpBuilder &builder) { auto int32Ty = builder.getIntegerType(32); auto funcTy = mlir::FunctionType::get(builder.getContext(), std::nullopt, {int32Ty}); - return builder.addNamedFunction(builder.getUnknownLoc(), "llvm.get.rounding", - funcTy); + return builder.createFunction(builder.getUnknownLoc(), "llvm.get.rounding", + funcTy); } mlir::func::FuncOp @@ -73,8 +72,8 @@ fir::factory::getLlvmSetRounding(fir::FirOpBuilder &builder) { auto int32Ty = builder.getIntegerType(32); auto funcTy = mlir::FunctionType::get(builder.getContext(), {int32Ty}, std::nullopt); - return builder.addNamedFunction(builder.getUnknownLoc(), "llvm.set.rounding", - funcTy); + return builder.createFunction(builder.getUnknownLoc(), "llvm.set.rounding", + funcTy); } mlir::func::FuncOp fir::factory::getLlvmStackSave(fir::FirOpBuilder &builder) { @@ -82,8 +81,8 @@ mlir::func::FuncOp fir::factory::getLlvmStackSave(fir::FirOpBuilder &builder) { auto ptrTy = builder.getRefType(builder.getIntegerType(8)); auto funcTy = mlir::FunctionType::get(builder.getContext(), std::nullopt, {ptrTy}); - return builder.addNamedFunction(builder.getUnknownLoc(), "llvm.stacksave.p0", - funcTy); + return builder.createFunction(builder.getUnknownLoc(), "llvm.stacksave.p0", + funcTy); } mlir::func::FuncOp @@ -92,8 +91,8 @@ fir::factory::getLlvmStackRestore(fir::FirOpBuilder &builder) { auto ptrTy = builder.getRefType(builder.getIntegerType(8)); auto funcTy = mlir::FunctionType::get(builder.getContext(), {ptrTy}, std::nullopt); - return builder.addNamedFunction(builder.getUnknownLoc(), - "llvm.stackrestore.p0", funcTy); + return builder.createFunction(builder.getUnknownLoc(), "llvm.stackrestore.p0", + funcTy); } mlir::func::FuncOp @@ -101,24 +100,24 @@ fir::factory::getLlvmInitTrampoline(fir::FirOpBuilder &builder) { auto ptrTy = builder.getRefType(builder.getIntegerType(8)); auto funcTy = mlir::FunctionType::get(builder.getContext(), {ptrTy, ptrTy, ptrTy}, std::nullopt); - return builder.addNamedFunction(builder.getUnknownLoc(), - "llvm.init.trampoline", funcTy); + return builder.createFunction(builder.getUnknownLoc(), "llvm.init.trampoline", + funcTy); } mlir::func::FuncOp fir::factory::getLlvmAdjustTrampoline(fir::FirOpBuilder &builder) { auto ptrTy = builder.getRefType(builder.getIntegerType(8)); auto funcTy = mlir::FunctionType::get(builder.getContext(), {ptrTy}, {ptrTy}); - return builder.addNamedFunction(builder.getUnknownLoc(), - "llvm.adjust.trampoline", funcTy); + return builder.createFunction(builder.getUnknownLoc(), + "llvm.adjust.trampoline", funcTy); } mlir::func::FuncOp fir::factory::getFeclearexcept(fir::FirOpBuilder &builder) { auto int32Ty = builder.getIntegerType(32); auto funcTy = mlir::FunctionType::get(builder.getContext(), {int32Ty}, {int32Ty}); - return builder.addNamedFunction(builder.getUnknownLoc(), "feclearexcept", - funcTy); + return builder.createFunction(builder.getUnknownLoc(), "feclearexcept", + funcTy); } mlir::func::FuncOp @@ -126,38 +125,37 @@ fir::factory::getFedisableexcept(fir::FirOpBuilder &builder) { auto int32Ty = builder.getIntegerType(32); auto funcTy = mlir::FunctionType::get(builder.getContext(), {int32Ty}, {int32Ty}); - return builder.addNamedFunction(builder.getUnknownLoc(), "fedisableexcept", - funcTy); + return builder.createFunction(builder.getUnknownLoc(), "fedisableexcept", + funcTy); } mlir::func::FuncOp fir::factory::getFeenableexcept(fir::FirOpBuilder &builder) { auto int32Ty = builder.getIntegerType(32); auto funcTy = mlir::FunctionType::get(builder.getContext(), {int32Ty}, {int32Ty}); - return builder.addNamedFunction(builder.getUnknownLoc(), "feenableexcept", - funcTy); + return builder.createFunction(builder.getUnknownLoc(), "feenableexcept", + funcTy); } mlir::func::FuncOp fir::factory::getFegetexcept(fir::FirOpBuilder &builder) { auto int32Ty = builder.getIntegerType(32); auto funcTy = mlir::FunctionType::get(builder.getContext(), std::nullopt, {int32Ty}); - return builder.addNamedFunction(builder.getUnknownLoc(), "fegetexcept", - funcTy); + return builder.createFunction(builder.getUnknownLoc(), "fegetexcept", funcTy); } mlir::func::FuncOp fir::factory::getFeraiseexcept(fir::FirOpBuilder &builder) { auto int32Ty = builder.getIntegerType(32); auto funcTy = mlir::FunctionType::get(builder.getContext(), {int32Ty}, {int32Ty}); - return builder.addNamedFunction(builder.getUnknownLoc(), "feraiseexcept", - funcTy); + return builder.createFunction(builder.getUnknownLoc(), "feraiseexcept", + funcTy); } mlir::func::FuncOp fir::factory::getFetestexcept(fir::FirOpBuilder &builder) { auto int32Ty = builder.getIntegerType(32); auto funcTy = mlir::FunctionType::get(builder.getContext(), {int32Ty}, {int32Ty}); - return builder.addNamedFunction(builder.getUnknownLoc(), "fetestexcept", - funcTy); + return builder.createFunction(builder.getUnknownLoc(), "fetestexcept", + funcTy); } diff --git a/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp b/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp index ab0d5079d8afe0..e588b19dded4f1 100644 --- a/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/PPCIntrinsicCall.cpp @@ -1084,11 +1084,11 @@ void PPCIntrinsicLibrary::genMtfsf(llvm::ArrayRef args) { if (isImm) { libFuncType = genFuncType, Ty::Integer<4>>( builder.getContext(), builder); - funcOp = builder.addNamedFunction(loc, "llvm.ppc.mtfsfi", libFuncType); + funcOp = builder.createFunction(loc, "llvm.ppc.mtfsfi", libFuncType); } else { libFuncType = genFuncType, Ty::Real<8>>( builder.getContext(), builder); - funcOp = builder.addNamedFunction(loc, "llvm.ppc.mtfsf", libFuncType); + funcOp = builder.createFunction(loc, "llvm.ppc.mtfsf", libFuncType); } builder.create(loc, funcOp, scalarArgs); } @@ -1116,7 +1116,7 @@ PPCIntrinsicLibrary::genVecAbs(mlir::Type resultType, genFuncType, Ty::RealVector<8>>(context, builder); } - funcOp = builder.addNamedFunction(loc, fname, ftype); + funcOp = builder.createFunction(loc, fname, ftype); auto callOp{builder.create(loc, funcOp, argBases[0])}; return callOp.getResult(0); } else if (auto eleTy = vTypeInfo.eleTy.dyn_cast()) { @@ -1155,7 +1155,7 @@ PPCIntrinsicLibrary::genVecAbs(mlir::Type resultType, default: llvm_unreachable("invalid integer size"); } - funcOp = builder.addNamedFunction(loc, fname, ftype); + funcOp = builder.createFunction(loc, fname, ftype); mlir::Value args[] = {zeroSubVarg1, varg1}; auto callOp{builder.create(loc, funcOp, args)}; @@ -1339,7 +1339,7 @@ PPCIntrinsicLibrary::genVecAnyCompare(mlir::Type resultType, } assert((!fname.empty() && ftype) && "invalid type"); - mlir::func::FuncOp funcOp{builder.addNamedFunction(loc, fname, ftype)}; + mlir::func::FuncOp funcOp{builder.createFunction(loc, fname, ftype)}; auto callOp{builder.create(loc, funcOp, cmpArgs)}; return callOp.getResult(0); } @@ -1445,7 +1445,7 @@ PPCIntrinsicLibrary::genVecCmp(mlir::Type resultType, std::pair funcTyNam{ getVecCmpFuncTypeAndName(vecTyInfo, vop, builder)}; - mlir::func::FuncOp funcOp = builder.addNamedFunction( + mlir::func::FuncOp funcOp = builder.createFunction( loc, std::get<0>(funcTyNam), std::get<1>(funcTyNam)); mlir::Value res{nullptr}; @@ -1572,7 +1572,7 @@ PPCIntrinsicLibrary::genVecConvert(mlir::Type resultType, Ty::Integer<4>>(context, builder)}; const llvm::StringRef fname{(isUnsigned) ? "llvm.ppc.altivec.vcfux" : "llvm.ppc.altivec.vcfsx"}; - auto funcOp{builder.addNamedFunction(loc, fname, ftype)}; + auto funcOp{builder.createFunction(loc, fname, ftype)}; mlir::Value newArgs[] = {argBases[0], convArg}; auto callOp{builder.create(loc, funcOp, newArgs)}; @@ -1627,7 +1627,7 @@ PPCIntrinsicLibrary::genVecConvert(mlir::Type resultType, const llvm::StringRef fname{"llvm.ppc.vsx.xvcvspdp"}; auto ftype{ genFuncType, Ty::RealVector<4>>(context, builder)}; - auto funcOp{builder.addNamedFunction(loc, fname, ftype)}; + auto funcOp{builder.createFunction(loc, fname, ftype)}; auto callOp{builder.create(loc, funcOp, newArgs)}; return callOp.getResult(0); @@ -1635,7 +1635,7 @@ PPCIntrinsicLibrary::genVecConvert(mlir::Type resultType, const llvm::StringRef fname{"llvm.ppc.vsx.xvcvdpsp"}; auto ftype{ genFuncType, Ty::RealVector<8>>(context, builder)}; - auto funcOp{builder.addNamedFunction(loc, fname, ftype)}; + auto funcOp{builder.createFunction(loc, fname, ftype)}; newArgs[0] = builder.create(loc, funcOp, newArgs).getResult(0); auto fvf32Ty{newArgs[0].getType()}; @@ -1963,7 +1963,7 @@ PPCIntrinsicLibrary::genVecLdCallGrp(mlir::Type resultType, auto funcType{ mlir::FunctionType::get(context, {addr.getType()}, {intrinResTy})}; - auto funcOp{builder.addNamedFunction(loc, fname, funcType)}; + auto funcOp{builder.createFunction(loc, fname, funcType)}; auto result{ builder.create(loc, funcOp, parsedArgs).getResult(0)}; @@ -2022,7 +2022,7 @@ PPCIntrinsicLibrary::genVecLvsGrp(mlir::Type resultType, llvm_unreachable("invalid vector operation for generator"); } auto funcType{mlir::FunctionType::get(context, {addr.getType()}, {mlirTy})}; - auto funcOp{builder.addNamedFunction(loc, fname, funcType)}; + auto funcOp{builder.createFunction(loc, fname, funcType)}; auto result{ builder.create(loc, funcOp, parsedArgs).getResult(0)}; @@ -2057,8 +2057,8 @@ PPCIntrinsicLibrary::genVecNmaddMsub(mlir::Type resultType, genFuncType, Ty::RealVector<8>, Ty::RealVector<8>>( context, builder))}}; - auto funcOp{builder.addNamedFunction(loc, std::get<0>(fmaMap[width]), - std::get<1>(fmaMap[width]))}; + auto funcOp{builder.createFunction(loc, std::get<0>(fmaMap[width]), + std::get<1>(fmaMap[width]))}; if (vop == VecOp::Nmadd) { // vec_nmadd(arg1, arg2, arg3) = -fma(arg1, arg2, arg3) auto callOp{builder.create(loc, funcOp, newArgs)}; @@ -2110,7 +2110,7 @@ PPCIntrinsicLibrary::genVecPerm(mlir::Type resultType, builder.create(loc, vi32Ty, mArg1).getResult(); } - auto funcOp{builder.addNamedFunction( + auto funcOp{builder.createFunction( loc, "llvm.ppc.altivec.vperm", genFuncType, Ty::IntegerVector<4>, Ty::IntegerVector<4>, Ty::IntegerVector<1>>(context, @@ -2307,7 +2307,7 @@ PPCIntrinsicLibrary::genVecShift(mlir::Type resultType, } auto funcTy{genFuncType, Ty::IntegerVector<4>, Ty::IntegerVector<4>>(context, builder)}; - mlir::func::FuncOp funcOp{builder.addNamedFunction(loc, funcName, funcTy)}; + mlir::func::FuncOp funcOp{builder.createFunction(loc, funcName, funcTy)}; auto callOp{builder.create(loc, funcOp, mlirVecArgs)}; // If the result vector type is different from the original type, need @@ -2755,7 +2755,7 @@ void PPCIntrinsicLibrary::genMmaIntr(llvm::ArrayRef args) { auto context{builder.getContext()}; mlir::FunctionType intrFuncType{getMmaIrFuncType(context, IntrId)}; mlir::func::FuncOp funcOp{ - builder.addNamedFunction(loc, getMmaIrIntrName(IntrId), intrFuncType)}; + builder.createFunction(loc, getMmaIrIntrName(IntrId), intrFuncType)}; llvm::SmallVector intrArgs; // Depending on SubToFunc, change the subroutine call to a function call. @@ -2892,7 +2892,7 @@ void PPCIntrinsicLibrary::genVecStore(llvm::ArrayRef args) { auto funcType{ mlir::FunctionType::get(context, {stTy, addr.getType()}, std::nullopt)}; - mlir::func::FuncOp funcOp = builder.addNamedFunction(loc, fname, funcType); + mlir::func::FuncOp funcOp = builder.createFunction(loc, fname, funcType); llvm::SmallVector biArgs; diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index 9bb10a42a3997c..dba2c30d1851bf 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -3677,10 +3677,19 @@ fir::parseSelector(mlir::OpAsmParser &parser, mlir::OperationState &result, return mlir::success(); } -mlir::func::FuncOp -fir::createFuncOp(mlir::Location loc, mlir::ModuleOp module, - llvm::StringRef name, mlir::FunctionType type, - llvm::ArrayRef attrs) { +mlir::func::FuncOp fir::createFuncOp(mlir::Location loc, mlir::ModuleOp module, + llvm::StringRef name, + mlir::FunctionType type, + llvm::ArrayRef attrs, + const mlir::SymbolTable *symbolTable) { + if (symbolTable) + if (auto f = symbolTable->lookup(name)) { +#ifdef EXPENSIVE_CHECKS + assert(f == module.lookupSymbol(name) && + "symbolTable and module out of sync"); +#endif + return f; + } if (auto f = module.lookupSymbol(name)) return f; mlir::OpBuilder modBuilder(module.getBodyRegion()); @@ -3692,7 +3701,16 @@ fir::createFuncOp(mlir::Location loc, mlir::ModuleOp module, fir::GlobalOp fir::createGlobalOp(mlir::Location loc, mlir::ModuleOp module, llvm::StringRef name, mlir::Type type, - llvm::ArrayRef attrs) { + llvm::ArrayRef attrs, + const mlir::SymbolTable *symbolTable) { + if (symbolTable) + if (auto g = symbolTable->lookup(name)) { +#ifdef EXPENSIVE_CHECKS + assert(g == module.lookupSymbol(name) && + "symbolTable and module out of sync"); +#endif + return g; + } if (auto g = module.lookupSymbol(name)) return g; mlir::OpBuilder modBuilder(module.getBodyRegion()); diff --git a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp index a11aa38c771bd1..f7820b6b8170ba 100644 --- a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp +++ b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp @@ -1004,10 +1004,8 @@ mlir::func::FuncOp SimplifyIntrinsicsPass::getOrCreateFunction( // We can also avoid this by using internal linkage, but // this may increase the size of final executable/shared library. std::string replacementName = mlir::Twine{baseName, "_simplified"}.str(); - mlir::ModuleOp module = builder.getModule(); // If we already have a function, just return it. - mlir::func::FuncOp newFunc = - fir::FirOpBuilder::getNamedFunction(module, replacementName); + mlir::func::FuncOp newFunc = builder.getNamedFunction(replacementName); mlir::FunctionType fType = typeGenerator(builder); if (newFunc) { assert(newFunc.getFunctionType() == fType && @@ -1017,8 +1015,7 @@ mlir::func::FuncOp SimplifyIntrinsicsPass::getOrCreateFunction( // Need to build the function! auto loc = mlir::UnknownLoc::get(builder.getContext()); - newFunc = - fir::FirOpBuilder::createFunction(loc, module, replacementName, fType); + newFunc = builder.createFunction(loc, replacementName, fType); auto inlineLinkage = mlir::LLVM::linkage::Linkage::LinkonceODR; auto linkage = mlir::LLVM::LinkageAttr::get(builder.getContext(), inlineLinkage); From eb08c0f1659d12524f58a01bf174177b8acedf2e Mon Sep 17 00:00:00 2001 From: Krystian Stasiowski Date: Tue, 2 Apr 2024 08:35:42 -0400 Subject: [PATCH 090/201] [Clang][Sema] Fix explicit specializations of member function templates with a deduced return type (#86817) Clang erroneously rejects the following: ``` template struct A { template auto f(); }; template<> template auto A::f(); // error: conflicting types for 'f' ``` This happens because the explicit specialization of `f` has its return type replaced with a dependent `AutoType` in `ActOnFunctionDeclarator`, but no such replacement occurs for the implicitly instantiated function template `A::f`. Since the return types don't match, the explicit specialization is diagnosed as an invalid redeclaration. This patch moves the replacement of the return type to `CheckFunctionDeclaration` so it also happens during instantiation. `setObjectOfFriendDecl` will have been called by then, so the `isFriend && CurContext->isDependentContext()` condition is made redundant & removed (as it already happens in `DeclContext::isDependentContext`). `Sema::IsOverload` only checks the _declared_ return type (which isn't changed by the adjustment), so adjusting the return type afterwards should be safe. --- .../clang-tidy/infrastructure/diagnostic.cpp | 4 +- clang/docs/ReleaseNotes.rst | 2 + clang/lib/Sema/SemaDecl.cpp | 46 ++++++++++++------- .../SemaCXX/deduced-return-type-cxx14.cpp | 18 ++++++++ 4 files changed, 50 insertions(+), 20 deletions(-) diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/diagnostic.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/diagnostic.cpp index d0efc5ca763753..57d930b26e64c0 100644 --- a/clang-tools-extra/test/clang-tidy/infrastructure/diagnostic.cpp +++ b/clang-tools-extra/test/clang-tidy/infrastructure/diagnostic.cpp @@ -25,7 +25,7 @@ // RUN: not clang-tidy -checks='-*,modernize-use-override' %T/diagnostics/input.cpp -- -DCOMPILATION_ERROR 2>&1 | FileCheck -check-prefix=CHECK6 -implicit-check-not='{{warning:|error:}}' %s // RUN: clang-tidy -checks='-*,modernize-use-override,clang-diagnostic-macro-redefined' %s -- -DMACRO_FROM_COMMAND_LINE -std=c++20 | FileCheck -check-prefix=CHECK4 -implicit-check-not='{{warning:|error:}}' %s // RUN: clang-tidy -checks='-*,modernize-use-override,clang-diagnostic-macro-redefined,clang-diagnostic-literal-conversion' %s -- -DMACRO_FROM_COMMAND_LINE -std=c++20 -Wno-macro-redefined | FileCheck --check-prefix=CHECK7 -implicit-check-not='{{warning:|error:}}' %s -// RUN: not clang-tidy -checks='-*,modernize-use-override' %s -- -std=c++20 -DPR64602 | FileCheck -check-prefix=CHECK8 -implicit-check-not='{{warning:|error:}}' %s +// RUN: clang-tidy -checks='-*,modernize-use-override' %s -- -std=c++20 -DPR64602 // CHECK1: error: no input files [clang-diagnostic-error] // CHECK1: error: no such file or directory: '{{.*}}nonexistent.cpp' [clang-diagnostic-error] @@ -68,6 +68,4 @@ auto S<>::foo(auto) { return 1; } -// CHECK8: error: conflicting types for 'foo' [clang-diagnostic-error] -// CHECK8: note: previous declaration is here #endif diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index b2faab1f1525b2..3a84ff16a1e4d4 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -483,6 +483,8 @@ Bug Fixes to C++ Support following the first `::` were ignored). - Fix an out-of-bounds crash when checking the validity of template partial specializations. (part of #GH86757). - Fix an issue caused by not handling invalid cases when substituting into the parameter mapping of a constraint. Fixes (#GH86757). +- Fixed a bug that prevented member function templates of class templates declared with a deduced return type + from being explicitly specialized for a given implicit instantiation of the class template. Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 6ff85c0c5c29da..5c1152896559b5 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -10124,23 +10124,6 @@ Sema::ActOnFunctionDeclarator(Scope *S, Declarator &D, DeclContext *DC, Diag(D.getDeclSpec().getVirtualSpecLoc(), diag::err_auto_fn_virtual); } - if (getLangOpts().CPlusPlus14 && - (NewFD->isDependentContext() || - (isFriend && CurContext->isDependentContext())) && - NewFD->getReturnType()->isUndeducedType()) { - // If the function template is referenced directly (for instance, as a - // member of the current instantiation), pretend it has a dependent type. - // This is not really justified by the standard, but is the only sane - // thing to do. - // FIXME: For a friend function, we have not marked the function as being - // a friend yet, so 'isDependentContext' on the FD doesn't work. - const FunctionProtoType *FPT = - NewFD->getType()->castAs(); - QualType Result = SubstAutoTypeDependent(FPT->getReturnType()); - NewFD->setType(Context.getFunctionType(Result, FPT->getParamTypes(), - FPT->getExtProtoInfo())); - } - // C++ [dcl.fct.spec]p3: // The inline specifier shall not appear on a block scope function // declaration. @@ -12112,6 +12095,35 @@ bool Sema::CheckFunctionDeclaration(Scope *S, FunctionDecl *NewFD, CheckConstPureAttributesUsage(*this, NewFD); + // C++ [dcl.spec.auto.general]p12: + // Return type deduction for a templated function with a placeholder in its + // declared type occurs when the definition is instantiated even if the + // function body contains a return statement with a non-type-dependent + // operand. + // + // C++ [temp.dep.expr]p3: + // An id-expression is type-dependent if it is a template-id that is not a + // concept-id and is dependent; or if its terminal name is: + // - [...] + // - associated by name lookup with one or more declarations of member + // functions of a class that is the current instantiation declared with a + // return type that contains a placeholder type, + // - [...] + // + // If this is a templated function with a placeholder in its return type, + // make the placeholder type dependent since it won't be deduced until the + // definition is instantiated. We do this here because it needs to happen + // for implicitly instantiated member functions/member function templates. + if (getLangOpts().CPlusPlus14 && + (NewFD->isDependentContext() && + NewFD->getReturnType()->isUndeducedType())) { + const FunctionProtoType *FPT = + NewFD->getType()->castAs(); + QualType NewReturnType = SubstAutoTypeDependent(FPT->getReturnType()); + NewFD->setType(Context.getFunctionType(NewReturnType, FPT->getParamTypes(), + FPT->getExtProtoInfo())); + } + // C++11 [dcl.constexpr]p8: // A constexpr specifier for a non-static member function that is not // a constructor declares that member function to be const. diff --git a/clang/test/SemaCXX/deduced-return-type-cxx14.cpp b/clang/test/SemaCXX/deduced-return-type-cxx14.cpp index 431d77ca785b8e..c33e07088ba32f 100644 --- a/clang/test/SemaCXX/deduced-return-type-cxx14.cpp +++ b/clang/test/SemaCXX/deduced-return-type-cxx14.cpp @@ -237,6 +237,24 @@ namespace Templates { int (S::*(*p)())(double) = f; int (S::*(*q)())(double) = f; } + + template + struct MemberSpecialization { + auto f(); + template auto f(U); + template auto *f(U); + }; + + template<> + auto MemberSpecialization::f(); + + template<> + template + auto MemberSpecialization::f(U); + + template<> + template + auto *MemberSpecialization::f(U); } auto fwd_decl_using(); From 63d22f7a5b6afc515799f67c388bf5a8864274e4 Mon Sep 17 00:00:00 2001 From: Christian Ulmann Date: Tue, 2 Apr 2024 14:35:55 +0200 Subject: [PATCH 091/201] [MLIR][LLVM][SROA] Make GEP handling type agnostic (#86950) This commit removes SROA's type consistency constraints from LLVM dialect's GEPOp. The checks for valid indexing are now purely done by computing the GEP's offset with the aid of the data layout. To simplify handling of "nested subslots", we are tricking the SROA by handing in memory slots that hold byte array types. This ensures that subsequent accesses only need to check if their access will be in-bounds. This lifts the requirement of determining the sub-types for all but the first level of subslots. --- mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 241 +++++++++++++----- mlir/test/Dialect/LLVMIR/sroa.mlir | 130 ++++++++++ 2 files changed, 312 insertions(+), 59 deletions(-) diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp index f171bf7cc4bec3..06c1fdd2eb2d95 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp @@ -20,6 +20,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/TypeSwitch.h" +#define DEBUG_TYPE "sroa" + using namespace mlir; //===----------------------------------------------------------------------===// @@ -431,10 +433,147 @@ DeletionKind LLVM::GEPOp::removeBlockingUses( return DeletionKind::Delete; } -static bool isFirstIndexZero(LLVM::GEPOp gep) { - IntegerAttr index = - llvm::dyn_cast_if_present(gep.getIndices()[0]); - return index && index.getInt() == 0; +/// Returns the amount of bytes the provided GEP elements will offset the +/// pointer by. Returns nullopt if no constant offset could be computed. +static std::optional gepToByteOffset(const DataLayout &dataLayout, + LLVM::GEPOp gep) { + // Collects all indices. + SmallVector indices; + for (auto index : gep.getIndices()) { + auto constIndex = dyn_cast(index); + if (!constIndex) + return {}; + int64_t gepIndex = constIndex.getInt(); + // Negative indices are not supported. + if (gepIndex < 0) + return {}; + indices.push_back(gepIndex); + } + + Type currentType = gep.getElemType(); + uint64_t offset = indices[0] * dataLayout.getTypeSize(currentType); + + for (uint64_t index : llvm::drop_begin(indices)) { + bool shouldCancel = + TypeSwitch(currentType) + .Case([&](LLVM::LLVMArrayType arrayType) { + offset += + index * dataLayout.getTypeSize(arrayType.getElementType()); + currentType = arrayType.getElementType(); + return false; + }) + .Case([&](LLVM::LLVMStructType structType) { + ArrayRef body = structType.getBody(); + assert(index < body.size() && "expected valid struct indexing"); + for (uint32_t i : llvm::seq(index)) { + if (!structType.isPacked()) + offset = llvm::alignTo( + offset, dataLayout.getTypeABIAlignment(body[i])); + offset += dataLayout.getTypeSize(body[i]); + } + + // Align for the current type as well. + if (!structType.isPacked()) + offset = llvm::alignTo( + offset, dataLayout.getTypeABIAlignment(body[index])); + currentType = body[index]; + return false; + }) + .Default([&](Type type) { + LLVM_DEBUG(llvm::dbgs() + << "[sroa] Unsupported type for offset computations" + << type << "\n"); + return true; + }); + + if (shouldCancel) + return std::nullopt; + } + + return offset; +} + +namespace { +/// A struct that stores both the index into the aggregate type of the slot as +/// well as the corresponding byte offset in memory. +struct SubslotAccessInfo { + /// The parent slot's index that the access falls into. + uint32_t index; + /// The offset into the subslot of the access. + uint64_t subslotOffset; +}; +} // namespace + +/// Computes subslot access information for an access into `slot` with the given +/// offset. +/// Returns nullopt when the offset is out-of-bounds or when the access is into +/// the padding of `slot`. +static std::optional +getSubslotAccessInfo(const DestructurableMemorySlot &slot, + const DataLayout &dataLayout, LLVM::GEPOp gep) { + std::optional offset = gepToByteOffset(dataLayout, gep); + if (!offset) + return {}; + + // Helper to check that a constant index is in the bounds of the GEP index + // representation. LLVM dialects's GEP arguments have a limited bitwidth, thus + // this additional check is necessary. + auto isOutOfBoundsGEPIndex = [](uint64_t index) { + return index >= (1 << LLVM::kGEPConstantBitWidth); + }; + + Type type = slot.elemType; + if (*offset >= dataLayout.getTypeSize(type)) + return {}; + return TypeSwitch>(type) + .Case([&](LLVM::LLVMArrayType arrayType) + -> std::optional { + // Find which element of the array contains the offset. + uint64_t elemSize = dataLayout.getTypeSize(arrayType.getElementType()); + uint64_t index = *offset / elemSize; + if (isOutOfBoundsGEPIndex(index)) + return {}; + return SubslotAccessInfo{static_cast(index), + *offset - (index * elemSize)}; + }) + .Case([&](LLVM::LLVMStructType structType) + -> std::optional { + uint64_t distanceToStart = 0; + // Walk over the elements of the struct to find in which of + // them the offset is. + for (auto [index, elem] : llvm::enumerate(structType.getBody())) { + uint64_t elemSize = dataLayout.getTypeSize(elem); + if (!structType.isPacked()) { + distanceToStart = llvm::alignTo( + distanceToStart, dataLayout.getTypeABIAlignment(elem)); + // If the offset is in padding, cancel the rewrite. + if (offset < distanceToStart) + return {}; + } + + if (offset < distanceToStart + elemSize) { + if (isOutOfBoundsGEPIndex(index)) + return {}; + // The offset is within this element, stop iterating the + // struct and return the index. + return SubslotAccessInfo{static_cast(index), + *offset - distanceToStart}; + } + + // The offset is not within this element, continue walking + // over the struct. + distanceToStart += elemSize; + } + + return {}; + }); +} + +/// Constructs a byte array type of the given size. +static LLVM::LLVMArrayType getByteArrayType(MLIRContext *context, + unsigned size) { + auto byteType = IntegerType::get(context, 8); + return LLVM::LLVMArrayType::get(context, byteType, size); } LogicalResult LLVM::GEPOp::ensureOnlySafeAccesses( @@ -442,18 +581,17 @@ LogicalResult LLVM::GEPOp::ensureOnlySafeAccesses( const DataLayout &dataLayout) { if (getBase() != slot.ptr) return success(); - if (slot.elemType != getElemType()) - return failure(); - if (!isFirstIndexZero(*this)) + std::optional gepOffset = gepToByteOffset(dataLayout, *this); + if (!gepOffset) return failure(); - // Dynamic indices can be out-of-bounds (even negative), so an access with - // dynamic indices can never be considered safe. - if (!getDynamicIndices().empty()) + uint64_t slotSize = dataLayout.getTypeSize(slot.elemType); + // Check that the access is strictly inside the slot. + if (*gepOffset >= slotSize) return failure(); - Type reachedType = getResultPtrElementType(); - if (!reachedType) - return failure(); - mustBeSafelyUsed.emplace_back({getResult(), reachedType}); + // Every access that remains in bounds of the remaining slot is considered + // legal. + mustBeSafelyUsed.emplace_back( + {getRes(), getByteArrayType(getContext(), slotSize - *gepOffset)}); return success(); } @@ -464,23 +602,25 @@ bool LLVM::GEPOp::canRewire(const DestructurableMemorySlot &slot, if (!isa(getBase().getType())) return false; - if (getBase() != slot.ptr || slot.elemType != getElemType()) - return false; - if (!isFirstIndexZero(*this)) - return false; - // Dynamic indices can be out-of-bounds (even negative), so an access with - // dynamic indices can never be properly rewired. - if (!getDynamicIndices().empty()) - return false; - Type reachedType = getResultPtrElementType(); - if (!reachedType || getIndices().size() < 2) + if (getBase() != slot.ptr) return false; - auto firstLevelIndex = dyn_cast(getIndices()[1]); - if (!firstLevelIndex) + std::optional accessInfo = + getSubslotAccessInfo(slot, dataLayout, *this); + if (!accessInfo) return false; - mustBeSafelyUsed.emplace_back({getResult(), reachedType}); - assert(slot.elementPtrs.contains(firstLevelIndex)); - usedIndices.insert(firstLevelIndex); + auto indexAttr = + IntegerAttr::get(IntegerType::get(getContext(), 32), accessInfo->index); + assert(slot.elementPtrs.contains(indexAttr)); + usedIndices.insert(indexAttr); + + // The remainder of the subslot should be accesses in-bounds. Thus, we create + // a dummy slot with the size of the remainder. + Type subslotType = slot.elementPtrs.lookup(indexAttr); + uint64_t slotSize = dataLayout.getTypeSize(subslotType); + LLVM::LLVMArrayType remainingSlotType = + getByteArrayType(getContext(), slotSize - accessInfo->subslotOffset); + mustBeSafelyUsed.emplace_back({getRes(), remainingSlotType}); + return true; } @@ -488,36 +628,19 @@ DeletionKind LLVM::GEPOp::rewire(const DestructurableMemorySlot &slot, DenseMap &subslots, RewriterBase &rewriter, const DataLayout &dataLayout) { - IntegerAttr firstLevelIndex = - llvm::dyn_cast_if_present(getIndices()[1]); - const MemorySlot &newSlot = subslots.at(firstLevelIndex); - - ArrayRef remainingIndices = getRawConstantIndices().slice(2); - - // If the GEP would become trivial after this transformation, eliminate it. - // A GEP should only be eliminated if it has no indices (except the first - // pointer index), as simplifying GEPs with all-zero indices would eliminate - // structure information useful for further destruction. - if (remainingIndices.empty()) { - rewriter.replaceAllUsesWith(getResult(), newSlot.ptr); - return DeletionKind::Delete; - } - - rewriter.modifyOpInPlace(*this, [&]() { - // Rewire the indices by popping off the second index. - // Start with a single zero, then add the indices beyond the second. - SmallVector newIndices(1); - newIndices.append(remainingIndices.begin(), remainingIndices.end()); - setRawConstantIndices(newIndices); - - // Rewire the pointed type. - setElemType(newSlot.elemType); - - // Rewire the pointer. - getBaseMutable().assign(newSlot.ptr); - }); - - return DeletionKind::Keep; + std::optional accessInfo = + getSubslotAccessInfo(slot, dataLayout, *this); + assert(accessInfo && "expected access info to be checked before"); + auto indexAttr = + IntegerAttr::get(IntegerType::get(getContext(), 32), accessInfo->index); + const MemorySlot &newSlot = subslots.at(indexAttr); + + auto byteType = IntegerType::get(rewriter.getContext(), 8); + auto newPtr = rewriter.createOrFold( + getLoc(), getResult().getType(), byteType, newSlot.ptr, + ArrayRef(accessInfo->subslotOffset), getInbounds()); + rewriter.replaceAllUsesWith(getResult(), newPtr); + return DeletionKind::Delete; } //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/LLVMIR/sroa.mlir b/mlir/test/Dialect/LLVMIR/sroa.mlir index 3f4d17c6a43f97..fe1531d988a4f5 100644 --- a/mlir/test/Dialect/LLVMIR/sroa.mlir +++ b/mlir/test/Dialect/LLVMIR/sroa.mlir @@ -82,6 +82,27 @@ llvm.func @multi_level_indirect() -> i32 { // ----- +// This verifies that a nested GEP's users are checked properly. In this case +// the load goes over the bounds of the memory slot and thus should block the +// splitting of the alloca. + +// CHECK-LABEL: llvm.func @nested_access_over_slot_bound +llvm.func @nested_access_over_slot_bound() -> i64 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<(i32, struct<( + %1 = llvm.alloca %0 x !llvm.struct<(i32, struct<(array<10 x i32>)>, i32)> {alignment = 8 : i64} : (i32) -> !llvm.ptr + // CHECK: %[[GEP0:.*]] = llvm.getelementptr inbounds %[[ALLOCA]] + %2 = llvm.getelementptr inbounds %1[0, 1, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(i32, struct<(array<10 x i32>)>, i32)> + // CHECK: %[[GEP1:.*]] = llvm.getelementptr inbounds %[[GEP0]] + %3 = llvm.getelementptr inbounds %2[0, 9] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<10 x i32> + // CHECK: %[[RES:.*]] = llvm.load %[[GEP1]] + %4 = llvm.load %3 : !llvm.ptr -> i64 + // CHECK: llvm.return %[[RES]] : i64 + llvm.return %4 : i64 +} + +// ----- + // CHECK-LABEL: llvm.func @resolve_alias // CHECK-SAME: (%[[ARG:.*]]: i32) llvm.func @resolve_alias(%arg: i32) -> i32 { @@ -318,3 +339,112 @@ llvm.func @store_to_memory(%arg: !llvm.ptr) { llvm.store %1, %arg : !llvm.ptr, !llvm.ptr llvm.return } + +// ----- + +// CHECK-LABEL: llvm.func @type_mismatch_array_access +// CHECK-SAME: %[[ARG:.*]]: i32 +llvm.func @type_mismatch_array_access(%arg: i32) { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x i32 + %1 = llvm.alloca %0 x !llvm.struct<(i32, i32, i32)> : (i32) -> !llvm.ptr + %2 = llvm.getelementptr %1[8] : (!llvm.ptr) -> !llvm.ptr, i8 + // CHECK-NEXT: llvm.store %[[ARG]], %[[ALLOCA]] + llvm.store %arg, %2 : i32, !llvm.ptr + llvm.return +} + +// ----- + +// CHECK-LABEL: llvm.func @type_mismatch_struct_access +// CHECK-SAME: %[[ARG:.*]]: i32 +llvm.func @type_mismatch_struct_access(%arg: i32) { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x i32 + %1 = llvm.alloca %0 x !llvm.struct<(i32, i32, i32)> : (i32) -> !llvm.ptr + %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(i32, i32)> + // CHECK-NEXT: llvm.store %[[ARG]], %[[ALLOCA]] + llvm.store %arg, %2 : i32, !llvm.ptr + llvm.return +} + +// ----- + +// CHECK-LABEL: llvm.func @index_in_final_padding +llvm.func @index_in_final_padding(%arg: i32) { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<"foo", (i32, i8)> + %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i8)> : (i32) -> !llvm.ptr + // CHECK: = llvm.getelementptr %[[ALLOCA]][7] : (!llvm.ptr) -> !llvm.ptr, i8 + %2 = llvm.getelementptr %1[7] : (!llvm.ptr) -> !llvm.ptr, i8 + llvm.store %arg, %2 : i32, !llvm.ptr + llvm.return +} + +// ----- + +// CHECK-LABEL: llvm.func @index_out_of_bounds +llvm.func @index_out_of_bounds(%arg: i32) { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<"foo", (i32, i32)> + %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32)> : (i32) -> !llvm.ptr + // CHECK: = llvm.getelementptr %[[ALLOCA]][9] : (!llvm.ptr) -> !llvm.ptr, i8 + %2 = llvm.getelementptr %1[9] : (!llvm.ptr) -> !llvm.ptr, i8 + llvm.store %arg, %2 : i32, !llvm.ptr + llvm.return +} + +// ----- + +// CHECK-LABEL: llvm.func @index_in_padding +llvm.func @index_in_padding(%arg: i16) { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<"foo", (i16, i32)> + %1 = llvm.alloca %0 x !llvm.struct<"foo", (i16, i32)> : (i32) -> !llvm.ptr + // CHECK: = llvm.getelementptr %[[ALLOCA]][2] : (!llvm.ptr) -> !llvm.ptr, i8 + %2 = llvm.getelementptr %1[2] : (!llvm.ptr) -> !llvm.ptr, i8 + llvm.store %arg, %2 : i16, !llvm.ptr + llvm.return +} + +// ----- + +// CHECK-LABEL: llvm.func @index_not_in_padding_because_packed +// CHECK-SAME: %[[ARG:.*]]: i16 +llvm.func @index_not_in_padding_because_packed(%arg: i16) { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x i32 + %1 = llvm.alloca %0 x !llvm.struct<"foo", packed (i16, i32)> : (i32) -> !llvm.ptr + %2 = llvm.getelementptr %1[2] : (!llvm.ptr) -> !llvm.ptr, i8 + // CHECK-NEXT: llvm.store %[[ARG]], %[[ALLOCA]] + llvm.store %arg, %2 : i16, !llvm.ptr + llvm.return +} + +// ----- + +// CHECK-LABEL: llvm.func @no_crash_on_negative_gep_index +// CHECK-SAME: %[[ARG:.*]]: f16 +llvm.func @no_crash_on_negative_gep_index(%arg: f16) { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x !llvm.struct<"foo", (i32, i32, i32)> + %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32, i32)> : (i32) -> !llvm.ptr + // CHECK: llvm.getelementptr %[[ALLOCA]][-1] : (!llvm.ptr) -> !llvm.ptr, f32 + %2 = llvm.getelementptr %1[-1] : (!llvm.ptr) -> !llvm.ptr, f32 + llvm.store %arg, %2 : f16, !llvm.ptr + llvm.return +} + +// ----- + +// CHECK-LABEL: llvm.func @out_of_bound_gep_array_access +// CHECK-SAME: %[[ARG:.*]]: i32 +llvm.func @out_of_bound_gep_array_access(%arg: i32) { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: %[[ALLOCA:.*]] = llvm.alloca %{{.*}} x i32 + %1 = llvm.alloca %0 x !llvm.struct<"foo", (i32, i32)> : (i32) -> !llvm.ptr + %2 = llvm.getelementptr %1[0, 4] : (!llvm.ptr) -> !llvm.ptr, !llvm.array<4 x i8> + // CHECK-NEXT: llvm.store %[[ARG]], %[[ALLOCA]] + llvm.store %arg, %2 : i32, !llvm.ptr + llvm.return +} From 56aeac47ab0858db9f447b5ec43b660d9035167f Mon Sep 17 00:00:00 2001 From: Mitch Phillips Date: Tue, 2 Apr 2024 14:44:11 +0200 Subject: [PATCH 092/201] Revert "[mlir] Reland the dialect conversion hanging use fix (#87297)" This reverts commit 49a4ec20a8be5888cbf225bab340dbaf204902c7. Reason: Broke the ASan build bot with a memory leak. See the comments at https://github.com/llvm/llvm-project/pull/87297 for more information. --- mlir/lib/Transforms/Utils/DialectConversion.cpp | 2 -- .../TosaToLinalg/tosa-to-linalg-invalid.mlir | 13 ------------- 2 files changed, 15 deletions(-) diff --git a/mlir/lib/Transforms/Utils/DialectConversion.cpp b/mlir/lib/Transforms/Utils/DialectConversion.cpp index 270ac0a0868960..8671c1008902a0 100644 --- a/mlir/lib/Transforms/Utils/DialectConversion.cpp +++ b/mlir/lib/Transforms/Utils/DialectConversion.cpp @@ -279,8 +279,6 @@ class CreateBlockRewrite : public BlockRewrite { auto &blockOps = block->getOperations(); while (!blockOps.empty()) blockOps.remove(blockOps.begin()); - for (auto arg : block->getArguments()) - arg.dropAllUses(); block->dropAllUses(); if (block->getParent()) block->erase(); diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-invalid.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-invalid.mlir index 6494e1b2719487..17eec593691860 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-invalid.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-invalid.mlir @@ -15,16 +15,3 @@ func.func @tensor_with_unknown_rank(%arg0: tensor<*xi8>) -> tensor<*xi8> { %0 = "tosa.abs"(%arg0) : (tensor<*xi8>) -> tensor<*xi8> return %0 : tensor<*xi8> } - -// ----- - -// CHECK-LABEL: @unranked_add -func.func @unranked_add(%arg0 : tensor<10x10xf32> , %arg1 : tensor<10x10xf32>, %arg2 : tensor<*xf32>) -> (tensor<10x10xf32>) { - // expected-error@+3 {{failed to legalize operation 'tosa.add'}} - %reduce = tosa.reduce_max %arg0 {axis = 1 : i32} : (tensor<10x10xf32>) -> tensor<10x1xf32> - %1 = tosa.add %reduce, %arg1 : (tensor<10x1xf32>, tensor<10x10xf32>) -> tensor<10x10xf32> - %0 = tosa.add %1, %arg2 : (tensor<10x10xf32>, tensor<*xf32>) -> tensor<*xf32> - %2 = tosa.reshape %0 {new_shape = array} : (tensor<*xf32>) -> tensor<10x10xf32> - return %2 : tensor<10x10xf32> -} - From cb9cf331fa69bb8319886cdca043a742b4d19ce5 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 2 Apr 2024 05:52:27 -0700 Subject: [PATCH 093/201] [SLP][NFC]Do not lookup in MinBWs, reuse previously used iterator. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 1ffc39a9067431..b1940e3feed958 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -12297,7 +12297,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { Value *V = Builder.CreateBinOp( static_cast(E->getOpcode()), LHS, RHS); - propagateIRFlags(V, E->Scalars, VL0, !MinBWs.contains(E)); + propagateIRFlags(V, E->Scalars, VL0, It == MinBWs.end()); if (auto *I = dyn_cast(V)) V = propagateMetadata(I, E->Scalars); @@ -12598,8 +12598,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { }, Mask, &OpScalars, &AltScalars); - propagateIRFlags(V0, OpScalars, E->getMainOp(), !MinBWs.contains(E)); - propagateIRFlags(V1, AltScalars, E->getAltOp(), !MinBWs.contains(E)); + propagateIRFlags(V0, OpScalars, E->getMainOp(), It == MinBWs.end()); + propagateIRFlags(V1, AltScalars, E->getAltOp(), It == MinBWs.end()); Value *V = Builder.CreateShuffleVector(V0, V1, Mask); if (auto *I = dyn_cast(V)) { From 0b9528d6bd0bfde5702b1ee5ed8a249d354434f1 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Tue, 2 Apr 2024 15:18:38 +0200 Subject: [PATCH 094/201] [clang] CTAD: Track template template type parameters that referenced in the template arguments of the RHS. (#85405) Fixes https://github.com/llvm/llvm-project/issues/85385. The Finder was missing for this case, for the crash test, the template parameter TTP was incorrectly considered as not referenced/appeared in the template arguments of the right hand side of the alias template decl, thus the synthesized deduction decl doesn't contain this TTP in the template parameter list, but we have references in the declaration, thus it caused crashes. --- clang/lib/Sema/SemaTemplate.cpp | 6 ++++++ clang/test/SemaCXX/cxx20-ctad-type-alias.cpp | 20 ++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index befec401c8eec3..a2b8cc14ca764f 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -2720,6 +2720,12 @@ SmallVector TemplateParamsReferencedInTemplateArgumentList( return true; } + bool TraverseTemplateName(TemplateName Template) { + if (auto *TD = Template.getAsTemplateDecl()) + MarkAppeared(TD); + return RecursiveASTVisitor::TraverseTemplateName(Template); + } + void MarkAppeared(NamedDecl *ND) { if (TemplateParams.contains(ND)) ReferencedTemplateParams.insert(ND); diff --git a/clang/test/SemaCXX/cxx20-ctad-type-alias.cpp b/clang/test/SemaCXX/cxx20-ctad-type-alias.cpp index ce403285b0f531..b71cd46f884d63 100644 --- a/clang/test/SemaCXX/cxx20-ctad-type-alias.cpp +++ b/clang/test/SemaCXX/cxx20-ctad-type-alias.cpp @@ -259,3 +259,23 @@ using Bar2 = Foo; // expected-error {{extraneous template parameter list in a Bar2 b = 1; // expected-error {{no viable constructor or deduction guide for deduction of template arguments}} } // namespace test19 + +// GH85385 +namespace test20 { +template