From 640c85d9211534f04237063adaaa26f66ce0995c Mon Sep 17 00:00:00 2001 From: Tarun Prabhu Date: Wed, 15 May 2024 17:36:26 -0600 Subject: [PATCH] WIP: Merge with LLVM 18.x --- clang/include/clang-c/Index.h | 12 +- clang/include/clang/Basic/Attr.td | 2 +- .../clang/Basic/DiagnosticDriverKinds.td | 4 - clang/include/clang/Driver/Options.td | 25 +- clang/include/clang/Driver/ToolChain.h | 13 +- clang/lib/AST/ExprConstant.cpp | 3 - clang/lib/CodeGen/BackendUtil.cpp | 59 +- clang/lib/CodeGen/CGBuiltin.cpp | 1 + clang/lib/CodeGen/CGCleanup.cpp | 6 +- clang/lib/CodeGen/CGException.cpp | 31 +- clang/lib/CodeGen/CGStmt.cpp | 6 +- clang/lib/CodeGen/CodeGenFunction.cpp | 209 +++--- clang/lib/CodeGen/CodeGenFunction.h | 4 +- clang/lib/CodeGen/CodeGenModule.cpp | 17 +- clang/lib/Driver/ToolChain.cpp | 460 ++++++------- clang/lib/Driver/ToolChains/Clang.cpp | 20 - clang/lib/Driver/ToolChains/CloudABI.cpp | 2 +- clang/lib/Driver/ToolChains/CrossWindows.cpp | 2 +- clang/lib/Driver/ToolChains/Darwin.cpp | 75 +-- clang/lib/Driver/ToolChains/Darwin.h | 4 +- clang/lib/Driver/ToolChains/DragonFly.cpp | 2 +- clang/lib/Driver/ToolChains/FreeBSD.cpp | 2 +- clang/lib/Driver/ToolChains/Gnu.cpp | 2 +- clang/lib/Driver/ToolChains/MinGW.cpp | 2 +- clang/lib/Driver/ToolChains/Minix.cpp | 2 +- clang/lib/Driver/ToolChains/Myriad.cpp | 2 +- clang/lib/Driver/ToolChains/NaCl.cpp | 2 +- clang/lib/Driver/ToolChains/PS4CPU.cpp | 2 +- clang/lib/Driver/ToolChains/Solaris.cpp | 2 +- clang/lib/Format/UnwrappedLineParser.h | 3 - clang/lib/Frontend/CompilerInvocation.cpp | 10 +- kitsune/CMakeLists.txt | 251 +++++-- kitsune/include/kitsune/CMakeLists.txt | 12 +- kitsune/include/kitsune/Config/config.h.cmake | 120 ++-- kitsune/runtime/CMakeLists.txt | 86 ++- kitsune/runtime/cuda/kitcuda.cpp | 2 +- kitsune/test/cuda/config.cpp | 2 +- kitsune/test/cuda/default-args.cpp | 5 +- kitsune/test/cuda/ftapir.cpp | 2 +- kitsune/test/hip/config.cpp | 2 +- kitsune/test/hip/default-args.cpp | 5 +- kitsune/test/kokkos/config.cpp | 19 + kitsune/test/kokkos/default-args.cpp | 8 +- kitsune/test/kokkos/functor.cpp | 6 +- kitsune/test/kokkos/kokkos.cfg | 3 + kitsune/test/kokkos/lambda.cpp | 1 + kitsune/test/kokkos/target-attr.cpp | 2 +- kitsune/test/opencilk/config.cpp | 2 +- kitsune/test/opencilk/default-args.cpp | 5 +- kitsune/test/opencilk/ftapir.cpp | 2 +- kitsune/test/openmp/config.cpp | 2 +- kitsune/test/openmp/default-args.cpp | 5 +- kitsune/test/qthreads/config.cpp | 2 +- kitsune/test/qthreads/default-args.cpp | 5 +- kitsune/test/realm/config.cpp | 2 +- kitsune/test/realm/default-args.cpp | 5 +- llvm/include/llvm/IR/Intrinsics.td | 4 +- llvm/include/llvm/LinkAllPasses.h | 1 - llvm/include/llvm/Transforms/Scalar/SROA.h | 122 ---- llvm/lib/Analysis/BasicAliasAnalysis.cpp | 29 +- llvm/lib/Analysis/CMakeLists.txt | 1 - llvm/lib/Analysis/LoopAccessAnalysis.cpp | 8 +- llvm/lib/Analysis/TapirRaceDetect.cpp | 6 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 27 +- llvm/lib/IR/Verifier.cpp | 2 + llvm/lib/Passes/PassBuilder.cpp | 4 +- llvm/lib/Passes/PassRegistry.def | 44 +- .../Instrumentation/AddressSanitizer.cpp | 2 +- .../Instrumentation/CilkSanitizer.cpp | 637 +++++++----------- .../ComprehensiveStaticInstrumentation.cpp | 52 +- llvm/lib/Transforms/Scalar/GVN.cpp | 4 +- llvm/lib/Transforms/Scalar/SROA.cpp | 5 +- llvm/lib/Transforms/Tapir/CudaABI.cpp | 114 ++-- llvm/lib/Transforms/Tapir/HipABI.cpp | 63 +- llvm/lib/Transforms/Tapir/LambdaABI.cpp | 2 +- llvm/lib/Transforms/Tapir/LoopSpawningTI.cpp | 14 +- llvm/lib/Transforms/Tapir/LoopStripMine.cpp | 6 +- llvm/lib/Transforms/Tapir/LoweringUtils.cpp | 299 ++++---- llvm/lib/Transforms/Tapir/OMPTaskABI.cpp | 2 +- llvm/lib/Transforms/Tapir/OpenCilkABI.cpp | 6 +- llvm/lib/Transforms/Tapir/QthreadsABI.cpp | 164 ++--- llvm/lib/Transforms/Tapir/RealmABI.cpp | 138 ++-- llvm/lib/Transforms/Tapir/TapirGPUUtils.cpp | 12 +- llvm/lib/Transforms/Utils/InlineFunction.cpp | 3 - llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 41 +- llvm/lib/Transforms/Utils/TapirUtils.cpp | 18 +- llvm/lib/Transforms/Utils/TaskSimplify.cpp | 6 +- llvm/runtimes/CMakeLists.txt | 12 - llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll | 14 +- llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 10 +- llvm/test/CodeGen/X86/sse1.ll | 10 +- llvm/test/Other/new-pm-defaults.ll | 2 +- llvm/test/Other/new-pm-lto-defaults.ll | 2 +- .../Other/new-pm-thinlto-postlink-defaults.ll | 1 + .../new-pm-thinlto-postlink-pgo-defaults.ll | 2 +- ...-pm-thinlto-postlink-samplepgo-defaults.ll | 2 +- .../Other/new-pm-thinlto-prelink-defaults.ll | 2 +- ...w-pm-thinlto-prelink-samplepgo-defaults.ll | 2 +- .../MergeFunc/call-and-invoke-with-ranges.ll | 6 +- ...ting-sinking-required-for-vectorization.ll | 3 +- .../CilkSanitizer/cilksan-aarch64-neon.ll | 8 +- .../Tapir/CilkSanitizer/finddbgvalues.ll | 7 + .../Transforms/Tapir/dead-tapir-intrinsics.ll | 6 +- llvm/test/Transforms/Tapir/loop-stripmine.ll | 8 +- .../Transforms/Tapir/slp-vectorize-long-bb.ll | 192 +++--- llvm/tools/gold/gold-plugin.cpp | 4 +- runtimes/CMakeLists.txt | 23 - 107 files changed, 1630 insertions(+), 2064 deletions(-) create mode 100644 kitsune/test/kokkos/config.cpp create mode 100644 kitsune/test/kokkos/kokkos.cfg diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h index 1d5f68076f863a7..984244fb6e95e19 100644 --- a/clang/include/clang-c/Index.h +++ b/clang/include/clang-c/Index.h @@ -2136,21 +2136,25 @@ enum CXCursorKind { */ CXCursor_OMPErrorDirective = 305, + /** OpenMP scope directive. + */ + CXCursor_OMPScopeDirective = 306, + /** Kitsune forall statement. */ - CXCursor_ForallStmt = 306, + CXCursor_ForallStmt = 307, /** Kitsune range-based forall statement. */ - CXCursor_CXXForallRangeStmt = 307, + CXCursor_CXXForallRangeStmt = 308, /** Kitsune spawn statement. */ - CXCursor_SpawnStmt = 308, + CXCursor_SpawnStmt = 309, /** Kitsune sync statement. */ - CXCursor_SyncStmt = 309, + CXCursor_SyncStmt = 310, CXCursor_LastStmt = CXCursor_SyncStmt, diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 017eb3d5022d9c2..1b63794c4a32d5e 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1,4 +1,4 @@ -.td - attribute definitions -----------------------------------===// +//==--- Attr.td - attribute definitions -----------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index b2d35ed61a164a5..8451780a2d87fa2 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -808,10 +808,6 @@ def err_drv_kitsune_unsupported: Error< // give opencilk any special treatment here. def err_drv_opencilk_missing_abi_bitcode: Error< "Cannot find OpenCilk runtime ABI bitcode file: %0">; -def err_drv_opencilk_resource_dir_missing_include: Error< - "No include directory in OpenCilk resource directory: %0">; -def err_drv_opencilk_resource_dir_missing_lib: Error< - "No lib directory in OpenCilk resource directory: %0">; def err_drv_mix_tapir_cuda_hip : Error< "Mixed Tapir and Cuda/HIP compilation is currently not supported.">; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index bbed9781b5d00c8..9b2bbb95fef40f9 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3163,34 +3163,31 @@ def fno_knr_functions : Flag<["-"], "fno-knr-functions">, Group, Visibility<[ClangOption, CC1Option, CLOption]>; def config_kitsune_dir_EQ : Joined<["--"], "config-kitsune-dir=">, - Flags<[NoXarchOption, CoreOption, HelpHidden]>, + Flags<[NoXarchOption, HelpHidden]>, + Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>, HelpText<"Kitsune directory for configuration files">; def fkokkos : Flag<["-"], "fkokkos">, Group, - Flags<[CC1Option, NoArgumentUnused]>, + Flags<[NoArgumentUnused]>, + Visibility<[ClangOption, CC1Option]>, HelpText<"Enable Kokkos extensions: recognition and custom code generation">; def fkokkos_no_init : Flag<["-"], "fkokkos-no-init">, Group, - Flags<[CC1Option, NoArgumentUnused]>, + Flags<[NoArgumentUnused]>, + Visibility<[ClangOption, CC1Option]>, HelpText<"Skip code gen of Kokkos initialization (and finalize) calls.">; def ftapir_EQ : Joined<["-"], "ftapir=">, Group, - Flags<[CC1Option]>, + Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>, MetaVarName<"">, HelpText<"Choose the backend parallel runtime for Tapir instructions">, Values<"none,serial,cuda,hip,opencilk,openmp,qthreads,realm">; def ftapir_nvarch_EQ : Joined<["-"], "ftapir-nvarch=">, Group, - Flags<[CC1Option]>, + Visibility<[ClangOption, CC1Option, FlangOption, FC1Option]>, HelpText<"Choose the target nvida gpu architecture (e.g., sm_80) for gpu and cuda backend runtimes">, Values<"sm_50,sm_52,sm_53,sm_60,sm_61,sm_62,sm_70,sm_72,sm_75,sm_80,sm_86,sm_90">; def fstripmine : Flag<["-"], "fstripmine">, Group, HelpText<"Enable the Tapir loop stripmining passes">; def fno_stripmine : Flag<["-"], "fno-stripmine">, Group; - -// KITSUNE FIXME: Should remove this because we don't want to support -// customizing the OpenCilk paths. -def opencilk_resource_dir_EQ : Joined<["--"], "opencilk-resource-dir=">, - Flags<[NoXarchOption]>, - HelpText<"The directory that holds OpenCilk resource files">; def opencilk_abi_bitcode_EQ : Joined<["--"], "opencilk-abi-bitcode=">, - Flags<[CC1Option]>, HelpText<"Path to OpenCilk ABI bitcode file">, + Visibility<[CC1Option]>, HelpText<"Path to OpenCilk ABI bitcode file">, MarshallingInfoString>; def fmudflapth : Flag<["-"], "fmudflapth">, Group; @@ -6200,7 +6197,6 @@ defm caller_saves : BooleanFFlag<"caller-saves">, Group, Group; defm branch_count_reg : BooleanFFlag<"branch-count-reg">, Group; defm default_inline : BooleanFFlag<"default-inline">, Group; -defm fat_lto_objects : BooleanFFlag<"fat-lto-objects">, Group; defm float_store : BooleanFFlag<"float-store">, Group; defm friend_injection : BooleanFFlag<"friend-injection">, Group; defm function_attribute_list : BooleanFFlag<"function-attribute-list">, Group; @@ -6963,9 +6959,6 @@ def vectorize_slp : Flag<["-"], "vectorize-slp">, def stripmine_loops : Flag<["-"], "stripmine-loops">, HelpText<"Run the Tapir Loop stripmining passes">, MarshallingInfoFlag>; -def dependent_lib : Joined<["--"], "dependent-lib=">, - HelpText<"Add dependent library">, - MarshallingInfoStringVector>; def linker_option : Joined<["--"], "linker-option=">, HelpText<"Add linker option">, MarshallingInfoStringVector>; diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index cd97d8da4f62f2c..f0613fa59af5e87 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -824,13 +824,9 @@ class ToolChain { virtual void AddKitsuneLinkerArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const; - /// Check the specified OpenCilk resource directory is valid. - virtual void AddOpenCilkIncludeDir(const llvm::opt::ArgList &Args, - llvm::opt::ArgStringList &CmdArgs) const; - /// Get the OpenCilk library path if it exists. - virtual path_list - getOpenCilkRuntimePaths(const llvm::opt::ArgList &Args) const; + virtual std::optional + getOpenCilkRuntimePath(const llvm::opt::ArgList &Args) const; virtual std::string getOpenCilkBCBasename(const llvm::opt::ArgList &Args, StringRef Component, @@ -851,11 +847,6 @@ class ToolChain { virtual void AddOpenCilkABIBitcode(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, bool IsLTO = false) const; - - /// AddTapirRuntimeLibArgs - Add the specific linker arguments to use for the - /// given Tapir runtime library type. - virtual void AddTapirRuntimeLibArgs(const llvm::opt::ArgList &Args, - llvm::opt::ArgStringList &CmdArgs) const; }; /// Set a ToolChain's effective triple. Reset it when the registration object diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index ef3d34d37674c9b..edf9b5e2d52bb3c 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11654,7 +11654,6 @@ GCCTypeClass EvaluateBuiltinClassifyType(QualType T, return EvaluateBuiltinClassifyType( CanTy->castAs()->getValueType(), LangOpts); - case Type::BlockPointer: case Type::Vector: case Type::ExtVector: return GCCTypeClass::Vector; @@ -15800,8 +15799,6 @@ bool Expr::EvaluateAsInitializer(APValue &Value, const ASTContext &Ctx, llvm_unreachable("Unhandled cleanup; missing full expression marker?"); } - SourceLocation DeclLoc = VD->getLocation(); - QualType DeclTy = VD->getType(); return CheckConstantExpression(Info, DeclLoc, DeclTy, Value, ConstantExprKind::Normal) && CheckMemoryLeaks(Info); diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 22448bda5a19df0..0843226090faa1f 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -280,43 +280,11 @@ static bool asanUseGlobalsGC(const Triple &T, const CodeGenOptions &CGOpts) { return false; } -static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, - const CodeGenOptions &CodeGenOpts, - const LangOptions &LangOpts) { - TargetLibraryInfoImpl *TLII = new TargetLibraryInfoImpl(TargetTriple); - - switch (CodeGenOpts.getVecLib()) { - case CodeGenOptions::Accelerate: - TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::Accelerate, - TargetTriple); - break; - case CodeGenOptions::LIBMVEC: - TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::LIBMVEC_X86, - TargetTriple); - break; - case CodeGenOptions::MASSV: - TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::MASSV, - TargetTriple); - break; - case CodeGenOptions::SVML: - TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SVML, - TargetTriple); - break; - case CodeGenOptions::SLEEF: - TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SLEEFGNUABI, - TargetTriple); - break; - case CodeGenOptions::Darwin_libsystem_m: - TLII->addVectorizableFunctionsFromVecLib( - TargetLibraryInfoImpl::DarwinLibSystemM, TargetTriple); - break; - case CodeGenOptions::ArmPL: - TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::ArmPL, - TargetTriple); - break; - default: - break; - } +static std::unique_ptr +createTLII(llvm::Triple &TargetTriple, const CodeGenOptions &CodeGenOpts, + const LangOptions &LangOpts) { + std::unique_ptr TLII( + llvm::driver::createTLII(TargetTriple, CodeGenOpts.getVecLib())); TLII->setTapirTarget(LangOpts.KitsuneOpts.getTapirTargetOrInvalid()); TLII->setTapirTargetOptions( @@ -614,7 +582,7 @@ bool EmitAssemblyHelper::AddEmitPasses(legacy::PassManager &CodeGenPasses, raw_pwrite_stream *DwoOS) { // Add LibraryInfo. std::unique_ptr TLII( - llvm::driver::createTLII(TargetTriple, CodeGenOpts.getVecLib())); + createTLII(TargetTriple, CodeGenOpts, LangOpts)); CodeGenPasses.add(new TargetLibraryInfoWrapperPass(*TLII)); // Normal mode, emit a .s or .o file by running the code generator. Note, @@ -948,7 +916,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline( // Register the target library analysis directly and give it a customized // preset TLI. std::unique_ptr TLII( - llvm::driver::createTLII(TargetTriple, CodeGenOpts.getVecLib())); + createTLII(TargetTriple, CodeGenOpts, LangOpts)); FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); }); // Register all the basic analyses with the managers. @@ -1070,20 +1038,21 @@ void EmitAssemblyHelper::RunOptimizationPipeline( }); #endif // 0 - if (CodeGenOpts.OptimizationLevel == 0) { - MPM.addPass(PB.buildO0DefaultPipeline(Level, IsLTO || IsThinLTO, - TLII->hasTapirTarget())); - } else if (CodeGenOpts.FatLTO) { + if (CodeGenOpts.FatLTO) { MPM.addPass(PB.buildFatLTODefaultPipeline( Level, PrepareForThinLTO, PrepareForThinLTO || shouldEmitRegularLTOSummary())); + } else if (CodeGenOpts.OptimizationLevel == 0) { + MPM.addPass(PB.buildO0DefaultPipeline( + Level, PrepareForLTO || PrepareForThinLTO, TLII->hasTapirTarget())); } else if (PrepareForThinLTO) { MPM.addPass(PB.buildThinLTOPreLinkDefaultPipeline(Level)); } else if (PrepareForLTO) { MPM.addPass(PB.buildLTOPreLinkDefaultPipeline(Level)); } else { - MPM.addPass(PB.buildPerModuleDefaultPipeline(Level), - /* LTOPreLink */ false, TLII->hasTapirTarget()); + MPM.addPass(PB.buildPerModuleDefaultPipeline(Level, + /* LTOPreLink */ false, + TLII->hasTapirTarget())); } } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index ee3a0a542a5866a..7e5300f42d49ec2 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5190,6 +5190,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, } MaybeDetach(this, SpawnedScp); + llvm::Value *Carry; llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry); Builder.CreateStore(Sum, SumOutPtr); diff --git a/clang/lib/CodeGen/CGCleanup.cpp b/clang/lib/CodeGen/CGCleanup.cpp index 24a8d3958622e24..d3113a1c6033909 100644 --- a/clang/lib/CodeGen/CGCleanup.cpp +++ b/clang/lib/CodeGen/CGCleanup.cpp @@ -1107,17 +1107,13 @@ bool CodeGenFunction::isObviouslyBranchWithoutCleanups(JumpDest Dest) const { /// be known, in which case this will require a fixup. /// /// As a side-effect, this method clears the insertion point. -void CodeGenFunction::EmitBranchThroughCleanup(JumpDest Dest, bool AfterSync) { +void CodeGenFunction::EmitBranchThroughCleanup(JumpDest Dest) { assert(Dest.getScopeDepth().encloses(EHStack.stable_begin()) && "stale jump destination"); if (!HaveInsertPoint()) return; - // If needed, insert an implicit _Cilk_sync before the cleanups. - if (AfterSync) - EmitImplicitSyncCleanup(); - // Create the branch. llvm::BranchInst *BI = Builder.CreateBr(Dest.getBlock()); diff --git a/clang/lib/CodeGen/CGException.cpp b/clang/lib/CodeGen/CGException.cpp index 189f4dcd6ce8f29..abc5974379228c5 100644 --- a/clang/lib/CodeGen/CGException.cpp +++ b/clang/lib/CodeGen/CGException.cpp @@ -620,27 +620,16 @@ void CodeGenFunction::EmitEndEHSpec(const Decl *D) { } void CodeGenFunction::EmitCXXTryStmt(const CXXTryStmt &S) { - TaskFrameScope TFScope(*this); - EnterCXXTryStmt(S); - { - // If compiling Cilk code, create a nested sync region, with an implicit - // sync, for the try-catch. - // FIXME KITSUNE: Since we know that we will not be compiling Cilk, can we - // clean this up. - bool CompilingCilk = false; - SyncedScopeRAII SyncedScp(*this); - if (CompilingCilk) { - PushSyncRegion(); - if (isa(S.getTryBlock())) - ScopeIsSynced = true; - } - EmitStmt(S.getTryBlock()); - - // Pop the nested sync region after the try block. - if (CompilingCilk) - PopSyncRegion(); - } - ExitCXXTryStmt(S); + const llvm::Triple &T = Target.getTriple(); + // If we encounter a try statement on in an OpenMP target region offloaded to + // a GPU, we treat it as a basic block. + const bool IsTargetDevice = + (CGM.getLangOpts().OpenMPIsTargetDevice && (T.isNVPTX() || T.isAMDGCN())); + if (!IsTargetDevice) + EnterCXXTryStmt(S); + EmitStmt(S.getTryBlock()); + if (!IsTargetDevice) + ExitCXXTryStmt(S); } void CodeGenFunction::EnterCXXTryStmt(const CXXTryStmt &S, bool IsFnTryBlock) { diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp index abe99c20ad72136..61642e80595b21f 100644 --- a/clang/lib/CodeGen/CGStmt.cpp +++ b/clang/lib/CodeGen/CGStmt.cpp @@ -532,7 +532,6 @@ CodeGenFunction::EmitCompoundStmtWithoutScope(const CompoundStmt &S, } else if (const auto *AS = dyn_cast(ExprResult)) { // FIXME: Update this if we ever have attributes that affect the // semantics of an expression. - llvm::errs() << "we are here with an attributed expression...\n"; ExprResult = AS->getSubStmt(); } else { llvm_unreachable("unknown value statement"); @@ -1432,10 +1431,7 @@ void CodeGenFunction::EmitReturnStmt(const ReturnStmt &S) { PopDetachScope(); } - // FIXME KITSUNE: Can we clean up this API since we know that we will never be - // compiling Cilk? - bool CompilingCilk = false; - EmitBranchThroughCleanup(ReturnBlock, CompilingCilk); + EmitBranchThroughCleanup(ReturnBlock); } void CodeGenFunction::EmitDeclStmt(const DeclStmt &S) { diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index ab002cfb847d309..f2912024e82b75d 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -105,12 +105,9 @@ llvm::fp::ExceptionBehavior clang::ToConstrainedExceptMD(LangOptions::FPExceptionModeKind Kind) { switch (Kind) { - case LangOptions::FPE_Ignore: - return llvm::fp::ebIgnore; - case LangOptions::FPE_MayTrap: - return llvm::fp::ebMayTrap; - case LangOptions::FPE_Strict: - return llvm::fp::ebStrict; + case LangOptions::FPE_Ignore: return llvm::fp::ebIgnore; + case LangOptions::FPE_MayTrap: return llvm::fp::ebMayTrap; + case LangOptions::FPE_Strict: return llvm::fp::ebStrict; default: llvm_unreachable("Unsupported FP Exception Behavior"); } @@ -169,7 +166,8 @@ void CodeGenFunction::CGFPOptionsRAII::ConstructorHelper(FPOptions FPFeatures) { "FPConstrained should be enabled on entire function"); auto mergeFnAttrValue = [&](StringRef Name, bool Value) { - auto OldValue = CGF.CurFn->getFnAttribute(Name).getValueAsBool(); + auto OldValue = + CGF.CurFn->getFnAttribute(Name).getValueAsBool(); auto NewValue = OldValue & Value; if (OldValue != NewValue) CGF.CurFn->addFnAttr(Name, llvm::toStringRef(NewValue)); @@ -200,8 +198,8 @@ LValue CodeGenFunction::MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T) { /// Given a value of type T* that may not be to a complete object, /// construct an l-value with the natural pointee alignment of T. -LValue CodeGenFunction::MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, - QualType T) { +LValue +CodeGenFunction::MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, QualType T) { LValueBaseInfo BaseInfo; TBAAAccessInfo TBAAInfo; CharUnits Align = CGM.getNaturalTypeAlignment(T, &BaseInfo, &TBAAInfo, @@ -210,6 +208,7 @@ LValue CodeGenFunction::MakeNaturalAlignPointeeAddrLValue(llvm::Value *V, return MakeAddrLValue(Addr, T, BaseInfo, TBAAInfo); } + llvm::Type *CodeGenFunction::ConvertTypeForMem(QualType T) { return CGM.getTypes().ConvertTypeForMem(T); } @@ -298,7 +297,7 @@ llvm::DebugLoc CodeGenFunction::EmitReturnBlock() { // cleans up functions which started with a unified return block. if (ReturnBlock.getBlock()->hasOneUse()) { llvm::BranchInst *BI = - dyn_cast(*ReturnBlock.getBlock()->user_begin()); + dyn_cast(*ReturnBlock.getBlock()->user_begin()); if (BI && BI->isUnconditional() && BI->getSuccessor(0) == ReturnBlock.getBlock()) { // Record/return the DebugLoc of the simple 'return' expression to be used @@ -321,8 +320,7 @@ llvm::DebugLoc CodeGenFunction::EmitReturnBlock() { } static void EmitIfUsed(CodeGenFunction &CGF, llvm::BasicBlock *BB) { - if (!BB) - return; + if (!BB) return; if (!BB->use_empty()) { CGF.CurFn->insert(CGF.CurFn->end(), BB); return; @@ -335,9 +333,9 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { "mismatched push/pop in break/continue stack!"); assert(!CurDetachScope && "mismatched push/pop in detach-scope stack!"); - bool OnlySimpleReturnStmts = NumSimpleReturnExprs > 0 && - NumSimpleReturnExprs == NumReturnExprs && - ReturnBlock.getBlock()->use_empty(); + bool OnlySimpleReturnStmts = NumSimpleReturnExprs > 0 + && NumSimpleReturnExprs == NumReturnExprs + && ReturnBlock.getBlock()->use_empty(); // Usually the return expression is evaluated before the cleanup // code. If the function contains only a simple return statement, // such as a constant, the location before the cleanup code becomes @@ -365,11 +363,6 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { bool HasOnlyLifetimeMarkers = HasCleanups && EHStack.containsOnlyLifetimeMarkers(PrologueCleanupDepth); bool EmitRetDbgLoc = !HasCleanups || HasOnlyLifetimeMarkers; - bool SyncEmitted = false; - - // FIXME KITSUNE: Since we know that we will never be compiling Cilk, can we - // simplify this? - bool CompilingCilk = false; std::optional OAL; if (HasCleanups) { @@ -384,29 +377,12 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { OAL = ApplyDebugLocation::CreateDefaultArtificial(*this, EndLoc); } - // If we're compiling Cilk, PopCleanupBlocks should emit a _Cilk_sync before - // any cleanups. - PopCleanupBlocks(PrologueCleanupDepth, {}, CompilingCilk); - SyncEmitted = true; - } else if (CompilingCilk && Builder.GetInsertBlock() && - ReturnBlock.getBlock()->use_empty()) { - // If we're compiling Cilk, emit an implicit sync for the function. In this - // case, EmitReturnBlock will recycle Builder.GetInsertBlock() for the - // function's return block, so we insert the implicit _Cilk_sync before - // calling EmitReturnBlock. - EmitImplicitSyncCleanup(); - SyncEmitted = true; + PopCleanupBlocks(PrologueCleanupDepth); } // Emit function epilog (to return). llvm::DebugLoc Loc = EmitReturnBlock(); - if (CompilingCilk && !SyncEmitted) { - // If we're compiling Cilk, emit an implicit sync for the function. - EmitImplicitSyncCleanup(); - SyncEmitted = true; - } - if (ShouldInstrumentFunction()) { if (CGM.getCodeGenOpts().InstrumentFunctions) CurFn->addFnAttr("instrument-function-exit", "__cyg_profile_func_exit"); @@ -425,7 +401,8 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { EmitFunctionEpilog(*CurFnInfo, EmitRetDbgLoc, EndLoc); EmitEndEHSpec(CurCodeDecl); - assert(EHStack.empty() && "did not remove all scopes from cleanup stack!"); + assert(EHStack.empty() && + "did not remove all scopes from cleanup stack!"); // If someone did an indirect goto, emit the indirect goto block at the end of // the function. @@ -555,10 +532,10 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) { PopSyncRegion(); // FIXME KITSUNE: This currently causes an assertion failure because we // apparently do end up with nested sync regions at the end of the function. - // This was check was added in OpenCilk during the 17.x rebase. The fact - // that we fail indicate that we are probably doing something wrong, but - // this is disabled for now since there are more pressing issues that need - // to be addressed in during the de-cilkifying of the Kitsune. + // This check was added in OpenCilk during the 17.x rebase. We are probably + // doing something wrong, but this is disabled for now since there are more + // pressing issues that need to be addressed in during the de-cilkifying of + // Kitsune. assert(!CurSyncRegion && "Nested sync regions at end of function."); } } @@ -648,8 +625,7 @@ void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD, llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())), llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())), llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))}; - Fn->setMetadata("work_group_size_hint", - llvm::MDNode::get(Context, AttrMDArgs)); + Fn->setMetadata("work_group_size_hint", llvm::MDNode::get(Context, AttrMDArgs)); } if (const ReqdWorkGroupSizeAttr *A = FD->getAttr()) { @@ -657,8 +633,7 @@ void CodeGenFunction::EmitKernelMetadata(const FunctionDecl *FD, llvm::ConstantAsMetadata::get(Builder.getInt32(A->getXDim())), llvm::ConstantAsMetadata::get(Builder.getInt32(A->getYDim())), llvm::ConstantAsMetadata::get(Builder.getInt32(A->getZDim()))}; - Fn->setMetadata("reqd_work_group_size", - llvm::MDNode::get(Context, AttrMDArgs)); + Fn->setMetadata("reqd_work_group_size", llvm::MDNode::get(Context, AttrMDArgs)); } if (const OpenCLIntelReqdSubGroupSizeAttr *A = @@ -685,7 +660,7 @@ void CodeGenFunction::EmitKitsuneMetadata(const FunctionDecl *FD, } /// Determine whether the function F ends with a return stmt. -static bool endsWithReturn(const Decl *F) { +static bool endsWithReturn(const Decl* F) { const Stmt *Body = nullptr; if (auto *FD = dyn_cast_or_null(F)) Body = FD->getBody(); @@ -860,8 +835,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, IdentifierInfo *II = OMD->getSelector().getIdentifierInfoForSlot(0); if (OMD->getMethodFamily() == OMF_dealloc || OMD->getMethodFamily() == OMF_initialize || - (OMD->getSelector().isUnarySelector() && - II->isStr(".cxx_destruct"))) { + (OMD->getSelector().isUnarySelector() && II->isStr(".cxx_destruct"))) { markAsIgnoreThreadCheckingAtRuntime(Fn); } } @@ -1134,16 +1108,14 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, if (CGM.getCodeGenOpts().MNopMCount) { if (!CGM.getCodeGenOpts().CallFEntry) CGM.getDiags().Report(diag::err_opt_not_valid_without_opt) - << "-mnop-mcount" - << "-mfentry"; + << "-mnop-mcount" << "-mfentry"; Fn->addFnAttr("mnop-mcount"); } if (CGM.getCodeGenOpts().RecordMCount) { if (!CGM.getCodeGenOpts().CallFEntry) CGM.getDiags().Report(diag::err_opt_not_valid_without_opt) - << "-mrecord-mcount" - << "-mfentry"; + << "-mrecord-mcount" << "-mfentry"; Fn->addFnAttr("mrecord-mcount"); } } @@ -1153,7 +1125,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, if (getContext().getTargetInfo().getTriple().getArch() != llvm::Triple::systemz) CGM.getDiags().Report(diag::err_opt_not_valid_on_target) - << "-mpacked-stack"; + << "-mpacked-stack"; Fn->addFnAttr("packed-stack"); } @@ -1189,8 +1161,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, unsigned Idx = CurFnInfo->getReturnInfo().getInAllocaFieldIndex(); llvm::Function::arg_iterator EI = CurFn->arg_end(); --EI; - llvm::Value *Addr = - Builder.CreateStructGEP(CurFnInfo->getArgStruct(), &*EI, Idx); + llvm::Value *Addr = Builder.CreateStructGEP( + CurFnInfo->getArgStruct(), &*EI, Idx); llvm::Type *Ty = cast(Addr)->getResultElementType(); ReturnValuePointer = Address(Addr, Ty, getPointerAlign()); @@ -1203,7 +1175,8 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, // Tell the epilog emitter to autorelease the result. We do this // now so that various specialized functions can suppress it // during their IR-generation. - if (getLangOpts().ObjCAutoRefCount && !CurFnInfo->isReturnsRetained() && + if (getLangOpts().ObjCAutoRefCount && + !CurFnInfo->isReturnsRetained() && RetTy->isObjCRetainableType()) AutoreleaseResult = true; } @@ -1239,24 +1212,21 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, // Get the lvalue for the field (which is a copy of the enclosing object // or contains the address of the enclosing object). - LValue ThisFieldLValue = - EmitLValueForLambdaField(LambdaThisCaptureField); + LValue ThisFieldLValue = EmitLValueForLambdaField(LambdaThisCaptureField); if (!LambdaThisCaptureField->getType()->isPointerType()) { - // If the enclosing object was captured by value, just use its - // address. + // If the enclosing object was captured by value, just use its address. CXXThisValue = ThisFieldLValue.getAddress(*this).getPointer(); } else { // Load the lvalue pointed to by the field, since '*this' was captured // by reference. - CXXThisValue = EmitLoadOfLValue(ThisFieldLValue, SourceLocation()) - .getScalarVal(); + CXXThisValue = + EmitLoadOfLValue(ThisFieldLValue, SourceLocation()).getScalarVal(); } } for (auto *FD : MD->getParent()->fields()) { if (FD->hasCapturedVLAType()) { - auto *ExprArg = - EmitLoadOfLValue(EmitLValueForLambdaField(FD), SourceLocation()) - .getScalarVal(); + auto *ExprArg = EmitLoadOfLValue(EmitLValueForLambdaField(FD), + SourceLocation()).getScalarVal(); auto VAT = FD->getCapturedVLAType(); VLASizeMap[VAT->getSizeExpr()] = ExprArg; } @@ -1351,8 +1321,7 @@ void CodeGenFunction::EmitBlockWithFallThrough(llvm::BasicBlock *BB, static void TryMarkNoThrow(llvm::Function *F) { // LLVM treats 'nounwind' on a function as part of the type, so we // can't do this on functions that can be overwritten. - if (F->isInterposable()) - return; + if (F->isInterposable()) return; for (llvm::BasicBlock &BB : *F) for (llvm::Instruction &I : BB) @@ -1511,7 +1480,8 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, EmitDestructorBody(Args); else if (isa(FD)) EmitConstructorBody(Args); - else if (getLangOpts().CUDA && !getLangOpts().CUDAIsDevice && + else if (getLangOpts().CUDA && + !getLangOpts().CUDAIsDevice && FD->hasAttr()) CGM.getCUDARuntime().emitDeviceStub(*this, Args); else if (isa(FD) && @@ -1582,8 +1552,7 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, /// that we can just remove the code. bool CodeGenFunction::ContainsLabel(const Stmt *S, bool IgnoreCaseStmts) { // Null statement, not a label! - if (!S) - return false; + if (!S) return false; // If this is a label, we have to emit the code, consider something like: // if (0) { ... foo: bar(); } goto foo; @@ -1615,8 +1584,7 @@ bool CodeGenFunction::ContainsLabel(const Stmt *S, bool IgnoreCaseStmts) { /// inside of it, this is fine. bool CodeGenFunction::containsBreak(const Stmt *S) { // Null statement, not a label! - if (!S) - return false; + if (!S) return false; // If this is a switch or loop that defines its own break scope, then we can // include it and anything inside of it. @@ -1636,8 +1604,7 @@ bool CodeGenFunction::containsBreak(const Stmt *S) { } bool CodeGenFunction::mightAddDeclToScope(const Stmt *S) { - if (!S) - return false; + if (!S) return false; // Some statement kinds add a scope and thus never add a decl to the current // scope. Note, this list is longer than the list of statements that might @@ -1690,11 +1657,11 @@ bool CodeGenFunction::ConstantFoldsToSimpleInteger(const Expr *Cond, // to bool. Expr::EvalResult Result; if (!Cond->EvaluateAsInt(Result, getContext())) - return false; // Not foldable, not integer or not fully evaluatable. + return false; // Not foldable, not integer or not fully evaluatable. llvm::APSInt Int = Result.Val.getInt(); if (!AllowLabels && CodeGenFunction::ContainsLabel(Cond)) - return false; // Contains a label. + return false; // Contains a label. ResultInt = Int; return true; @@ -1987,7 +1954,7 @@ void CodeGenFunction::EmitBranchOnBoolExpr( // br(c ? throw x : y, t, f) -> br(c, br(throw x, t, f), br(y, t, f) // Fold this to: // br(c, throw x, br(y, t, f)) - EmitCXXThrowExpr(Throw, /*KeepInsertionPoint*/ false); + EmitCXXThrowExpr(Throw, /*KeepInsertionPoint*/false); return; } @@ -2061,8 +2028,8 @@ static void emitNonZeroVLAInit(CodeGenFunction &CGF, QualType baseType, CGBuilderTy &Builder = CGF.Builder; CharUnits baseSize = CGF.getContext().getTypeSizeInChars(baseType); - llvm::Value *baseSizeInChars = - llvm::ConstantInt::get(CGF.IntPtrTy, baseSize.getQuantity()); + llvm::Value *baseSizeInChars + = llvm::ConstantInt::get(CGF.IntPtrTy, baseSize.getQuantity()); Address begin = dest.withElementType(CGF.Int8Ty); llvm::Value *end = Builder.CreateInBoundsGEP( @@ -2079,7 +2046,8 @@ static void emitNonZeroVLAInit(CodeGenFunction &CGF, QualType baseType, llvm::PHINode *cur = Builder.CreatePHI(begin.getType(), 2, "vla.cur"); cur->addIncoming(begin.getPointer(), originBB); - CharUnits curAlign = dest.getAlignment().alignmentOfArrayElement(baseSize); + CharUnits curAlign = + dest.getAlignment().alignmentOfArrayElement(baseSize); // memcpy the individual element bit-pattern. Builder.CreateMemCpy(Address(cur, CGF.Int8Ty, curAlign), src, baseSizeInChars, @@ -2087,7 +2055,7 @@ static void emitNonZeroVLAInit(CodeGenFunction &CGF, QualType baseType, // Go to the next element. llvm::Value *next = - Builder.CreateInBoundsGEP(CGF.Int8Ty, cur, baseSizeInChars, "vla.next"); + Builder.CreateInBoundsGEP(CGF.Int8Ty, cur, baseSizeInChars, "vla.next"); // Leave if that's the end of the VLA. llvm::Value *done = Builder.CreateICmpEQ(next, end, "vla-init.isdone"); @@ -2097,7 +2065,8 @@ static void emitNonZeroVLAInit(CodeGenFunction &CGF, QualType baseType, CGF.EmitBlock(contBB); } -void CodeGenFunction::EmitNullInitialization(Address DestPtr, QualType Ty) { +void +CodeGenFunction::EmitNullInitialization(Address DestPtr, QualType Ty) { // Ignore empty classes in C++. if (getLangOpts().CPlusPlus) { if (const RecordType *RT = Ty->getAs()) { @@ -2118,8 +2087,9 @@ void CodeGenFunction::EmitNullInitialization(Address DestPtr, QualType Ty) { // Don't bother emitting a zero-byte memset. if (size.isZero()) { // But note that getTypeInfo returns 0 for a VLA. - if (const VariableArrayType *vlaType = dyn_cast_or_null( - getContext().getAsArrayType(Ty))) { + if (const VariableArrayType *vlaType = + dyn_cast_or_null( + getContext().getAsArrayType(Ty))) { auto VlaSize = getVLASize(vlaType); SizeVal = VlaSize.NumElts; CharUnits eltSize = getContext().getTypeSizeInChars(VlaSize.Type); @@ -2140,21 +2110,20 @@ void CodeGenFunction::EmitNullInitialization(Address DestPtr, QualType Ty) { // like -1, which happens to be the pattern used by member-pointers. if (!CGM.getTypes().isZeroInitializable(Ty)) { // For a VLA, emit a single element, then splat that over the VLA. - if (vla) - Ty = getContext().getBaseElementType(vla); + if (vla) Ty = getContext().getBaseElementType(vla); llvm::Constant *NullConstant = CGM.EmitNullConstant(Ty); - llvm::GlobalVariable *NullVariable = new llvm::GlobalVariable( - CGM.getModule(), NullConstant->getType(), - /*isConstant=*/true, llvm::GlobalVariable::PrivateLinkage, NullConstant, - Twine()); + llvm::GlobalVariable *NullVariable = + new llvm::GlobalVariable(CGM.getModule(), NullConstant->getType(), + /*isConstant=*/true, + llvm::GlobalVariable::PrivateLinkage, + NullConstant, Twine()); CharUnits NullAlign = DestPtr.getAlignment(); NullVariable->setAlignment(NullAlign.getAsAlign()); Address SrcPtr(NullVariable, Builder.getInt8Ty(), NullAlign); - if (vla) - return emitNonZeroVLAInit(*this, Ty, DestPtr, SrcPtr, SizeVal); + if (vla) return emitNonZeroVLAInit(*this, Ty, DestPtr, SrcPtr, SizeVal); // Get and call the appropriate llvm.memcpy overload. Builder.CreateMemCpy(DestPtr, SrcPtr, SizeVal, false); @@ -2181,14 +2150,13 @@ llvm::BlockAddress *CodeGenFunction::GetAddrOfLabel(const LabelDecl *L) { llvm::BasicBlock *CodeGenFunction::GetIndirectGotoBlock() { // If we already made the indirect branch for indirect goto, return its block. - if (IndirectBranch) - return IndirectBranch->getParent(); + if (IndirectBranch) return IndirectBranch->getParent(); CGBuilderTy TmpBuilder(*this, createBasicBlock("indirectgoto")); // Create the PHI node that indirect gotos will add entries to. - llvm::Value *DestVal = - TmpBuilder.CreatePHI(Int8PtrTy, 0, "indirect.goto.dest"); + llvm::Value *DestVal = TmpBuilder.CreatePHI(Int8PtrTy, 0, + "indirect.goto.dest"); // Create the indirect branch instruction. IndirectBranch = TmpBuilder.CreateIndirectBr(DestVal); @@ -2228,7 +2196,7 @@ llvm::Value *CodeGenFunction::emitArrayLength(const ArrayType *origArrayType, // We have some number of constant-length arrays, so addr should // have LLVM type [M x [N x [...]]]*. Build a GEP that walks // down to the first element of addr. - SmallVector gepIndices; + SmallVector gepIndices; // GEP down to the array type. llvm::ConstantInt *zero = Builder.getInt32(0); @@ -2238,17 +2206,18 @@ llvm::Value *CodeGenFunction::emitArrayLength(const ArrayType *origArrayType, QualType eltType; llvm::ArrayType *llvmArrayType = - dyn_cast(addr.getElementType()); + dyn_cast(addr.getElementType()); while (llvmArrayType) { assert(isa(arrayType)); - assert(cast(arrayType)->getSize().getZExtValue() == - llvmArrayType->getNumElements()); + assert(cast(arrayType)->getSize().getZExtValue() + == llvmArrayType->getNumElements()); gepIndices.push_back(zero); countFromCLAs *= llvmArrayType->getNumElements(); eltType = arrayType->getElementType(); - llvmArrayType = dyn_cast(llvmArrayType->getElementType()); + llvmArrayType = + dyn_cast(llvmArrayType->getElementType()); arrayType = getContext().getAsArrayType(arrayType->getElementType()); assert((!llvmArrayType || arrayType) && "LLVM and Clang types are out-of-synch"); @@ -2269,15 +2238,16 @@ llvm::Value *CodeGenFunction::emitArrayLength(const ArrayType *origArrayType, addr = addr.withElementType(baseType); } else { // Create the actual GEP. - addr = Address(Builder.CreateInBoundsGEP(addr.getElementType(), - addr.getPointer(), gepIndices, - "array.begin"), - ConvertTypeForMem(eltType), addr.getAlignment()); + addr = Address(Builder.CreateInBoundsGEP( + addr.getElementType(), addr.getPointer(), gepIndices, "array.begin"), + ConvertTypeForMem(eltType), + addr.getAlignment()); } baseType = eltType; - llvm::Value *numElements = llvm::ConstantInt::get(SizeTy, countFromCLAs); + llvm::Value *numElements + = llvm::ConstantInt::get(SizeTy, countFromCLAs); // If we had any VLA dimensions, factor them in. if (numVLAElements) @@ -2313,10 +2283,11 @@ CodeGenFunction::getVLASize(const VariableArrayType *type) { } } while ((type = getContext().getAsVariableArrayType(elementType))); - return {numElements, elementType}; + return { numElements, elementType }; } -CodeGenFunction::VlaSizePair CodeGenFunction::getVLAElements1D(QualType type) { +CodeGenFunction::VlaSizePair +CodeGenFunction::getVLAElements1D(QualType type) { const VariableArrayType *vla = getContext().getAsVariableArrayType(type); assert(vla && "type was not a variable array type!"); return getVLAElements1D(vla); @@ -2327,7 +2298,7 @@ CodeGenFunction::getVLAElements1D(const VariableArrayType *Vla) { llvm::Value *VlaSize = VLASizeMap[Vla->getSizeExpr()]; assert(VlaSize && "no size for VLA!"); assert(VlaSize->getType() == SizeTy); - return {VlaSize, Vla->getElementType()}; + return { VlaSize, Vla->getElementType() }; } void CodeGenFunction::EmitVariablyModifiedType(QualType type) { @@ -2485,7 +2456,7 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) { } while (type->isVariablyModifiedType()); } -Address CodeGenFunction::EmitVAListRef(const Expr *E) { +Address CodeGenFunction::EmitVAListRef(const Expr* E) { if (getContext().getBuiltinVaListType()->isArrayType()) return EmitPointerWithAlignment(E); return EmitLValue(E).getAddress(*this); @@ -2509,11 +2480,9 @@ CodeGenFunction::protectFromPeepholes(RValue rvalue) { // is trunc(zext) folding, but if we add more, we can easily // extend this protection. - if (!rvalue.isScalar()) - return PeepholeProtection(); + if (!rvalue.isScalar()) return PeepholeProtection(); llvm::Value *value = rvalue.getScalarVal(); - if (!isa(value)) - return PeepholeProtection(); + if (!isa(value)) return PeepholeProtection(); // Just make an extra bitcast. assert(HaveInsertPoint()); @@ -2526,8 +2495,7 @@ CodeGenFunction::protectFromPeepholes(RValue rvalue) { } void CodeGenFunction::unprotectFromPeepholes(PeepholeProtection protection) { - if (!protection.Inst) - return; + if (!protection.Inst) return; // In theory, we could try to duplicate the peepholes now, but whatever. protection.Inst->eraseFromParent(); @@ -2634,7 +2602,7 @@ Address CodeGenFunction::EmitFieldAnnotations(const FieldDecl *D, return Address(V, Addr.getElementType(), Addr.getAlignment()); } -CodeGenFunction::CGCapturedStmtInfo::~CGCapturedStmtInfo() {} +CodeGenFunction::CGCapturedStmtInfo::~CGCapturedStmtInfo() { } CodeGenFunction::SanitizerScope::SanitizerScope(CodeGenFunction *CGF) : CGF(CGF) { @@ -2767,7 +2735,8 @@ void CodeGenFunction::checkTargetFeatures(SourceLocation Loc, if (!Builtin::evaluateRequiredTargetFeatures( FeatureList, CallerFeatureMap) && !IsHipStdPar) { CGM.getDiags().Report(Loc, diag::err_builtin_needs_feature) - << TargetDecl->getDeclName() << FeatureList; + << TargetDecl->getDeclName() + << FeatureList; } } else if (!TargetDecl->isMultiVersion() && TargetDecl->hasAttr()) { diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 4436d9f987fcbea..f44cdaaa9a0d0d8 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -1752,7 +1752,7 @@ class CodeGenFunction : public CodeGenTypeCache { /// EmitBranchThroughCleanup - Emit a branch from the current insert /// block through the normal cleanup handling code (if any) and then /// on to \arg Dest. - void EmitBranchThroughCleanup(JumpDest Dest, bool AfterSync = false); + void EmitBranchThroughCleanup(JumpDest Dest); /// isObviouslyBranchWithoutCleanups - Return true if a branch to the /// specified destination obviously has no cleanups to run. 'false' is always @@ -5139,7 +5139,7 @@ class CodeGenFunction : public CodeGenTypeCache { /// EmitScalarExpr - Emit the computation of the specified expression of LLVM /// scalar type, returning the result. - llvm::Value *EmitScalarExpr(const Expr *E, bool IgnoreResultAssign = false); + llvm::Value *EmitScalarExpr(const Expr *E , bool IgnoreResultAssign = false); void EmitScalarExprIntoLValue(const Expr *E, LValue dest, bool isInit); /// Emit a conversion from the specified type to the specified destination diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 6f6d7dd0d86256e..64fee5e7255138f 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2328,13 +2328,6 @@ void CodeGenModule::GenKitsuneArgMetadata(llvm::Function *Fn, for (unsigned i = 0, e = FD->getNumParams(); i != e; ++i) { const ParmVarDecl *parm = FD->getParamDecl(i); QualType ty = parm->getType(); - if (ty.getTypePtr()->isStructureOrClassType()) { - ErrorUnsupported( - parm, - "cannot handle kitsune memaccess attribute on a struct or class"); - break; - } - const Decl *PDecl = parm; if (auto *TD = dyn_cast(ty)) PDecl = TD->getDecl(); @@ -2343,9 +2336,13 @@ void CodeGenModule::GenKitsuneArgMetadata(llvm::Function *Fn, for (llvm::Argument *fnArg = Fn->arg_begin(); fnArg != Fn->arg_end(); ++fnArg) { if (fnArg->getArgNo() == i) { - // Note: this will break for structs passed by value - if (const KitsuneMemAccessAttr *A = - PDecl->getAttr()) { + if (const auto *A = PDecl->getAttr()) { + if (ty.getTypePtr()->isStructureOrClassType()) { + ErrorUnsupported(parm, "cannot handle kitsune memaccess " + "attribute on a struct or class"); + break; + } + if (A->isWriteOnly()) fnArg->addAttr( llvm::Attribute::get(Context, "kitsune.writeonly")); diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index b08ac1a58c28ab4..020fd9c91562606 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -13,6 +13,7 @@ #include "ToolChains/CommonArgs.h" #include "ToolChains/Flang.h" #include "ToolChains/InterfaceStubs.h" +#include "kitsune/Config/config.h" #include "clang/Basic/ObjCRuntime.h" #include "clang/Basic/Sanitizers.h" #include "clang/Config/config.h" @@ -25,7 +26,6 @@ #include "clang/Driver/SanitizerArgs.h" #include "clang/Driver/Tapir.h" #include "clang/Driver/XRayArgs.h" -#include "kitsune/Config/config.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" @@ -1578,173 +1578,221 @@ void ToolChain::ExtractArgsFromString(const char *s, ArgStringList &CmdArgs, void ToolChain::AddKitsunePreprocessorArgs(const ArgList &Args, ArgStringList &CmdArgs) const { std::optional TapirTarget = parseTapirTarget(Args); + bool IsKokkos = D.CCCIsCXX() && Args.hasArg(options::OPT_fkokkos); + + if (TapirTarget) { + switch (*TapirTarget) { + case TapirTargetID::Serial: + case TapirTargetID::None: + break; + case llvm::TapirTargetID::Cuda: + CmdArgs.push_back("-D_tapir_cuda_target"); + ExtractArgsFromString(KITSUNE_CUDA_EXTRA_PREPROCESSOR_FLAGS, CmdArgs, + Args); + break; + case llvm::TapirTargetID::Hip: + CmdArgs.push_back("-D_tapir_hip_target"); + ExtractArgsFromString(KITSUNE_HIP_EXTRA_PREPROCESSOR_FLAGS, CmdArgs, + Args); + break; + case llvm::TapirTargetID::OpenCilk: + ExtractArgsFromString(KITSUNE_OPENCILK_EXTRA_PREPROCESSOR_FLAGS, CmdArgs, + Args); + break; + case llvm::TapirTargetID::OpenMP: + ExtractArgsFromString(KITSUNE_OPENMP_EXTRA_PREPROCESSOR_FLAGS, CmdArgs, Args); + break; + case llvm::TapirTargetID::Qthreads: + ExtractArgsFromString(KITSUNE_QTHREADS_EXTRA_PREPROCESSOR_FLAGS, CmdArgs, Args); + break; + case llvm::TapirTargetID::Realm: + ExtractArgsFromString(KITSUNE_REALM_EXTRA_PREPROCESSOR_FLAGS, CmdArgs, Args); + break; + default: + llvm::report_fatal_error("internal error -- unhandled tapir target ID!"); + break; + } + } - // No need to report an error here. That will have been done already. - if (not TapirTarget) - return; - - switch (*TapirTarget) { - case TapirTargetID::Serial: - case TapirTargetID::None: - break; - case llvm::TapirTargetID::Cuda: - ExtractArgsFromString(KITSUNE_CUDA_PREPROCESSOR_FLAGS, CmdArgs, Args); - break; - case llvm::TapirTargetID::Hip: - ExtractArgsFromString(KITSUNE_HIP_PREPROCESSOR_FLAGS, CmdArgs, Args); - break; - case llvm::TapirTargetID::OpenCilk: - ExtractArgsFromString(KITSUNE_OPENCILK_PREPROCESSOR_FLAGS, CmdArgs, Args); - break; - case llvm::TapirTargetID::OpenMP: - ExtractArgsFromString(KITSUNE_OPENMP_PREPROCESSOR_FLAGS, CmdArgs, Args); - break; - case llvm::TapirTargetID::Qthreads: - ExtractArgsFromString(KITSUNE_QTHREADS_PREPROCESSOR_FLAGS, CmdArgs, Args); - break; - case llvm::TapirTargetID::Realm: - ExtractArgsFromString(KITSUNE_REALM_PREPROCESSOR_FLAGS, CmdArgs, Args); - break; - default: - llvm::report_fatal_error("internal error -- unhandled tapir target ID!"); - break; + if (IsKokkos) { + std::string InclDir = concat(D.ResourceDir, "include", "kokkos"); + CmdArgs.push_back(Args.MakeArgString(StringRef("-I") + InclDir)); + ExtractArgsFromString(KITSUNE_KOKKOS_EXTRA_PREPROCESSOR_FLAGS, CmdArgs, + Args); } - if (D.CCCIsCXX() && Args.hasArg(options::OPT_fkokkos)) { - ExtractArgsFromString(KITSUNE_KOKKOS_LINKER_FLAGS, CmdArgs, Args); + if (TapirTarget or IsKokkos) { + std::string InclDir = concat(D.ResourceDir, "include"); + CmdArgs.push_back(Args.MakeArgString(StringRef("-I") + InclDir)); } } void ToolChain::AddKitsuneCompilerArgs(const ArgList& Args, ArgStringList& CmdArgs) const { std::optional TapirTarget = parseTapirTarget(Args); + bool IsKokkos = D.CCCIsCXX() && Args.hasArg(options::OPT_fkokkos); + + if (TapirTarget) { + switch (*TapirTarget) { + case TapirTargetID::Serial: + case TapirTargetID::None: + break; + case llvm::TapirTargetID::Cuda: + ExtractArgsFromString(KITSUNE_CUDA_EXTRA_COMPILER_FLAGS, CmdArgs, Args); + break; + case llvm::TapirTargetID::Hip: + ExtractArgsFromString(KITSUNE_HIP_EXTRA_COMPILER_FLAGS, CmdArgs, Args); + break; + case llvm::TapirTargetID::OpenCilk: + ExtractArgsFromString(KITSUNE_OPENCILK_EXTRA_COMPILER_FLAGS, CmdArgs, + Args); + break; + case llvm::TapirTargetID::OpenMP: + ExtractArgsFromString(KITSUNE_OPENMP_EXTRA_COMPILER_FLAGS, CmdArgs, Args); + break; + case llvm::TapirTargetID::Qthreads: + ExtractArgsFromString(KITSUNE_QTHREADS_EXTRA_COMPILER_FLAGS, CmdArgs, Args); + break; + case llvm::TapirTargetID::Realm: + ExtractArgsFromString(KITSUNE_REALM_EXTRA_COMPILER_FLAGS, CmdArgs, Args); + break; + default: + llvm::report_fatal_error("internal error -- unhandled tapir target ID!"); + break; + } + } - // No need to report an error here. That will have been done already. - if (not TapirTarget) + if (IsKokkos) { + ExtractArgsFromString(KITSUNE_KOKKOS_EXTRA_COMPILER_FLAGS, CmdArgs, Args); + } +} + +static void addOpenCilkRuntimeRunPath(const ToolChain &TC, const ArgList &Args, + ArgStringList &CmdArgs, + const llvm::Triple &Triple) { + // Allow the -fno-rtlib-add-rpath flag to prevent adding this default + // directory to the runpath. + if (!Args.hasFlag(options::OPT_frtlib_add_rpath, + options::OPT_fno_rtlib_add_rpath, true)) return; - switch (*TapirTarget) { - case TapirTargetID::Serial: - case TapirTargetID::None: - break; - case llvm::TapirTargetID::Cuda: - ExtractArgsFromString(KITSUNE_CUDA_COMPILER_FLAGS, CmdArgs, Args); - break; - case llvm::TapirTargetID::Hip: - ExtractArgsFromString(KITSUNE_HIP_COMPILER_FLAGS, CmdArgs, Args); - break; - case llvm::TapirTargetID::OpenCilk: - ExtractArgsFromString(KITSUNE_OPENCILK_COMPILER_FLAGS, CmdArgs, Args); - break; - case llvm::TapirTargetID::OpenMP: - ExtractArgsFromString(KITSUNE_OPENMP_COMPILER_FLAGS, CmdArgs, Args); - break; - case llvm::TapirTargetID::Qthreads: - ExtractArgsFromString(KITSUNE_QTHREADS_COMPILER_FLAGS, CmdArgs, Args); - break; - case llvm::TapirTargetID::Realm: - ExtractArgsFromString(KITSUNE_REALM_COMPILER_FLAGS, CmdArgs, Args); - break; - default: - llvm::report_fatal_error("internal error -- unhandled tapir target ID!"); - break; + if (std::optional RPath = TC.getOpenCilkRuntimePath(Args)) { + if (TC.getVFS().exists(*RPath)) { + CmdArgs.push_back("-L"); + CmdArgs.push_back(Args.MakeArgString(RPath->c_str())); + CmdArgs.push_back("-rpath"); + CmdArgs.push_back(Args.MakeArgString(RPath->c_str())); + if (Triple.isOSBinFormatELF()) + CmdArgs.push_back("--enable-new-dtags"); + } } +} - if (D.CCCIsCXX() && Args.hasArg(options::OPT_fkokkos)) { - ExtractArgsFromString(KITSUNE_KOKKOS_COMPILER_FLAGS, CmdArgs, Args); - } +static StringRef getArchNameForOpenCilkRTLib(const ToolChain &TC, + const ArgList &Args) { + return getArchNameForCompilerRTLib(TC, Args); } void ToolChain::AddKitsuneLinkerArgs(const ArgList &Args, ArgStringList &CmdArgs) const { - // For now, this is just added for completeness. All of the functionality is - // in addTapirRuntimeLibArgs. At some point, the calls to that function will - // be replaced with calls to this. - llvm::errs() << "AddKitsuneLinkerArgs\n"; -} - -void ToolChain::AddOpenCilkIncludeDir(const ArgList &Args, - ArgStringList &CmdArgs) const { - if (!Args.hasArg(options::OPT_opencilk_resource_dir_EQ)) - return; + std::optional TapirTarget = parseTapirTarget(Args); + bool IsKokkos = D.CCCIsCXX() && Args.hasArg(options::OPT_fkokkos); - const Arg *A = Args.getLastArg(options::OPT_opencilk_resource_dir_EQ); - SmallString<128> P; + if (TapirTarget) { + switch (*TapirTarget) { + case TapirTargetID::Serial: + case TapirTargetID::None: + break; - // Check for an include directory. - P.assign(A->getValue()); - llvm::sys::path::append(P, "include"); - if (getVFS().exists(P)) { - addSystemInclude(Args, CmdArgs, P); - } else { - D.Diag(diag::err_drv_opencilk_resource_dir_missing_include) - << A->getAsString(Args); - } -} + case llvm::TapirTargetID::Cuda: + CmdArgs.push_back( + Args.MakeArgString(StringRef("-L") + KITSUNE_CUDA_LIBRARY_DIR)); + CmdArgs.push_back( + Args.MakeArgString(StringRef("-L") + KITSUNE_CUDA_STUBS_DIR)); + ExtractArgsFromString("-lcudart -lcuda", CmdArgs, Args); + ExtractArgsFromString(KITSUNE_CUDA_EXTRA_LINKER_FLAGS, CmdArgs, Args); + break; -ToolChain::path_list -ToolChain::getOpenCilkRuntimePaths(const ArgList &Args) const { - if (!Args.hasArg(options::OPT_opencilk_resource_dir_EQ)) - return getRuntimePaths(); + case llvm::TapirTargetID::Hip: + llvm_unreachable("Add default libraries for the Hip backend"); + ExtractArgsFromString(KITSUNE_HIP_EXTRA_LINKER_FLAGS, CmdArgs, Args); + break; - path_list Paths; + case llvm::TapirTargetID::OpenCilk: { + bool StaticOpenCilk = Args.hasArg(options::OPT_static); + bool UseAsan = getSanitizerArgs(Args).needsAsanRt(); - if (!Args.hasArg(options::OPT_opencilk_resource_dir_EQ)) { - Paths = getRuntimePaths(); - Paths.push_back(getCompilerRTPath()); - return Paths; - } + // Link the correct Cilk personality fn + if (getDriver().CCCIsCXX()) + CmdArgs.push_back(Args.MakeArgString(getOpenCilkRT( + Args, + UseAsan ? "opencilk-asan-personality-cpp" + : "opencilk-personality-cpp", + StaticOpenCilk ? ToolChain::FT_Static : ToolChain::FT_Shared))); + else + CmdArgs.push_back(Args.MakeArgString(getOpenCilkRT( + Args, + UseAsan ? "opencilk-asan-personality-c" : "opencilk-personality-c", + StaticOpenCilk ? ToolChain::FT_Static : ToolChain::FT_Shared))); + + // Link the opencilk runtime. We do this after linking the personality + // function, to ensure that symbols are resolved correctly when using + // static linking. + CmdArgs.push_back(Args.MakeArgString(getOpenCilkRT( + Args, UseAsan ? "opencilk-asan" : "opencilk", + StaticOpenCilk ? ToolChain::FT_Static : ToolChain::FT_Shared))); - // If -opencilk-resource-dir= is specified, try to use that directory, and - // raise an error if that fails. - const Arg *A = Args.getLastArg(options::OPT_opencilk_resource_dir_EQ); + // Add to the executable's runpath the default directory containing + // OpenCilk runtime. + addOpenCilkRuntimeRunPath(*this, Args, CmdArgs, Triple); - // Try the triple passed to driver as --target=. - { - SmallString<128> P(A->getValue()); - llvm::sys::path::append(P, "lib", getTriple().str()); - Paths.push_back(std::string(P.str())); - } - // Try excluding the triple. - { - SmallString<128> P(A->getValue()); - if (Triple.isOSUnknown()) { - llvm::sys::path::append(P, "lib"); - } else { - llvm::sys::path::append(P, "lib", getOSLibName()); + ExtractArgsFromString(KITSUNE_OPENCILK_EXTRA_LINKER_FLAGS, CmdArgs, Args); + break; } - Paths.push_back(std::string(P.str())); - } - return Paths; -} + case llvm::TapirTargetID::OpenMP: + ExtractArgsFromString(KITSUNE_OPENMP_EXTRA_LINKER_FLAGS, CmdArgs, Args); + break; -static void addOpenCilkRuntimeRunPath(const ToolChain &TC, const ArgList &Args, - ArgStringList &CmdArgs, - const llvm::Triple &Triple) { - // Allow the -fno-rtlib-add-rpath flag to prevent adding this default - // directory to the runpath. - if (!Args.hasFlag(options::OPT_frtlib_add_rpath, - options::OPT_fno_rtlib_add_rpath, true)) - return; + case llvm::TapirTargetID::Qthreads: + ExtractArgsFromString(KITSUNE_QTHREADS_EXTRA_LINKER_FLAGS, CmdArgs, Args); + break; - bool FoundCandidate = false; - for (auto CandidateRPath : TC.getOpenCilkRuntimePaths(Args)) { - if (TC.getVFS().exists(CandidateRPath)) { - FoundCandidate = true; - CmdArgs.push_back("-L"); - CmdArgs.push_back(Args.MakeArgString(CandidateRPath.c_str())); - CmdArgs.push_back("-rpath"); - CmdArgs.push_back(Args.MakeArgString(CandidateRPath.c_str())); + case llvm::TapirTargetID::Realm: + ExtractArgsFromString(KITSUNE_REALM_EXTRA_LINKER_FLAGS, CmdArgs, Args); + break; + + default: + llvm::report_fatal_error("internal error -- unhandled tapir target ID!"); + break; } } - if (FoundCandidate && Triple.isOSBinFormatELF()) - CmdArgs.push_back("--enable-new-dtags"); + + if (IsKokkos) { + std::string LibDir = concat(D.ResourceDir, "lib64"); + CmdArgs.push_back(Args.MakeArgString(StringRef("-L") + LibDir)); + CmdArgs.push_back(Args.MakeArgString("-rpath")); + CmdArgs.push_back(Args.MakeArgString(LibDir)); + CmdArgs.push_back(Args.MakeArgString("-lkokkoscore")); + ExtractArgsFromString(KITSUNE_KOKKOS_EXTRA_LINKER_FLAGS, CmdArgs, Args); + } + + // The pthread functions are now part of libc and was removed from glibc 2.34. + // There is no need to explicitly link this in unless we have older versions + // of libc around. We should consider removing this from here at some point + // when we are certain we don't need this any longer. + if (TapirTarget or IsKokkos) { + std::string LibDir = concat(D.ResourceDir, "lib"); + CmdArgs.push_back(Args.MakeArgString(StringRef("-L") + LibDir)); + CmdArgs.push_back(Args.MakeArgString("-rpath")); + CmdArgs.push_back(Args.MakeArgString(LibDir)); + ExtractArgsFromString("-lkitrt -lpthread -ldl", CmdArgs, Args); + } } -static StringRef getArchNameForOpenCilkRTLib(const ToolChain &TC, - const ArgList &Args) { - return getArchNameForCompilerRTLib(TC, Args); +std::optional +ToolChain::getOpenCilkRuntimePath(const ArgList &Args) const { + return getRuntimePath(); } std::string ToolChain::getOpenCilkBCBasename(const ArgList &Args, @@ -1767,8 +1815,8 @@ std::optional ToolChain::getOpenCilkBC(const ArgList &Args, // Check for runtime files without the architecture first. std::string BCBasename = getOpenCilkBCBasename(Args, Component, /*AddArch=*/false); - for (auto RuntimePath : getOpenCilkRuntimePaths(Args)) { - SmallString<128> P(RuntimePath); + if (std::optional RuntimePath = getOpenCilkRuntimePath(Args)) { + SmallString<128> P(*RuntimePath); llvm::sys::path::append(P, BCBasename); if (getVFS().exists(P)) return std::optional(std::string(P.str())); @@ -1777,8 +1825,8 @@ std::optional ToolChain::getOpenCilkBC(const ArgList &Args, // Fall back to the OpenCilk name with the arch if the no-arch version does // not exist. BCBasename = getOpenCilkBCBasename(Args, Component, /*AddArch=*/true); - for (auto RuntimePath : getOpenCilkRuntimePaths(Args)) { - SmallString<128> P(RuntimePath); + if (std::optional RuntimePath = getOpenCilkRuntimePath(Args)) { + SmallString<128> P(*RuntimePath); llvm::sys::path::append(P, BCBasename); if (getVFS().exists(P)) return std::optional(std::string(P.str())); @@ -1790,20 +1838,6 @@ std::optional ToolChain::getOpenCilkBC(const ArgList &Args, void ToolChain::AddOpenCilkABIBitcode(const ArgList &Args, ArgStringList &CmdArgs, bool IsLTO) const { - // If --opencilk-abi-bitcode= is specified, use that specified path. - if (Args.hasArg(options::OPT_opencilk_abi_bitcode_EQ)) { - const Arg *A = Args.getLastArg(options::OPT_opencilk_abi_bitcode_EQ); - SmallString<128> P(A->getValue()); - if (!getVFS().exists(P)) { - getDriver().Diag(diag::err_drv_opencilk_missing_abi_bitcode) - << A->getAsString(Args); - } - if (IsLTO) - CmdArgs.push_back( - Args.MakeArgString("--plugin-opt=opencilk-abi-bitcode=" + P)); - return; - } - bool UseAsan = getSanitizerArgs(Args).needsAsanRt(); StringRef OpenCilkBCName = UseAsan ? "opencilk-asan-abi" : "opencilk-abi"; if (auto OpenCilkABIBCFilename = getOpenCilkBC(Args, OpenCilkBCName)) { @@ -1816,12 +1850,6 @@ void ToolChain::AddOpenCilkABIBitcode(const ArgList &Args, return; } - // Check if libopencilk is in LD_LIBRARY_PATH, and if it is, we're OK - if (llvm::sys::Process::FindInEnvPath("LD_LIBRARY_PATH", - "libopencilk-abi.bc")) { - return; - } - // Error if we could not find a bitcode file. getDriver().Diag(diag::err_drv_opencilk_missing_abi_bitcode) << getOpenCilkBCBasename(Args, OpenCilkBCName, /*AddArch=*/false); @@ -1858,30 +1886,19 @@ std::string ToolChain::getOpenCilkRT(const ArgList &Args, StringRef Component, // Check for runtime files without the architecture first. std::string RTBasename = getOpenCilkRTBasename(Args, Component, Type, /*AddArch=*/false); - if (Args.hasArg(options::OPT_opencilk_resource_dir_EQ)) { - // If opencilk-resource-dir is specified, look for the library in that - // directory. - for (auto RuntimePath : getOpenCilkRuntimePaths(Args)) { - SmallString<128> P(RuntimePath); - llvm::sys::path::append(P, RTBasename); - if (getVFS().exists(P)) - return std::string(P.str()); - } - } else { - for (const auto &LibPath : getLibraryPaths()) { - SmallString<128> P(LibPath); - llvm::sys::path::append(P, RTBasename); - if (getVFS().exists(P)) - // If we found the library in LibraryPaths, let the linker resolve it. - return std::string(("-l" + Component).str()); - } + for (const auto &LibPath : getLibraryPaths()) { + SmallString<128> P(LibPath); + llvm::sys::path::append(P, RTBasename); + if (getVFS().exists(P)) + // If we found the library in LibraryPaths, let the linker resolve it. + return std::string(("-l" + Component).str()); } // Fall back to the OpenCilk name with the arch if the no-arch version does // not exist. RTBasename = getOpenCilkRTBasename(Args, Component, Type, /*AddArch=*/true); - for (auto RuntimePath : getOpenCilkRuntimePaths(Args)) { - SmallString<128> P(RuntimePath); + if (std::optional RuntimePath = getOpenCilkRuntimePath(Args)) { + SmallString<128> P(*RuntimePath); llvm::sys::path::append(P, RTBasename); if (getVFS().exists(P)) return std::string(P.str()); @@ -1890,90 +1907,3 @@ std::string ToolChain::getOpenCilkRT(const ArgList &Args, StringRef Component, // Otherwise, trust the linker to find the library on the system. return std::string(("-l" + Component).str()); } - -void ToolChain::AddTapirRuntimeLibArgs(const ArgList &Args, - ArgStringList &CmdArgs) const { - std::optional TapirTarget = parseTapirTarget(Args); - - // No need to report an error here. That will have been done already. - if (not TapirTarget) - return; - - switch (*TapirTarget) { - case TapirTargetID::Serial: - case TapirTargetID::None: - break; - - case llvm::TapirTargetID::Cuda: - ExtractArgsFromString(KITSUNE_CUDA_LINKER_FLAGS, CmdArgs, Args); - break; - - case llvm::TapirTargetID::Hip: - ExtractArgsFromString(KITSUNE_HIP_LINKER_FLAGS, CmdArgs, Args); - break; - - case llvm::TapirTargetID::OpenCilk: { - bool StaticOpenCilk = Args.hasArg(options::OPT_static); - bool UseAsan = getSanitizerArgs(Args).needsAsanRt(); - - // Link the correct Cilk personality fn - if (getDriver().CCCIsCXX()) - CmdArgs.push_back(Args.MakeArgString(getOpenCilkRT( - Args, - UseAsan ? "opencilk-asan-personality-cpp" - : "opencilk-personality-cpp", - StaticOpenCilk ? ToolChain::FT_Static : ToolChain::FT_Shared))); - else - CmdArgs.push_back(Args.MakeArgString(getOpenCilkRT( - Args, - UseAsan ? "opencilk-asan-personality-c" : "opencilk-personality-c", - StaticOpenCilk ? ToolChain::FT_Static : ToolChain::FT_Shared))); - - // FIXME KITSUNE: Do we need these? What about exceptions when using the - // OpenCilk backend -#if 0 - // Link the correct Cilk personality fn if running in opencilk mode. - if (Args.hasArg(options::OPT_fopencilk)) { - if (getDriver().CCCIsCXX()) - CmdArgs.push_back("-lopencilk-personality-cpp"); - else - CmdArgs.push_back("-lopencilk-personality-c"); - } -#endif // 0 - - // Link the opencilk runtime. We do this after linking the personality - // function, to ensure that symbols are resolved correctly when using static - // linking. - CmdArgs.push_back(Args.MakeArgString(getOpenCilkRT( - Args, UseAsan ? "opencilk-asan" : "opencilk", - StaticOpenCilk ? ToolChain::FT_Static : ToolChain::FT_Shared))); - - // Add to the executable's runpath the default directory containing OpenCilk - // runtime. - addOpenCilkRuntimeRunPath(*this, Args, CmdArgs, Triple); - - ExtractArgsFromString(KITSUNE_OPENCILK_LINKER_FLAGS, CmdArgs, Args); - break; - } - - case llvm::TapirTargetID::OpenMP: - ExtractArgsFromString(KITSUNE_OPENMP_LINKER_FLAGS, CmdArgs, Args); - break; - - case llvm::TapirTargetID::Qthreads: - ExtractArgsFromString(KITSUNE_QTHREADS_LINKER_FLAGS, CmdArgs, Args); - break; - - case llvm::TapirTargetID::Realm: - ExtractArgsFromString(KITSUNE_REALM_LINKER_FLAGS, CmdArgs, Args); - break; - - default: - llvm::report_fatal_error("internal error -- unhandled tapir target ID!"); - break; - } - - if (D.CCCIsCXX() && Args.hasArg(options::OPT_fkokkos)) { - ExtractArgsFromString(KITSUNE_KOKKOS_LINKER_FLAGS, CmdArgs, Args); - } -} diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index db990d597f2c998..fc72e8a2d6bb7e8 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1320,11 +1320,6 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA, Args.addOptInFlag(CmdArgs, options::OPT_fdefine_target_os_macros, options::OPT_fno_define_target_os_macros); - - // KITSUNE FIXME: We probably don't support custom resource directories and - // include paths for OpenCilk, so this should be removed. - // If a custom OpenCilk resource directory is specified, add its include path. - getToolChain().AddOpenCilkIncludeDir(Args, CmdArgs); } // FIXME: Move to target hook. @@ -6456,21 +6451,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, break; } - // If an OpenCilk resource directory is specified, check that it is valid. - if (Args.hasArgNoClaim(options::OPT_opencilk_resource_dir_EQ)) { - bool ValidPathFound = false; - for (auto Path : getToolChain().getOpenCilkRuntimePaths(Args)) { - if (D.getVFS().exists(Path)) { - ValidPathFound = true; - break; - } - } - if (!ValidPathFound) - D.Diag(diag::err_drv_opencilk_resource_dir_missing_lib) - << Args.getLastArgNoClaim(options::OPT_opencilk_resource_dir_EQ) - ->getAsString(Args); - } - if (!CustomTarget) // Add the OpenCilk ABI bitcode file. getToolChain().AddOpenCilkABIBitcode(Args, CmdArgs); diff --git a/clang/lib/Driver/ToolChains/CloudABI.cpp b/clang/lib/Driver/ToolChains/CloudABI.cpp index f5b8480fb0ec9c8..81c26f084ff89e1 100644 --- a/clang/lib/Driver/ToolChains/CloudABI.cpp +++ b/clang/lib/Driver/ToolChains/CloudABI.cpp @@ -84,7 +84,7 @@ void cloudabi::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (ToolChain.ShouldLinkCXXStdlib(Args)) ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); - ToolChain.AddTapirRuntimeLibArgs(Args, CmdArgs); + ToolChain.AddKitsuneLinkerArgs(Args, CmdArgs); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { CmdArgs.push_back("-lc"); diff --git a/clang/lib/Driver/ToolChains/CrossWindows.cpp b/clang/lib/Driver/ToolChains/CrossWindows.cpp index eba34c6890ba51a..5fdf47e31d7950c 100644 --- a/clang/lib/Driver/ToolChains/CrossWindows.cpp +++ b/clang/lib/Driver/ToolChains/CrossWindows.cpp @@ -178,7 +178,7 @@ void tools::CrossWindows::Linker::ConstructJob( CmdArgs.push_back("-Bdynamic"); } - TC.AddTapirRuntimeLibArgs(Args, CmdArgs); + TC.AddKitsuneLinkerArgs(Args, CmdArgs); if (!Args.hasArg(options::OPT_nostdlib)) { if (!Args.hasArg(options::OPT_nodefaultlibs)) { diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 78e6e17820c1db1..017959c1a961d28 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -3458,50 +3458,16 @@ void Darwin::printVerboseInfo(raw_ostream &OS) const { RocmInstallation->print(OS); } -ToolChain::path_list -DarwinClang::getOpenCilkRuntimePaths(const ArgList &Args) const { - path_list Paths; - if (!Args.hasArg(options::OPT_opencilk_resource_dir_EQ)) { - SmallString<128> P(getDriver().ResourceDir); - llvm::sys::path::append(P, "lib", "darwin"); - Paths.push_back(std::string(P.str())); - return Paths; - } - - // If -opencilk-resource-dir= is specified, try to use that directory, and - // raise an error if that fails. - const Arg *A = Args.getLastArg(options::OPT_opencilk_resource_dir_EQ); - - // Try the lib/darwin subdirectory - { - SmallString<128> P(A->getValue()); - llvm::sys::path::append(P, "lib", "darwin"); - Paths.push_back(std::string(P.str())); - } - // Try the lib subdirectory - { - SmallString<128> P(A->getValue()); - llvm::sys::path::append(P, "lib"); - Paths.push_back(std::string(P.str())); - } - return Paths; +std::optional +DarwinClang::getOpenCilkRuntimePath(const ArgList &Args) const { + SmallString<128> P(getDriver().ResourceDir); + llvm::sys::path::append(P, "lib", "darwin"); + return std::string(P); } void DarwinClang::AddOpenCilkABIBitcode(const ArgList &Args, ArgStringList &CmdArgs, bool IsLTO) const { - // If --opencilk-abi-bitcode= is specified, use that specified path. - if (Args.hasArg(options::OPT_opencilk_abi_bitcode_EQ)) { - const Arg *A = Args.getLastArg(options::OPT_opencilk_abi_bitcode_EQ); - SmallString<128> P(A->getValue()); - if (!getVFS().exists(P)) - getDriver().Diag(diag::err_drv_opencilk_missing_abi_bitcode) - << A->getAsString(Args); - if (IsLTO) - CmdArgs.push_back( - Args.MakeArgString("--opencilk-abi-bitcode=" + P)); - } - bool UseAsan = getSanitizerArgs(Args).needsAsanRt(); SmallString<128> BitcodeFilename(UseAsan ? "libopencilk-asan-abi" : "libopencilk-abi"); @@ -3509,8 +3475,8 @@ void DarwinClang::AddOpenCilkABIBitcode(const ArgList &Args, BitcodeFilename += getOSLibraryNameSuffix(); BitcodeFilename += ".bc"; - for (auto RuntimePath : getOpenCilkRuntimePaths(Args)) { - SmallString<128> P(RuntimePath); + if (std::optional RuntimePath = getOpenCilkRuntimePath(Args)) { + SmallString<128> P(*RuntimePath); llvm::sys::path::append(P, BitcodeFilename); if (getVFS().exists(P)) { // The same argument works regardless of IsLTO. @@ -3533,17 +3499,8 @@ void DarwinClang::AddLinkTapirRuntimeLib(const ArgList &Args, DarwinLibName += getOSLibraryNameSuffix(); DarwinLibName += IsShared ? "_dynamic.dylib" : ".a"; SmallString<128> Dir(getDriver().ResourceDir); - if (Args.hasArg(options::OPT_opencilk_resource_dir_EQ)) { - for (auto OpenCilkRuntimeDir : getOpenCilkRuntimePaths(Args)) { - if (getVFS().exists(OpenCilkRuntimeDir)) { - Dir.assign(OpenCilkRuntimeDir); - break; - } - } - } else { - llvm::sys::path::append( - Dir, "lib", (Opts & RLO_IsEmbedded) ? "macho_embedded" : "darwin"); - } + llvm::sys::path::append( + Dir, "lib", (Opts & RLO_IsEmbedded) ? "macho_embedded" : "darwin"); SmallString<128> P(Dir); llvm::sys::path::append(P, DarwinLibName); @@ -3561,7 +3518,7 @@ void DarwinClang::AddLinkTapirRuntimeLib(const ArgList &Args, // rpaths. This is currently true from this place, but we need to be // careful if this function is ever called before user's rpaths are emitted. if (Opts & RLO_AddRPath) { - assert(DarwinLibName.endswith(".dylib") && "must be a dynamic library"); + assert(DarwinLibName.ends_with(".dylib") && "must be a dynamic library"); // Add @executable_path to rpath to support having the dylib copied with // the executable. @@ -3578,16 +3535,12 @@ void DarwinClang::AddLinkTapirRuntimeLib(const ArgList &Args, void DarwinClang::AddLinkTapirRuntime(const ArgList &Args, ArgStringList &CmdArgs) const { std::optional TapirTarget = parseTapirTarget(Args); - if (not TapirTarget) { - return; - } else if (*TapirTarget == llvm::TapirTargetID::Last_TapirTargetID) { - const Arg *A = Args.getLastArg(options::OPT_ftapir_EQ); - getDriver().Diag(diag::err_drv_invalid_value) - << A->getAsString(Args) << A->getValue(); + if (not TapirTarget) return; - } - llvm_unreachable("AddLinkTapirRuntime has to be fixed for Darwin"); + llvm_unreachable( + "NOT IMPLEMENTED: AddLinkTapirRuntime has to be fixed for Darwin"); + // FIXME KITSUNE: Shouldn't this be like the code in ToolChain.cpp? switch (*TapirTarget) { case llvm::TapirTargetID::OpenCilk: { diff --git a/clang/lib/Driver/ToolChains/Darwin.h b/clang/lib/Driver/ToolChains/Darwin.h index 484a802f69f6223..d51d1e9e8c1798a 100644 --- a/clang/lib/Driver/ToolChains/Darwin.h +++ b/clang/lib/Driver/ToolChains/Darwin.h @@ -618,8 +618,8 @@ class LLVM_LIBRARY_VISIBILITY DarwinClang : public Darwin { void AddLinkARCArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const override; - path_list - getOpenCilkRuntimePaths(const llvm::opt::ArgList &Args) const override; + std::optional + getOpenCilkRuntimePath(const llvm::opt::ArgList &Args) const override; void AddOpenCilkABIBitcode(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs, diff --git a/clang/lib/Driver/ToolChains/DragonFly.cpp b/clang/lib/Driver/ToolChains/DragonFly.cpp index eae7902d47fbc4c..4fd7442e346da65 100644 --- a/clang/lib/Driver/ToolChains/DragonFly.cpp +++ b/clang/lib/Driver/ToolChains/DragonFly.cpp @@ -127,7 +127,7 @@ void dragonfly::Linker::ConstructJob(Compilation &C, const JobAction &JA, AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); - getToolChain().AddTapirRuntimeLibArgs(Args, CmdArgs); + getToolChain().AddKitsuneLinkerArgs(Args, CmdArgs); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs, options::OPT_r)) { diff --git a/clang/lib/Driver/ToolChains/FreeBSD.cpp b/clang/lib/Driver/ToolChains/FreeBSD.cpp index 93f5026d00fe422..11a45b567f03e65 100644 --- a/clang/lib/Driver/ToolChains/FreeBSD.cpp +++ b/clang/lib/Driver/ToolChains/FreeBSD.cpp @@ -286,7 +286,7 @@ void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, unsigned Major = ToolChain.getTriple().getOSMajorVersion(); bool Profiling = Args.hasArg(options::OPT_pg) && Major != 0 && Major < 14; - ToolChain.AddTapirRuntimeLibArgs(Args, CmdArgs); + ToolChain.AddKitsuneLinkerArgs(Args, CmdArgs); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs, options::OPT_r)) { diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 559cfb0da9b03cd..f86923f8da524a4 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -546,7 +546,7 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, // The profile runtime also needs access to system libraries. getToolChain().addProfileRTLibs(Args, CmdArgs); - ToolChain.AddTapirRuntimeLibArgs(Args, CmdArgs); + ToolChain.AddKitsuneLinkerArgs(Args, CmdArgs); if (D.CCCIsCXX() && !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs, diff --git a/clang/lib/Driver/ToolChains/MinGW.cpp b/clang/lib/Driver/ToolChains/MinGW.cpp index ba617aac94dfc39..6591d2b04ab087b 100644 --- a/clang/lib/Driver/ToolChains/MinGW.cpp +++ b/clang/lib/Driver/ToolChains/MinGW.cpp @@ -252,7 +252,7 @@ void tools::MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA, addFortranRuntimeLibs(TC, Args, CmdArgs); } - TC.AddTapirRuntimeLibArgs(Args, CmdArgs); + TC.AddKitsuneLinkerArgs(Args, CmdArgs); // TODO: Add profile stuff here diff --git a/clang/lib/Driver/ToolChains/Minix.cpp b/clang/lib/Driver/ToolChains/Minix.cpp index e9665b712e59a12..9ab075c5a578c7f 100644 --- a/clang/lib/Driver/ToolChains/Minix.cpp +++ b/clang/lib/Driver/ToolChains/Minix.cpp @@ -72,7 +72,7 @@ void tools::minix::Linker::ConstructJob(Compilation &C, const JobAction &JA, getToolChain().addProfileRTLibs(Args, CmdArgs); - getToolChain().AddTapirRuntimeLibArgs(Args, CmdArgs); + getToolChain().AddKitsuneLinkerArgs(Args, CmdArgs); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs, options::OPT_r)) { diff --git a/clang/lib/Driver/ToolChains/Myriad.cpp b/clang/lib/Driver/ToolChains/Myriad.cpp index 530ecfaf2f287ae..c89363d7f400bda 100644 --- a/clang/lib/Driver/ToolChains/Myriad.cpp +++ b/clang/lib/Driver/ToolChains/Myriad.cpp @@ -170,7 +170,7 @@ void tools::Myriad::Linker::ConstructJob(Compilation &C, const JobAction &JA, bool NeedsSanitizerDeps = addSanitizerRuntimes(TC, Args, CmdArgs); AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA); - TC.AddTapirRuntimeLibArgs(Args, CmdArgs); + TC.AddKitsuneLinkerArgs(Args, CmdArgs); if (UseDefaultLibs) { if (NeedsSanitizerDeps) diff --git a/clang/lib/Driver/ToolChains/NaCl.cpp b/clang/lib/Driver/ToolChains/NaCl.cpp index 4c4f785f071fe39..8adadfb96b859b2 100644 --- a/clang/lib/Driver/ToolChains/NaCl.cpp +++ b/clang/lib/Driver/ToolChains/NaCl.cpp @@ -127,7 +127,7 @@ void nacltools::Linker::ConstructJob(Compilation &C, const JobAction &JA, AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); - ToolChain.AddTapirRuntimeLibArgs(Args, CmdArgs); + ToolChain.AddKitsuneLinkerArgs(Args, CmdArgs); if (D.CCCIsCXX() && !Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { diff --git a/clang/lib/Driver/ToolChains/PS4CPU.cpp b/clang/lib/Driver/ToolChains/PS4CPU.cpp index 24242bdd6636176..2e96f6a257cf81e 100644 --- a/clang/lib/Driver/ToolChains/PS4CPU.cpp +++ b/clang/lib/Driver/ToolChains/PS4CPU.cpp @@ -217,7 +217,7 @@ void tools::PScpu::Linker::ConstructJob(Compilation &C, const JobAction &JA, AddLinkerInputs(TC, Inputs, Args, CmdArgs, JA); - TC.AddTapirRuntimeLibArgs(Args, CmdArgs); + TC.AddKitsuneLinkerArgs(Args, CmdArgs); if (Args.hasArg(options::OPT_pthread)) { CmdArgs.push_back("-lpthread"); diff --git a/clang/lib/Driver/ToolChains/Solaris.cpp b/clang/lib/Driver/ToolChains/Solaris.cpp index e968a2fe349f43e..448fa60f7de7cb2 100644 --- a/clang/lib/Driver/ToolChains/Solaris.cpp +++ b/clang/lib/Driver/ToolChains/Solaris.cpp @@ -207,7 +207,7 @@ void solaris::Linker::ConstructJob(Compilation &C, const JobAction &JA, bool NeedsSanitizerDeps = addSanitizerRuntimes(ToolChain, Args, CmdArgs); AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); - getToolChain().AddTapirRuntimeLibArgs(Args, CmdArgs); + getToolChain().AddKitsuneLinkerArgs(Args, CmdArgs); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs, options::OPT_r)) { diff --git a/clang/lib/Format/UnwrappedLineParser.h b/clang/lib/Format/UnwrappedLineParser.h index b12105bdb337f03..739298690bbd765 100644 --- a/clang/lib/Format/UnwrappedLineParser.h +++ b/clang/lib/Format/UnwrappedLineParser.h @@ -63,9 +63,6 @@ struct UnwrappedLine { /// Whether the parser has seen \c decltype(auto) in this line. bool SeenDecltypeAuto = false; - /// Whether the parser has seen \c decltype(auto) in this line. - bool SeenDecltypeAuto = false; - /// \c True if this line should be indented by ContinuationIndent in /// addition to the normal indention level. bool IsContinuation = false; diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 975ae07acf2c038..1f0fd53e2d0e42c 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1460,14 +1460,14 @@ void CompilerInvocationBase::GenerateCodeGenArgs(const CodeGenOptions &Opts, std::string buf; llvm::raw_string_ostream os(buf); os << *KitsuneOpts.getTapirTarget(); - GenerateArg(Args, OPT_ftapir_EQ, os.str(), SA); + GenerateArg(Consumer, OPT_ftapir_EQ, os.str()); } if (KitsuneOpts.getKokkos()) - GenerateArg(Args, OPT_fkokkos, SA); + GenerateArg(Consumer, OPT_fkokkos); if (KitsuneOpts.getKokkosNoInit()) - GenerateArg(Args, OPT_fkokkos_no_init, SA); + GenerateArg(Consumer, OPT_fkokkos_no_init); const CodeGenOptions &CodeGenOpts = Opts; @@ -3391,9 +3391,9 @@ void CompilerInvocationBase::GenerateLangArgs(const LangOptions &Opts, const llvm::Triple &T, InputKind IK) { if (Opts.KitsuneOpts.getKokkos()) - GenerateArg(Args, OPT_fkokkos, SA); + GenerateArg(Consumer, OPT_fkokkos); if (Opts.KitsuneOpts.getKokkosNoInit()) - GenerateArg(Args, OPT_fkokkos_no_init, SA); + GenerateArg(Consumer, OPT_fkokkos_no_init); if (IK.getFormat() == InputKind::Precompiled || IK.getLanguage() == Language::LLVM_IR) { diff --git a/kitsune/CMakeLists.txt b/kitsune/CMakeLists.txt index 4f8c8faf5070cc2..99efd58b5d3e755 100644 --- a/kitsune/CMakeLists.txt +++ b/kitsune/CMakeLists.txt @@ -10,6 +10,7 @@ project(kitsune C CXX) include(ExternalProject) include(FetchContent) +include(GetClangResourceDir) if (WIN32) message(FATAL_ERROR "Kitsune is not supported on Windows") @@ -80,46 +81,119 @@ foreach(target IN LISTS KITSUNE_ENABLE_TAPIR_TARGETS) endif() endforeach() +get_clang_resource_dir(CLANG_RESOURCE_INTDIR + PREFIX ${CMAKE_BINARY_DIR}) + +get_clang_resource_dir(CLANG_RESOURCE_INSTALL_DIR + PREFIX ${CMAKE_INSTALL_PREFIX}) + if (KITSUNE_KOKKOS_ENABLE) # We currently don't support using a pre-built Kokkos - even if it has been # patched the way we need it to be. This could possibly be moved to # kitsune/runtime, just so everything that needs to be fetched and built is # there, even if this is not technically a runtime. + set(KITSUNE_KOKKOS_BUILD_CONFIGURE_FLAGS "" + CACHE STRING + "Additional options to be passed to be CMake when configuring Kokkos. These are only used when building Kitsune") + + set(KITSUNE_KOKKOS_BUILD_COMPILE_FLAGS "" + CACHE STRING + "Additional C++ compiler flags needed to build Kokkos. These are only used when building Kitsune") + set(KITSUNE_KOKKOS_SRC_DIR ${KITSUNE_TARGETS_BINARY_DIR}/kokkos/kokkos) set(KITSUNE_KOKKOS_BUILD_DIR ${KITSUNE_TARGETS_BINARY_DIR}/kokkos/build) - set(KITSUNE_KOKKOS_BUILD_ARGS - -DKokkos_ENABLE_SERIAL=ON -DBUILD_TESTING=OFF - -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release) + set(KITSUNE_KOKKOS_STAMP_DIR ${KITSUNE_TARGETS_BINARY_DIR}/kokkos/stamp) + set(KITSUNE_KOKKOS_BUILD_CMAKE_FLAGS + -DCMAKE_CXX_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++ + -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DCMAKE_INSTALL_PREFIX=${CLANG_RESOURCE_INSTALL_DIR} + -DCMAKE_CXX_FLAGS=${KITSUNE_KOKKOS_BUILD_COMPILE_FLAGS} + -DKokkos_ENABLE_SERIAL=ON + -DBUILD_TESTING=OFF + -DBUILD_SHARED_LIBS=ON + ${KITSUNE_KOKKOS_BUILD_CONFIGURE_FLAGS}) # We have to patch kokkos to disable some sections of code that are using # exceptions. set(KITSUNE_KOKKOS_PATCH_EXCS ${KITSUNE_SOURCE_DIR}/patches/Kokkos_Exceptions.patch) + # + # WARNING This absolutely sucks! + # + # As I understand it, ExternalProject_Add will always check for updates and + # download any updates. Obviously, since we have a fixed git tag, there will + # never be any. However, the PATCH_COMMAND is always executed, which means + # that it will attempt to patch code that has already been patched. Naturally, + # this fails. The workaround here is to disable checking for updates by + # setting UPDATE_COMMAND to "". + # + # HORRORS! It is possible that this means any update to GIT_TAG below will + # also not have any effect since the updates will never be retrieved. In this + # case, the entire build directory may need to be deleted for everything to + # work correctly. I cannot think of a way to automatically warn when this + # happens (never mind making sure that this doesn't happen). # # TODO: Look for newer Kokkos updates. # - message(STATUS "Checkout Kokkos into ${KITSUNE_KOKKOS_SRC_DIR}") - FetchContent_Declare( - kokkos + ExternalProject_Add(kokkos + DEPENDS clang lld llvm-config llvm-link GIT_REPOSITORY https://github.com/kokkos/kokkos.git GIT_TAG 606866d # 4.0.00 SOURCE_DIR ${KITSUNE_KOKKOS_SRC_DIR} BINARY_DIR ${KITSUNE_KOKKOS_BUILD_DIR} - CMAKE_ARGS ${KITSUNE_KOKKOS_BUILD_ARGS} - PATCH_COMMAND patch -p0 --verbose --input=${KITSUNE_KOKKOS_PATCH_EXCS} - UPDATE_DISCONNECTED FALSE) - - FetchContent_MakeAvailable(kokkos) - - set(KITSUNE_KOKKOS_DEFAULT_PREPROCESSOR_FLAGS - "-I${CMAKE_INSTALL_PREFIX}/include -I${CMAKE_INSTALL_PREFIX}/include/kokkos") - - set(KITSUNE_KOKKOS_DEFAULT_COMPILER_FLAGS - "") - - set(KITSUNE_KOKKOS_DEFAULT_LINKER_FLAGS - "-L${CMAKE_INSTALL_PREFIX}/lib -lkokkoscore -ldl") + STAMP_DIR ${KITSUNE_KOKKOS_STAMP_DIR} + CMAKE_ARGS ${KITSUNE_KOKKOS_BUILD_CMAKE_FLAGS} + PATCH_COMMAND patch -p0 --input=${KITSUNE_KOKKOS_PATCH_EXCS} + UPDATE_COMMAND "" + UPDATE_DISCONNECTED FALSE + STEP_TARGETS build) + + # Copy the Kokkos files into a "staging" area. This is only needed to run + # the Kitsune + Kokkos tests. + add_custom_target(kokkos-copy-headers-to-build-dir ALL + # Copy all the headers directly from the source. These are installed as is + # anyway. + COMMAND ${CMAKE_COMMAND} -E copy_directory + ${KITSUNE_KOKKOS_SRC_DIR}/core/src + ${KITSUNE_KOKKOS_SRC_DIR}/tpls/desul/include + ${CLANG_RESOURCE_INTDIR}/include + + # We don't copy the entire build directory because we only want specific + # headers. + COMMAND ${CMAKE_COMMAND} -E copy + ${KITSUNE_KOKKOS_BUILD_DIR}/KokkosCore_config.h + ${KITSUNE_KOKKOS_BUILD_DIR}/KokkosCore_Config_DeclareBackend.hpp + ${KITSUNE_KOKKOS_BUILD_DIR}/KokkosCore_Config_FwdBackend.hpp + ${KITSUNE_KOKKOS_BUILD_DIR}/KokkosCore_Config_PostInclude.hpp + ${KITSUNE_KOKKOS_BUILD_DIR}/KokkosCore_Config_SetupBackend.hpp + ${CLANG_RESOURCE_INTDIR}/include + + COMMAND ${CMAKE_COMMAND} -E copy_directory + ${KITSUNE_KOKKOS_BUILD_DIR}/core/src/desul + ${CLANG_RESOURCE_INTDIR}/include/desul + + COMMAND ${CMAKE_COMMAND} -E make_directory + ${CLANG_RESOURCE_INTDIR}/lib64 + + # Copy the core libraries that are built. There are others that are also + # built, but these are the only ones that we care about. + # + # NOTE: These should be updated when the GIT_TAG is changed because the + # trailing version suffix may also change. There might be an automatic way + # to do this that does not involve copying the entire directory, and we + # probably should implement that at some point. + COMMAND ${CMAKE_COMMAND} -E copy + ${KITSUNE_KOKKOS_BUILD_DIR}/core/src/libkokkoscore.so + ${KITSUNE_KOKKOS_BUILD_DIR}/core/src/libkokkoscore.so.4.0 + ${KITSUNE_KOKKOS_BUILD_DIR}/core/src/libkokkoscore.so.4.0.0 + ${CLANG_RESOURCE_INTDIR}/lib64 + + COMMENT "Copying Kokkos files to the 'top-level' build directory" + VERBATIM USES_TERMINAL + ) + add_dependencies(kokkos-copy-headers-to-build-dir kokkos) set(KITSUNE_KOKKOS_EXTRA_PREPROCESSOR_FLAGS "" @@ -140,22 +214,39 @@ endif() if (KITSUNE_CUDA_ENABLE) find_package(CUDAToolkit ${KITSUNE_CUDA_VERSION_MIN} REQUIRED) - find_library(LIB_CUDART cudart REQUIRED - PATHS ${CUDAToolkit_LIBRARY_DIR} + set(KITSUNE_CUDA_PREFIX ${CUDAToolkit_LIBRARY_ROOT}) + set(KITSUNE_CUDA_INCLUDE_DIR ${CUDAToolkit_INCLUDE_DIRS}) + set(KITSUNE_CUDA_BINARY_DIR ${CUDAToolkit_BIN_DIR}) + set(KITSUNE_CUDA_LIBRARY_DIR ${CUDAToolkit_LIBRARY_DIR}) + set(KITSUNE_CUDA_STUBS_DIR "${KITSUNE_CUDA_LIBRARY_DIR}/stubs") + set(KITSUNE_CUDA_LIBDEVICE_DIR "${KITSUNE_CUDA_PREFIX}/nvvm/libdevice") + + find_library(KITSUNE_CUDA_LIB_CUDART cudart REQUIRED + PATHS ${KITSUNE_CUDA_LIBRARY_DIR} NO_DEFAULT_PATH) - find_library(LIB_CUDA cuda REQUIRED - PATHS ${CUDAToolkit_LIBRARY_DIR}/stubs + # This should almost always be the libcuda that is installed on the system, + # so look for it in the "usual" places. In some cases, both /usr/lib and + # /usr/lib64 are present with libcuda in the latter. But cmake does not seem + # to look there, so force it to. + find_library(KITSUNE_CUDA_LIB_CUDA cuda REQUIRED + HINTS /usr/lib64 /usr/local/lib64) + + find_library(KITSUNE_CUDA_LIB_NVPTX_STATIC nvptxcompiler_static REQUIRED + PATHS ${KITSUNE_CUDA_LIBRARY_DIR} NO_DEFAULT_PATH) - set(KITSUNE_CUDA_DEFAULT_PREPROCESSOR_FLAGS - "-D_tapir_cuda_target") + find_file(KITSUNE_CUDA_LIBDEVICE_BC libdevice.10.bc REQUIRED + PATHS ${KITSUNE_CUDA_LIBDEVICE_DIR} + NO_DEFAULT_PATH) - set(KITSUNE_CUDA_DEFAULT_COMPILER_FLAGS - "") + find_program(KITSUNE_CUDA_PTXAS ptxas REQUIRED + PATHS ${KITSUNE_CUDA_BINARY_DIR} + NO_DEFAULT_PATH) - set(KITSUNE_CUDA_DEFAULT_LINKER_FLAGS - "-L${CUDAToolkit_LIBRARY_DIR} -L${CUDAToolkit_LIBRARY_DIR}/stubs -lcudart -lcuda -lkitrt") + find_program(KITSUNE_CUDA_FATBINARY fatbinary REQUIRED + PATHS ${KITSUNE_CUDA_BINARY_DIR} + NO_DEFAULT_PATH) set(KITSUNE_CUDA_EXTRA_PREPROCESSOR_FLAGS "" @@ -171,10 +262,6 @@ if (KITSUNE_CUDA_ENABLE) "" CACHE STRING "Additional linker flags needed for the Cuda target") - - # KITSUNE FIXME: Find libdevice.bc - set(KITSUNE_CUDA_LIBDEVICE_BC - "") endif() if (KITSUNE_HIP_ENABLE) @@ -184,12 +271,15 @@ if (KITSUNE_HIP_ENABLE) "Path to hip install directory") # Probably should not be hard-coding /opt/rocm/* here, but it seems to be - # where it goes in several distributions and it seems we cannot otherwise - # reliably find rocm. + # where it goes in several distributions and we cannot reliably find rocm + # otherwise find_package(hip REQUIRED PATHS $ENV{ROCM_PATH}/hip $ENV{ROCM_PATH} /opt/rocm/hip /opt/rocm) + message(FATAL_ERROR "hip: ${HIP_LIBRARY_DIR} ${hip_LIBRARY_DIR} ${hip_LIBRARY_DIRS} ${HIP_LIBRARY_DIRS}") + set(KITSUNE_HIP_PREFIX) + find_library(LIB_AMDHIP64 amdhip64 REQUIRED PATHS ${hip_LIBRARY_DIR} NO_DEFAULT_PATH) @@ -201,7 +291,7 @@ if (KITSUNE_HIP_ENABLE) "") set(KITSUNE_HIP_DEFAULT_LINKER_FLAGS - "-L${ROCM_PATH}/lib -L${ROCM_PATH}/hip/lib -lamdhip64 -lpthread -lm -lrt -lkitrt") + "-L${ROCM_PATH}/lib -L${ROCM_PATH}/hip/lib -lamdhip64 -lm") set(KITSUNE_HIP_EXTRA_PREPROCESSOR_FLAGS "" @@ -225,28 +315,45 @@ if (KITSUNE_OPENCILK_ENABLE) # vanilla clang build of the same version, but it's probably not worth all the # error checking hassles that would be needed to detect if the versions were # mismatched. + + set(KITSUNE_OPENCILK_BUILD_CONFIGURE_FLAGS "" + CACHE STRING + "Additional options to be passed to be CMake when configuring Kokkos. These are only used when building Kitsune") + + set(KITSUNE_OPENCILK_BUILD_COMPILE_FLAGS "" + CACHE STRING + "Additional C++ compiler flags needed to build Kokkos. These are only used when building Kitsune") + + # We pass some LLVM_* variables to make Cheetah behave as if it were an + # in-tree build. set(KITSUNE_CHEETAH_SOURCE_DIR ${KITSUNE_TARGETS_BINARY_DIR}/cheetah/cheetah) set(KITSUNE_CHEETAH_BINARY_DIR ${KITSUNE_TARGETS_BINARY_DIR}/cheetah/build) - set(KITSUNE_CHEETAH_BUILD_ARGS + set(KITSUNE_CHEETAH_BUILD_CMAKE_FLAGS -DCMAKE_C_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang -DCMAKE_CXX_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++ - -DCMAKE_ASM_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} - -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM} -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} - -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}/lib/clang/${LLVM_VERSION_MAJOR} + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + -DCMAKE_MAKE_PROGRAM=${CMAKE_MAKE_PROGRAM} + -DCMAKE_INSTALL_PREFIX=${CLANG_RESOURCE_INSTALL_DIR} + -DCMAKE_CXX_FLAGS=${KITSUNE_OPENCILK_BUILD_COMPILE_FLAGS} -DCHEETAH_DEFAULT_TARGET_TRIPLE=${LLVM_DEFAULT_TARGET_TRIPLE} -DCHEETAH_DIRECTORY_PER_TARGET=ON + -DLLVM_CMAKE_DIR=${CMAKE_BINARY_DIR} + -DLLVM_COMMON_CMAKE_UTILS=${CMAKE_SOURCE_DIR}/llvm/cmake + -DLLVM_LIBRARY_OUTPUT_INTDIR=${LLVM_LIBRARY_OUTPUT_INTDIR} + -DLLVM_RUNTIME_OUTPUT_INTDIR=${LLVM_RUNTIME_OUTPUT_INTDIR} + -DPACKAGE_VERSION=${PACKAGE_VERSION} + ${KITSUNE_OPENCILK_BUILD_CONFIGURE_FLAGS} ) - ExternalProject_Add(cheetah ${KITSUNE_CHEETAH_SRC_DIR} + ExternalProject_Add(cheetah DEPENDS clang lld llvm-config llvm-link GIT_REPOSITORY https://github.com/OpenCilk/cheetah.git GIT_TAG opencilk/v2.1 SOURCE_DIR ${KITSUNE_CHEETAH_SOURCE_DIR} BINARY_DIR ${KITSUNE_CHEETAH_BINARY_DIR} - CMAKE_ARGS ${KITSUNE_CHEETAH_BUILD_ARGS}) + CMAKE_ARGS ${KITSUNE_CHEETAH_BUILD_CMAKE_FLAGS}) # The values here are populated given that we know where Cheetah will be # installed. The code to actual fetch, configure, build etc. is in @@ -257,9 +364,10 @@ if (KITSUNE_OPENCILK_ENABLE) set(KITSUNE_OPENCILK_DEFAULT_COMPILER_FLAGS "") - # FIXME: We need to specify where libopencilk is installed with a -L flag? + # All the defaults are handled in the source code because finding the + # OpenCilk runtime library is not straightforward on all platforms. set(KITSUNE_OPENCILK_DEFAULT_LINKER_FLAGS - "-lopencilk -lkitrt") + "") set(KITSUNE_OPENCILK_EXTRA_PREPROCESSOR_FLAGS "" @@ -276,10 +384,6 @@ if (KITSUNE_OPENCILK_ENABLE) CACHE STRING "Additional linker flags needed for the OpenCilk target") - # Path to libopencilk-abi.bc. This will be generated when cheetah is built. - set(KITSUNE_OPENCILK_LIBOPENCILKABI_BC - "") - endif() if (KITSUNE_OPENMP_ENABLE) @@ -297,7 +401,7 @@ if (KITSUNE_OPENMP_ENABLE) set(KITSUNE_OPENMP_EXTRA_PREPROCESSOR_FLAGS "" CACHE STRING - "Additional preprocessor flags needed for the OpenMP target_") + "Additional preprocessor flags needed for the OpenMP target") set(KITSUNE_OPENMP_EXTRA_COMPILER_FLAGS "" @@ -324,7 +428,7 @@ if (KITSUNE_QTHREADS_ENABLE) "") set(KITSUNE_QTHREADS_DEFAULT_LINKER_FLAGS - "-L${QTHREADS_LIBRARY_DIR} -lqthread -lkitrt -lpthread -lhwloc") + "-L${QTHREADS_LIBRARY_DIR} -lqthread -lhwloc") set(KITSUNE_QTHREADS_EXTRA_PREPROCESSOR_FLAGS "" @@ -354,7 +458,7 @@ if (KITSUNE_REALM_ENABLE) "") set(KITSUNE_REALM_DEFAULT_LINKER_FLAGS - "-L${Realm_LIBRARY_DIR} -L${Realm_WRAPPER_LIBRARY_DIR} -lrealm-abi -lrealm -lpthread -ldl -lrt -lkitrt") + "-L${Realm_LIBRARY_DIR} -L${Realm_WRAPPER_LIBRARY_DIR} -lrealm-abi -lrealm") set(KITSUNE_REALM_EXTRA_PREPROCESSOR_FLAGS "" @@ -412,11 +516,19 @@ endif() # # FIXME: Instead of passing all the Kitsune variables here by hand, look for # for every variable starting with KITSUNE_, KITRT_ and LLVM_ and pass those -# in. +# in. Perhaps something like what is below. +# +# get_cmake_property(vars VARIABLES) +# list(FILTER vars INCLUDE REGEX "^KIT") +# list(SORT vars) +# foreach (var ${vars}) +# message(STATUS "${var}=${${var}}") +# endforeach() +# ExternalProject_Add(kitsune-runtimes DEPENDS llvm-config clang LLVM SOURCE_DIR ${KITSUNE_SOURCE_DIR}/runtime - STAMP_DIR ${KITSUNE_BINARY_DIR}/runtime-stamp + STAMP_DIR ${KITSUNE_BINARY_DIR}/stamp BINARY_DIR ${KITSUNE_BINARY_DIR}/runtime CMAKE_ARGS -DCMAKE_C_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang -DCMAKE_CXX_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++ @@ -425,32 +537,45 @@ ExternalProject_Add(kitsune-runtimes -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX} + -DCMAKE_COLOR_DIAGNOSTICS=${CMAKE_COLOR_DIAGNOSTICS} -DKITSUNE_CUDA_ENABLE=${KITSUNE_CUDA_ENABLE} + -DKITSUNE_CUDA_INCLUDE_DIR=${KITSUNE_CUDA_INCLUDE_DIR} + -DKITSUNE_CUDA_BINARY_DIR=${KITSUNE_CUDA_BINARY_DIR} + -DKITSUNE_CUDA_LIBRARY_DIR=${KITSUNE_CUDA_LIBRARY_DIR} + -DKITSUNE_CUDA_STUBS_DIR=${KITSUNE_CUDA_STUBS_DIR} + -DKITSUNE_CUDA_LIB_CUDA=${KITSUNE_CUDA_LIB_CUDA} + -DKITSUNE_CUDA_LIB_CUDART=${KITSUNE_CUDA_LIB_CUDART} + -DKITSUNE_CUDA_LIB_NVPTX_STATIC=${KITSUNE_CUDA_LIB_NVPTX_STATIC} -DKITSUNE_HIP_ENABLE=${KITSUNE_HIP_ENABLE} -DKITSUNE_OPENCILK_ENABLE=${KITSUNE_OPENCILK_ENABLE} -DKITSUNE_OPENMP_ENABLE=${KITSUNE_OPENMP_ENABLE} -DKITSUNE_QTHREADS_ENABLE=${KITSUNE_QTHREADS_ENABLE} -DKITSUNE_REALM_ENABLE=${KITSUNE_REALM_ENABLE} -DKITSUNE_CUDA_VERSION_MIN=${KITSUNE_CUDA_VERSION_MIN} - -DKITSUNE_ROMC_VERSION_MIN=${KITSUNE_ROCM_VERSION_MIN} + -DKITSUNE_ROCM_VERSION_MIN=${KITSUNE_ROCM_VERSION_MIN} -DKITRT_ENABLE_DEBUG=${KITRT_ENABLE_DEBUG} -DKITRT_ENABLE_VERBOSE=${KITRT_ENABLE_VERBOSE} -DKITCUDA_ENABLE_NVTX=${KITCUDA_ENABLE_NVTX} + -DCLANG_RESOURCE_INTDIR=${CLANG_RESOURCE_INTDIR} + -DCLANG_RESOURCE_DIR=${CLANG_RESOURCE_INSTALL_DIR} -DLLVM_CONFIG_PATH=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-config -DLLVM_VERSION_MAJOR=${LLVM_VERSION_MAJOR} -DLLVM_VERSION_MINOR=${LLVM_VERSION_MINOR} -DLLVM_VERSION_PATCH=${LLVM_VERSION_PATCH} + -DLLVM_BINARY_DIR=${LLVM_BINARY_DIR} -DLLVM_INCLUDE_DIR=${LLVM_INCLUDE_DIR} - -DLLVM_LIBRARY_DIR=${LLVM_LIBRARY_OUTPUT_INTDIR} - -DLLVM_BINARY_DIR=${LLVM_RUNTIME_OUTPUT_INTDIR} + -DLLVM_LIBRARY_DIR=${LLVM_LIBRARY_DIR} + -DLLVM_LIBRARY_OUTPUT_INTDIR=${LLVM_LIBRARY_OUTPUT_INTDIR} + -DLLVM_RUNTIME_OUTPUT_INTDIR=${LLVM_RUNTIME_OUTPUT_INTDIR} -DLLVM_LIT_ARGS=${LLVM_LIT_ARGS} -DLLVM_LIBDIR_SUFFIX=${LLVM_LIBDIR_SUFFIX} -DLLVM_MAIN_INCLUDE_DIR=${LLVM_MAIN_INCLUDE_DIR} -DLLVM_RUNTIME_OUTPUT_INTDIR=${LLVM_RUNTIME_OUTPUT_INTDIR} -DCUDAToolkit_INCLUDE_DIRS=${CUDAToolkit_INCLUDE_DIRS} -DCUDAToolkit_LIBRARY_DIR=${CUDAToolkit_LIBRARY_DIR} - INSTALL_COMMAND "" - # STEP_TARGETS configure build - USES_TERMINAL_CONFIGURE 1 - USES_TERMINAL_BUILD 1 - USES_TERMINAL_INSTALL 1) + USES_TERMINAL_CONFIGURE TRUE + USES_TERMINAL_BUILD TRUE + USES_TERMINAL_INSTALL TRUE + BUILD_ALWAYS TRUE + CONFIGURE_HANDLED_BY_BUILD TRUE) + diff --git a/kitsune/include/kitsune/CMakeLists.txt b/kitsune/include/kitsune/CMakeLists.txt index e8f34e76f7925a7..d3a506e1323ad58 100644 --- a/kitsune/include/kitsune/CMakeLists.txt +++ b/kitsune/include/kitsune/CMakeLists.txt @@ -10,7 +10,8 @@ include(GetClangResourceDir) get_clang_resource_dir(CLANG_RESOURCE_INCDIR PREFIX ${LLVM_LIBRARY_OUTPUT_INTDIR}/.. - SUBDIR include) + SUBDIR include +) set(out_files) function(copy_header_to_resource_dir file) @@ -33,8 +34,13 @@ add_custom_target("kitsune-resource-headers" ALL DEPENDS ${out_files}) # will pick it up. configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/Config/config.h.cmake - ${KITSUNE_BINARY_DIR}/../clang/include/kitsune/Config/config.h) + ${CMAKE_BINARY_DIR}/include/kitsune/Config/config.h) + +install(DIRECTORY + ${CMAKE_BINARY_DIR}/include/ + DESTINATION ${CMAKE_INSTALL_PREFIX}/include +) install(FILES kitsune.h - DESTINATION ${CLANG_RESOURCE_INCDIR} + DESTINATION ${CMAKE_INSTALL_PREFIX}/lib/clang/${LLVM_VERSION_MAJOR}/include COMPONENT "kitsune-resource-headers") diff --git a/kitsune/include/kitsune/Config/config.h.cmake b/kitsune/include/kitsune/Config/config.h.cmake index 09f31c33fb6299e..b3ac61f7dec44ac 100644 --- a/kitsune/include/kitsune/Config/config.h.cmake +++ b/kitsune/include/kitsune/Config/config.h.cmake @@ -13,108 +13,66 @@ #else #define KITSUNE_CONFIG_H +// Kokkos configuration #cmakedefine01 KITSUNE_KOKKOS_ENABLE -#define KITSUNE_KOKKOS_PREPROCESSOR_FLAGS \ - "${KITSUNE_KOKKOS_DEFAULT_PREPROCESSOR_FLAGS} "\ - "${KITSUNE_KOKKOS_EXTRA_PREPROCESSOR_FLAGS}" - -#define KITSUNE_KOKKOS_COMPILER_FLAGS \ - "${KITSUNE_KOKKOS_DEFAULT_COMPILER_FLAGS} "\ - "${KITSUNE_KOKKOS_EXTRA_COMPILER_FLAGS}" - -#define KITSUNE_KOKKOS_LINKER_FLAGS \ - "${KITSUNE_KOKKOS_DEFAULT_LINKER_FLAGS} " \ - "${KITSUNE_KOKKOS_EXTRA_LINKER_FLAGS}" +#define KITSUNE_KOKKOS_EXTRA_PREPROCESSOR_FLAGS "${KITSUNE_KOKKOS_EXTRA_PREPROCESSOR_FLAGS}" +#define KITSUNE_KOKKOS_EXTRA_COMPILER_FLAGS "${KITSUNE_KOKKOS_EXTRA_COMPILER_FLAGS}" +#define KITSUNE_KOKKOS_EXTRA_LINKER_FLAGS "${KITSUNE_KOKKOS_EXTRA_LINKER_FLAGS}" +// Cuda configuration #cmakedefine01 KITSUNE_CUDA_ENABLE -#define KITSUNE_CUDA_PREPROCESSOR_FLAGS \ - "${KITSUNE_CUDA_DEFAULT_PREPROCESSOR_FLAGS} " \ - "${KITSUNE_CUDA_EXTRA_PREPROCESSOR_FLAGS}" - -#define KITSUNE_CUDA_COMPILER_FLAGS \ - "${KITSUNE_CUDA_DEFAULT_COMPILER_FLAGS} " \ - "${KITSUNE_CUDA_EXTRA_COMPILER_FLAGS}" - -#define KITSUNE_CUDA_LINKER_FLAGS \ - "${KITSUNE_CUDA_DEFAULT_LINKER_FLAGS} " \ - "${KITSUNE_CUDA_EXTRA_LINKER_FLAGS}" +#define KITSUNE_CUDA_EXTRA_PREPROCESSOR_FLAGS "${KITSUNE_CUDA_EXTRA_PREPROCESSOR_FLAGS}" +#define KITSUNE_CUDA_EXTRA_COMPILER_FLAGS "${KITSUNE_CUDA_EXTRA_COMPILER_FLAGS}" +#define KITSUNE_CUDA_EXTRA_LINKER_FLAGS "${KITSUNE_CUDA_EXTRA_LINKER_FLAGS}" -#define KITSUNE_CUDA_LIBDEVICE_BC \ - "${KITSUNE_CUDA_LIBDEVICE_BC}" +#define KITSUNE_CUDA_PREFIX "${KITSUNE_CUDA_PREFIX}" +#define KITSUNE_CUDA_BINARY_DIR "${KITSUNE_CUDA_BINARY_DIR}" +#define KITSUNE_CUDA_LIBRARY_DIR "${KITSUNE_CUDA_LIBRARY_DIR}" +#define KITSUNE_CUDA_STUBS_DIR "${KITSUNE_CUDA_STUBS_DIR}" +#define KITSUNE_CUDA_LIBDEVICE_DIR "${KITSUNE_CUDA_LIBDEVICE_DIR}" +#define KITSUNE_CUDA_LIBDEVICE_BC "${KITSUNE_CUDA_LIBDEVICE_BC}" +#define KITSUNE_CUDA_PTXAS "${KITSUNE_CUDA_PTXAS}" +#define KITSUNE_CUDA_FATBINARY "${KITSUNE_CUDA_FATBINARY}" +// Hip configuration #cmakedefine01 KITSUNE_HIP_ENABLE -#define KITSUNE_HIP_PREPROCESSOR_FLAGS \ - "${KITSUNE_HIP_DEFAULT_PREPROCESSOR_FLAGS} " \ - "${KITSUNE_HIP_EXTRA_PREPROCESSOR_FLAGS}" +#define KITSUNE_HIP_EXTRA_PREPROCESSOR_FLAGS "${KITSUNE_HIP_EXTRA_PREPROCESSOR_FLAGS}" +#define KITSUNE_HIP_EXTRA_COMPILER_FLAGS "${KITSUNE_HIP_EXTRA_COMPILER_FLAGS}" +#define KITSUNE_HIP_EXTRA_LINKER_FLAGS "${KITSUNE_HIP_EXTRA_LINKER_FLAGS}" -#define KITSUNE_HIP_COMPILER_FLAGS \ - "${KITSUNE_HIP_DEFAULT_COMPILER_FLAGS} " \ - "${KITSUNE_HIP_EXTRA_COMPILER_FLAGS}" - -#define KITSUNE_HIP_LINKER_FLAGS \ - "${KITSUNE_HIP_DEFAULT_LINKER_FLAGS} " \ - "${KITSUNE_HIP_EXTRA_LINKER_FLAGS}" +#define KITSUNE_HIP_PREFIX "${KITSUNE_HIP_PREFIX}" +#define KITSUNE_HIP_LIBRARY_DIR "${KITSUNE_HIP_LIBRARY_DIR}" +#define KITSUNE_HIP_ALT_LIBRARY_DIR "${KITSUNE_HIP_ALT_LIBRARY_DIR}" +// OpenCilk configuration #cmakedefine01 KITSUNE_OPENCILK_ENABLE -#define KITSUNE_OPENCILK_PREPROCESSOR_FLAGS \ - "${KITSUNE_OPENCILK_DEFAULT_PREPROCESSOR_FLAGS} " \ - "${KITSUNE_OPENCILK_EXTRA_PREPROCESSOR_FLAGS}" - -#define KITSUNE_OPENCILK_COMPILER_FLAGS \ - "${KITSUNE_OPENCILK_DEFAULT_COMPILER_FLAGS} " \ - "${KITSUNE_OPENCILK_EXTRA_COMPILER_FLAGS}" - -#define KITSUNE_OPENCILK_LINKER_FLAGS \ - "${KITSUNE_OPENCILK_DEFAULT_LINKER_FLAGS} " \ - "${KITSUNE_OPENCILK_EXTRA_LINKER_FLAGS}" - -#define KITSUNE_OPENCILK_LIBOPENCILKABI_BC \ - "${KITSUNE_OPENCILK_LIBOPENCILKABI_BC}" +#define KITSUNE_OPENCILK_EXTRA_PREPROCESSOR_FLAGS "${KITSUNE_OPENCILK_EXTRA_PREPROCESSOR_FLAGS}" +#define KITSUNE_OPENCILK_EXTRA_COMPILER_FLAGS "${KITSUNE_OPENCILK_EXTRA_COMPILER_FLAGS}" +#define KITSUNE_OPENCILK_EXTRA_LINKER_FLAGS "${KITSUNE_OPENCILK_EXTRA_LINKER_FLAGS}" +// OpenMP configuration #cmakedefine01 KITSUNE_OPENMP_ENABLE -#define KITSUNE_OPENMP_PREPROCESSOR_FLAGS \ - "${KITSUNE_OPENMP_DEFAULT_PREPROCESSOR_FLAGS} " \ - "${KITSUNE_OPENMP_EXTRA_PREPROCESSOR_FLAGS}" - -#define KITSUNE_OPENMP_COMPILER_FLAGS \ - "${KITSUNE_OPENMP_DEFAULT_COMPILER_FLAGS} " \ - "${KITSUNE_OPENMP_EXTRA_COMPILER_FLAGS}" - -#define KITSUNE_OPENMP_LINKER_FLAGS \ - "${KITSUNE_OPENMP_DEFAULT_LINKER_FLAGS} " \ - "${KITSUNE_OPENMP_EXTRA_LINKER_FLAGS}" +#define KITSUNE_OPENMP_EXTRA_PREPROCESSOR_FLAGS "${KITSUNE_OPENMP_EXTRA_PREPROCESSOR_FLAGS}" +#define KITSUNE_OPENMP_EXTRA_COMPILER_FLAGS "${KITSUNE_OPENMP_EXTRA_COMPILER_FLAGS}" +#define KITSUNE_OPENMP_EXTRA_LINKER_FLAGS "${KITSUNE_OPENMP_EXTRA_LINKER_FLAGS}" +// Qthreads configuration #cmakedefine01 KITSUNE_QTHREADS_ENABLE -#define KITSUNE_QTHREADS_PREPROCESSOR_FLAGS \ - "${KITSUNE_QTHREADS_DEFAULT_PREPROCESSOR_FLAGS} " \ - "${KITSUNE_QTHREADS_EXTRA_PREPROCESSOR_FLAGS}" - -#define KITSUNE_QTHREADS_COMPILER_FLAGS \ - "${KITSUNE_QTHREADS_DEFAULT_COMPILER_FLAGS} " \ - "${KITSUNE_QTHREADS_EXTRA_COMPILER_FLAGS}" - -#define KITSUNE_QTHREADS_LINKER_FLAGS \ - "${KITSUNE_QTHREADS_DEFAULT_LINKER_FLAGS} " \ - "${KITSUNE_QTHREADS_EXTRA_LINKER_FLAGS}" +#define KITSUNE_QTHREADS_EXTRA_PREPROCESSOR_FLAGS "${KITSUNE_QTHREADS_EXTRA_PREPROCESSOR_FLAGS}" +#define KITSUNE_QTHREADS_EXTRA_COMPILER_FLAGS "${KITSUNE_QTHREADS_EXTRA_COMPILER_FLAGS}" +#define KITSUNE_QTHREADS_EXTRA_LINKER_FLAGS "${KITSUNE_QTHREADS_EXTRA_LINKER_FLAGS}" +// Realm configuration #cmakedefine01 KITSUNE_REALM_ENABLE -#define KITSUNE_REALM_PREPROCESSOR_FLAGS \ - "${KITSUNE_REALM_DEFAULT_PREPROCESSOR_FLAGS} " \ - "${KITSUNE_REALM_EXTRA_PREPROCESSOR_FLAGS}" - -#define KITSUNE_REALM_COMPILER_FLAGS \ - "${KITSUNE_REALM_DEFAULT_COMPILER_FLAGS}" \ - "${KITSUNE_REALM_EXTRA_COMPILER_FLAGS}" - -#define KITSUNE_REALM_LINKER_FLAGS \ - "${KITSUNE_REALM_DEFAULT_LINKER_FLAGS}" \ - "${KITSUNE_REALM_EXTRA_LINKER_FLAGS}" +#define KITSUNE_REALM_EXTRA_PREPROCESSOR_FLAGS "${KITSUNE_REALM_EXTRA_PREPROCESSOR_FLAGS}" +#define KITSUNE_REALM_EXTRA_COMPILER_FLAGS "${KITSUNE_REALM_EXTRA_COMPILER_FLAGS}" +#define KITSUNE_REALM_EXTRA_LINKER_FLAGS "${KITSUNE_REALM_EXTRA_LINKER_FLAGS}" #endif diff --git a/kitsune/runtime/CMakeLists.txt b/kitsune/runtime/CMakeLists.txt index dbf6335fef08665..f16a015f1069f25 100644 --- a/kitsune/runtime/CMakeLists.txt +++ b/kitsune/runtime/CMakeLists.txt @@ -53,6 +53,8 @@ if (KITRT_ENABLE_VERBOSE) endif() target_include_directories(${KITRT} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) +set_target_properties(${KITRT} PROPERTIES + LIBRARY_OUTPUT_DIRECTORY ${CLANG_RESOURCE_INTDIR}/lib) target_include_directories(${KITRT} BEFORE PRIVATE @@ -61,19 +63,26 @@ target_include_directories(${KITRT} ${KITSUNE_SOURCE_DIR}/include ${KITSUNE_RUNTIME_INCLUDE_DIR}) -set_property(TARGET ${KITRT} APPEND PROPERTY - INSTALL_RPATH "$ORIGIN/../lib${LLVM_LIBDIR_SUFFIX}") - -set_property(TARGET ${KITRT} APPEND PROPERTY - BUILD_RPATH "$ORIGIN/../lib${LLVM_LIBDIR_SUFFIX}:${PROJECT_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}") - -find_library(LIB_LLVM LLVM-${LLVM_VERSION_MAJOR} REQUIRED - PATHS ${LLVM_LIBRARY_DIR} +# +# KITSUNE FIXME: Do we need the RPATH's that are set below? +# +# set_property(TARGET ${KITRT} APPEND PROPERTY +# INSTALL_RPATH "$ORIGIN/../lib${LLVM_LIBDIR_SUFFIX}") + +# set_property(TARGET ${KITRT} APPEND PROPERTY +# BUILD_RPATH "$ORIGIN/../lib${LLVM_LIBDIR_SUFFIX}:${PROJECT_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}") + +find_library(LIB_LLVM + NAMES LLVM + LLVM-${LLVM_VERSION_MAJOR} + LLVM-${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR} + REQUIRED + PATHS ${LLVM_LIBRARY_OUTPUT_INTDIR} NO_DEFAULT_PATH) -# FIXME: We probably don't need to explicitly link pthread, rt, and dl. -# libLLVM is guaranteed to be built because we have explicitly set -# LLVM_BUILD_LLVM_DYLIB to ON. +# FIXME: We probably don't need to explicitly link pthread and rt. These are +# only necessary for older versions of glibc, but we might get some antique +# software stacks on HPC machines. target_link_libraries(${KITRT} PUBLIC ${LIB_LLVM} @@ -81,25 +90,10 @@ target_link_libraries(${KITRT} rt dl) +# The KITSUNE_CUDA_* variables are defined in ../CMakeLists.txt (i.e. in +# kitsune/CMakeLists.txt). They are passed in via CMake because the Kitsune +# runtime is treated as an "ExternalProject". if (KITSUNE_CUDA_ENABLE) - # FIXME: Instead of looking for CudaToolkit again here, the necessary - # variables should be passed when this is configured. This ensures that there - # is only a single call to find_package(CudaToolkit) - # message(STATUS "Kitsune runtime adding CUDA components to build...") - # find_package(CUDAToolkit ${KITSUNE_CUDA_VERSION_MIN} REQUIRED) - - find_library(LIB_NVPTX_STATIC nvptxcompiler_static REQUIRED - PATHS ${CUDAToolkit_LIBRARY_DIR} - NO_DEFAULT_PATH) - - find_library(LIB_CUDART cudart REQUIRED - PATHS ${CUDAToolkit_LIBRARY_DIR} - NO_DEFAULT_PATH) - - find_library(LIB_CUDA cuda REQUIRED - PATHS ${CUDAToolkit_LIBRARY_DIR}/stubs - NO_DEFAULT_PATH) - list(APPEND KITRT_HDRS cuda/kitcuda.h cuda/kitcuda_dylib.h) @@ -112,41 +106,44 @@ if (KITSUNE_CUDA_ENABLE) cuda/streams.cpp) target_compile_definitions(${KITRT} PUBLIC KITRT_CUDA_ENABLED) - target_include_directories(${KITRT} SYSTEM PUBLIC ${CUDAToolkit_INCLUDE_DIRS}) + target_include_directories(${KITRT} SYSTEM PUBLIC ${KITSUNE_CUDA_INCLUDE_DIR}) target_include_directories(${KITRT} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/cuda) target_link_directories(${KITRT} PUBLIC - ${CUDAToolkit_LIBRARY_DIR} - ${CUDAToolkit_LIBRARY_DIR}/stubs) + ${KITSUNE_CUDA_LIBRARY_DIR} + ${KITSUNE_CUDA_STUBS_DIR}) target_link_libraries(${KITRT} PUBLIC - ${LIB_NVPTX_STATIC} - ${LIB_CUDART} - ${LIB_CUDA}) + ${KITSUNE_CUDA_LIB_NVPTX_STATIC} + ${KITSUNE_CUDA_LIB_CUDART} + ${KITSUNE_CUDA_LIB_CUDA}) set_property(TARGET ${KITRT} APPEND PROPERTY - INSTALL_RPATH ${CUDAToolkit_LIBRARY_DIR}) + INSTALL_RPATH ${KITSUNE_CUDA_LIBRARY_DIR}) set_property(TARGET ${KITRT} APPEND PROPERTY - BUILD_RPATH ${CUDAToolkit_LIBRARY_DIR}) + BUILD_RPATH ${KITSUNE_CUDA_LIBRARY_DIR}) if (KITCUDA_ENABLE_NVTX) target_compile_definitions(${KITRT} PUBLIC KITCUDA_ENABLE_NVTX) find_library(LIB_NVTOOLS_EXT nvToolsExt REQUIRED - PATHS ${CUDAToolkit_LIBRARY_DIR} + PATHS ${KITSUNE_CUDA_LIBRARY_DIR} NO_DEFAULT_PATH) target_link_libraries(${KITRT} PUBLIC ${LIB_NVTOOLS_EXT}) endif() endif() +# The KITSUNE_HIP_* variables are defined in ../CMakeLists.txt (i.e. in +# kitsune/CMakeLists.txt). They are passed in via CMake because the Kitsune +# runtime is treated as an "ExternalProject". +# # FIXME: hip::host depends on clangrt_builtin, but that is not caught by the # build system currently and the library which causes a build failure if the # library is not built. Not sure how this is to be fixed, but adding the note # here as a reminder that it needs to be done. if (KITSUNE_HIP_ENABLE) - message(STATUS "Kitsune runtime adding HIP components to build...") # FIXME: It may be necessary to hardcode these paths to rocm, but there ought # to be better ways of doing this. find_package(hip REQUIRED @@ -179,19 +176,18 @@ if (KITSUNE_HIP_ENABLE) add_subdirectory(hip) endif() -# FIXME: Do we need to add the Cuda and Hip directories to the rpath as well? -list(APPEND CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_PREFIX}/lib) +set_target_properties(${KITRT} PROPERTIES + INSTALL_RPATH ${CMAKE_INSTALL_PREFIX}/lib) install(TARGETS kitrt - LIBRARY DESTINATION lib COMPONENT kitrt - ARCHIVE DESTINATION lib COMPONENT kitrt) + DESTINATION ${CLANG_RESOURCE_DIR}/lib) # KITSUNE FIXME: Do we really need to install the runtime headers? Do we intend # this to be used by third-parties? install(FILES ${KITRT_HDRS} - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/kitrt) + DESTINATION ${CLANG_RESOURCE_DIR}/include/kitrt) # KITSUNE FIXME: Do we really need to install the headers for the targets? install(DIRECTORY cuda hip realm - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/kitrt/ + DESTINATION ${CLANG_RESOURCE_DIR}/include/kitrt FILES_MATCHING PATTERN "*.h") diff --git a/kitsune/runtime/cuda/kitcuda.cpp b/kitsune/runtime/cuda/kitcuda.cpp index 77aa5cdab427061..c43f9fa253f581a 100644 --- a/kitsune/runtime/cuda/kitcuda.cpp +++ b/kitsune/runtime/cuda/kitcuda.cpp @@ -97,7 +97,7 @@ extern "C" { bool __kitcuda_initialize() { KIT_NVTX_PUSH("kitcuda: initialize", KIT_NVTX_INIT); if (_kitcuda_initialized) { - if (__kitrt_verbose_mode()) + if (__kitrt_verbose_mode()) fprintf(stderr, "kitcuda: warning, multiple initialization calls!\n"); return true; } diff --git a/kitsune/test/cuda/config.cpp b/kitsune/test/cuda/config.cpp index 6001fdc42ead385..8a9b239895bc973 100644 --- a/kitsune/test/cuda/config.cpp +++ b/kitsune/test/cuda/config.cpp @@ -17,7 +17,7 @@ // CHECK-CUSTOM-SAME: "-D" "some_preprocessor_flag" // CHECK-CUSTOM-SAME: "-D_tapir_cuda_target" // CHECK-CUSTOM-SAME: "-Wsome_compiler_flag" -// CHECK-CUSTOM: /{{[^ ]*}}ld +// CHECK-CUSTOM: {{^[ ]*"[^"]+/[l]?}}ld{{[.]?[^ ]*}}" // CHECK-CUSTOM-SAME: "-some_linker_flag" // CHECK-CUSTOM-SAME: -lcudart // CHECK-CUSTOM-SAME: -lcuda diff --git a/kitsune/test/cuda/default-args.cpp b/kitsune/test/cuda/default-args.cpp index 9f24559d7e22817..4c7074de06c61e9 100644 --- a/kitsune/test/cuda/default-args.cpp +++ b/kitsune/test/cuda/default-args.cpp @@ -3,7 +3,10 @@ // CHECK: "-cc1" // CHECK-SAME: "-D_tapir_cuda_target" -// CHECK: /{{[^ ]*}}ld +// The link line may have some optional space at the start of the line followed +// by the absolute path to the linker in quotes. The linker name itself could +// be lld, but we also allow matches to ld.gold, ld.bfd etc. +// CHECK: {{^[ ]*"[^"]+/[l]?}}ld{{[.]?[^ ]*}}" // CHECK-SAME: -lcudart // CHECK-SAME: -lcuda // CHECK-SAME: -lkitrt diff --git a/kitsune/test/cuda/ftapir.cpp b/kitsune/test/cuda/ftapir.cpp index beb3f362eea1472..e53063a1a0e593f 100644 --- a/kitsune/test/cuda/ftapir.cpp +++ b/kitsune/test/cuda/ftapir.cpp @@ -9,6 +9,6 @@ // The option value is case sensitive. // RUN: not %kitxx -fsyntax-only -ftapir=Cuda %s 2>&1 | FileCheck %s -check-prefix=CHECK-BAD-TARGET -// RUN: %kitxx -### -ftapir=Cuda %s 2>&1 | FileCheck %s -check-prefix=CHECK-BAD-TARGET +// RUN: not %kitxx -### -ftapir=Cuda %s 2>&1 | FileCheck %s -check-prefix=CHECK-BAD-TARGET // CHECK-BAD-TARGET: invalid value '{{.+}}' in '-ftapir={{.+}}' diff --git a/kitsune/test/hip/config.cpp b/kitsune/test/hip/config.cpp index 12cccd1c2902749..a5dee3d9d3542c7 100644 --- a/kitsune/test/hip/config.cpp +++ b/kitsune/test/hip/config.cpp @@ -16,5 +16,5 @@ // CHECK-CUSTOM-SAME: "-D" "_tapir_hip_target" // CHECK-CUSTOM-SAME: "-D" "some_preprocessor_flag" // CHECK-CUSTOM-SAME: "-Wsome_compiler_flag" -// CHECK-CUSTOM: /{{[^ ]*}}ld +// CHECK-CUSTOM: {{^[ ]*"[^"]+/[l]?}}ld{{[.]?[^ ]*}}" // CHECK-CUSTOM-SAME: "-some_linker_flag" diff --git a/kitsune/test/hip/default-args.cpp b/kitsune/test/hip/default-args.cpp index 43a31a95fdd17e8..2eebd2452771280 100644 --- a/kitsune/test/hip/default-args.cpp +++ b/kitsune/test/hip/default-args.cpp @@ -3,7 +3,10 @@ // CHECK: "-cc1" // CHECK-SAME: "-D_tapir_hip_target" -// CHECK: /{{[^ ]*}}ld" +// The link line may have some optional space at the start of the line followed +// by the absolute path to the linker in quotes. The linker name itself could +// be lld, but we also allow matches to ld.gold, ld.bfd etc. +// CHECK: {{^[ ]*"[^"]+/[l]?}}ld{{[.]?[^ ]*}}" // CHECK-SAME: -lamdhip64 // CHECK-SAME: -lhip::host // CHECK-SAME: -lkitrt diff --git a/kitsune/test/kokkos/config.cpp b/kitsune/test/kokkos/config.cpp new file mode 100644 index 000000000000000..89d32b5aea9dd98 --- /dev/null +++ b/kitsune/test/kokkos/config.cpp @@ -0,0 +1,19 @@ +// Check that the default target-specific configuration file is always found. +// RUN: %kitxx -### -fkokkos %s 2>&1 | FileCheck %s -check-prefix=CHECK-DEFAULT-CONFIG + +// Check that providing a custom config directory without a target-specific +// configuration file is ok. +// RUN: %kitxx -### -fkokkos --config-kitsune-dir=%S/../ %s 2>&1 | FileCheck %s -check-prefix=CHECK-CUSTOM-NOEXIST + +// Check that providing a custom config directory with a target-specific +// configuration file leads to the file being found and the contents used. +// RUN: %kitxx -### -fkokkos --config-kitsune-dir=%S %s 2>&1 | FileCheck %s -check-prefix=CHECK-CUSTOM + +// CHECK-DEFAULT-CONFIG: Configuration file: {{.*}}/kokkos.cfg +// CHECK-CUSTOM-NOEXIST-NOT: Configuration file: {{.*}}/kokkos.cfg +// CHECK-CUSTOM: Configuration file: {{.*}}/kokkos/kokkos.cfg +// CHECK-CUSTOM: "-cc1" +// CHECK-CUSTOM-SAME: "-D" "some_preprocessor_flag" +// CHECK-CUSTOM-SAME: "-Wsome_compiler_flag" +// CHECK-CUSTOM: {{^[ ]*"[^"]+/[l]?}}ld{{[.]?[^ ]*}}" +// CHECK-CUSTOM-SAME: "-some_linker_flag" diff --git a/kitsune/test/kokkos/default-args.cpp b/kitsune/test/kokkos/default-args.cpp index 4923ff06632ae4d..c15196675aa4cfa 100644 --- a/kitsune/test/kokkos/default-args.cpp +++ b/kitsune/test/kokkos/default-args.cpp @@ -1,9 +1,11 @@ -// RUN: %kitxx -### -ftapir=kokkos %s 2>&1 | FileCheck %s +// RUN: %kitxx -### -fkokkos %s 2>&1 | FileCheck %s // CHECK: "-cc1" // CHECK-SAME: -I{{[^ ]*}}/include/kokkos -// CHECK: /{{[^ ]*}}ld -// CHECK-SAME: -lkokkos +// The link line may have some optional space at the start of the line followed +// by the absolute path to the linker in quotes. The linker name itself could +// be lld, but we also allow matches to ld.gold, ld.bfd etc. +// CHECK-NEXT: {{^[ ]*"[^"]+/[l]?}}ld{{[.]?[^ ]*}}" // CHECK-SAME: -lkokkoscore // CHECK-SAME: -lkitrt diff --git a/kitsune/test/kokkos/functor.cpp b/kitsune/test/kokkos/functor.cpp index 3c7107218992718..16e924586b245ab 100644 --- a/kitsune/test/kokkos/functor.cpp +++ b/kitsune/test/kokkos/functor.cpp @@ -1,6 +1,8 @@ -// TODO: Include an actual run line and add proper checks. // REQUIRES: kitsune-kokkos -// RUN: false +// RUN: %kitxx -fkokkos -fkokkos-no-init -ftapir=opencilk -S -emit-llvm -o - | FileCheck %s +// XFAIL: * +// There is currently a bug in Kitsune which causes it to crash. +// KITSUNE FIXME: Fix this bug so the compiler does not crash. // Very simple test of kokkos that uses a functor. In a nutshell, // given the potential for different compilation units, kitsune does diff --git a/kitsune/test/kokkos/kokkos.cfg b/kitsune/test/kokkos/kokkos.cfg new file mode 100644 index 000000000000000..f0b3f6ad374cc0b --- /dev/null +++ b/kitsune/test/kokkos/kokkos.cfg @@ -0,0 +1,3 @@ +-Dsome_preprocessor_flag +-Wsome_compiler_flag +-Wl,-some_linker_flag \ No newline at end of file diff --git a/kitsune/test/kokkos/lambda.cpp b/kitsune/test/kokkos/lambda.cpp index 4e08dbfe6cbdb5a..f2bcbfe659197b3 100644 --- a/kitsune/test/kokkos/lambda.cpp +++ b/kitsune/test/kokkos/lambda.cpp @@ -1,6 +1,7 @@ // TODO: Include an actual run line and add proper checks. // REQUIRES: kitsune-kokkos // RUN: false +// XFAIL: * // Very simple test of kokkos with two common forms of the // parallel_for construct. We should be able to transform diff --git a/kitsune/test/kokkos/target-attr.cpp b/kitsune/test/kokkos/target-attr.cpp index fce7c7ecd980388..fe5ed6820bf7ce6 100644 --- a/kitsune/test/kokkos/target-attr.cpp +++ b/kitsune/test/kokkos/target-attr.cpp @@ -28,7 +28,7 @@ int main(int argc, char *argv[]) { A[i] = i; }); - [[tapir::target("cuda")]] // expected-warning {{tapir target attribute on unsupported statement}} + [[tapir::target("cuda")]] // expected-error {{tapir target attribute on unsupported statement}} if (argc == 1) { forall(int i = 0; i < 1024; ++i) Kokkos::parallel_for(1024, KOKKOS_LAMBDA(const int i) { diff --git a/kitsune/test/opencilk/config.cpp b/kitsune/test/opencilk/config.cpp index bace9307abc21cb..30e515c0f854ae8 100644 --- a/kitsune/test/opencilk/config.cpp +++ b/kitsune/test/opencilk/config.cpp @@ -15,5 +15,5 @@ // CHECK-CUSTOM: "-cc1" // CHECK-CUSTOM-SAME: "-D" "some_preprocessor_flag" // CHECK-CUSTOM-SAME: "-Wsome_compiler_flag" -// CHECK-CUSTOM: /{{[^ ]*}}ld +// CHECK-CUSTOM: {{^[ ]*"[^"]+/[l]?}}ld{{[.]?[^ ]*}}" // CHECK-CUSTOM-SAME: "-some_linker_flag" diff --git a/kitsune/test/opencilk/default-args.cpp b/kitsune/test/opencilk/default-args.cpp index f9cc421147b805a..aff3b667e12b9b0 100644 --- a/kitsune/test/opencilk/default-args.cpp +++ b/kitsune/test/opencilk/default-args.cpp @@ -1,5 +1,8 @@ // RUN: %kitxx -### -ftapir=opencilk %s 2>&1 | FileCheck %s -// CHECK: /{{[^ ]*}}ld +// The link line may have some optional space at the start of the line followed +// by the absolute path to the linker in quotes. The linker name itself could +// be lld, but we also allow matches to ld.gold, ld.bfd etc. +// CHECK: {{^[ ]*"[^"]+/[l]?}}ld{{[.]?[^ ]*}}" // CHECK-SAME: -lopencilk // CHECK-SAME: -lkitrt diff --git a/kitsune/test/opencilk/ftapir.cpp b/kitsune/test/opencilk/ftapir.cpp index 2064d42ede5bfd1..3f9c9df4ee7b406 100644 --- a/kitsune/test/opencilk/ftapir.cpp +++ b/kitsune/test/opencilk/ftapir.cpp @@ -9,6 +9,6 @@ // The option value is case sensitive. // RUN: not %kitxx -fsyntax-only -ftapir=OpenCilk %s 2>&1 | FileCheck %s -check-prefix=CHECK-BAD-TARGET -// RUN: %kitxx -### -ftapir=OpenCilk %s 2>&1 | FileCheck %s -check-prefix=CHECK-BAD-TARGET +// RUN: not %kitxx -### -ftapir=OpenCilk %s 2>&1 | FileCheck %s -check-prefix=CHECK-BAD-TARGET // CHECK-BAD-TARGET: invalid value '{{.+}}' in '-ftapir={{.+}}' diff --git a/kitsune/test/openmp/config.cpp b/kitsune/test/openmp/config.cpp index adafea2889811f1..0231b58063961ab 100644 --- a/kitsune/test/openmp/config.cpp +++ b/kitsune/test/openmp/config.cpp @@ -15,5 +15,5 @@ // CHECK-CUSTOM: "-cc1" // CHECK-CUSTOM-SAME: "-D" "some_preprocessor_flag" // CHECK-CUSTOM-SAME: "-Wsome_compiler_flag" -// CHECK-CUSTOM: /{{[^ ]*}}ld +// CHECK-CUSTOM: {{^[ ]*"[^"]+/[l]?}}ld{{[.]?[^ ]*}}" // CHECK-CUSTOM-SAME: "-some_linker_flag" diff --git a/kitsune/test/openmp/default-args.cpp b/kitsune/test/openmp/default-args.cpp index 6c29dcedc1445a6..3c2b03a10881452 100644 --- a/kitsune/test/openmp/default-args.cpp +++ b/kitsune/test/openmp/default-args.cpp @@ -1,5 +1,8 @@ // RUN: %kitxx -### -ftapir=openmp %s 2>&1 | FileCheck %s -// CHECK: /{{[^ ]*}}ld +// The link line may have some optional space at the start of the line followed +// by the absolute path to the linker in quotes. The linker name itself could +// be lld, but we also allow matches to ld.gold, ld.bfd etc. +// CHECK: {{^[ ]*"[^"]+/[l]?}}ld{{[.]?[^ ]*}}" // CHECK-SAME: -lomp // CHECK-SAME: -lkitrt diff --git a/kitsune/test/qthreads/config.cpp b/kitsune/test/qthreads/config.cpp index 72207dd939d8377..583522e1dafc2f8 100644 --- a/kitsune/test/qthreads/config.cpp +++ b/kitsune/test/qthreads/config.cpp @@ -15,5 +15,5 @@ // CHECK-CUSTOM: "-cc1" // CHECK-CUSTOM-SAME: "-D" "some_preprocessor_flag" // CHECK-CUSTOM-SAME: "-Wsome_compiler_flag" -// CHECK-CUSTOM: /{{[^ ]*}}ld +// CHECK-CUSTOM: {{^[ ]*"[^"]+/[l]?}}ld{{[.]?[^ ]*}}" // CHECK-CUSTOM-SAME: "-some_linker_flag" diff --git a/kitsune/test/qthreads/default-args.cpp b/kitsune/test/qthreads/default-args.cpp index 0422a197dec7aa8..06c7f7bec789ef4 100644 --- a/kitsune/test/qthreads/default-args.cpp +++ b/kitsune/test/qthreads/default-args.cpp @@ -1,5 +1,8 @@ // RUN: %kitxx -### -ftapir=qthreads %s 2>&1 | FileCheck %s -// CHECK: /{{[^ ]*}}ld +// The link line may have some optional space at the start of the line followed +// by the absolute path to the linker in quotes. The linker name itself could +// be lld, but we also allow matches to ld.gold, ld.bfd etc. +// CHECK: {{^[ ]*"[^"]+/[l]?}}ld{{[.]?[^ ]*}}" // CHECK-SAME: -lqthreads // CHECK-SAME: -lkitrt diff --git a/kitsune/test/realm/config.cpp b/kitsune/test/realm/config.cpp index 2964b9f92965361..8864e0566ac903b 100644 --- a/kitsune/test/realm/config.cpp +++ b/kitsune/test/realm/config.cpp @@ -15,5 +15,5 @@ // CHECK-CUSTOM: "-cc1" // CHECK-CUSTOM-SAME: "-D" "some_preprocessor_flag" // CHECK-CUSTOM-SAME: "-Wsome_compiler_flag" -// CHECK-CUSTOM: /{{[^ ]*}}ld +// CHECK-CUSTOM: {{^[ ]*"[^"]+/[l]?}}ld{{[.]?[^ ]*}}" // CHECK-CUSTOM-SAME: "-some_linker_flag" diff --git a/kitsune/test/realm/default-args.cpp b/kitsune/test/realm/default-args.cpp index 8820e25aabc9078..51a524523500f79 100644 --- a/kitsune/test/realm/default-args.cpp +++ b/kitsune/test/realm/default-args.cpp @@ -1,6 +1,9 @@ // RUN: %kitxx -### -ftapir=realm %s 2>&1 | FileCheck %s -// CHECK: /{{[^ ]*}}ld +// The link line may have some optional space at the start of the line followed +// by the absolute path to the linker in quotes. The linker name itself could +// be lld, but we also allow matches to ld.gold, ld.bfd etc. +// CHECK: {{^[ ]*"[^"]+/[l]?}}ld{{[.]?[^ ]*}}" // CHECK-SAME: -lrealm // CHECK-SAME: -lrealm-abi // CHECK-SAME: -lkitrt diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 7526a93c73b457e..547786c5f3d3367 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1757,11 +1757,11 @@ def int_tapir_loop_grainsize // lowering transforms this intrinsic into ordinary frameaddress // intrinsics. def int_task_frameaddress - : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrWillReturn]>; + : Intrinsic<[llvm_anyptr_ty], [llvm_i32_ty], [IntrWillReturn]>; ///===-------------------------- Other Intrinsics --------------------------===// // -// TODO: We should introduce a new memory kind fo traps (and other side effects +// TODO: We should introduce a new memory kind fo traps (and other side effects // we only model to keep things alive). def int_trap : Intrinsic<[], [], [IntrNoReturn, IntrCold, IntrInaccessibleMemOnly, IntrWriteMem]>, ClangBuiltin<"__builtin_trap">; diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h index def3871b8864e44..c1f97b6f421035e 100644 --- a/llvm/include/llvm/LinkAllPasses.h +++ b/llvm/include/llvm/LinkAllPasses.h @@ -91,7 +91,6 @@ namespace { (void) llvm::createLazyValueInfoPass(); (void) llvm::createLoopExtractorPass(); (void) llvm::createLoopSimplifyPass(); - (void) llvm::createLoopSimplifyCFGPass(); (void) llvm::createLoopSpawningTIPass(); (void) llvm::createLoopStrengthReducePass(); (void) llvm::createLoopStripMinePass(); diff --git a/llvm/include/llvm/Transforms/Scalar/SROA.h b/llvm/include/llvm/Transforms/Scalar/SROA.h index b0217b51e13132e..c03cdf48fb1c686 100644 --- a/llvm/include/llvm/Transforms/Scalar/SROA.h +++ b/llvm/include/llvm/Transforms/Scalar/SROA.h @@ -20,115 +20,12 @@ namespace llvm { class Function; -class LLVMContext; -class PHINode; -class SelectInst; -class TaskInfo; -class Use; - -/// A private "module" namespace for types and utilities used by SROA. These -/// are implementation details and should not be used by clients. -namespace LLVM_LIBRARY_VISIBILITY sroa { - -class AllocaSliceRewriter; -class AllocaSlices; -class Partition; -class SROALegacyPass; - -class SelectHandSpeculativity { - unsigned char Storage = 0; // None are speculatable by default. - using TrueVal = Bitfield::Element; // Low 0'th bit. - using FalseVal = Bitfield::Element; // Low 1'th bit. -public: - SelectHandSpeculativity() = default; - SelectHandSpeculativity &setAsSpeculatable(bool isTrueVal); - bool isSpeculatable(bool isTrueVal) const; - bool areAllSpeculatable() const; - bool areAnySpeculatable() const; - bool areNoneSpeculatable() const; - // For interop as int half of PointerIntPair. - explicit operator intptr_t() const { return static_cast(Storage); } - explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {} -}; -static_assert(sizeof(SelectHandSpeculativity) == sizeof(unsigned char)); - -using PossiblySpeculatableLoad = - PointerIntPair; -using UnspeculatableStore = StoreInst *; -using RewriteableMemOp = - std::variant; -using RewriteableMemOps = SmallVector; - -} // end namespace sroa enum class SROAOptions : bool { ModifyCFG, PreserveCFG }; class SROAPass : public PassInfoMixin { const SROAOptions PreserveCFG; - LLVMContext *C = nullptr; - DomTreeUpdater *DTU = nullptr; - AssumptionCache *AC = nullptr; - TaskInfo *TI = nullptr; - - /// Worklist of alloca instructions to simplify. - /// - /// Each alloca in the function is added to this. Each new alloca formed gets - /// added to it as well to recursively simplify unless that alloca can be - /// directly promoted. Finally, each time we rewrite a use of an alloca other - /// the one being actively rewritten, we add it back onto the list if not - /// already present to ensure it is re-visited. - SmallSetVector Worklist; - - /// A collection of instructions to delete. - /// We try to batch deletions to simplify code and make things a bit more - /// efficient. We also make sure there is no dangling pointers. - SmallVector DeadInsts; - - /// Post-promotion worklist. - /// - /// Sometimes we discover an alloca which has a high probability of becoming - /// viable for SROA after a round of promotion takes place. In those cases, - /// the alloca is enqueued here for re-processing. - /// - /// Note that we have to be very careful to clear allocas out of this list in - /// the event they are deleted. - SmallSetVector PostPromotionWorklist; - - /// A collection of alloca instructions we can directly promote. - std::vector PromotableAllocas; - - /// A worklist of PHIs to speculate prior to promoting allocas. - /// - /// All of these PHIs have been checked for the safety of speculation and by - /// being speculated will allow promoting allocas currently in the promotable - /// queue. - SmallSetVector SpeculatablePHIs; - - /// A worklist of select instructions to rewrite prior to promoting - /// allocas. - SmallMapVector SelectsToRewrite; - - /// Select instructions that use an alloca and are subsequently loaded can be - /// rewritten to load both input pointers and then select between the result, - /// allowing the load of the alloca to be promoted. - /// From this: - /// %P2 = select i1 %cond, ptr %Alloca, ptr %Other - /// %V = load , ptr %P2 - /// to: - /// %V1 = load , ptr %Alloca -> will be mem2reg'd - /// %V2 = load , ptr %Other - /// %V = select i1 %cond, %V1, %V2 - /// - /// We can do this to a select if its only uses are loads - /// and if either the operand to the select can be loaded unconditionally, - /// or if we are allowed to perform CFG modifications. - /// If found an intervening bitcast with a single use of the load, - /// allow the promotion. - static std::optional - isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG); ->>>>>>> 05e7e8a2c040 (This is a squash of the OpenCilk repo onto LLVM 17's release branch. All credit) - public: /// If \p PreserveCFG is set, then the pass is not allowed to modify CFG /// in any way, even if it would update CFG analyses. @@ -139,25 +36,6 @@ class SROAPass : public PassInfoMixin { void printPipeline(raw_ostream &OS, function_ref MapClassName2PassName); - -private: - friend class sroa::AllocaSliceRewriter; - friend class sroa::SROALegacyPass; - - /// Helper used by both the public run method and by the legacy pass. - PreservedAnalyses runImpl(Function &F, DomTreeUpdater &RunDTU, - AssumptionCache &RunAC, TaskInfo &RunTI); - PreservedAnalyses runImpl(Function &F, DominatorTree &RunDT, - AssumptionCache &RunAC, TaskInfo &RunTI); - - bool presplitLoadsAndStores(AllocaInst &AI, sroa::AllocaSlices &AS); - AllocaInst *rewritePartition(AllocaInst &AI, sroa::AllocaSlices &AS, - sroa::Partition &P); - bool splitAlloca(AllocaInst &AI, sroa::AllocaSlices &AS); - std::pair runOnAlloca(AllocaInst &AI); - void clobberUse(Use &U); - bool deleteDeadInstructions(SmallPtrSetImpl &DeletedAllocas); - bool promoteAllocas(Function &F); }; } // end namespace llvm diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index decc4f19483d7cf..e389fa772d077a6 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -1459,7 +1459,7 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize, // Given that O1 != O2, return NoAlias if they can not alias. static AliasResult UnderlyingNoAlias(const Value *O1, const Value *O2, - AAQueryInfo &AAQI) { + AAQueryInfo &AAQI, DominatorTree *DT) { assert(O1 != O2 && "identical arguments to UnderlyingNoAlias"); // If V1/V2 point to two different objects, we know that we have no alias. @@ -1493,10 +1493,10 @@ static AliasResult UnderlyingNoAlias(const Value *O1, const Value *O2, // location if that memory location doesn't escape. Or it may pass a // nocapture value to other functions as long as they don't capture it. if (isEscapeSource(O1) && - AAQI.CI->isNotCapturedBeforeOrAt(O2, cast(O1))) + AAQI.CI->isNotCapturedBefore(O2, cast(O1), /*OrAt*/false)) return AliasResult::NoAlias; if (isEscapeSource(O2) && - AAQI.CI->isNotCapturedBeforeOrAt(O1, cast(O2))) + AAQI.CI->isNotCapturedBefore(O1, cast(O2), /*OrAt*/false)) return AliasResult::NoAlias; return AliasResult::MayAlias; @@ -1557,11 +1557,6 @@ static inline TapirFnBehavior clearStrand(const TapirFnBehavior TFB) { return TapirFnBehavior(static_cast(TFB) & ~static_cast(TapirFnBehavior::Strand)); } -static inline TapirFnBehavior unionTapirFnBehavior(const TapirFnBehavior TFB1, - const TapirFnBehavior TFB2) { - return TapirFnBehavior(static_cast(TFB1) | - static_cast(TFB2)); -} static inline TapirFnBehavior intersectTapirFnBehavior(const TapirFnBehavior TFB1, const TapirFnBehavior TFB2) { @@ -1668,7 +1663,7 @@ BasicAAResult::checkInjectiveArguments(const Value *V1, const Value *O1, if (O1 == U2) // 1 return AliasResult::MayAlias; if (isViewSet(Behavior2)) // 2 - return UnderlyingNoAlias(O1, U2, AAQI); + return UnderlyingNoAlias(O1, U2, AAQI, getDT(AAQI)); return AliasResult::MayAlias; } if (!A2) { @@ -1677,7 +1672,7 @@ BasicAAResult::checkInjectiveArguments(const Value *V1, const Value *O1, if (U1 == O2) // 1 return AliasResult::MayAlias; if (isViewSet(Behavior1)) // 2 - return UnderlyingNoAlias(U1, O2, AAQI); + return UnderlyingNoAlias(U1, O2, AAQI, getDT(AAQI)); return AliasResult::MayAlias; } @@ -1695,9 +1690,9 @@ BasicAAResult::checkInjectiveArguments(const Value *V1, const Value *O1, // void *f(void *p) { return p; } // could not be declared injective. BasicAAResult::DecomposedGEP DecompGEP1 = - DecomposeGEPExpression(A1, DL, &AC, DT); + DecomposeGEPExpression(A1, DL, &AC, getDT(AAQI)); BasicAAResult::DecomposedGEP DecompGEP2 = - DecomposeGEPExpression(A2, DL, &AC, DT); + DecomposeGEPExpression(A2, DL, &AC, getDT(AAQI)); if (DecompGEP1.VarIndices.empty() && DecompGEP2.VarIndices.empty() && isValueEqualInPotentialCycles(DecompGEP1.Base, DecompGEP2.Base, AAQI)) return DecompGEP1.Offset == DecompGEP2.Offset @@ -1706,7 +1701,7 @@ BasicAAResult::checkInjectiveArguments(const Value *V1, const Value *O1, return AliasResult::MayAlias; } - return UnderlyingNoAlias(U1, U2, AAQI); + return UnderlyingNoAlias(U1, U2, AAQI, getDT(AAQI)); } /// Provides a bunch of ad-hoc rules to disambiguate in common cases, such as @@ -1763,6 +1758,11 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, else if (InjectiveResult == AliasResult::MustAlias) return AliasResult::MayAlias; + if (O1 != O2) { + // If V1/V2 point to two different objects, we know that we have no alias. + if (isIdentifiedObject(O1) && isIdentifiedObject(O2)) + return AliasResult::NoAlias; + // Function arguments can't alias with things that are known to be // unambigously identified at the function level. if ((isa(O1) && isIdentifiedFunctionLocal(O2)) || @@ -1786,9 +1786,6 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, return AliasResult::NoAlias; } - if (O1 != O2 && UnderlyingNoAlias(O1, O2, AAQI) == AliasResult::NoAlias) - return AliasResult::NoAlias; - // If the size of one access is larger than the entire object on the other // side, then we know such behavior is undefined and can assume no alias. bool NullIsValidLocation = NullPointerIsDefined(&F); diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt index f8eb49235010ed5..9d22bc05f79c021 100644 --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -144,7 +144,6 @@ add_llvm_component_library(LLVMAnalysis ValueLatticeUtils.cpp ValueTracking.cpp VectorUtils.cpp - VFABIDemangling.cpp WorkSpanAnalysis.cpp ${GeneratedMLSources} diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index e6353aa510ed216..cf857243b4d0708 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1944,7 +1944,7 @@ getDependenceDistanceStrideAndSize( const AccessAnalysis::MemAccessInfo &B, Instruction *BInst, const DenseMap &Strides, const DenseMap> &UnderlyingObjects, - PredicatedScalarEvolution &PSE, const Loop *InnermostLoop) { + PredicatedScalarEvolution &PSE, TaskInfo& TI, const Loop *InnermostLoop) { auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout(); auto &SE = *PSE.getSE(); auto [APtr, AIsWrite] = A; @@ -1959,8 +1959,8 @@ getDependenceDistanceStrideAndSize( // Under certain assumptions, Tapir can guarantee that there are no // loop-carried dependencies. - if (EnableDRFAA && isLogicallyParallelViaTapir(InnermostLoop, TI)) - return Dependence::NoDep; + if (EnableDRFAA && isLogicallyParallelViaTapir(InnermostLoop, &TI)) + return MemoryDepChecker::Dependence::NoDep; // We cannot check pointers in different address spaces. if (APtr->getType()->getPointerAddressSpace() != @@ -2025,7 +2025,7 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent( // Get the dependence distance, stride, type size and what access writes for // the dependence between A and B. auto Res = getDependenceDistanceStrideAndSize( - A, InstMap[AIdx], B, InstMap[BIdx], Strides, UnderlyingObjects, PSE, + A, InstMap[AIdx], B, InstMap[BIdx], Strides, UnderlyingObjects, PSE, *TI, InnermostLoop); if (std::holds_alternative(Res)) return std::get(Res); diff --git a/llvm/lib/Analysis/TapirRaceDetect.cpp b/llvm/lib/Analysis/TapirRaceDetect.cpp index aa8919a99f2aa54..5eef7720ed80907 100644 --- a/llvm/lib/Analysis/TapirRaceDetect.cpp +++ b/llvm/lib/Analysis/TapirRaceDetect.cpp @@ -531,9 +531,9 @@ static bool checkInstructionForRace(const Instruction *I, return false; // Ignore CSI and Cilksan functions - if (Called->hasName() && (Called->getName().startswith("__csi") || - Called->getName().startswith("__csan") || - Called->getName().startswith("__cilksan"))) + if (Called->hasName() && (Called->getName().starts_with("__csi") || + Called->getName().starts_with("__csan") || + Called->getName().starts_with("__cilksan"))) return false; } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 1aae947e722c9aa..e943fce978d1821 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3378,20 +3378,19 @@ void SelectionDAGBuilder::visitDetach(const DetachInst &I) { // Update machine-CFG edges. MachineBasicBlock *Detached = FuncInfo.MBBMap[I.getSuccessor(0)]; - //MachineBasicBlock *Continue = FuncInfo.MBBMap[I.getSuccessor(1)]; + // MachineBasicBlock *Continue = FuncInfo.MBBMap[I.getSuccessor(1)]; // Update machine-CFG edges. DetachMBB->addSuccessor(Detached); // If this is not a fall-through branch or optimizations are switched off, // emit the branch. - if (Detached != NextBlock(DetachMBB) || TM.getOptLevel() == CodeGenOpt::None) - DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), - MVT::Other, getControlRoot(), - DAG.getBasicBlock(Detached))); + if (Detached != NextBlock(DetachMBB) || + TM.getOptLevel() == CodeGenOptLevel::None) + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, + getControlRoot(), DAG.getBasicBlock(Detached))); return; - } void SelectionDAGBuilder::visitReattach(const ReattachInst &I) { @@ -3405,10 +3404,10 @@ void SelectionDAGBuilder::visitReattach(const ReattachInst &I) { // If this is not a fall-through branch or optimizations are switched off, // emit the branch. - if (Continue != NextBlock(ReattachMBB) || TM.getOptLevel() == CodeGenOpt::None) - DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), - MVT::Other, getControlRoot(), - DAG.getBasicBlock(Continue))); + if (Continue != NextBlock(ReattachMBB) || + TM.getOptLevel() == CodeGenOptLevel::None) + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, + getControlRoot(), DAG.getBasicBlock(Continue))); return; } @@ -3424,10 +3423,10 @@ void SelectionDAGBuilder::visitSync(const SyncInst &I) { // If this is not a fall-through branch or optimizations are switched off, // emit the branch. - if (Continue != NextBlock(SyncMBB) || TM.getOptLevel() == CodeGenOpt::None) - DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), - MVT::Other, getControlRoot(), - DAG.getBasicBlock(Continue))); + if (Continue != NextBlock(SyncMBB) || + TM.getOptLevel() == CodeGenOptLevel::None) + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, + getControlRoot(), DAG.getBasicBlock(Continue))); return; } diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index f920f873e90c59f..bd4094c59efc31a 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -6232,6 +6232,8 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { "llvm.ptrmask intrinsic second argument bitwidth must match " "pointer index type size of first argument", &Call); + break; + } case Intrinsic::syncregion_start: { SmallVector DetachUsers; for (const User *U : Call.users()) diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index c78e58fcb80c6b8..418fce704173e8b 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1153,8 +1153,8 @@ Expected parseGlobalMergeOptions(StringRef Params) { /// Tests whether a pass name starts with a valid prefix for a default pipeline /// alias. static bool startsWithDefaultPipelineAliasPrefix(StringRef Name) { - return Name.startswith("default") || Name.startswith("thinlto") || - Name.startswith("lto") || Name.startswith("tapir-lowering"); + return Name.starts_with("default") || Name.starts_with("thinlto") || + Name.starts_with("lto") || Name.starts_with("tapir-lowering"); } /// Tests whether registered callbacks will accept a given pass name. diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 0b6f936ad676eec..bea7ae894eeeb6e 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -84,9 +84,8 @@ MODULE_PASS("internalize", InternalizePass()) MODULE_PASS("invalidate", InvalidateAllAnalysesPass()) MODULE_PASS("iroutliner", IROutlinerPass()) MODULE_PASS("jmc-instrumenter", JMCInstrumenterPass()) -MODULE_PASS("lower-emutls", LowerEmuTLSPass()) -MODULE_PASS("print-ir-similarity", IRSimilarityAnalysisPrinterPass(dbgs())) MODULE_PASS("loop-spawning", LoopSpawningPass()) +MODULE_PASS("lower-emutls", LowerEmuTLSPass()) MODULE_PASS("lower-global-dtors", LowerGlobalDtorsPass()) MODULE_PASS("lower-ifunc", LowerIFuncPass()) MODULE_PASS("lowertypetests", LowerTypeTestsPass()) @@ -270,8 +269,8 @@ FUNCTION_ANALYSIS("opt-remark-emit", OptimizationRemarkEmitterAnalysis()) FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) FUNCTION_ANALYSIS("phi-values", PhiValuesAnalysis()) FUNCTION_ANALYSIS("postdomtree", PostDominatorTreeAnalysis()) -FUNCTION_ANALYSIS("regions", RegionInfoAnalysis()) FUNCTION_ANALYSIS("race-detect", TapirRaceDetect()) +FUNCTION_ANALYSIS("regions", RegionInfoAnalysis()) FUNCTION_ANALYSIS("scalar-evolution", ScalarEvolutionAnalysis()) FUNCTION_ANALYSIS("should-not-run-function-passes", ShouldNotRunFunctionPassesAnalysis()) @@ -283,8 +282,6 @@ FUNCTION_ANALYSIS("targetir", TM ? TM->getTargetIRAnalysis() : TargetIRAnalysis()) FUNCTION_ANALYSIS("targetlibinfo", TargetLibraryAnalysis()) FUNCTION_ANALYSIS("tasks", TaskAnalysis()) -FUNCTION_ANALYSIS("verify", VerifierAnalysis()) -FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) FUNCTION_ANALYSIS("uniformity", UniformityInfoAnalysis()) FUNCTION_ANALYSIS("verify", VerifierAnalysis()) @@ -293,11 +290,11 @@ FUNCTION_ANALYSIS("verify", VerifierAnalysis()) FUNCTION_ANALYSIS(NAME, CREATE_PASS) #endif FUNCTION_ALIAS_ANALYSIS("basic-aa", BasicAA()) +FUNCTION_ALIAS_ANALYSIS("drf-aa", DRFAA()) FUNCTION_ALIAS_ANALYSIS("objc-arc-aa", objcarc::ObjCARCAA()) FUNCTION_ALIAS_ANALYSIS("scev-aa", SCEVAA()) FUNCTION_ALIAS_ANALYSIS("scoped-noalias-aa", ScopedNoAliasAA()) FUNCTION_ALIAS_ANALYSIS("tbaa", TypeBasedAA()) -FUNCTION_ALIAS_ANALYSIS("drf-aa", DRFAA()) #undef FUNCTION_ALIAS_ANALYSIS #undef FUNCTION_ANALYSIS @@ -334,16 +331,12 @@ FUNCTION_PASS("dot-dom", DomPrinter()) FUNCTION_PASS("dot-dom-only", DomOnlyPrinter()) FUNCTION_PASS("dot-post-dom", PostDomPrinter()) FUNCTION_PASS("dot-post-dom-only", PostDomOnlyPrinter()) +FUNCTION_PASS("drf-scoped-noalias", DRFScopedNoAliasPass()) FUNCTION_PASS("dse", DSEPass()) FUNCTION_PASS("dwarf-eh-prepare", DwarfEHPreparePass(TM)) FUNCTION_PASS("expand-large-div-rem", ExpandLargeDivRemPass(TM)) FUNCTION_PASS("expand-large-fp-convert", ExpandLargeFpConvertPass(TM)) FUNCTION_PASS("expand-memcmp", ExpandMemCmpPass(TM)) -FUNCTION_PASS("drf-scoped-noalias", DRFScopedNoAliasPass()) -FUNCTION_PASS("view-dom", DomViewer()) -FUNCTION_PASS("view-dom-only", DomOnlyViewer()) -FUNCTION_PASS("view-post-dom", PostDomViewer()) -FUNCTION_PASS("view-post-dom-only", PostDomOnlyViewer()) FUNCTION_PASS("fix-irreducible", FixIrreduciblePass()) FUNCTION_PASS("flattencfg", FlattenCFGPass()) FUNCTION_PASS("float2int", Float2IntPass()) @@ -375,6 +368,7 @@ FUNCTION_PASS("loop-fusion", LoopFusePass()) FUNCTION_PASS("loop-load-elim", LoopLoadEliminationPass()) FUNCTION_PASS("loop-simplify", LoopSimplifyPass()) FUNCTION_PASS("loop-sink", LoopSinkPass()) +FUNCTION_PASS("loop-stripmine", LoopStripMinePass()) FUNCTION_PASS("loop-versioning", LoopVersioningPass()) FUNCTION_PASS("lower-constant-intrinsics", LowerConstantIntrinsicsPass()) FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass()) @@ -393,16 +387,6 @@ FUNCTION_PASS("move-auto-init", MoveAutoInitPass()) FUNCTION_PASS("nary-reassociate", NaryReassociatePass()) FUNCTION_PASS("newgvn", NewGVNPass()) FUNCTION_PASS("no-op-function", NoOpFunctionPass()) -FUNCTION_PASS("jump-threading", JumpThreadingPass()) -FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass()) -FUNCTION_PASS("kcfi", KCFIPass()) -FUNCTION_PASS("lcssa", LCSSAPass()) -FUNCTION_PASS("loop-data-prefetch", LoopDataPrefetchPass()) -FUNCTION_PASS("loop-load-elim", LoopLoadEliminationPass()) -FUNCTION_PASS("loop-fusion", LoopFusePass()) -FUNCTION_PASS("loop-distribute", LoopDistributePass()) -FUNCTION_PASS("loop-versioning", LoopVersioningPass()) -FUNCTION_PASS("loop-stripmine", LoopStripMinePass()) FUNCTION_PASS("objc-arc", ObjCARCOptPass()) FUNCTION_PASS("objc-arc-contract", ObjCARCContractPass()) FUNCTION_PASS("objc-arc-expand", ObjCARCExpandPass()) @@ -438,21 +422,11 @@ FUNCTION_PASS("print", LoopPrinterPass(dbgs())) FUNCTION_PASS("print", MemorySSAWalkerPrinterPass(dbgs())) FUNCTION_PASS("print", PhiValuesPrinterPass(dbgs())) FUNCTION_PASS("print", PostDominatorTreePrinterPass(dbgs())) -FUNCTION_PASS("print", RegionInfoPrinterPass(dbgs())) -FUNCTION_PASS("print", ScalarEvolutionPrinterPass(dbgs())) -FUNCTION_PASS("print", StackSafetyPrinterPass(dbgs())) FUNCTION_PASS("print", TapirRaceDetectPrinterPass(dbgs())) FUNCTION_PASS("print", RegionInfoPrinterPass(dbgs())) FUNCTION_PASS("print", ScalarEvolutionPrinterPass(dbgs())) FUNCTION_PASS("print", StackSafetyPrinterPass(dbgs())) -FUNCTION_PASS("print", LoopAccessInfoPrinterPass(dbgs())) FUNCTION_PASS("print", TaskPrinterPass(dbgs())) -// TODO: rename to print after NPM switch -FUNCTION_PASS("print-alias-sets", AliasSetsPrinterPass(dbgs())) -FUNCTION_PASS("print-cfg-sccs", CFGSCCPrinterPass(dbgs())) -FUNCTION_PASS("print-predicateinfo", PredicateInfoPrinterPass(dbgs())) -FUNCTION_PASS("print-mustexecute", MustExecutePrinterPass(dbgs())) -FUNCTION_PASS("print-memderefs", MemDerefPrinterPass(dbgs())) FUNCTION_PASS("print", UniformityInfoPrinterPass(dbgs())) FUNCTION_PASS("reassociate", ReassociatePass()) FUNCTION_PASS("redundant-dbg-inst-elim", RedundantDbgInstEliminationPass()) @@ -460,12 +434,11 @@ FUNCTION_PASS("reg2mem", RegToMemPass()) FUNCTION_PASS("safe-stack", SafeStackPass(TM)) FUNCTION_PASS("scalarize-masked-mem-intrin", ScalarizeMaskedMemIntrinPass()) FUNCTION_PASS("scalarizer", ScalarizerPass()) -FUNCTION_PASS("separate-const-offset-from-gep", SeparateConstOffsetFromGEPPass()) -FUNCTION_PASS("serialize-small-tasks", SerializeSmallTasksPass()) FUNCTION_PASS("sccp", SCCPPass()) FUNCTION_PASS("select-optimize", SelectOptimizePass(TM)) FUNCTION_PASS("separate-const-offset-from-gep", SeparateConstOffsetFromGEPPass()) +FUNCTION_PASS("serialize-small-tasks", SerializeSmallTasksPass()) FUNCTION_PASS("sink", SinkingPass()) FUNCTION_PASS("sjlj-eh-prepare", SjLjEHPreparePass(TM)) FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass()) @@ -474,12 +447,12 @@ FUNCTION_PASS("stack-protector", StackProtectorPass(TM)) FUNCTION_PASS("strip-gc-relocates", StripGCRelocates()) FUNCTION_PASS("structurizecfg", StructurizeCFGPass()) FUNCTION_PASS("tailcallelim", TailCallElimPass()) +FUNCTION_PASS("task-canonicalize", TaskCanonicalizePass()) +FUNCTION_PASS("task-simplify", TaskSimplifyPass()) FUNCTION_PASS("tlshoist", TLSVariableHoistPass()) FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass()) FUNCTION_PASS("trigger-verifier-error", TriggerVerifierErrorPass()) FUNCTION_PASS("tsan", ThreadSanitizerPass()) -FUNCTION_PASS("task-canonicalize", TaskCanonicalizePass()) -FUNCTION_PASS("task-simplify", TaskSimplifyPass()) FUNCTION_PASS("typepromotion", TypePromotionPass(TM)) FUNCTION_PASS("unify-loop-exits", UnifyLoopExitsPass()) FUNCTION_PASS("vector-combine", VectorCombinePass()) @@ -651,6 +624,7 @@ LOOP_PASS("print", DDGAnalysisPrinterPass(dbgs())) LOOP_PASS("print", IVUsersPrinterPass(dbgs())) LOOP_PASS("print", LoopCachePrinterPass(dbgs())) LOOP_PASS("print", LoopNestPrinterPass(dbgs())) +LOOP_PASS("tapir-indvars", TapirIndVarSimplifyPass()) #undef LOOP_PASS #ifndef LOOP_PASS_WITH_PARAMS diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index b456a2af0d9142a..26fa6eafb9d6914 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -658,7 +658,7 @@ struct AddressSanitizer { UseAfterScope(UseAfterScope || ClUseAfterScope), UseAfterReturn(ClUseAfterReturn.getNumOccurrences() ? ClUseAfterReturn : UseAfterReturn), - SSGI(SSGI), TI(TI), + TI(TI), SSGI(SSGI), InstrumentationWithCallsThreshold( ClInstrumentationWithCallsThreshold.getNumOccurrences() > 0 ? ClInstrumentationWithCallsThreshold diff --git a/llvm/lib/Transforms/Instrumentation/CilkSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/CilkSanitizer.cpp index 011007d82e3fa0a..6a443c5f29fd7d5 100644 --- a/llvm/lib/Transforms/Instrumentation/CilkSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/CilkSanitizer.cpp @@ -44,9 +44,9 @@ #include "llvm/InitializePasses.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/ModRef.h" +#include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Instrumentation/CSI.h" -#include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/EscapeEnumerator.h" #include "llvm/Transforms/Utils/Local.h" @@ -89,31 +89,25 @@ STATISTIC(NumSunkInstrumentedReads, STATISTIC(NumSunkInstrumentedWrites, "Number of writes whose instrumentation has been coalesced and sunk"); -static cl::opt - EnableStaticRaceDetection( - "enable-static-race-detection", cl::init(true), cl::Hidden, - cl::desc("Enable static detection of determinacy races.")); +static cl::opt EnableStaticRaceDetection( + "enable-static-race-detection", cl::init(true), cl::Hidden, + cl::desc("Enable static detection of determinacy races.")); -static cl::opt - AssumeRaceFreeLibraryFunctions( - "assume-race-free-lib", cl::init(false), cl::Hidden, - cl::desc("Assume library functions are race free.")); +static cl::opt AssumeRaceFreeLibraryFunctions( + "assume-race-free-lib", cl::init(false), cl::Hidden, + cl::desc("Assume library functions are race free.")); -static cl::opt - IgnoreInaccessibleMemory( - "ignore-inaccessible-memory", cl::init(false), cl::Hidden, - cl::desc("Ignore inaccessible memory when checking for races.")); +static cl::opt IgnoreInaccessibleMemory( + "ignore-inaccessible-memory", cl::init(false), cl::Hidden, + cl::desc("Ignore inaccessible memory when checking for races.")); -static cl::opt - AssumeNoExceptions( - "cilksan-assume-no-exceptions", cl::init(false), cl::Hidden, - cl::desc("Assume that ordinary calls cannot throw exceptions.")); +static cl::opt AssumeNoExceptions( + "cilksan-assume-no-exceptions", cl::init(false), cl::Hidden, + cl::desc("Assume that ordinary calls cannot throw exceptions.")); -static cl::opt - MaxUsesToExploreCapture( - "cilksan-max-uses-to-explore-capture", cl::init(unsigned(-1)), - cl::Hidden, - cl::desc("Maximum number of uses to explore for a capture query.")); +static cl::opt MaxUsesToExploreCapture( + "cilksan-max-uses-to-explore-capture", cl::init(unsigned(-1)), cl::Hidden, + cl::desc("Maximum number of uses to explore for a capture query.")); static cl::opt MAAPChecks("cilksan-maap-checks", cl::init(true), cl::Hidden, @@ -123,11 +117,10 @@ static cl::opt LoopHoisting( "cilksan-loop-hoisting", cl::init(true), cl::Hidden, cl::desc("Enable or disable hoisting instrumentation out of loops.")); -static cl::opt - IgnoreSanitizeCilkAttr( - "ignore-sanitize-cilk-attr", cl::init(false), cl::Hidden, - cl::desc("Ignore the 'sanitize_cilk' attribute when choosing what to " - "instrument.")); +static cl::opt IgnoreSanitizeCilkAttr( + "ignore-sanitize-cilk-attr", cl::init(false), cl::Hidden, + cl::desc("Ignore the 'sanitize_cilk' attribute when choosing what to " + "instrument.")); static cl::opt ClCilksanBCPath( "cilksan-bc-path", cl::init(""), cl::Hidden, @@ -186,9 +179,8 @@ class ObjectTable : public ForensicTable { static StructType *getSourceLocStructType(LLVMContext &C); /// Append the line and file information to the table. - void add(uint64_t ID, int32_t Line = -1, - StringRef Filename = "", StringRef Directory = "", - StringRef Name = ""); + void add(uint64_t ID, int32_t Line = -1, StringRef Filename = "", + StringRef Directory = "", StringRef Name = ""); }; namespace { @@ -202,10 +194,10 @@ struct CilkSanitizerImpl : public CSIImpl { : CilkSanImpl(CilkSanImpl), TI(TI), LI(LI), DT(DT), DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy), TLI(TLI) {} - bool InstrumentSimpleInstructions( - SmallVectorImpl &Instructions); - bool InstrumentAnyMemIntrinsics( - SmallVectorImpl &MemIntrinsics); + bool + InstrumentSimpleInstructions(SmallVectorImpl &Instructions); + bool + InstrumentAnyMemIntrinsics(SmallVectorImpl &MemIntrinsics); bool InstrumentCalls(SmallVectorImpl &Calls); bool InstrumentAncillaryInstructions( SmallPtrSetImpl &Allocas, @@ -236,10 +228,10 @@ struct CilkSanitizerImpl : public CSIImpl { DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy), TLI(TLI) {} void InsertArgMAAPs(Function &F, Value *FuncId); - bool InstrumentSimpleInstructions( - SmallVectorImpl &Instructions); - bool InstrumentAnyMemIntrinsics( - SmallVectorImpl &MemIntrinsics); + bool + InstrumentSimpleInstructions(SmallVectorImpl &Instructions); + bool + InstrumentAnyMemIntrinsics(SmallVectorImpl &MemIntrinsics); bool InstrumentCalls(SmallVectorImpl &Calls); void GetDetachesForCoalescedInstrumentation( SmallPtrSetImpl &LoopInstToHoist, @@ -265,14 +257,13 @@ struct CilkSanitizerImpl : public CSIImpl { // caller or some ancestor may read or write the referenced memory in // parallel and whether the caller can provide any noalias guarantee on that // memory location. - enum class MAAPValue : uint8_t - { - NoAccess = 0, - Mod = 1, - Ref = 2, - ModRef = Mod | Ref, - NoAlias = 4, - }; + enum class MAAPValue : uint8_t { + NoAccess = 0, + Mod = 1, + Ref = 2, + ModRef = Mod | Ref, + NoAlias = 4, + }; static unsigned RaceTypeToFlagVal(RaceInfo::RaceType RT); // Get the MAAP value for specific instruction and operand. Value *getMAAPValue(Instruction *I, IRBuilder<> &IRB, @@ -282,8 +273,8 @@ struct CilkSanitizerImpl : public CSIImpl { // Helper method to determine noalias MAAP bit. Value *getNoAliasMAAPValue(Instruction *I, IRBuilder<> &IRB, unsigned OperandNum, MemoryLocation Loc, - const RaceInfo::RaceData &RD, - const Value *Obj, Value *MAAPVal); + const RaceInfo::RaceData &RD, const Value *Obj, + Value *MAAPVal); // Synthesize a check of the MAAP to determine whether the MAAP means we can // skip executing instrumentation for the given instruction. Value *getMAAPCheck(Instruction *I, IRBuilder<> &IRB, @@ -405,8 +396,7 @@ struct CilkSanitizerImpl : public CSIImpl { return Callee; } template - FunctionCallee getHookFunction(StringRef Name, Type *RetTy, - ArgsTy... Args) { + FunctionCallee getHookFunction(StringRef Name, Type *RetTy, ArgsTy... Args) { return getHookFunction(Name, AttributeList{}, RetTy, Args...); } @@ -465,8 +455,8 @@ struct CilkSanitizerImpl : public CSIImpl { LoopInfo &LI); bool instrumentSync(SyncInst *SI, unsigned SyncRegNum); void instrumentTapirLoop(Loop &L, TaskInfo &TI, - DenseMap &SyncRegNums, - ScalarEvolution *SE = nullptr); + DenseMap &SyncRegNums, + ScalarEvolution *SE = nullptr); bool instrumentAlloca(Instruction *I, TaskInfo &TI); bool instrumentFunctionUsingRI(Function &F); @@ -481,10 +471,8 @@ struct CilkSanitizerImpl : public CSIImpl { return instrumentAnyMemIntrinAcc(I, OperandNum, IRB); } - bool instrumentLoadOrStoreHoisted(Instruction *I, - Value *Addr, - Value *RangeVal, - IRBuilder<> &IRB, + bool instrumentLoadOrStoreHoisted(Instruction *I, Value *Addr, + Value *RangeVal, IRBuilder<> &IRB, uint64_t LocalId); private: @@ -543,8 +531,7 @@ struct CilkSanitizerImpl : public CSIImpl { const TargetLibraryInfo *TLI) const; // Cached results of calls to getUnderlyingObjects. - using BaseObjMapTy = - DenseMap>; + using BaseObjMapTy = DenseMap>; mutable BaseObjMapTy BaseObjects; SmallVectorImpl &lookupBaseObjects(const Value *Addr, LoopInfo *LI) const { @@ -579,8 +566,8 @@ struct CilkSanitizerImpl : public CSIImpl { if (isa(Ptr)) MayBeCapturedCache.lookup(Ptr); else - MayBeCapturedCache[Ptr] = PointerMayBeCaptured(Ptr, true, false, - MaxUsesToExploreCapture); + MayBeCapturedCache[Ptr] = + PointerMayBeCaptured(Ptr, true, false, MaxUsesToExploreCapture); } return MayBeCapturedCache[Ptr]; } @@ -589,56 +576,8 @@ struct CilkSanitizerImpl : public CSIImpl { AttributeList AL = AttributeList()); }; -/// CilkSanitizer: instrument the code in module to find races. -struct CilkSanitizerLegacyPass : public ModulePass { - static char ID; // Pass identification, replacement for typeid. - CilkSanitizerLegacyPass(bool CallsMayThrow = !AssumeNoExceptions, - bool JitMode = false) - : ModulePass(ID), JitMode(JitMode), CallsMayThrow(CallsMayThrow) { - initializeCilkSanitizerLegacyPassPass(*PassRegistry::getPassRegistry()); - } - StringRef getPassName() const override { return "CilkSanitizer"; } - void getAnalysisUsage(AnalysisUsage &AU) const override; - bool runOnModule(Module &M) override; - - bool JitMode = false; - bool CallsMayThrow = true; -}; } // end anonymous namespace -char CilkSanitizerLegacyPass::ID = 0; - -INITIALIZE_PASS_BEGIN( - CilkSanitizerLegacyPass, "csan", - "CilkSanitizer: detects determinacy races in Cilk programs.", - false, false) -INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) -INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) -INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TapirRaceDetectWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TaskInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) -INITIALIZE_PASS_END( - CilkSanitizerLegacyPass, "csan", - "CilkSanitizer: detects determinacy races in Cilk programs.", - false, false) - -void CilkSanitizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); -} - uint64_t ObjectTable::add(Instruction &I, Value *Obj) { uint64_t ID = getId(&I); if (isa(Obj)) { @@ -678,7 +617,7 @@ uint64_t ObjectTable::add(Instruction &I, Value *Obj) { // Next, if this is an alloca instruction, look for a llvm.dbg.declare // intrinsic. if (AllocaInst *AI = dyn_cast(Obj)) { - TinyPtrVector DbgDeclares = FindDbgDeclareUses(AI); + TinyPtrVector DbgDeclares = findDbgDeclares(AI); if (!DbgDeclares.empty()) { auto *LV = DbgDeclares.front()->getVariable(); add(ID, LV->getLine(), LV->getFilename(), LV->getDirectory(), @@ -714,9 +653,8 @@ StructType *ObjectTable::getSourceLocStructType(LLVMContext &C) { /* File */ PointerType::get(IntegerType::get(C, 8), 0)); } -void ObjectTable::add(uint64_t ID, int32_t Line, - StringRef Filename, StringRef Directory, - StringRef Name) { +void ObjectTable::add(uint64_t ID, int32_t Line, StringRef Filename, + StringRef Directory, StringRef Name) { assert(LocalIdToSourceLocationMap.find(ID) == LocalIdToSourceLocationMap.end() && "Id already exists in FED table."); @@ -762,8 +700,8 @@ Constant *ObjectTable::insertIntoModule(Module &M) const { ArrayType *TableArrayType = ArrayType::get(TableType, TableEntries.size()); Constant *Table = ConstantArray::get(TableArrayType, TableEntries); GlobalVariable *GV = - new GlobalVariable(M, TableArrayType, false, GlobalValue::InternalLinkage, - Table, CsiUnitObjTableName); + new GlobalVariable(M, TableArrayType, false, GlobalValue::InternalLinkage, + Table, CsiUnitObjTableName); return ConstantExpr::getGetElementPtr(GV->getValueType(), GV, GepArgs); } @@ -831,21 +769,21 @@ void CilkSanitizerImpl::initializeCsanObjectTables() { } // Create a struct type to match the unit_obj_entry_t type in csanrt.c. -StructType *CilkSanitizerImpl::getUnitObjTableType( - LLVMContext &C, PointerType *EntryPointerType) { +StructType * +CilkSanitizerImpl::getUnitObjTableType(LLVMContext &C, + PointerType *EntryPointerType) { return StructType::get(IntegerType::get(C, 64), EntryPointerType); } Constant *CilkSanitizerImpl::objTableToUnitObjTable( Module &M, StructType *UnitObjTableType, ObjectTable &ObjTable) { Constant *NumEntries = - ConstantInt::get(IntegerType::get(M.getContext(), 64), ObjTable.size()); + ConstantInt::get(IntegerType::get(M.getContext(), 64), ObjTable.size()); // Constant *BaseIdPtr = // ConstantExpr::getPointerCast(FedTable.baseId(), - // Type::getInt8PtrTy(M.getContext(), 0)); + // PointerType::get(M.getContext(), 0)); Constant *InsertedTable = ObjTable.insertIntoModule(M); - return ConstantStruct::get(UnitObjTableType, NumEntries, - InsertedTable); + return ConstantStruct::get(UnitObjTableType, NumEntries, InsertedTable); } void CilkSanitizerImpl::collectUnitObjectTables() { @@ -853,8 +791,7 @@ void CilkSanitizerImpl::collectUnitObjectTables() { StructType *UnitObjTableType = getUnitObjTableType(C, ObjectTable::getPointerType(C)); - UnitObjTables.push_back( - objTableToUnitObjTable(M, UnitObjTableType, LoadObj)); + UnitObjTables.push_back(objTableToUnitObjTable(M, UnitObjTableType, LoadObj)); UnitObjTables.push_back( objTableToUnitObjTable(M, UnitObjTableType, StoreObj)); UnitObjTables.push_back( @@ -872,7 +809,7 @@ CallInst *CilkSanitizerImpl::createRTUnitInitCall(IRBuilder<> &IRB) { getUnitObjTableType(C, ObjectTable::getPointerType(C)); // Lookup __csanrt_unit_init - SmallVector InitArgTypes({IRB.getInt8PtrTy(), + SmallVector InitArgTypes({IRB.getPtrTy(), PointerType::get(UnitFedTableType, 0), PointerType::get(UnitObjTableType, 0), InitCallsiteToFunction->getType()}); @@ -885,16 +822,16 @@ CallInst *CilkSanitizerImpl::createRTUnitInitCall(IRBuilder<> &IRB) { ArrayType *UnitFedTableArrayType = ArrayType::get(UnitFedTableType, UnitFedTables.size()); Constant *FEDTable = ConstantArray::get(UnitFedTableArrayType, UnitFedTables); - GlobalVariable *FEDGV = new GlobalVariable(M, UnitFedTableArrayType, false, - GlobalValue::InternalLinkage, FEDTable, - CsiUnitFedTableArrayName); + GlobalVariable *FEDGV = new GlobalVariable( + M, UnitFedTableArrayType, false, GlobalValue::InternalLinkage, FEDTable, + CsiUnitFedTableArrayName); ArrayType *UnitObjTableArrayType = ArrayType::get(UnitObjTableType, UnitObjTables.size()); Constant *ObjTable = ConstantArray::get(UnitObjTableArrayType, UnitObjTables); - GlobalVariable *ObjGV = new GlobalVariable(M, UnitObjTableArrayType, false, - GlobalValue::InternalLinkage, ObjTable, - CsiUnitObjTableArrayName); + GlobalVariable *ObjGV = new GlobalVariable( + M, UnitObjTableArrayType, false, GlobalValue::InternalLinkage, ObjTable, + CsiUnitObjTableArrayName); Constant *Zero = ConstantInt::get(IRB.getInt32Ty(), 0); Value *GepArgs[] = {Zero, Zero}; @@ -903,9 +840,9 @@ CallInst *CilkSanitizerImpl::createRTUnitInitCall(IRBuilder<> &IRB) { return IRB.CreateCall( RTUnitInit, {IRB.CreateGlobalStringPtr(M.getName()), - ConstantExpr::getGetElementPtr(FEDGV->getValueType(), FEDGV, GepArgs), - ConstantExpr::getGetElementPtr(ObjGV->getValueType(), ObjGV, GepArgs), - InitCallsiteToFunction}); + ConstantExpr::getGetElementPtr(FEDGV->getValueType(), FEDGV, GepArgs), + ConstantExpr::getGetElementPtr(ObjGV->getValueType(), ObjGV, GepArgs), + InitCallsiteToFunction}); } // Initialize all instrumentation hooks that are specific to CilkSanitizer. @@ -925,7 +862,7 @@ void CilkSanitizerImpl::initializeCsanHooks() { Type *DetachPropertyTy = CsiDetachProperty::getType(C); Type *DetContPropertyTy = CsiDetachContinueProperty::getType(C); Type *RetType = IRB.getVoidTy(); - Type *AddrType = IRB.getInt8PtrTy(); + Type *AddrType = IRB.getPtrTy(); Type *NumBytesType = IRB.getInt32Ty(); Type *LargeNumBytesType = IntptrTy; Type *IDType = IRB.getInt64Ty(); @@ -953,7 +890,7 @@ void CilkSanitizerImpl::initializeCsanHooks() { FnAttrs = FnAttrs.addParamAttribute(C, 1, Attribute::NoCapture); FnAttrs = FnAttrs.addParamAttribute(C, 1, Attribute::ReadNone); CsanRead = getHookFunction("__csan_load", FnAttrs, RetType, IDType, - AddrType, NumBytesType, LoadPropertyTy); + AddrType, NumBytesType, LoadPropertyTy); } { AttributeList FnAttrs; @@ -1053,8 +990,7 @@ void CilkSanitizerImpl::initializeCsanHooks() { getHookFunction("__cilksan_disable_checking", RetType); } { - CsanEnableChecking = getHookFunction("__cilksan_enable_checking", - RetType); + CsanEnableChecking = getHookFunction("__cilksan_enable_checking", RetType); } Type *MAAPTy = IRB.getInt8Ty(); @@ -1070,9 +1006,7 @@ void CilkSanitizerImpl::initializeCsanHooks() { HookFn->setMemoryEffects(HookFn->getMemoryEffects() | MemoryEffects::argMemOnly(ModRefInfo::ModRef)); } - { - SetMAAP = getHookFunction("__csan_set_MAAP", RetType, MAAPTy, IDType); - } + { SetMAAP = getHookFunction("__csan_set_MAAP", RetType, MAAPTy, IDType); } { CsanBeforeLoop = getHookFunction("__csan_before_loop", IRB.getVoidTy(), @@ -1091,13 +1025,14 @@ void CilkSanitizerImpl::initializeCsanHooks() { CsiAfterAllocaFn->setDoesNotThrow(); } -static BasicBlock *SplitOffPreds( - BasicBlock *BB, SmallVectorImpl &Preds, DominatorTree *DT, - LoopInfo *LI) { +static BasicBlock *SplitOffPreds(BasicBlock *BB, + SmallVectorImpl &Preds, + DominatorTree *DT, LoopInfo *LI) { if (BB->isLandingPad()) { + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); SmallVector NewBBs; SplitLandingPadPredecessors(BB, Preds, ".csi-split-lp", ".csi-split", - NewBBs, DT, LI); + NewBBs, &DTU, LI); return NewBBs[1]; } @@ -1167,14 +1102,14 @@ static void setupBlock(BasicBlock *BB, DominatorTree *DT, LoopInfo *LI, } NumPredTypes = static_cast(!DetachPreds.empty()) + - static_cast(!TFResumePreds.empty()) + - static_cast(!SyncPreds.empty()) + - static_cast(!SyncUnwindPreds.empty()) + - static_cast(!AllocFnPreds.empty()) + - static_cast(!FreeFnPreds.empty()) + - static_cast(LibCallPreds.size()) + - static_cast(!InvokePreds.empty()) + - static_cast(HasOtherPredTypes); + static_cast(!TFResumePreds.empty()) + + static_cast(!SyncPreds.empty()) + + static_cast(!SyncUnwindPreds.empty()) + + static_cast(!AllocFnPreds.empty()) + + static_cast(!FreeFnPreds.empty()) + + static_cast(LibCallPreds.size()) + + static_cast(!InvokePreds.empty()) + + static_cast(HasOtherPredTypes); // Splitting predecessors works differently for landingpads versus normal // basic blocks. If the block is not a landingpad, split off every type of @@ -1257,15 +1192,15 @@ static bool shouldInstrumentReadWriteFromAddress(const Module *M, Value *Addr) { StringRef SectionName = GV->getSection(); // Check if the global is in the PGO counters section. auto OF = Triple(M->getTargetTriple()).getObjectFormat(); - if (SectionName.endswith( + if (SectionName.ends_with( getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo*/ false))) return false; } // Check if the global is private gcov data. - if (GV->getName().startswith("__llvm_gcov") || - GV->getName().startswith("__llvm_gcda")) + if (GV->getName().starts_with("__llvm_gcov") || + GV->getName().starts_with("__llvm_gcda")) return false; } @@ -1327,7 +1262,7 @@ bool CilkSanitizerImpl::MightHaveDetachedUse(const Value *V, // assert(AllocTask && "Null task for instruction."); if (!AllocTask) { LLVM_DEBUG(dbgs() << "MightHaveDetachedUse: No task found for given value " - << *V << "\n"); + << *V << "\n"); return false; } @@ -1457,7 +1392,7 @@ bool CilkSanitizerImpl::unknownObjectUses(const Value *Addr, LoopInfo *LI, void CilkSanitizerImpl::chooseInstructionsToInstrument( SmallVectorImpl &Local, SmallVectorImpl &All, const TaskInfo &TI, LoopInfo &LI, const TargetLibraryInfo *TLI) { - SmallSet WriteTargets; + SmallSet WriteTargets; // Iterate from the end. for (Instruction *I : reverse(Local)) { if (StoreInst *Store = dyn_cast(I)) { @@ -1481,11 +1416,9 @@ void CilkSanitizerImpl::chooseInstructionsToInstrument( continue; } } - Value *Addr = isa(*I) - ? cast(I)->getPointerOperand() - : cast(I)->getPointerOperand(); - if (LocalBaseObj(Addr, &LI, TLI) && - !PossibleRaceByCapture(Addr, TI, &LI)) { + Value *Addr = isa(*I) ? cast(I)->getPointerOperand() + : cast(I)->getPointerOperand(); + if (LocalBaseObj(Addr, &LI, TLI) && !PossibleRaceByCapture(Addr, TI, &LI)) { // The variable is addressable but not captured, so it cannot be // referenced from a different thread and participate in a data race // (see llvm/Analysis/CaptureTracking.h for details). @@ -1529,9 +1462,9 @@ bool CilkSanitizerImpl::simpleCallCannotRace(const Instruction &I) { bool CilkSanitizerImpl::shouldIgnoreCall(const Instruction &I) { if (const CallBase *Call = dyn_cast(&I)) if (const Function *Called = Call->getCalledFunction()) - if (Called->hasName() && (Called->getName().startswith("__csi") || - Called->getName().startswith("__csan") || - Called->getName().startswith("__cilksan"))) + if (Called->hasName() && (Called->getName().starts_with("__csi") || + Called->getName().starts_with("__csan") || + Called->getName().starts_with("__cilksan"))) return true; return false; } @@ -1545,8 +1478,7 @@ Value *CilkSanitizerImpl::GetCalleeFuncID(const Function *Callee, // Unknown targets (i.e., indirect calls) are always unknown. return IRB.getInt64(CsiCallsiteUnknownTargetId); - std::string GVName = - CsiFuncIdVariablePrefix + Callee->getName().str(); + std::string GVName = CsiFuncIdVariablePrefix + Callee->getName().str(); GlobalVariable *FuncIdGV = M.getNamedGlobal(GVName); Type *FuncIdGVTy = IRB.getInt64Ty(); if (!FuncIdGV) { @@ -1625,8 +1557,7 @@ bool CilkSanitizerImpl::SimpleInstrumentor::InstrumentCalls( LocalResult |= CilkSanImpl.instrumentIntrinsicCall(I, /*MAAPVals*/ nullptr); else if (isLibCall(*I, TLI)) - LocalResult |= - CilkSanImpl.instrumentLibCall(I, /*MAAPVals*/ nullptr); + LocalResult |= CilkSanImpl.instrumentLibCall(I, /*MAAPVals*/ nullptr); else LocalResult |= CilkSanImpl.instrumentCallsite(I, /*MAAPVals*/ nullptr); if (LocalResult) { @@ -1745,8 +1676,8 @@ void CilkSanitizerImpl::Instrumentor::getDetachesForInstruction( } } -unsigned CilkSanitizerImpl::Instrumentor::RaceTypeToFlagVal( - RaceInfo::RaceType RT) { +unsigned +CilkSanitizerImpl::Instrumentor::RaceTypeToFlagVal(RaceInfo::RaceType RT) { unsigned FlagVal = static_cast(MAAPValue::NoAccess); if (RaceInfo::isLocalRace(RT) || RaceInfo::isOpaqueRace(RT)) FlagVal = static_cast(MAAPValue::ModRef); @@ -1925,10 +1856,10 @@ bool CilkSanitizerImpl::Instrumentor::InstrumentCalls( // Get current race data for this call. RaceInfo::RaceType CallRT = RI.getRaceType(I); LLVM_DEBUG({ - dbgs() << "Call " << *I << ": "; - RaceInfo::printRaceType(CallRT, dbgs()); - dbgs() << "\n"; - }); + dbgs() << "Call " << *I << ": "; + RaceInfo::printRaceType(CallRT, dbgs()); + dbgs() << "\n"; + }); // Get update race data, if it's available. RaceInfo::RaceType FuncRT = CallRT; @@ -1938,20 +1869,20 @@ bool CilkSanitizerImpl::Instrumentor::InstrumentCalls( FuncRT = CilkSanImpl.FunctionRaceType[CF]; LLVM_DEBUG({ - dbgs() << " FuncRT: "; - RaceInfo::printRaceType(FuncRT, dbgs()); - dbgs() << "\n"; - }); + dbgs() << " FuncRT: "; + RaceInfo::printRaceType(FuncRT, dbgs()); + dbgs() << "\n"; + }); // Propagate information about opaque races from function to call. if (!RaceInfo::isOpaqueRace(FuncRT)) CallRT = RaceInfo::clearOpaqueRace(CallRT); LLVM_DEBUG({ - dbgs() << " New CallRT: "; - RaceInfo::printRaceType(CallRT, dbgs()); - dbgs() << "\n"; - }); + dbgs() << " New CallRT: "; + RaceInfo::printRaceType(CallRT, dbgs()); + dbgs() << "\n"; + }); // If this instruction cannot race, see if we can suppress it if (!RaceInfo::isRace(CallRT)) { @@ -1964,13 +1895,13 @@ bool CilkSanitizerImpl::Instrumentor::InstrumentCalls( if (!CB->doesNotAccessMemory()) LocalResult |= CilkSanImpl.suppressCallsite(I); continue; - // } else { - // GetDetaches |= CilkSanImpl.instrumentCallsite(I); - // // SmallPtrSet Objects; - // // RI.getObjectsFor(I, Objects); - // // for (Value *Obj : Objects) { - // // CilkSanImpl.ObjectMRForRace[Obj] = ModRefInfo::ModRef; - // // } + // } else { + // GetDetaches |= CilkSanImpl.instrumentCallsite(I); + // // SmallPtrSet Objects; + // // RI.getObjectsFor(I, Objects); + // // for (Value *Obj : Objects) { + // // CilkSanImpl.ObjectMRForRace[Obj] = ModRefInfo::ModRef; + // // } } // continue; } @@ -2014,9 +1945,9 @@ bool CilkSanitizerImpl::Instrumentor::InstrumentCalls( MAAPVal = getMAAPValue(I, IRB, OpIdx, MAAPValue::NoAccess, /*CheckArgs*/ false); LLVM_DEBUG({ - dbgs() << " Op: " << *CB->getArgOperand(OpIdx) << "\n"; - dbgs() << " MAAP value: " << *MAAPVal << "\n"; - }); + dbgs() << " Op: " << *CB->getArgOperand(OpIdx) << "\n"; + dbgs() << " MAAP value: " << *MAAPVal << "\n"; + }); MAAPVals.push_back(MAAPVal); ++OpIdx; } @@ -2095,9 +2026,8 @@ static MemoryLocation getMemoryLocation(Instruction *I, unsigned OperandNum, // Evaluate the noalias value in the MAAP for Obj, and intersect that result // with the noalias information for other objects. Value *CilkSanitizerImpl::Instrumentor::getNoAliasMAAPValue( - Instruction *I, IRBuilder<> &IRB, unsigned OperandNum, - MemoryLocation Loc, const RaceInfo::RaceData &RD, const Value *Obj, - Value *ObjNoAliasFlag) { + Instruction *I, IRBuilder<> &IRB, unsigned OperandNum, MemoryLocation Loc, + const RaceInfo::RaceData &RD, const Value *Obj, Value *ObjNoAliasFlag) { AAResults *AA = RI.getAA(); for (const RaceInfo::RaceData &OtherRD : RI.getRaceData(I)) { @@ -2125,12 +2055,12 @@ Value *CilkSanitizerImpl::Instrumentor::getNoAliasMAAPValue( // then we don't have "no alias". if (Obj == OtherObj) { LLVM_DEBUG({ - dbgs() << "getNoAliasMAAPValue: Matching objects found:\n"; - dbgs() << " Obj: " << *Obj << "\n"; - dbgs() << " I: " << *I << "\n"; - dbgs() << " Operands " << OperandNum << ", " << OtherRD.OperandNum - << "\n"; - }); + dbgs() << "getNoAliasMAAPValue: Matching objects found:\n"; + dbgs() << " Obj: " << *Obj << "\n"; + dbgs() << " I: " << *I << "\n"; + dbgs() << " Operands " << OperandNum << ", " << OtherRD.OperandNum + << "\n"; + }); return getMAAPIRValue(IRB, 0); } @@ -2141,7 +2071,8 @@ Value *CilkSanitizerImpl::Instrumentor::getNoAliasMAAPValue( if (isa(OtherObj)) continue; - // // If the other object is something we can't reason about locally, then we + // // If the other object is something we can't reason about locally, then + // we // // give up. // if (!isa(OtherObj)) // return getMAAPIRValue(IRB, 0); @@ -2149,10 +2080,10 @@ Value *CilkSanitizerImpl::Instrumentor::getNoAliasMAAPValue( // Otherwise, check if the other object might alias this one. if (AA->alias(Loc, MemoryLocation::getBeforeOrAfter(OtherObj))) { LLVM_DEBUG({ - dbgs() << "getNoAliasMAAPValue: Possible aliasing between:\n"; - dbgs() << " Obj: " << *Obj << "\n"; - dbgs() << " OtherObj: " << *OtherObj << "\n"; - }); + dbgs() << "getNoAliasMAAPValue: Possible aliasing between:\n"; + dbgs() << " Obj: " << *Obj << "\n"; + dbgs() << " OtherObj: " << *OtherObj << "\n"; + }); return getMAAPIRValue(IRB, 0); } } @@ -2576,7 +2507,7 @@ bool CilkSanitizerImpl::Instrumentor::PerformDelayedInstrumentation() { // Handle delayed memory intrinsics for (auto &MemIntrinOp : DelayedMemIntrinsics) { Instruction *I = MemIntrinOp.first; - assert((RI.mightRaceViaAncestor(I) || RI.mightRaceLocally(I)) && + assert((RI.mightRaceViaAncestor(I) || RI.mightRaceLocally(I)) && "Delayed instrumentation is not local race or race via ancestor"); unsigned OperandNum = MemIntrinOp.second; IRBuilder<> IRB(I); @@ -2768,8 +2699,8 @@ static const SCEV *getRuntimeTripCount(Loop &L, ScalarEvolution *SE, // Helper function to find where in the given basic block to insert coalesced // instrumentation. -static Instruction *getLoopBlockInsertPt(BasicBlock *BB, FunctionCallee LoopHook, - bool AfterHook) { +static Instruction * +getLoopBlockInsertPt(BasicBlock *BB, FunctionCallee LoopHook, bool AfterHook) { // BasicBlock *PreheaderBB = L->getLoopPreheader(); for (Instruction &I : *BB) if (CallBase *CB = dyn_cast(&I)) @@ -2790,7 +2721,8 @@ static Instruction *getLoopBlockInsertPt(BasicBlock *BB, FunctionCallee LoopHook } // TODO: Maybe to avoid confusion with CilkSanImpl.Options.InstrumentLoops -// (which is unrelated to this), rename this to involve the word "hoist" or something. +// (which is unrelated to this), rename this to involve the word "hoist" or +// something. bool CilkSanitizerImpl::Instrumentor::InstrumentLoops( SmallPtrSetImpl &LoopInstToHoist, SmallPtrSetImpl &LoopInstToSink, @@ -2853,7 +2785,7 @@ bool CilkSanitizerImpl::Instrumentor::InstrumentLoops( SCEVExpander Expander(*SE, DL, "cilksan"); Value *AddrVal = - Expander.expandCodeFor(Addr, Type::getInt8PtrTy(Ctx), InsertPt); + Expander.expandCodeFor(Addr, PointerType::get(Ctx, 0), InsertPt); Value *RangeVal = Expander.expandCodeFor(RangeExpr, Type::getInt64Ty(Ctx), InsertPt); HoistedHookArgs[I] = std::make_pair(AddrVal, RangeVal); @@ -2864,7 +2796,7 @@ bool CilkSanitizerImpl::Instrumentor::InstrumentLoops( DenseMap, std::pair> SunkHookArgs; // Map to track which loops we have already created counters for - SmallMapVector LoopToCounterMap; + SmallMapVector LoopToCounterMap; // Compute arguments for coalesced instrumentation sunk after the loop. for (Instruction *I : LoopInstToSink) { // Get the loop @@ -2939,7 +2871,7 @@ bool CilkSanitizerImpl::Instrumentor::InstrumentLoops( getLoopBlockInsertPt(ExitBB, CilkSanImpl.CsanAfterLoop, /*AfterHook*/ true); Value *AddrVal = - Expander.expandCodeFor(Addr, Type::getInt8PtrTy(Ctx), InsertPt); + Expander.expandCodeFor(Addr, PointerType::get(Ctx, 0), InsertPt); Value *RangeVal = Expander.expandCodeFor(RangeExpr, Type::getInt64Ty(Ctx), InsertPt); @@ -3184,13 +3116,14 @@ bool CilkSanitizerImpl::instrumentFunctionUsingRI(Function &F) { if (F.empty() || shouldNotInstrumentFunction(F) || !CheckSanitizeCilkAttr(F)) { LLVM_DEBUG({ - dbgs() << "Skipping " << F.getName() << "\n"; - if (F.empty()) - dbgs() << " Empty function\n"; - else if (shouldNotInstrumentFunction(F)) - dbgs() << " Function should not be instrumented\n"; - else if (!CheckSanitizeCilkAttr(F)) - dbgs() << " Function lacks sanitize_cilk attribute\n";}); + dbgs() << "Skipping " << F.getName() << "\n"; + if (F.empty()) + dbgs() << " Empty function\n"; + else if (shouldNotInstrumentFunction(F)) + dbgs() << " Function should not be instrumented\n"; + else if (!CheckSanitizeCilkAttr(F)) + dbgs() << " Function lacks sanitize_cilk attribute\n"; + }); return false; } @@ -3302,14 +3235,16 @@ bool CilkSanitizerImpl::instrumentFunctionUsingRI(Function &F) { // the start is available at loop entry. LoopInstToHoist.insert(&Inst); CanCoalesce = true; - LLVM_DEBUG(dbgs() << "Can hoist instrumentation for " << Inst << "\n"); + LLVM_DEBUG(dbgs() << "Can hoist instrumentation for " << Inst + << "\n"); } else if (!isa( SE.getConstantMaxBackedgeTakenCount(L))) { // Can sink if stride <= size and the tripcount is unknown but // guaranteed to be finite. LoopInstToSink.insert(&Inst); CanCoalesce = true; - LLVM_DEBUG(dbgs() << "Can sink instrumentation for " << Inst << "\n"); + LLVM_DEBUG(dbgs() << "Can sink instrumentation for " << Inst + << "\n"); } else { LLVM_DEBUG(dbgs() << "Can't hoist or sink instrumentation for " @@ -3361,9 +3296,9 @@ bool CilkSanitizerImpl::instrumentFunctionUsingRI(Function &F) { } } - // Record this function call as either an allocation function, a call to - // free (or delete), a memory intrinsic, or an ordinary real function - // call. + // Record this function call as either an allocation function, a call + // to free (or delete), a memory intrinsic, or an ordinary real + // function call. if (isAllocFn(&Inst, TLI)) AllocationFnCalls.insert(&Inst); else if (isFreeFn(&Inst, TLI)) @@ -3402,7 +3337,7 @@ bool CilkSanitizerImpl::instrumentFunctionUsingRI(Function &F) { // Map each detach instruction with the sync instructions that could sync it. for (SyncInst *Sync : Syncs) for (const Task *MPT : - MPTasks.TaskList[TI.getSpindleFor(Sync->getParent())]) + MPTasks.TaskList[TI.getSpindleFor(Sync->getParent())]) DetachToSync[MPT->getDetach()].push_back(Sync); // Record objects involved in races @@ -3425,9 +3360,8 @@ bool CilkSanitizerImpl::instrumentFunctionUsingRI(Function &F) { // Instrument ancillary instructions including allocas, allocation-function // calls, free calls, detaches, and syncs. - Result |= FuncI.InstrumentAncillaryInstructions(Allocas, AllocationFnCalls, - FreeCalls, SyncRegNums, - SRCounters, DL); + Result |= FuncI.InstrumentAncillaryInstructions( + Allocas, AllocationFnCalls, FreeCalls, SyncRegNums, SRCounters, DL); } else { Instrumentor FuncI(*this, RI, TI, LI, DT, TLI); @@ -3448,9 +3382,8 @@ bool CilkSanitizerImpl::instrumentFunctionUsingRI(Function &F) { // Instrument ancillary instructions including allocas, allocation-function // calls, free calls, detaches, and syncs. - Result |= FuncI.InstrumentAncillaryInstructions(Allocas, AllocationFnCalls, - FreeCalls, SyncRegNums, - SRCounters, DL); + Result |= FuncI.InstrumentAncillaryInstructions( + Allocas, AllocationFnCalls, FreeCalls, SyncRegNums, SRCounters, DL); // Hoist and sink instrumentation when possible (applies to all loops, // not just Tapir loops) @@ -3476,10 +3409,10 @@ bool CilkSanitizerImpl::instrumentFunctionUsingRI(Function &F) { // pointer. Value *FrameAddr = IRB.CreateCall(Intrinsic::getDeclaration(&M, Intrinsic::frameaddress, - IRB.getInt8PtrTy()), + IRB.getPtrTy()), {IRB.getInt32(0)}); - Value *StackSave = - IRB.CreateCall(Intrinsic::getDeclaration(&M, Intrinsic::stacksave)); + Value *StackSave = IRB.CreateCall(Intrinsic::getDeclaration( + &M, Intrinsic::stacksave, {IRB.getPtrTy()})); CallInst *EntryCall = IRB.CreateCall(CsanFuncEntry, {FuncId, FrameAddr, StackSave, FuncEntryProp.getValue(IRB)}); @@ -3501,7 +3434,8 @@ bool CilkSanitizerImpl::instrumentFunctionUsingRI(Function &F) { while (IRBuilder<> *AtExit = EE.Next()) { if (InstrumentationSet & SERIESPARALLEL) { uint64_t ExitLocalId = FunctionExitFED.add(*AtExit->GetInsertPoint()); - Value *ExitCsiId = FunctionExitFED.localToGlobalId(ExitLocalId, *AtExit); + Value *ExitCsiId = + FunctionExitFED.localToGlobalId(ExitLocalId, *AtExit); CsiFuncExitProperty FuncExitProp; FuncExitProp.setMaySpawn(MaySpawn); FuncExitProp.setEHReturn(isa(AtExit->GetInsertPoint())); @@ -3558,9 +3492,8 @@ bool CilkSanitizerImpl::instrumentFunctionUsingRI(Function &F) { bool CilkSanitizerImpl::instrumentLoadOrStore(Instruction *I, IRBuilder<> &IRB) { bool IsWrite = isa(*I); - Value *Addr = IsWrite - ? cast(I)->getPointerOperand() - : cast(I)->getPointerOperand(); + Value *Addr = IsWrite ? cast(I)->getPointerOperand() + : cast(I)->getPointerOperand(); Type *Ty = IsWrite ? cast(I)->getValueOperand()->getType() : I->getType(); @@ -3581,9 +3514,8 @@ bool CilkSanitizerImpl::instrumentLoadOrStore(Instruction *I, if (!(InstrumentationSet & SHADOWMEMORY)) return true; - const Align Alignment = IsWrite - ? cast(I)->getAlign() - : cast(I)->getAlign(); + const Align Alignment = + IsWrite ? cast(I)->getAlign() : cast(I)->getAlign(); CsiLoadStoreProperty Prop; Prop.setAlignment(Alignment); Prop.setIsAtomic(I->isAtomic()); @@ -3595,10 +3527,8 @@ bool CilkSanitizerImpl::instrumentLoadOrStore(Instruction *I, assert(LocalId == StoreObjId && "Store received different ID's in FED and object tables."); Value *CsiId = StoreFED.localToGlobalId(LocalId, IRB); - Value *Args[] = {CsiId, - IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), - IRB.getInt32(NumBytesAccessed), - Prop.getValue(IRB)}; + Value *Args[] = {CsiId, IRB.CreatePointerCast(Addr, IRB.getPtrTy()), + IRB.getInt32(NumBytesAccessed), Prop.getValue(IRB)}; Instruction *Call = IRB.CreateCall(CsanWrite, Args); IRB.SetInstDebugLocation(Call); NumInstrumentedWrites++; @@ -3609,10 +3539,8 @@ bool CilkSanitizerImpl::instrumentLoadOrStore(Instruction *I, assert(LocalId == LoadObjId && "Load received different ID's in FED and object tables."); Value *CsiId = LoadFED.localToGlobalId(LocalId, IRB); - Value *Args[] = {CsiId, - IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), - IRB.getInt32(NumBytesAccessed), - Prop.getValue(IRB)}; + Value *Args[] = {CsiId, IRB.CreatePointerCast(Addr, IRB.getPtrTy()), + IRB.getInt32(NumBytesAccessed), Prop.getValue(IRB)}; Instruction *Call = IRB.CreateCall(CsanRead, Args); IRB.SetInstDebugLocation(Call); NumInstrumentedReads++; @@ -3656,10 +3584,8 @@ bool CilkSanitizerImpl::instrumentAtomic(Instruction *I, IRBuilder<> &IRB) { assert(LocalId == StoreObjId && "Store received different ID's in FED and object tables."); Value *CsiId = StoreFED.localToGlobalId(LocalId, IRB); - Value *Args[] = {CsiId, - IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), - IRB.getInt32(NumBytesAccessed), - Prop.getValue(IRB)}; + Value *Args[] = {CsiId, IRB.CreatePointerCast(Addr, IRB.getPtrTy()), + IRB.getInt32(NumBytesAccessed), Prop.getValue(IRB)}; Instruction *Call = IRB.CreateCall(CsanWrite, Args); IRB.SetInstDebugLocation(Call); NumInstrumentedWrites++; @@ -3777,8 +3703,8 @@ bool CilkSanitizerImpl::instrumentIntrinsicCall( // Save the stack pointer, if we haven't already if (!SavedStack) - SavedStack = - IRB.CreateCall(Intrinsic::getDeclaration(&M, Intrinsic::stacksave)); + SavedStack = IRB.CreateCall(Intrinsic::getDeclaration( + &M, Intrinsic::stacksave, {IRB.getPtrTy()})); // Spill the argument onto the stack AllocaInst *ArgSpill = IRB.CreateAlloca(ArgTy); @@ -3790,15 +3716,16 @@ bool CilkSanitizerImpl::instrumentIntrinsicCall( } FunctionType *BeforeHookTy = FunctionType::get( IRB.getVoidTy(), BeforeHookParamTys, Called->isVarArg()); - FunctionCallee BeforeIntrinCallHook = getOrInsertSynthesizedHook( - ("__csan_" + Buf).str(), BeforeHookTy); + FunctionCallee BeforeIntrinCallHook = + getOrInsertSynthesizedHook(("__csan_" + Buf).str(), BeforeHookTy); // Insert the hook before the call insertHookCall(I, BeforeIntrinCallHook, BeforeHookParamVals); // If we previously saved the stack pointer, restore it if (SavedStack) - IRB.CreateCall(Intrinsic::getDeclaration(&M, Intrinsic::stackrestore), + IRB.CreateCall(Intrinsic::getDeclaration(&M, Intrinsic::stackrestore, + {IRB.getPtrTy()}), {SavedStack}); return true; } @@ -3833,8 +3760,8 @@ bool CilkSanitizerImpl::instrumentIntrinsicCall( // Save the stack pointer, if we haven't already if (!SavedStack) - SavedStack = - IRB.CreateCall(Intrinsic::getDeclaration(&M, Intrinsic::stacksave)); + SavedStack = IRB.CreateCall(Intrinsic::getDeclaration( + &M, Intrinsic::stacksave, {IRB.getPtrTy()})); // Spill the return value onto the stack AllocaInst *RetSpill = IRB.CreateAlloca(RetTy); @@ -3857,8 +3784,8 @@ bool CilkSanitizerImpl::instrumentIntrinsicCall( // Save the stack pointer, if we haven't already if (!SavedStack) - SavedStack = - IRB.CreateCall(Intrinsic::getDeclaration(&M, Intrinsic::stacksave)); + SavedStack = IRB.CreateCall(Intrinsic::getDeclaration( + &M, Intrinsic::stacksave, {IRB.getPtrTy()})); // Spill the argument onto the stack AllocaInst *ArgSpill = IRB.CreateAlloca(ArgTy); @@ -3878,7 +3805,8 @@ bool CilkSanitizerImpl::instrumentIntrinsicCall( insertHookCall(&*Iter, AfterIntrinCallHook, AfterHookParamVals); if (SavedStack) { - IRB.CreateCall(Intrinsic::getDeclaration(&M, Intrinsic::stackrestore), + IRB.CreateCall(Intrinsic::getDeclaration(&M, Intrinsic::stackrestore, + {IRB.getPtrTy()}), {SavedStack}); } return true; @@ -3939,9 +3867,9 @@ bool CilkSanitizerImpl::instrumentLibCall(Instruction *I, // Otherwise, insert the hook after the intrinsic. // Synthesize the after hook for this function. - SmallVector AfterHookParamTys( - {IDType, /*callee func_id*/ IDType, - /*Num MAAPVal*/ IRB.getInt8Ty(), CsiCallProperty::getType(Ctx)}); + SmallVector AfterHookParamTys({IDType, /*callee func_id*/ IDType, + /*Num MAAPVal*/ IRB.getInt8Ty(), + CsiCallProperty::getType(Ctx)}); SmallVector AfterHookParamVals( {CallsiteId, FuncId, NumMVVal, PropVal}); SmallVector AfterHookDefaultVals( @@ -3967,9 +3895,9 @@ bool CilkSanitizerImpl::instrumentLibCall(Instruction *I, // There are two "after" positions for invokes: the normal block and the // exception block. InvokeInst *II = cast(I); - insertHookCallInSuccessorBB( - II->getNormalDest(), II->getParent(), AfterLibCallHook, - AfterHookParamVals, AfterHookDefaultVals); + insertHookCallInSuccessorBB(II->getNormalDest(), II->getParent(), + AfterLibCallHook, AfterHookParamVals, + AfterHookDefaultVals); // Don't insert any instrumentation in the exception block. } else { // Simple call instruction; there is only one "after" position. @@ -4013,8 +3941,8 @@ bool CilkSanitizerImpl::instrumentCallsite(Instruction *I, Value *DefaultPropVal = Prop.getValue(IRB); Prop.setIsIndirect(!Called); Value *PropVal = Prop.getValue(IRB); - insertHookCall(I, CsanBeforeCallsite, {CallsiteId, FuncId, NumMVVal, - PropVal}); + insertHookCall(I, CsanBeforeCallsite, + {CallsiteId, FuncId, NumMVVal, PropVal}); BasicBlock::iterator Iter(I); if (IsInvoke) { @@ -4066,10 +3994,10 @@ bool CilkSanitizerImpl::suppressCallsite(Instruction *I) { // There are two "after" positions for invokes: the normal block and the // exception block. InvokeInst *II = cast(I); - insertHookCallInSuccessorBB( - II->getNormalDest(), II->getParent(), CsanEnableChecking, {}, {}); - insertHookCallInSuccessorBB( - II->getUnwindDest(), II->getParent(), CsanEnableChecking, {}, {}); + insertHookCallInSuccessorBB(II->getNormalDest(), II->getParent(), + CsanEnableChecking, {}, {}); + insertHookCallInSuccessorBB(II->getUnwindDest(), II->getParent(), + CsanEnableChecking, {}, {}); } else { // Simple call instruction; there is only one "after" position. Iter++; @@ -4116,7 +4044,7 @@ bool CilkSanitizerImpl::instrumentAnyMemIntrinAcc(Instruction *I, "Store received different ID's in FED and object tables."); Value *CsiId = StoreFED.localToGlobalId(StoreId, IRB); - Value *Args[] = {CsiId, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), + Value *Args[] = {CsiId, IRB.CreatePointerCast(Addr, IRB.getPtrTy()), IRB.CreateIntCast(M->getLength(), IntptrTy, false), Prop.getValue(IRB)}; Instruction *Call = IRB.CreateCall(CsanLargeWrite, Args); @@ -4142,7 +4070,7 @@ bool CilkSanitizerImpl::instrumentAnyMemIntrinAcc(Instruction *I, "Load received different ID's in FED and object tables."); Value *CsiId = LoadFED.localToGlobalId(LoadId, IRB); - Value *Args[] = {CsiId, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), + Value *Args[] = {CsiId, IRB.CreatePointerCast(Addr, IRB.getPtrTy()), IRB.CreateIntCast(M->getLength(), IntptrTy, false), Prop.getValue(IRB)}; Instruction *Call = IRB.CreateCall(CsanLargeRead, Args); @@ -4167,7 +4095,7 @@ bool CilkSanitizerImpl::instrumentAnyMemIntrinAcc(Instruction *I, "Store received different ID's in FED and object tables."); Value *CsiId = StoreFED.localToGlobalId(LocalId, IRB); - Value *Args[] = {CsiId, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), + Value *Args[] = {CsiId, IRB.CreatePointerCast(Addr, IRB.getPtrTy()), IRB.CreateIntCast(M->getLength(), IntptrTy, false), Prop.getValue(IRB)}; Instruction *Call = IRB.CreateCall(CsanLargeWrite, Args); @@ -4178,11 +4106,11 @@ bool CilkSanitizerImpl::instrumentAnyMemIntrinAcc(Instruction *I, return false; } -static void getTaskExits( - DetachInst *DI, SmallVectorImpl &TaskReturns, - SmallVectorImpl &TaskResumes, - SmallVectorImpl &SharedEHExits, - TaskInfo &TI) { +static void getTaskExits(DetachInst *DI, + SmallVectorImpl &TaskReturns, + SmallVectorImpl &TaskResumes, + SmallVectorImpl &SharedEHExits, + TaskInfo &TI) { BasicBlock *DetachedBlock = DI->getDetached(); Task *T = TI.getTaskFor(DetachedBlock); BasicBlock *ContinueBlock = DI->getContinue(); @@ -4206,7 +4134,7 @@ static void getTaskExits( for (Spindle *S : depth_first>(T->getEntrySpindle())) { if (S->isSharedEH()) { if (llvm::any_of(predecessors(S), - [](const Spindle *Pred){ return !Pred->isSharedEH(); })) + [](const Spindle *Pred) { return !Pred->isSharedEH(); })) SharedEHExits.push_back(S); continue; } @@ -4263,14 +4191,15 @@ bool CilkSanitizerImpl::instrumentDetach(DetachInst *DI, unsigned SyncRegNum, Prop.setIsTapirLoopBody(TapirLoopBody); Prop.setNumSyncReg(NumSyncRegs); // Get the frame and stack pointers. - Value *FrameAddr = IRB.CreateCall( - Intrinsic::getDeclaration(&M, Intrinsic::task_frameaddress), - {IRB.getInt32(0)}); + Value *FrameAddr = + IRB.CreateCall(Intrinsic::getDeclaration( + &M, Intrinsic::task_frameaddress, {IRB.getPtrTy()}), + {IRB.getInt32(0)}); Value *StackSave = IRB.CreateCall( - Intrinsic::getDeclaration(&M, Intrinsic::stacksave)); - Instruction *Call = IRB.CreateCall(CsanTaskEntry, - {TaskID, DetachID, FrameAddr, - StackSave, Prop.getValue(IRB)}); + Intrinsic::getDeclaration(&M, Intrinsic::stacksave, {IRB.getPtrTy()})); + Instruction *Call = + IRB.CreateCall(CsanTaskEntry, {TaskID, DetachID, FrameAddr, StackSave, + Prop.getValue(IRB)}); IRB.SetInstDebugLocation(Call); // Instrument the exit points of the detached tasks. @@ -4388,9 +4317,9 @@ bool CilkSanitizerImpl::instrumentSync(SyncInst *SI, unsigned SyncRegNum) { return true; } -void CilkSanitizerImpl::instrumentTapirLoop(Loop &L, TaskInfo &TI, - DenseMap &SyncRegNums, - ScalarEvolution *SE) { +void CilkSanitizerImpl::instrumentTapirLoop( + Loop &L, TaskInfo &TI, DenseMap &SyncRegNums, + ScalarEvolution *SE) { // Only insert instrumentation if requested if (!(InstrumentationSet & SERIESPARALLEL)) return; @@ -4435,8 +4364,8 @@ void CilkSanitizerImpl::instrumentTapirLoop(Loop &L, TaskInfo &TI, } // Insert before-loop hook. - insertHookCall(&*IRB.GetInsertPoint(), CsanBeforeLoop, {LoopCsiId, TripCount, - LoopPropVal}); + insertHookCall(&*IRB.GetInsertPoint(), CsanBeforeLoop, + {LoopCsiId, TripCount, LoopPropVal}); // Insert after-loop hooks. for (BasicBlock *BB : ExitBlocks) { @@ -4478,9 +4407,8 @@ bool CilkSanitizerImpl::instrumentAlloca(Instruction *I, TaskInfo &TI) { uint64_t Size = DL.getTypeAllocSize(AI->getAllocatedType()); Value *SizeVal = IRB.getInt64(Size); if (AI->isArrayAllocation()) - SizeVal = IRB.CreateMul(SizeVal, - IRB.CreateZExtOrBitCast(AI->getArraySize(), - IRB.getInt64Ty())); + SizeVal = IRB.CreateMul( + SizeVal, IRB.CreateZExtOrBitCast(AI->getArraySize(), IRB.getInt64Ty())); BasicBlock::iterator Iter(I); if (!AllocaInEntryBlock) { @@ -4490,7 +4418,7 @@ bool CilkSanitizerImpl::instrumentAlloca(Instruction *I, TaskInfo &TI) { Iter = IRB.GetInsertPoint(); } - Type *AddrType = IRB.getInt8PtrTy(); + Type *AddrType = IRB.getPtrTy(); Value *Addr = IRB.CreatePointerCast(I, AddrType); insertHookCall(&*Iter, CsiAfterAlloca, {CsiId, Addr, SizeVal, PropVal}); @@ -4503,8 +4431,7 @@ static Value *getHeapObject(Value *I) { unsigned NumOfBitCastUses = 0; // Determine if CallInst has a bitcast use. - for (Value::user_iterator UI = I->user_begin(), E = I->user_end(); - UI != E;) + for (Value::user_iterator UI = I->user_begin(), E = I->user_end(); UI != E;) if (BitCastInst *BCI = dyn_cast(*UI++)) { // Look for a dbg.value intrinsic for this bitcast. SmallVector DbgValues; @@ -4596,9 +4523,9 @@ bool CilkSanitizerImpl::instrumentAllocFnLibCall(Instruction *I, // There are two "after" positions for invokes: the normal block and the // exception block. InvokeInst *II = cast(I); - insertHookCallInSuccessorBB( - II->getNormalDest(), II->getParent(), AfterLibCallHook, - AfterHookParamVals, AfterHookDefaultVals); + insertHookCallInSuccessorBB(II->getNormalDest(), II->getParent(), + AfterLibCallHook, AfterHookParamVals, + AfterHookDefaultVals); // Don't insert any instrumentation in the exception block. } else { // Simple call instruction; there is only one "after" position. @@ -4626,14 +4553,15 @@ bool CilkSanitizerImpl::instrumentAllocationFn(Instruction *I, IRBuilder<> IRB(I); SmallVector AllocFnArgs; - if (!getAllocFnArgs(I, AllocFnArgs, IntptrTy, IRB.getInt8PtrTy(), *TLI)) { + if (!getAllocFnArgs(I, AllocFnArgs, IntptrTy, IRB.getPtrTy(), *TLI)) { return instrumentAllocFnLibCall(I, TLI); } - SmallVector DefaultAllocFnArgs( - {/* Allocated size */ Constant::getNullValue(IntptrTy), - /* Number of elements */ Constant::getNullValue(IntptrTy), - /* Alignment */ Constant::getNullValue(IntptrTy), - /* Old pointer */ Constant::getNullValue(IRB.getInt8PtrTy()),}); + SmallVector DefaultAllocFnArgs({ + /* Allocated size */ Constant::getNullValue(IntptrTy), + /* Number of elements */ Constant::getNullValue(IntptrTy), + /* Alignment */ Constant::getNullValue(IntptrTy), + /* Old pointer */ Constant::getNullValue(IRB.getPtrTy()), + }); Value *DefaultID = getDefaultID(IRB); uint64_t LocalId = AllocFnFED.add(*I); @@ -4659,14 +4587,14 @@ bool CilkSanitizerImpl::instrumentAllocationFn(Instruction *I, BasicBlock *NormalBB = II->getNormalDest(); unsigned SuccNum = GetSuccessorNumber(II->getParent(), NormalBB); if (isCriticalEdge(II, SuccNum)) - NormalBB = SplitCriticalEdge(II, SuccNum, - CriticalEdgeSplittingOptions(&DT)); + NormalBB = + SplitCriticalEdge(II, SuccNum, CriticalEdgeSplittingOptions(&DT)); // Insert hook into normal destination. { IRB.SetInsertPoint(&*NormalBB->getFirstInsertionPt()); SmallVector AfterAllocFnArgs; AfterAllocFnArgs.push_back(AllocFnId); - AfterAllocFnArgs.push_back(IRB.CreatePointerCast(I, IRB.getInt8PtrTy())); + AfterAllocFnArgs.push_back(IRB.CreatePointerCast(I, IRB.getPtrTy())); AfterAllocFnArgs.append(AllocFnArgs.begin(), AllocFnArgs.end()); insertHookCall(&*IRB.GetInsertPoint(), CsanAfterAllocFn, AfterAllocFnArgs); @@ -4677,16 +4605,15 @@ bool CilkSanitizerImpl::instrumentAllocationFn(Instruction *I, // destination. SmallVector AfterAllocFnArgs, DefaultAfterAllocFnArgs; AfterAllocFnArgs.push_back(AllocFnId); - AfterAllocFnArgs.push_back(Constant::getNullValue(IRB.getInt8PtrTy())); + AfterAllocFnArgs.push_back(Constant::getNullValue(IRB.getPtrTy())); AfterAllocFnArgs.append(AllocFnArgs.begin(), AllocFnArgs.end()); DefaultAfterAllocFnArgs.push_back(DefaultID); - DefaultAfterAllocFnArgs.push_back( - Constant::getNullValue(IRB.getInt8PtrTy())); + DefaultAfterAllocFnArgs.push_back(Constant::getNullValue(IRB.getPtrTy())); DefaultAfterAllocFnArgs.append(DefaultAllocFnArgs.begin(), DefaultAllocFnArgs.end()); - insertHookCallInSuccessorBB( - II->getUnwindDest(), II->getParent(), CsanAfterAllocFn, - AfterAllocFnArgs, DefaultAfterAllocFnArgs); + insertHookCallInSuccessorBB(II->getUnwindDest(), II->getParent(), + CsanAfterAllocFn, AfterAllocFnArgs, + DefaultAfterAllocFnArgs); } } else { // Simple call instruction; there is only one "after" position. @@ -4694,7 +4621,7 @@ bool CilkSanitizerImpl::instrumentAllocationFn(Instruction *I, IRB.SetInsertPoint(&*Iter); SmallVector AfterAllocFnArgs; AfterAllocFnArgs.push_back(AllocFnId); - AfterAllocFnArgs.push_back(IRB.CreatePointerCast(I, IRB.getInt8PtrTy())); + AfterAllocFnArgs.push_back(IRB.CreatePointerCast(I, IRB.getPtrTy())); AfterAllocFnArgs.append(AllocFnArgs.begin(), AllocFnArgs.end()); insertHookCall(&*Iter, CsanAfterAllocFn, AfterAllocFnArgs); } @@ -4736,64 +4663,24 @@ bool CilkSanitizerImpl::instrumentFree(Instruction *I, return true; } -bool CilkSanitizerLegacyPass::runOnModule(Module &M) { - if (skipModule(M)) - return false; - - CallGraph *CG = &getAnalysis().getCallGraph(); - auto GetTLI = [this](Function &F) -> TargetLibraryInfo & { - return this->getAnalysis().getTLI(F); - }; - auto GetDomTree = [this](Function &F) -> DominatorTree & { - return this->getAnalysis(F).getDomTree(); +PreservedAnalyses CilkSanitizerPass::run(Module &M, ModuleAnalysisManager &AM) { + auto &FAM = AM.getResult(M).getManager(); + auto &CG = AM.getResult(M); + auto GetDT = [&FAM](Function &F) -> DominatorTree & { + return FAM.getResult(F); }; - auto GetTaskInfo = [this](Function &F) -> TaskInfo & { - return this->getAnalysis(F).getTaskInfo(); + auto GetTI = [&FAM](Function &F) -> TaskInfo & { + return FAM.getResult(F); }; - auto GetLoopInfo = [this](Function &F) -> LoopInfo & { - return this->getAnalysis(F).getLoopInfo(); + auto GetLI = [&FAM](Function &F) -> LoopInfo & { + return FAM.getResult(F); }; - auto GetRaceInfo = [this](Function &F) -> RaceInfo & { - return this->getAnalysis(F).getRaceInfo(); + auto GetRI = [&FAM](Function &F) -> RaceInfo & { + return FAM.getResult(F); }; - auto GetSE = [this](Function &F) -> ScalarEvolution & { - return this->getAnalysis(F).getSE(); + auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & { + return FAM.getResult(F); }; - - bool Changed = - CilkSanitizerImpl(M, CG, GetDomTree, nullptr, GetLoopInfo, nullptr, - GetTLI, nullptr, CallsMayThrow, JitMode) - .setup(true); - Changed |= - CilkSanitizerImpl(M, CG, GetDomTree, GetTaskInfo, GetLoopInfo, - GetRaceInfo, GetTLI, GetSE, CallsMayThrow, JitMode) - .run(); - return Changed; -} - -PreservedAnalyses CilkSanitizerPass::run(Module &M, ModuleAnalysisManager &AM) { - auto &FAM = AM.getResult(M).getManager(); - auto &CG = AM.getResult(M); - auto GetDT = - [&FAM](Function &F) -> DominatorTree & { - return FAM.getResult(F); - }; - auto GetTI = - [&FAM](Function &F) -> TaskInfo & { - return FAM.getResult(F); - }; - auto GetLI = - [&FAM](Function &F) -> LoopInfo & { - return FAM.getResult(F); - }; - auto GetRI = - [&FAM](Function &F) -> RaceInfo & { - return FAM.getResult(F); - }; - auto GetTLI = - [&FAM](Function &F) -> TargetLibraryInfo & { - return FAM.getResult(F); - }; auto GetSE = [&FAM](Function &F) -> ScalarEvolution & { return FAM.getResult(F); }; diff --git a/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp index dab90267f9af374..85a8adeabbc2a2c 100644 --- a/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -637,7 +638,7 @@ void CSIImpl::initializeAllocaHooks() { LLVMContext &C = M.getContext(); IRBuilder<> IRB(C); Type *IDType = IRB.getInt64Ty(); - Type *AddrType = IRB.getInt8PtrTy(); + Type *AddrType = IRB.getPtrTy(); Type *PropType = CsiAllocaProperty::getType(C); CsiAfterAlloca = M.getOrInsertFunction("__csi_after_alloca", IRB.getVoidTy(), @@ -650,7 +651,7 @@ void CSIImpl::initializeAllocFnHooks() { IRBuilder<> IRB(C); Type *RetType = IRB.getVoidTy(); Type *IDType = IRB.getInt64Ty(); - Type *AddrType = IRB.getInt8PtrTy(); + Type *AddrType = IRB.getPtrTy(); Type *LargeNumBytesType = IntptrTy; Type *AllocFnPropType = CsiAllocFnProperty::getType(C); Type *FreePropType = CsiFreeProperty::getType(C); @@ -680,7 +681,7 @@ void CSIImpl::initializeLoadStoreHooks() { Type *LoadPropertyTy = CsiLoadStoreProperty::getType(C); Type *StorePropertyTy = CsiLoadStoreProperty::getType(C); Type *RetType = IRB.getVoidTy(); - Type *AddrType = IRB.getInt8PtrTy(); + Type *AddrType = IRB.getPtrTy(); Type *NumBytesType = IRB.getInt32Ty(); CsiBeforeRead = M.getOrInsertFunction("__csi_before_load", RetType, @@ -703,14 +704,14 @@ void CSIImpl::initializeMemIntrinsicsHooks() { LLVMContext &C = M.getContext(); IRBuilder<> IRB(C); - MemmoveFn = M.getOrInsertFunction("memmove", IRB.getInt8PtrTy(), - IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + MemmoveFn = M.getOrInsertFunction("memmove", IRB.getPtrTy(), + IRB.getPtrTy(), IRB.getPtrTy(), IntptrTy); - MemcpyFn = M.getOrInsertFunction("memcpy", IRB.getInt8PtrTy(), - IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), + MemcpyFn = M.getOrInsertFunction("memcpy", IRB.getPtrTy(), + IRB.getPtrTy(), IRB.getPtrTy(), IntptrTy); - MemsetFn = M.getOrInsertFunction("memset", IRB.getInt8PtrTy(), - IRB.getInt8PtrTy(), IRB.getInt32Ty(), + MemsetFn = M.getOrInsertFunction("memset", IRB.getPtrTy(), + IRB.getPtrTy(), IRB.getInt32Ty(), IntptrTy); } @@ -764,9 +765,10 @@ static BasicBlock *SplitOffPreds(BasicBlock *BB, SmallVectorImpl &Preds, DominatorTree *DT, LoopInfo *LI) { if (BB->isLandingPad()) { + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); SmallVector NewBBs; SplitLandingPadPredecessors(BB, Preds, ".csi-split-lp", ".csi-split", - NewBBs, DT, LI); + NewBBs, &DTU, LI); return NewBBs[1]; } @@ -1077,7 +1079,7 @@ void CSIImpl::instrumentLoadOrStore(Instruction *I, Type *Ty = IsWrite ? cast(I)->getValueOperand()->getType() : I->getType(); int NumBytes = getNumBytesAccessed(Ty, DL); - Type *AddrType = IRB.getInt8PtrTy(); + Type *AddrType = IRB.getPtrTy(); if (NumBytes == -1) return; // size that we don't recognize @@ -1119,7 +1121,7 @@ bool CSIImpl::instrumentMemIntrinsic(Instruction *I) { if (MemSetInst *M = dyn_cast(I)) { Instruction *Call = IRB.CreateCall( MemsetFn, - {IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()), + {IRB.CreatePointerCast(M->getArgOperand(0), IRB.getPtrTy()), IRB.CreateIntCast(M->getArgOperand(1), IRB.getInt32Ty(), false), IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)}); setInstrumentationDebugLoc(I, Call); @@ -1128,8 +1130,8 @@ bool CSIImpl::instrumentMemIntrinsic(Instruction *I) { } else if (MemTransferInst *M = dyn_cast(I)) { Instruction *Call = IRB.CreateCall( isa(M) ? MemcpyFn : MemmoveFn, - {IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()), - IRB.CreatePointerCast(M->getArgOperand(1), IRB.getInt8PtrTy()), + {IRB.CreatePointerCast(M->getArgOperand(0), IRB.getPtrTy()), + IRB.CreatePointerCast(M->getArgOperand(1), IRB.getPtrTy()), IRB.CreateIntCast(M->getArgOperand(2), IntptrTy, false)}); setInstrumentationDebugLoc(I, Call); I->eraseFromParent(); @@ -1603,7 +1605,7 @@ void CSIImpl::instrumentAlloca(Instruction *I, TaskInfo &TI) { Iter = IRB.GetInsertPoint(); } - Type *AddrType = IRB.getInt8PtrTy(); + Type *AddrType = IRB.getPtrTy(); Value *Addr = IRB.CreatePointerCast(I, AddrType); insertHookCall(&*Iter, CsiAfterAlloca, {CsiId, Addr, SizeVal, PropVal}); } @@ -1665,12 +1667,12 @@ void CSIImpl::instrumentAllocFn(Instruction *I, DominatorTree *DT, Value *AllocFnId = AllocFnFED.localToGlobalId(LocalId, IRB); SmallVector AllocFnArgs; - getAllocFnArgs(I, AllocFnArgs, IntptrTy, IRB.getInt8PtrTy(), *TLI); + getAllocFnArgs(I, AllocFnArgs, IntptrTy, IRB.getPtrTy(), *TLI); SmallVector DefaultAllocFnArgs({ /* Allocated size */ Constant::getNullValue(IntptrTy), /* Number of elements */ Constant::getNullValue(IntptrTy), /* Alignment */ Constant::getNullValue(IntptrTy), - /* Old pointer */ Constant::getNullValue(IRB.getInt8PtrTy()), + /* Old pointer */ Constant::getNullValue(IRB.getPtrTy()), }); CsiAllocFnProperty Prop; @@ -1697,7 +1699,7 @@ void CSIImpl::instrumentAllocFn(Instruction *I, DominatorTree *DT, IRB.SetInsertPoint(&*NormalBB->getFirstInsertionPt()); SmallVector AfterAllocFnArgs; AfterAllocFnArgs.push_back(AllocFnId); - AfterAllocFnArgs.push_back(IRB.CreatePointerCast(I, IRB.getInt8PtrTy())); + AfterAllocFnArgs.push_back(IRB.CreatePointerCast(I, IRB.getPtrTy())); AfterAllocFnArgs.append(AllocFnArgs.begin(), AllocFnArgs.end()); insertHookCall(&*IRB.GetInsertPoint(), CsiAfterAllocFn, AfterAllocFnArgs); } @@ -1707,11 +1709,11 @@ void CSIImpl::instrumentAllocFn(Instruction *I, DominatorTree *DT, // destination. SmallVector AfterAllocFnArgs, DefaultAfterAllocFnArgs; AfterAllocFnArgs.push_back(AllocFnId); - AfterAllocFnArgs.push_back(Constant::getNullValue(IRB.getInt8PtrTy())); + AfterAllocFnArgs.push_back(Constant::getNullValue(IRB.getPtrTy())); AfterAllocFnArgs.append(AllocFnArgs.begin(), AllocFnArgs.end()); DefaultAfterAllocFnArgs.push_back(DefaultID); DefaultAfterAllocFnArgs.push_back( - Constant::getNullValue(IRB.getInt8PtrTy())); + Constant::getNullValue(IRB.getPtrTy())); DefaultAfterAllocFnArgs.append(DefaultAllocFnArgs.begin(), DefaultAllocFnArgs.end()); insertHookCallInSuccessorBB(II->getUnwindDest(), II->getParent(), @@ -1724,7 +1726,7 @@ void CSIImpl::instrumentAllocFn(Instruction *I, DominatorTree *DT, IRB.SetInsertPoint(&*Iter); SmallVector AfterAllocFnArgs; AfterAllocFnArgs.push_back(AllocFnId); - AfterAllocFnArgs.push_back(IRB.CreatePointerCast(I, IRB.getInt8PtrTy())); + AfterAllocFnArgs.push_back(IRB.CreatePointerCast(I, IRB.getPtrTy())); AfterAllocFnArgs.append(AllocFnArgs.begin(), AllocFnArgs.end()); insertHookCall(&*Iter, CsiAfterAllocFn, AfterAllocFnArgs); } @@ -2017,7 +2019,7 @@ void CSIImpl::initializeCsi() { // Create a struct type to match the unit_fed_entry_t type in csirt.c. StructType *CSIImpl::getUnitFedTableType(LLVMContext &C, PointerType *EntryPointerType) { - return StructType::get(IntegerType::get(C, 64), Type::getInt8PtrTy(C, 0), + return StructType::get(IntegerType::get(C, 64), PointerType::get(C, 0), EntryPointerType); } @@ -2027,7 +2029,7 @@ Constant *CSIImpl::fedTableToUnitFedTable(Module &M, Constant *NumEntries = ConstantInt::get(IntegerType::get(M.getContext(), 64), FedTable.size()); Constant *BaseIdPtr = ConstantExpr::getPointerCast( - FedTable.baseId(), Type::getInt8PtrTy(M.getContext(), 0)); + FedTable.baseId(), PointerType::get(M.getContext(), 0)); Constant *InsertedTable = FedTable.insertIntoModule(M); return ConstantStruct::get(UnitFedTableType, NumEntries, BaseIdPtr, InsertedTable); @@ -2083,7 +2085,7 @@ Constant *CSIImpl::sizeTableToUnitSizeTable(Module &M, ConstantInt::get(IntegerType::get(M.getContext(), 64), SzTable.size()); // Constant *BaseIdPtr = // ConstantExpr::getPointerCast(FedTable.baseId(), - // Type::getInt8PtrTy(M.getContext(), 0)); + // PointerType::get(M.getContext(), 0)); Constant *InsertedTable = SzTable.insertIntoModule(M); return ConstantStruct::get(UnitSizeTableType, NumEntries, InsertedTable); } @@ -2106,7 +2108,7 @@ CallInst *CSIImpl::createRTUnitInitCall(IRBuilder<> &IRB) { getUnitSizeTableType(C, SizeTable::getPointerType(C)); // Lookup __csirt_unit_init - SmallVector InitArgTypes({IRB.getInt8PtrTy(), + SmallVector InitArgTypes({IRB.getPtrTy(), PointerType::get(UnitFedTableType, 0), PointerType::get(UnitSizeTableType, 0), InitCallsiteToFunction->getType()}); diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp index 20bedf242bebc70..a43e14a63303458 100644 --- a/llvm/lib/Transforms/Scalar/GVN.cpp +++ b/llvm/lib/Transforms/Scalar/GVN.cpp @@ -766,7 +766,7 @@ PreservedAnalyses GVNPass::run(Function &F, FunctionAnalysisManager &AM) { auto &TI = AM.getResult(F); auto *MSSA = AM.getCachedResult(F); auto &ORE = AM.getResult(F); - bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE, TI, + bool Changed = runImpl(F, AC, DT, TLI, AA, MemDep, LI, &ORE, &TI, MSSA ? &MSSA->getMSSA() : nullptr); if (!Changed) return PreservedAnalyses::all(); @@ -3370,8 +3370,6 @@ class llvm::gvn::GVNLegacyPass : public FunctionPass { if (skipFunction(F)) return false; - auto *LIWP = getAnalysisIfAvailable(); - auto *TIWP = getAnalysisIfAvailable(); auto *MSSAWP = getAnalysisIfAvailable(); return Impl.runImpl( diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 0c64952e1005a2d..b2957ad1dfa87a8 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -178,6 +178,7 @@ class SROA { LLVMContext *const C; DomTreeUpdater *const DTU; AssumptionCache *const AC; + TaskInfo *TI; const bool PreserveCFG; /// Worklist of alloca instructions to simplify. @@ -238,9 +239,9 @@ class SROA { isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG); public: - SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC, + SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC, TaskInfo *TI, SROAOptions PreserveCFG_) - : C(C), DTU(DTU), AC(AC), + : C(C), DTU(DTU), AC(AC), TI(TI), PreserveCFG(PreserveCFG_ == SROAOptions::PreserveCFG) {} /// Main run method used by both the SROAPass and by the legacy pass. diff --git a/llvm/lib/Transforms/Tapir/CudaABI.cpp b/llvm/lib/Transforms/Tapir/CudaABI.cpp index e98298ac845b6a2..60f3175d4a1e394 100644 --- a/llvm/lib/Transforms/Tapir/CudaABI.cpp +++ b/llvm/lib/Transforms/Tapir/CudaABI.cpp @@ -59,6 +59,7 @@ // #include "llvm/Transforms/Tapir/CudaABI.h" +#include "kitsune/Config/config.h" #include "llvm/ADT/Twine.h" #include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/IR/Constants.h" @@ -97,7 +98,6 @@ #include "llvm/Transforms/Tapir/TapirLoopInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/TapirUtils.h" -#include "llvm/Transforms/Vectorize.h" using namespace llvm; @@ -310,10 +310,12 @@ std::string PTXVersionFromCudaVersion() { .Case("11.6", "+ptx76") .Case("11.7", "+ptx77") .Case("11.8", "+ptx78") - .Case("12.0", "+ptx78") - .Case("12.1", "+ptx78") - .Case("12.2", "+ptx78") - .Case("12.3", "+ptx78") + .Case("12.0", "+ptx83") + .Case("12.1", "+ptx83") + .Case("12.2", "+ptx83") + .Case("12.3", "+ptx83") + .Case("12.4", "+ptx83") + .Case("12.5", "+ptx83") .Default(""); if (PTXVersionStr == "") { @@ -354,9 +356,9 @@ CudaLoop::CudaLoop(Module &M, Module &KernelModule, const std::string &KN, Type *Int32Ty = Type::getInt32Ty(Ctx); Type *Int64Ty = Type::getInt64Ty(Ctx); Type *VoidTy = Type::getVoidTy(Ctx); - PointerType *VoidPtrTy = Type::getInt8PtrTy(Ctx); + PointerType *VoidPtrTy = PointerType::getUnqual(Ctx); PointerType *VoidPtrPtrTy = VoidPtrTy->getPointerTo(); - PointerType *CharPtrTy = Type::getInt8PtrTy(Ctx); + PointerType *CharPtrTy = PointerType::getUnqual(Ctx); // Thread index values -- equivalent to Cuda's builtins: threadIdx.[x,y,z]. CUThreadIdxX = Intrinsic::getDeclaration(&KernelModule, @@ -925,10 +927,7 @@ void CudaLoop::transformForPTX(Function &F) { for (auto I = inst_begin(&F); I != inst_end(&F); I++) { if (auto CI = dyn_cast(&*I)) { if (FPMathOperator *FPO = dyn_cast(CI)) { - // LLVM_DEBUG(dbgs() << "\tCall is for a FP math operation: " << *FPO); - if (FPO->isFast()) { - // LLVM_DEBUG(dbgs() << " [fast]\n"); - FastMathFlags FMF = FPO->getFastMathFlags(); + if (FPO->isFast()) { enableFast = true; } else { // LLVM_DEBUG(dbgs() << " [std/full precision]\n"); @@ -1016,7 +1015,7 @@ void CudaLoop::processOutlinedLoopCall(TapirLoopInfo &TL, TaskOutlineInfo &TOI, LLVM_DEBUG(dbgs() << "\t*- code gen packing of " << OrderedInputs.size() << " kernel args.\n"); - PointerType *VoidPtrTy = Type::getInt8PtrTy(Ctx); + PointerType *VoidPtrTy = PointerType::getUnqual(Ctx); ArrayType *ArrayTy = ArrayType::get(VoidPtrTy, OrderedInputs.size()); Value *ArgArray = EntryBuilder.CreateAlloca(ArrayTy); unsigned int i = 0; @@ -1111,7 +1110,7 @@ void CudaLoop::processOutlinedLoopCall(TapirLoopInfo &TL, TaskOutlineInfo &TOI, ConstantInt::get(Int64Ty, InstMix.num_iops)); AllocaInst *AI = NewBuilder.CreateAlloca(KernelInstMixTy); - StoreInst *SI = NewBuilder.CreateStore(InstructionMix, AI); + NewBuilder.CreateStore(InstructionMix, AI); LLVM_DEBUG(dbgs() << "\t*- code gen kernel launch....\n"); NewBuilder.CreateCall(KitCudaLaunchFn, {DummyFBPtr, KNameParam, argsPtr, @@ -1173,7 +1172,7 @@ CudaABI::CudaABI(Module &M) PTXTargetMachine = PTXTarget->createTargetMachine( TT.getTriple(), GPUArch, PTXVersionStr.c_str(), TargetOptions(), - Reloc::PIC_, CodeModel::Large, CodeGenOpt::Aggressive); + Reloc::PIC_, CodeModel::Large, CodeGenOptLevel::Aggressive); KernelModule.setTargetTriple(TT.str()); KernelModule.setDataLayout(PTXTargetMachine->createDataLayout()); @@ -1195,22 +1194,24 @@ std::unique_ptr &CudaABI::getLibDeviceModule() { if (not LibDeviceModule) { LLVMContext &Ctx = KernelModule.getContext(); llvm::SMDiagnostic SMD; - // FIXME: Do not require CUDA_HOME to be set in the environment. The Cuda - // installation that was used to build Kitsune should be used instead - // because there is no guarantee that the cuda versions will match. If they - // don't, there is a chance of subtle problems. - std::optional CudaPath = sys::Process::FindInEnvPath( - "CUDA_HOME", "nvvm/libdevice/libdevice.10.bc"); - if (!CudaPath) { - CudaPath = sys::Process::FindInEnvPath("CUDA_PATH", - "nvvm/libdevice/libdevice.10.bc"); - if (!CudaPath) - report_fatal_error("Unable to load cuda libdevice.10.bc!"); - } - - LibDeviceModule = parseIRFile(*CudaPath, SMD, Ctx); + llvm::errs() << "libdevice: " << KITSUNE_CUDA_LIBDEVICE_BC << "\n"; + // KITSUNE FIXME: It might be useful during development to override the + // libdevice.10.bc function. We could do this with a command-line argument + // that gets passed to this transform. + // std::optional CudaPath = sys::Process::FindInEnvPath( + // "CUDA_HOME", "nvvm/libdevice/libdevice.10.bc"); + // if (!CudaPath) { + // CudaPath = sys::Process::FindInEnvPath("CUDA_PATH", + // "nvvm/libdevice/libdevice.10.bc"); + // if (!CudaPath) + // report_fatal_error("Unable to load cuda libdevice.10.bc!"); + // } + + llvm::StringRef LibDeviceBCFile = KITSUNE_CUDA_LIBDEVICE_BC; + LibDeviceModule = parseIRFile(LibDeviceBCFile, SMD, Ctx); if (not LibDeviceModule) - report_fatal_error("Failed to parse cuda libdevice.10.bc!"); + report_fatal_error(llvm::StringRef("Failed to parse: ") + + LibDeviceBCFile); } return LibDeviceModule; @@ -1298,10 +1299,12 @@ CudaABIOutputFile CudaABI::assemblePTXFile(CudaABIOutputFile &PTXFile) { std::error_code EC; // FIXME: Do not require ptxas to be in $PATH. Use the ptxas that is part of // cuda installation against which Kitsune was built. - auto PTXASExe = sys::findProgramByName("ptxas"); - if ((EC = PTXASExe.getError())) - report_fatal_error("'ptxas' not found. " - "Is a CUDA installation in your path?"); + // auto PTXASExe = sys::findProgramByName("ptxas"); + // if ((EC = PTXASExe.getError())) + // report_fatal_error("'ptxas' not found. " + // "Is a CUDA installation in your path?"); + + llvm::StringRef PTXASExe = KITSUNE_CUDA_PTXAS; SmallString<255> AsmFileName(PTXFile->getFilename()); sys::path::replace_extension(AsmFileName, ".s"); @@ -1314,7 +1317,7 @@ CudaABIOutputFile CudaABI::assemblePTXFile(CudaABIOutputFile &PTXFile) { // near the top of this file. // These can be passed to the transform via '-mllvm '. opt::ArgStringList PTXASArgList; - PTXASArgList.push_back(PTXASExe->c_str()); + PTXASArgList.push_back(PTXASExe.data()); // TODO: Do we need/want to add support for generating relocatable code? @@ -1375,7 +1378,7 @@ CudaABIOutputFile CudaABI::assemblePTXFile(CudaABIOutputFile &PTXFile) { // Finally we are ready to execute ptxas... std::string ErrMsg; bool ExecFailed; - int ExecStat = sys::ExecuteAndWait(*PTXASExe, PTXASArgs, std::nullopt, {}, + int ExecStat = sys::ExecuteAndWait(PTXASExe, PTXASArgs, std::nullopt, {}, 0, /* secs to wait -- 0 --> unlimited */ 0, /* memory limit -- 0 --> unlimited */ &ErrMsg, &ExecFailed); @@ -1408,8 +1411,8 @@ void CudaABI::finalizeLaunchCalls(Module &M, GlobalVariable *Fatbin) { LLVMContext &Ctx = M.getContext(); const DataLayout &DL = M.getDataLayout(); Type *VoidTy = Type::getVoidTy(Ctx); - PointerType *VoidPtrTy = Type::getInt8PtrTy(Ctx); - PointerType *CharPtrTy = Type::getInt8PtrTy(Ctx); + PointerType *VoidPtrTy = PointerType::getUnqual(Ctx); + PointerType *CharPtrTy = PointerType::getUnqual(Ctx); Type *Int64Ty = Type::getInt64Ty(Ctx); // Look up a global (device-side) symbol via a module @@ -1456,12 +1459,12 @@ void CudaABI::finalizeLaunchCalls(Module &M, GlobalVariable *Fatbin) { if (CallInst *CI = dyn_cast(&I)) { if (Function *CFn = CI->getCalledFunction()) { - if (CFn->getName().startswith("__kitrt_dummy_threads_per_blk")) { + if (CFn->getName().starts_with("__kitrt_dummy_threads_per_blk")) { LLVM_DEBUG(dbgs() << "\t\t\t* discovered a threads-per-block " "placeholder call.\n"); assert(ThreadsPerBlockCI == nullptr && "expected null pointer!"); ThreadsPerBlockCI = CI; - } else if (CFn->getName().startswith("__kitcuda_launch_kernel")) { + } else if (CFn->getName().starts_with("__kitcuda_launch_kernel")) { LLVM_DEBUG(dbgs() << "\t\t\t* patching launch: " << *CI << "\n"); Value *CFatbin; CFatbin = CastInst::CreateBitOrPointerCast(Fatbin, VoidPtrTy, @@ -1547,14 +1550,15 @@ CudaABIOutputFile CudaABI::createFatbinaryFile(CudaABIOutputFile &AsmFile) { LLVM_DEBUG(dbgs() << "\t- generatng fatbinary image file '" << FatbinFile->getFilename() << "'.\n"); - // TODO: LLVM docs suggest we shouldn't be using findProgramByName()... - auto FatbinaryExe = sys::findProgramByName("fatbinary"); - if ((EC = FatbinaryExe.getError())) - report_fatal_error("'fatbinary' not found. " - "Is a CUDA installation in your path?"); + // // TODO: LLVM docs suggest we shouldn't be using findProgramByName()... + // auto FatbinaryExe = sys::findProgramByName("fatbinary"); + // if ((EC = FatbinaryExe.getError())) + // report_fatal_error("'fatbinary' not found. " + // "Is a CUDA installation in your path?"); + llvm::StringRef FatbinaryExe = KITSUNE_CUDA_FATBINARY; opt::ArgStringList FatbinaryArgList; - FatbinaryArgList.push_back(FatbinaryExe->c_str()); + FatbinaryArgList.push_back(FatbinaryExe.data()); FatbinaryArgList.push_back("--64"); FatbinaryArgList.push_back("--create"); FatbinaryArgList.push_back(FatbinFilename.c_str()); @@ -1595,7 +1599,7 @@ CudaABIOutputFile CudaABI::createFatbinaryFile(CudaABIOutputFile &AsmFile) { std::string ErrMsg; bool ExecFailed; int ExecStat = - sys::ExecuteAndWait(*FatbinaryExe, FatbinaryArgs, std::nullopt, {}, + sys::ExecuteAndWait(FatbinaryExe, FatbinaryArgs, std::nullopt, {}, 0, /* secs to wait -- 0 --> unlimited */ 0, /* memory limit -- 0 --> unlimited */ &ErrMsg, &ExecFailed); @@ -1660,10 +1664,10 @@ void CudaABI::bindGlobalVariables(Value *Handle, IRBuilder<> &B) { Type *IntTy = Type::getInt32Ty(Ctx); Type *Int64Ty = Type::getInt64Ty(Ctx); Type *VoidTy = Type::getVoidTy(Ctx); - PointerType *VoidPtrTy = Type::getInt8PtrTy(Ctx); + PointerType *VoidPtrTy = PointerType::getUnqual(Ctx); PointerType *VoidPtrPtrTy = VoidPtrTy->getPointerTo(); Type *VarSizeTy = Int64Ty; - PointerType *CharPtrTy = Type::getInt8PtrTy(Ctx); + PointerType *CharPtrTy = PointerType::getUnqual(Ctx); FunctionCallee RegisterVarFn = M.getOrInsertFunction( "__cudaRegisterVar", VoidTy, VoidPtrPtrTy, CharPtrTy, CharPtrTy, @@ -1693,7 +1697,7 @@ Function *CudaABI::createCtor(GlobalVariable *Fatbinary, GlobalVariable *Wrapper) { LLVMContext &Ctx = M.getContext(); Type *VoidTy = Type::getVoidTy(Ctx); - PointerType *VoidPtrTy = Type::getInt8PtrTy(Ctx); + PointerType *VoidPtrTy = PointerType::getUnqual(Ctx); PointerType *VoidPtrPtrTy = VoidPtrTy->getPointerTo(); Type *IntTy = Type::getInt32Ty(Ctx); Type *BoolTy = Type::getInt8Ty(Ctx); @@ -1798,7 +1802,7 @@ Function *CudaABI::createDtor(GlobalVariable *FBHandle) { LLVMContext &Ctx = M.getContext(); const DataLayout &DL = M.getDataLayout(); Type *VoidTy = Type::getVoidTy(Ctx); - Type *VoidPtrTy = Type::getInt8PtrTy(Ctx); + Type *VoidPtrTy = PointerType::getUnqual(Ctx); Type *VoidPtrPtrTy = VoidPtrTy->getPointerTo(); FunctionCallee UnregisterFatbinFn = @@ -1862,7 +1866,7 @@ void CudaABI::registerFatbinary(GlobalVariable *Fatbinary) { LLVMContext &Ctx = M.getContext(); Type *VoidTy = Type::getVoidTy(Ctx); - PointerType *VoidPtrTy = Type::getInt8PtrTy(Ctx); + PointerType *VoidPtrTy = PointerType::getUnqual(Ctx); Type *IntTy = Type::getInt32Ty(Ctx); const DataLayout &DL = M.getDataLayout(); @@ -1956,7 +1960,7 @@ CudaABIOutputFile CudaABI::generatePTX() { pb.registerCGSCCAnalyses(cgam); pb.registerFunctionAnalyses(fam); pb.registerLoopAnalyses(lam); - PTXTargetMachine->registerPassBuilderCallbacks(pb); + PTXTargetMachine->registerPassBuilderCallbacks(pb, false); pb.crossRegisterProxies(lam, fam, cgam, mam); ModulePassManager mpm = pb.buildPerModuleDefaultPipeline(optLevel); mpm.addPass(VerifierPass()); @@ -1972,13 +1976,13 @@ CudaABIOutputFile CudaABI::generatePTX() { LLVM_DEBUG(dbgs() << "\t- PTX file: '" << PTXFileName << "'.\n"); legacy::PassManager PassMgr; if (PTXTargetMachine->addPassesToEmitFile(PassMgr, PTXFile->os(), nullptr, - CodeGenFileType::CGFT_AssemblyFile, + CodeGenFileType::AssemblyFile, false)) report_fatal_error("Cuda ABI transform -- PTX generation failed!"); PassMgr.run(KernelModule); LLVM_DEBUG(dbgs() << "\tkernel optimizations and code gen complete.\n\n"); LLVM_DEBUG(dbgs() << "\t\tPTX file: " << PTXFile->getFilename() << "\n"); - return std::move(PTXFile); + return PTXFile; } void CudaABI::postProcessModule() { @@ -2039,7 +2043,7 @@ void CudaABI::postProcessModule() { pb.registerCGSCCAnalyses(cgam); pb.registerFunctionAnalyses(fam); pb.registerLoopAnalyses(lam); - PTXTargetMachine->registerPassBuilderCallbacks(pb); + PTXTargetMachine->registerPassBuilderCallbacks(pb, false); pb.crossRegisterProxies(lam, fam, cgam, mam); ModulePassManager mpm = pb.buildPerModuleDefaultPipeline(optLevel); diff --git a/llvm/lib/Transforms/Tapir/HipABI.cpp b/llvm/lib/Transforms/Tapir/HipABI.cpp index 120204640e6d989..e36d3358c9c8d5a 100644 --- a/llvm/lib/Transforms/Tapir/HipABI.cpp +++ b/llvm/lib/Transforms/Tapir/HipABI.cpp @@ -114,7 +114,6 @@ #include "llvm/Transforms/Utils/AMDGPUEmitPrintf.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Mem2Reg.h" -#include "llvm/Transforms/Vectorize.h" using namespace llvm; @@ -631,7 +630,7 @@ void HipABI::transformConstants(Function *Fn) { new AddrSpaceCastInst(NewGEP, OldGEP->getType(), "", Call); Call->setArgOperand(argNo, asCast); } else - assert(nullptr && "unexpected use of gep"); + assert(false && "unexpected use of gep"); } OldGEP->eraseFromParent(); } @@ -644,9 +643,9 @@ void HipABI::transformArguments(Function *Fn) { FnArgTypes[A.getArgNo()] = A.getType(); if (isa(A.getType())) { LLVM_DEBUG(dbgs() << "\t\ttransforming argument: " << A << "\n"); - PointerType *OldPtrTy = dyn_cast(A.getType()); - PointerType *NewPtrTy = PointerType::getWithSamePointeeType( - OldPtrTy, HIPABI_GLOBAL_ADDR_SPACE); + PointerType *OldPtrTy = cast(A.getType()); + PointerType *NewPtrTy = + PointerType::get(OldPtrTy->getContext(), HIPABI_GLOBAL_ADDR_SPACE); // TODO: Better path here than mutate? A.mutateType(NewPtrTy); FnArgTypes[A.getArgNo()] = NewPtrTy; @@ -676,7 +675,7 @@ Value *HipLoop::emitWorkItemId(IRBuilder<> &Builder, int ItemIndex, int Low, LLVMContext &Ctx = KernelModule.getContext(); Type *Int32Ty = Type::getInt32Ty(Ctx); llvm::MDBuilder MDHelper(Ctx); - Constant *IndexVal = ConstantInt::get(Int32Ty, ItemIndex, ".x"); + Constant *IndexVal = ConstantInt::get(Int32Ty, ItemIndex); std::string WIName = "threadIdx."; switch (ItemIndex) { @@ -773,7 +772,7 @@ HipLoop::HipLoop(Module &M, Module &KModule, const std::string &Name, Type *Int32Ty = Type::getInt32Ty(Ctx); Type *Int64Ty = Type::getInt64Ty(Ctx); Type *VoidTy = Type::getVoidTy(Ctx); - PointerType *VoidPtrTy = Type::getInt8PtrTy(Ctx); + PointerType *VoidPtrTy = PointerType::getUnqual(Ctx); // We use ROCm/HSA/HIP entry points for various runtime calls. These calls // are often at a lower level vs. user-facing entry points. This follows @@ -1264,7 +1263,7 @@ std::unique_ptr HipABI::loadBCFile(const std::string &BCFile) { std::unique_ptr BCM = parseIRFile(BCFile, SMD, Ctx); if (not BCM) report_fatal_error("Failed to parse bitcode file!"); - return std::move(BCM); + return BCM; } bool HipABI::linkInModule(std::unique_ptr &Mod) { @@ -1335,7 +1334,7 @@ void HipLoop::processOutlinedLoopCall(TapirLoopInfo &TL, TaskOutlineInfo &TOI, LLVM_DEBUG(dbgs() << "\t*- code gen packing of " << OrderedInputs.size() << " kernel args.\n"); - PointerType *VoidPtrTy = Type::getInt8PtrTy(Ctx); + PointerType *VoidPtrTy = PointerType::getUnqual(Ctx); ArrayType *ArrayTy = ArrayType::get(VoidPtrTy, OrderedInputs.size()); Value *ArgArray = EntryBuilder.CreateAlloca(ArrayTy); unsigned int i = 0; @@ -1422,8 +1421,8 @@ HipABI::HipABI(Module &InputModule) LLVMContext &Ctx = InputModule.getContext(); Type *VoidTy = Type::getVoidTy(Ctx); - PointerType *VoidPtrTy = Type::getInt8PtrTy(Ctx); - PointerType *CharPtrTy = Type::getInt8PtrTy(Ctx); + PointerType *VoidPtrTy = PointerType::getUnqual(Ctx); + PointerType *CharPtrTy = PointerType::getUnqual(Ctx); Type *Int64Ty = Type::getInt64Ty(Ctx); Type *Int32Ty = Type::getInt32Ty(Ctx); KitHipGetGlobalSymbolFn = @@ -1456,19 +1455,19 @@ HipABI::HipABI(Module &InputModule) SmallString<255> NewModuleName(ArchString + KernelModule.getName().str()); sys::path::replace_extension(NewModuleName, ".amdgcn"); KernelModule.setSourceFileName(NewModuleName.c_str()); - llvm::CodeGenOpt::Level TMOptLevel; + llvm::CodeGenOptLevel TMOptLevel = CodeGenOptLevel::None; llvm::CodeModel::Model TMCodeModel = CodeModel::Model::Large; if (OptLevel == 0) - TMOptLevel = CodeGenOpt::Level::None; + TMOptLevel = CodeGenOptLevel::None; else if (OptLevel == 1) - TMOptLevel = CodeGenOpt::Level::Less; + TMOptLevel = CodeGenOptLevel::Less; else if (OptLevel == 2) - TMOptLevel = CodeGenOpt::Level::Default; + TMOptLevel = CodeGenOptLevel::Default; else if (OptLevel >= 3) - TMOptLevel = CodeGenOpt::Level::Aggressive; - std::string Features = ""; + TMOptLevel = CodeGenOptLevel::Aggressive; + std::string Features = ""; // TODO: feature is arch specific. need to cross-check. // NOTE: If the HSA_XNACK enviornment variable is not set this feature // can result in a crash that would appear to be an incorrect/corrupt @@ -1651,7 +1650,7 @@ void HipABI::finalizeLaunchCalls(Module &M, GlobalVariable *BundleBin) { LLVMContext &Ctx = M.getContext(); const DataLayout &DL = M.getDataLayout(); - PointerType *VoidPtrTy = Type::getInt8PtrTy(Ctx); + PointerType *VoidPtrTy = PointerType::getUnqual(Ctx); Type *Int64Ty = Type::getInt64Ty(Ctx); auto &FnList = M.getFunctionList(); @@ -1661,7 +1660,7 @@ void HipABI::finalizeLaunchCalls(Module &M, GlobalVariable *BundleBin) { if (CallInst *CI = dyn_cast(&I)) { if (Function *CFn = CI->getCalledFunction()) { - if (CFn->getName().startswith("__kithip_launch_kernel")) { + if (CFn->getName().starts_with("__kithip_launch_kernel")) { LLVM_DEBUG(dbgs() << "\t\t\t* patching launch: " << *CI << "\n"); Value *HipFatbin; HipFatbin = CastInst::CreateBitOrPointerCast( @@ -1756,7 +1755,7 @@ HipABIOutputFile HipABI::createTargetObj(const StringRef &ObjFileName) { pb.registerCGSCCAnalyses(cgam); pb.registerFunctionAnalyses(fam); pb.registerLoopAnalyses(lam); - AMDTargetMachine->registerPassBuilderCallbacks(pb); + AMDTargetMachine->registerPassBuilderCallbacks(pb, false); pb.crossRegisterProxies(lam, fam, cgam, mam); OptimizationLevel optLevels[] = { OptimizationLevel::O0, @@ -1778,14 +1777,14 @@ HipABIOutputFile HipABI::createTargetObj(const StringRef &ObjFileName) { legacy::PassManager PassMgr; if (AMDTargetMachine->addPassesToEmitFile(PassMgr, ObjFile->os(), nullptr, - CodeGenFileType::CGFT_ObjectFile, + CodeGenFileType::ObjectFile, false)) report_fatal_error("hipabi: AMDGPU target failed!"); PassMgr.run(KernelModule); LLVM_DEBUG(dbgs() << "\tkernel optimizations and code gen complete.\n\n"); LLVM_DEBUG(dbgs() << "\t\tobject file: " << ObjFile->getFilename() << "\n"); - return std::move(ObjFile); + return ObjFile; } HipABIOutputFile HipABI::linkTargetObj(const HipABIOutputFile &ObjFile, @@ -1850,7 +1849,7 @@ HipABIOutputFile HipABI::linkTargetObj(const HipABIOutputFile &ObjFile, if (ExecStat != 0) report_fatal_error("hipabi: 'ldd' failure - " + StringRef(ErrMsg)); - return std::move(LinkedObjFile); + return LinkedObjFile; } HipABIOutputFile HipABI::createBundleFile() { @@ -1930,9 +1929,9 @@ GlobalVariable *HipABI::embedBundle(HipABIOutputFile &BundleFile) { void HipABI::registerKernels(Value *HandlePtr, IRBuilder<> &B) { LLVMContext &Ctx = M.getContext(); Type *VoidTy = Type::getVoidTy(Ctx); - PointerType *VoidPtrTy = Type::getInt8PtrTy(Ctx); + PointerType *VoidPtrTy = PointerType::getUnqual(Ctx); PointerType *VoidPtrPtrTy = VoidPtrTy->getPointerTo(); - PointerType *CharPtrTy = Type::getInt8PtrTy(Ctx); + PointerType *CharPtrTy = PointerType::getUnqual(Ctx); Type *Int32Ty = Type::getInt32Ty(Ctx); llvm::Constant *NullPtr = llvm::ConstantPointerNull::get(VoidPtrTy); @@ -1978,10 +1977,10 @@ void HipABI::bindGlobalVariables(Value *Handle, IRBuilder<> &B) { const DataLayout &DL = M.getDataLayout(); Type *IntTy = Type::getInt32Ty(Ctx); Type *VoidTy = Type::getVoidTy(Ctx); - PointerType *VoidPtrTy = Type::getInt8PtrTy(Ctx); + PointerType *VoidPtrTy = PointerType::getUnqual(Ctx); PointerType *VoidPtrPtrTy = VoidPtrTy->getPointerTo(); Type *VarSizeTy = IntTy; - PointerType *CharPtrTy = Type::getInt8PtrTy(Ctx); + PointerType *CharPtrTy = PointerType::getUnqual(Ctx); FunctionCallee RegisterVarFn = M.getOrInsertFunction( "__hipRegisterManagedVar", @@ -2017,7 +2016,7 @@ Function *HipABI::createCtor(GlobalVariable *Bundle, GlobalVariable *Wrapper) { LLVMContext &Ctx = M.getContext(); Type *VoidTy = Type::getVoidTy(Ctx); - PointerType *VoidPtrTy = Type::getInt8PtrTy(Ctx); + PointerType *VoidPtrTy = PointerType::getUnqual(Ctx); PointerType *VoidPtrPtrTy = VoidPtrTy->getPointerTo(); Type *IntTy = Type::getInt32Ty(Ctx); @@ -2102,7 +2101,7 @@ Function *HipABI::createDtor(GlobalVariable *BundleHandle) { LLVMContext &Ctx = M.getContext(); const DataLayout &DL = M.getDataLayout(); Type *VoidTy = Type::getVoidTy(Ctx); - Type *VoidPtrTy = Type::getInt8PtrTy(Ctx); + Type *VoidPtrTy = PointerType::getUnqual(Ctx); Type *VoidPtrPtrTy = VoidPtrTy->getPointerTo(); FunctionCallee UnregisterFatbinFn = @@ -2138,7 +2137,7 @@ void HipABI::registerBundle(GlobalVariable *Bundle) { LLVMContext &Ctx = M.getContext(); const DataLayout &DL = M.getDataLayout(); Type *VoidTy = Type::getVoidTy(Ctx); - PointerType *VoidPtrTy = Type::getInt8PtrTy(Ctx); + PointerType *VoidPtrTy = PointerType::getUnqual(Ctx); Type *IntTy = Type::getInt32Ty(Ctx); StructType *WrapperTy = StructType::get(IntTy, // magic # @@ -2188,7 +2187,7 @@ void HipABI::postProcessModule() { Value *printf = KernelModule.getFunction("printf"); if (not printf) { LLVMContext &context = KernelModule.getContext(); - Type *paramTys[] = {Type::getInt8PtrTy(context)}; + Type *paramTys[] = {PointerType::getUnqual(context)}; Type *retTy = Type::getInt32Ty(context); FunctionType *funcTy = FunctionType::get(retTy, paramTys, false); FunctionCallee fce = KernelModule.getOrInsertFunction("printf", funcTy); @@ -2270,7 +2269,7 @@ void HipABI::postProcessModule() { pb.registerCGSCCAnalyses(cgam); pb.registerFunctionAnalyses(fam); pb.registerLoopAnalyses(lam); - AMDTargetMachine->registerPassBuilderCallbacks(pb); + AMDTargetMachine->registerPassBuilderCallbacks(pb, false); pb.crossRegisterProxies(lam, fam, cgam, mam); OptimizationLevel optLevels[] = { diff --git a/llvm/lib/Transforms/Tapir/LambdaABI.cpp b/llvm/lib/Transforms/Tapir/LambdaABI.cpp index 0ec5e900c7559e1..6a90a3cde646b65 100644 --- a/llvm/lib/Transforms/Tapir/LambdaABI.cpp +++ b/llvm/lib/Transforms/Tapir/LambdaABI.cpp @@ -185,7 +185,7 @@ void LambdaABI::prepareModule() { PointerType *StackFramePtrTy = PointerType::getUnqual(StackFrameTy); Type *VoidTy = Type::getVoidTy(C); - Type *VoidPtrTy = Type::getInt8PtrTy(C); + Type *VoidPtrTy = PointerType::getUnqual(C); // Define the types of the RTS functions. FunctionType *RTSFnTy = FunctionType::get(VoidTy, {StackFramePtrTy}, false); diff --git a/llvm/lib/Transforms/Tapir/LoopSpawningTI.cpp b/llvm/lib/Transforms/Tapir/LoopSpawningTI.cpp index 6fba6f9f84c8d5d..e69913867d79100 100644 --- a/llvm/lib/Transforms/Tapir/LoopSpawningTI.cpp +++ b/llvm/lib/Transforms/Tapir/LoopSpawningTI.cpp @@ -411,8 +411,8 @@ class LoopSpawningImpl { ScalarEvolution &SE, AssumptionCache &AC, TargetTransformInfo &TTI, TapirTargetID Target, OptimizationRemarkEmitter &ORE, std::map> &Targets) - : F(F), DT(DT), LI(LI), TI(TI), SE(SE), AC(AC), TTI(TTI), Target(Target), - ORE(ORE), Targets(Targets) {} + : F(F), DT(DT), LI(LI), TI(TI), SE(SE), AC(AC), TTI(TTI), ORE(ORE), + Targets(Targets) {} ~LoopSpawningImpl() { for (TapirLoopInfo *TL : TapirLoops) @@ -526,7 +526,6 @@ class LoopSpawningImpl { ScalarEvolution &SE; AssumptionCache &AC; TargetTransformInfo &TTI; - TapirTargetID Target; OptimizationRemarkEmitter &ORE; std::map> &Targets; @@ -907,7 +906,6 @@ LoopOutlineProcessor *LoopSpawningImpl::getOutlineProcessor(TapirLoopInfo *TL) { Loop *L = TL->getLoop(); TapirLoopHints Hints(L); TapirTargetID TLTID = (TapirTargetID)Hints.getLoopTarget(); - unsigned int ThreadsPerBlock = Hints.getThreadsPerBlock(); // get the LoopTarget from set of Targets if it exists, otherwise create it @@ -1405,10 +1403,12 @@ Function *LoopSpawningImpl::createHelperForTapirLoop( // loop with llvm.stacksave/llvm.stackrestore intrinsics. if (ContainsDynamicAllocas) { Module *M = Helper->getParent(); + LLVMContext& Ctx = M->getContext(); // Get the two intrinsics we care about. - Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); - Function *StackRestore = - Intrinsic::getDeclaration(M, Intrinsic::stackrestore); + Function *StackSave = Intrinsic::getDeclaration( + M, Intrinsic::stacksave, {PointerType::getUnqual(Ctx)}); + Function *StackRestore = Intrinsic::getDeclaration( + M, Intrinsic::stackrestore, {PointerType::getUnqual(Ctx)}); // Insert the llvm.stacksave. CallInst *SavedPtr = diff --git a/llvm/lib/Transforms/Tapir/LoopStripMine.cpp b/llvm/lib/Transforms/Tapir/LoopStripMine.cpp index 02e04f4fefbf0d3..e2ff76bcfcfbe3c 100644 --- a/llvm/lib/Transforms/Tapir/LoopStripMine.cpp +++ b/llvm/lib/Transforms/Tapir/LoopStripMine.cpp @@ -14,6 +14,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" @@ -656,9 +657,10 @@ static BasicBlock *NestDetachUnwindPredecessors( Value *InnerUDLPad; Type *OrigLPadTy = OrigLPad->getType(); if (EHCont->isLandingPad()) { + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); SmallVector NewBBs; - SplitLandingPadPredecessors(EHCont, Preds, Suffix1, Suffix2, NewBBs, DT, LI, - MSSAU, PreserveLCSSA); + SplitLandingPadPredecessors(EHCont, Preds, Suffix1, Suffix2, NewBBs, &DTU, + LI, MSSAU, PreserveLCSSA); InnerUD = NewBBs[0]; OuterUD = NewBBs[1]; InnerUDLPad = InnerUD->getLandingPadInst(); diff --git a/llvm/lib/Transforms/Tapir/LoweringUtils.cpp b/llvm/lib/Transforms/Tapir/LoweringUtils.cpp index c1abce0bbe04464..b24ca046d41ed33 100644 --- a/llvm/lib/Transforms/Tapir/LoweringUtils.cpp +++ b/llvm/lib/Transforms/Tapir/LoweringUtils.cpp @@ -14,9 +14,9 @@ #include "llvm/Demangle/Demangle.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Support/Timer.h" @@ -75,9 +75,8 @@ TapirTarget *llvm::getTapirTargetFromID(Module &M, TapirTargetID ID) { /// Helper function to find the inputs and outputs to task T, based only the /// blocks in T and no subtask of T. -static void -findTaskInputsOutputs(const Task *T, ValueSet &Inputs, ValueSet &Outputs, - const DominatorTree &DT) { +static void findTaskInputsOutputs(const Task *T, ValueSet &Inputs, + ValueSet &Outputs, const DominatorTree &DT) { NamedRegionTimer NRT("findTaskInputsOutputs", "Find task inputs and outputs", TimerGroupName, TimerGroupDescription, TimePassesIsEnabled); @@ -164,7 +163,8 @@ TaskValueSetMap llvm::findAllTaskInputs(Function &F, const DominatorTree &DT, TaskValueSetMap TaskInputs; for (Task *T : post_order(TI.getRootTask())) { // Skip the root task - if (T->isRootTask()) break; + if (T->isRootTask()) + break; LLVM_DEBUG(dbgs() << "Finding inputs/outputs for task@" << T->getEntry()->getName() << "\n"); @@ -226,16 +226,12 @@ static bool definedOutsideTaskFrame(const Value *V, const Spindle *TF, /// Get the set of inputs for the given task T, accounting for the taskframe of /// T, if it exists. -void llvm::getTaskFrameInputsOutputs(TFValueSetMap &TFInputs, - TFValueSetMap &TFOutputs, - const Spindle &TF, - const ValueSet *TaskInputs, - const TaskInfo &TI, - const DominatorTree &DT) { +void llvm::getTaskFrameInputsOutputs( + TFValueSetMap &TFInputs, TFValueSetMap &TFOutputs, const Spindle &TF, + const ValueSet *TaskInputs, const TaskInfo &TI, const DominatorTree &DT) { NamedRegionTimer NRT("getTaskFrameInputsOutputs", - "Find taskframe inputs and outputs", - TimerGroupName, TimerGroupDescription, - TimePassesIsEnabled); + "Find taskframe inputs and outputs", TimerGroupName, + TimerGroupDescription, TimePassesIsEnabled); const Task *T = TF.getTaskFromTaskFrame(); if (T) @@ -309,28 +305,28 @@ void llvm::getTaskFrameInputsOutputs(TFValueSetMap &TFInputs, // TODO: Add a test to exclude landingpads from detached-rethrows? LLVM_DEBUG({ - if (Instruction *OP = dyn_cast(*OI)) { - assert(!(T && T->encloses(OP->getParent())) && - "TaskFrame uses value defined in task."); - } - }); + if (Instruction *OP = dyn_cast(*OI)) { + assert(!(T && T->encloses(OP->getParent())) && + "TaskFrame uses value defined in task."); + } + }); // If this operand is not defined outside of the taskframe, then it's // an input. if (definedOutsideTaskFrame(*OI, &TF, TI)) TFInputs[&TF].insert(*OI); - } - // Examine all users of this instruction. - for (User *U : I.users()) { - // If we find a live use outside of the task, it's an output. - if (Instruction *UI = dyn_cast(U)) { - if (definedOutsideTaskFrame(UI, &TF, TI) && - DT.isReachableFromEntry(UI->getParent())) - TFOutputs[&TF].insert(&I); + } + // Examine all users of this instruction. + for (User *U : I.users()) { + // If we find a live use outside of the task, it's an output. + if (Instruction *UI = dyn_cast(U)) { + if (definedOutsideTaskFrame(UI, &TF, TI) && + DT.isReachableFromEntry(UI->getParent())) + TFOutputs[&TF].insert(&I); + } } } } } - } } /// Determine the inputs for all taskframes in this function. Returns a map @@ -369,11 +365,9 @@ void llvm::findAllTaskFrameInputs( /// location as the Reference compiler and other compilers that lower parallel /// constructs in the front end. This location is NOT the correct place, /// however, for handling tasks that are spawned inside of a serial loop. -std::pair -llvm::createTaskArgsStruct(const ValueSet &Inputs, Task *T, - Instruction *StorePt, Instruction *LoadPt, - bool staticStruct, ValueToValueMapTy &InputsMap, - Loop *TapirL) { +std::pair llvm::createTaskArgsStruct( + const ValueSet &Inputs, Task *T, Instruction *StorePt, Instruction *LoadPt, + bool staticStruct, ValueToValueMapTy &InputsMap, Loop *TapirL) { assert(T && T->getParentTask() && "Expected spawned task."); SmallPtrSet TaskFrameBlocks; if (Spindle *TFCreateSpindle = T->getTaskFrameCreateSpindle()) { @@ -408,7 +402,7 @@ llvm::createTaskArgsStruct(const ValueSet &Inputs, Task *T, std::sort(InputsToSort.begin(), InputsToSort.end(), [&DL](const Value *A, const Value *B) { return DL.getTypeSizeInBits(A->getType()) > - DL.getTypeSizeInBits(B->getType()); + DL.getTypeSizeInBits(B->getType()); }); } @@ -517,7 +511,7 @@ void llvm::fixupInputSet(Function &F, const ValueSet &Inputs, ValueSet &Fixed) { std::sort(InputsToSort.begin(), InputsToSort.end(), [&DL](const Value *A, const Value *B) { return DL.getTypeSizeInBits(A->getType()) > - DL.getTypeSizeInBits(B->getType()); + DL.getTypeSizeInBits(B->getType()); }); // Add the remaining inputs. @@ -529,16 +523,17 @@ void llvm::fixupInputSet(Function &F, const ValueSet &Inputs, ValueSet &Fixed) { /// Organize the inputs to task \p T, given in \p TaskInputs, to create an /// appropriate set of inputs, \p HelperInputs, to pass to the outlined /// function for \p T. -Instruction *llvm::fixupHelperInputs( - Function &F, Task *T, ValueSet &TaskInputs, ValueSet &HelperArgs, - Instruction *StorePt, Instruction *LoadPt, - TapirTarget::ArgStructMode useArgStruct, - ValueToValueMapTy &InputsMap, Loop *TapirL) { +Instruction *llvm::fixupHelperInputs(Function &F, Task *T, ValueSet &TaskInputs, + ValueSet &HelperArgs, Instruction *StorePt, + Instruction *LoadPt, + TapirTarget::ArgStructMode useArgStruct, + ValueToValueMapTy &InputsMap, + Loop *TapirL) { if (TapirTarget::ArgStructMode::None != useArgStruct) { std::pair ArgsStructInfo = - createTaskArgsStruct(TaskInputs, T, StorePt, LoadPt, - TapirTarget::ArgStructMode::Static == useArgStruct, - InputsMap, TapirL); + createTaskArgsStruct(TaskInputs, T, StorePt, LoadPt, + TapirTarget::ArgStructMode::Static == useArgStruct, + InputsMap, TapirL); HelperArgs.insert(ArgsStructInfo.first); return ArgsStructInfo.second; } @@ -661,9 +656,9 @@ void llvm::getTaskBlocks(Task *T, std::vector &TaskBlocks, /// function. The parameter \p Inputs specified the inputs to the helper /// function. The map \p VMap is updated with the mapping of instructions in /// \p T to instructions in the new helper function. -Function *llvm::createHelperForTask( - Function &F, Task *T, ValueSet &Args, Module *DestM, - ValueToValueMapTy &VMap, Type *ReturnType, OutlineAnalysis &OA) { +Function *llvm::createHelperForTask(Function &F, Task *T, ValueSet &Args, + Module *DestM, ValueToValueMapTy &VMap, + Type *ReturnType, OutlineAnalysis &OA) { // Collect all basic blocks in this task. std::vector TaskBlocks; // Reattach instructions and detached rethrows in this task might need special @@ -676,7 +671,7 @@ Function *llvm::createHelperForTask( getTaskBlocks(T, TaskBlocks, ReattachBlocks, TaskResumeBlocks, SharedEHEntries, &OA.DT); - SmallVector Returns; // Ignore returns cloned. + SmallVector Returns; // Ignore returns cloned. ValueSet Outputs; DetachInst *DI = T->getDetach(); @@ -813,9 +808,11 @@ static BasicBlock *getTaskFrameContinue(Spindle *TF) { /// function. The parameter \p Inputs specified the inputs to the helper /// function. The map \p VMap is updated with the mapping of instructions in \p /// TF to instructions in the new helper function. -Function *llvm::createHelperForTaskFrame( - Function &F, Spindle *TF, ValueSet &Args, Module *DestM, - ValueToValueMapTy &VMap, Type *ReturnType, OutlineAnalysis &OA) { +Function *llvm::createHelperForTaskFrame(Function &F, Spindle *TF, + ValueSet &Args, Module *DestM, + ValueToValueMapTy &VMap, + Type *ReturnType, + OutlineAnalysis &OA) { // Collect all basic blocks in this task. std::vector TaskBlocks; // Reattach instructions and detached rethrows in this task might need special @@ -826,78 +823,78 @@ Function *llvm::createHelperForTaskFrame( // rewritten in the cloned helper. SmallPtrSet SharedEHEntries; { - NamedRegionTimer NRT("getTaskFrameBlocks", "Get taskframe blocks", - TimerGroupName, TimerGroupDescription, - TimePassesIsEnabled); - // Get taskframe blocks - for (Spindle *S : TF->taskframe_spindles()) { - // Skip spindles that are placeholders. - if (isPlaceholderSuccessor(S->getEntry())) - continue; - - LLVM_DEBUG(dbgs() << "Adding blocks in taskframe spindle " << *S << "\n"); + NamedRegionTimer NRT("getTaskFrameBlocks", "Get taskframe blocks", + TimerGroupName, TimerGroupDescription, + TimePassesIsEnabled); + // Get taskframe blocks + for (Spindle *S : TF->taskframe_spindles()) { + // Skip spindles that are placeholders. + if (isPlaceholderSuccessor(S->getEntry())) + continue; - // Some canonicalization methods, e.g., loop canonicalization, will - // introduce a basic block after a detached-rethrow that branches to the - // successor of the EHContinuation entry. - for (BasicBlock *Pred : predecessors(S->getEntry())) { - assert(!endsTaskFrame(Pred, TF->getTaskFrameCreate()) && - "Taskframe spindle after taskframe.end"); - if (isDetachedRethrow(Pred->getTerminator())) - SharedEHEntries.insert(S->getEntry()); - if (isSuccessorOfDetachedRethrow(Pred)) - SharedEHEntries.insert(S->getEntry()); - } + LLVM_DEBUG(dbgs() << "Adding blocks in taskframe spindle " << *S << "\n"); - // Terminate landingpads might be shared between a taskframe and its parent. - // It's safe to clone these blocks, but we need to be careful about PHI - // nodes. - if (S != TF) { - for (Spindle *PredS : predecessors(S)) { - if (!TF->taskFrameContains(PredS)) { - LLVM_DEBUG( - dbgs() - << "Taskframe spindle has predecessor outside of taskframe: " - << *S << "\n"); + // Some canonicalization methods, e.g., loop canonicalization, will + // introduce a basic block after a detached-rethrow that branches to the + // successor of the EHContinuation entry. + for (BasicBlock *Pred : predecessors(S->getEntry())) { + assert(!endsTaskFrame(Pred, TF->getTaskFrameCreate()) && + "Taskframe spindle after taskframe.end"); + if (isDetachedRethrow(Pred->getTerminator())) + SharedEHEntries.insert(S->getEntry()); + if (isSuccessorOfDetachedRethrow(Pred)) SharedEHEntries.insert(S->getEntry()); - break; + } + + // Terminate landingpads might be shared between a taskframe and its + // parent. It's safe to clone these blocks, but we need to be careful + // about PHI nodes. + if (S != TF) { + for (Spindle *PredS : predecessors(S)) { + if (!TF->taskFrameContains(PredS)) { + LLVM_DEBUG( + dbgs() + << "Taskframe spindle has predecessor outside of taskframe: " + << *S << "\n"); + SharedEHEntries.insert(S->getEntry()); + break; + } } } - } - for (BasicBlock *B : S->blocks()) { - LLVM_DEBUG(dbgs() << "Adding taskframe block " << B->getName() << "\n"); - TaskBlocks.push_back(B); + for (BasicBlock *B : S->blocks()) { + LLVM_DEBUG(dbgs() << "Adding taskframe block " << B->getName() << "\n"); + TaskBlocks.push_back(B); - // Record any blocks that end the taskframe. - if (endsTaskFrame(B)) { - LLVM_DEBUG(dbgs() << "Recording taskframe.end block " << B->getName() - << "\n"); - TFEndBlocks.insert(B); - } - if (isTaskFrameResume(B->getTerminator())) { - LLVM_DEBUG(dbgs() << "Recording taskframe.resume block " << B->getName() - << "\n"); - TFResumeBlocks.insert(B); - } + // Record any blocks that end the taskframe. + if (endsTaskFrame(B)) { + LLVM_DEBUG(dbgs() << "Recording taskframe.end block " << B->getName() + << "\n"); + TFEndBlocks.insert(B); + } + if (isTaskFrameResume(B->getTerminator())) { + LLVM_DEBUG(dbgs() << "Recording taskframe.resume block " + << B->getName() << "\n"); + TFResumeBlocks.insert(B); + } - // Terminate landingpads might be shared between a taskframe and its - // parent. It's safe to clone these blocks, but we need to be careful - // about PHI nodes. - if ((B != S->getEntry()) && B->isLandingPad()) { - for (BasicBlock *Pred : predecessors(B)) { - if (!S->contains(Pred)) { - LLVM_DEBUG(dbgs() << "Block within taskframe spindle has " - "predecessor outside of spindle.\n"); - SharedEHEntries.insert(B); + // Terminate landingpads might be shared between a taskframe and its + // parent. It's safe to clone these blocks, but we need to be careful + // about PHI nodes. + if ((B != S->getEntry()) && B->isLandingPad()) { + for (BasicBlock *Pred : predecessors(B)) { + if (!S->contains(Pred)) { + LLVM_DEBUG(dbgs() << "Block within taskframe spindle has " + "predecessor outside of spindle.\n"); + SharedEHEntries.insert(B); + } } } } } - } } // end timed region - SmallVector Returns; // Ignore returns cloned. + SmallVector Returns; // Ignore returns cloned. ValueSet Outputs; Value *TFCreate = TF->getTaskFrameCreate(); @@ -941,8 +938,7 @@ Function *llvm::createHelperForTaskFrame( // Move allocas in cloned taskframe entry block to entry of helper function. BasicBlock *ClonedTFEntry = cast(VMap[Header]); - MoveStaticAllocasInBlock(&Helper->getEntryBlock(), ClonedTFEntry, - TaskEnds); + MoveStaticAllocasInBlock(&Helper->getEntryBlock(), ClonedTFEntry, TaskEnds); // We do not need to add new llvm.stacksave/llvm.stackrestore intrinsics, // because calling and returning from the helper will automatically manage @@ -975,11 +971,13 @@ Function *llvm::createHelperForTaskFrame( /// Inputs. The map \p VMap is updated with the mapping of instructions in \p /// TF to instructions in the new helper function. Information about the helper /// function is returned as a TaskOutlineInfo structure. -TaskOutlineInfo llvm::outlineTaskFrame( - Spindle *TF, ValueSet &Inputs, SmallVectorImpl &HelperInputs, - Module *DestM, ValueToValueMapTy &VMap, - TapirTarget::ArgStructMode useArgStruct, Type *ReturnType, - ValueToValueMapTy &InputMap, OutlineAnalysis &OA) { +TaskOutlineInfo llvm::outlineTaskFrame(Spindle *TF, ValueSet &Inputs, + SmallVectorImpl &HelperInputs, + Module *DestM, ValueToValueMapTy &VMap, + TapirTarget::ArgStructMode useArgStruct, + Type *ReturnType, + ValueToValueMapTy &InputMap, + OutlineAnalysis &OA) { if (Task *T = TF->getTaskFromTaskFrame()) return outlineTask(T, Inputs, HelperInputs, DestM, VMap, useArgStruct, ReturnType, InputMap, OA); @@ -1002,17 +1000,18 @@ TaskOutlineInfo llvm::outlineTaskFrame( HelperInputs.push_back(V); // Clone the blocks into a helper function. - Function *Helper = createHelperForTaskFrame(F, TF, HelperArgs, DestM, VMap, - ReturnType, OA); + Function *Helper = + createHelperForTaskFrame(F, TF, HelperArgs, DestM, VMap, ReturnType, OA); Instruction *ClonedTF = cast(VMap[TF->getTaskFrameCreate()]); - return TaskOutlineInfo(Helper, Entry, nullptr, ClonedTF, Inputs, - ArgsStart, StorePt, nullptr, Continue, Unwind); + return TaskOutlineInfo(Helper, Entry, nullptr, ClonedTF, Inputs, ArgsStart, + StorePt, nullptr, Continue, Unwind); } /// Replaces the spawned task \p T, with associated TaskOutlineInfo \p Out, with /// a call or invoke to the outlined helper function created for \p T. Instruction *llvm::replaceTaskFrameWithCallToOutline( - Spindle *TF, TaskOutlineInfo &Out, SmallVectorImpl &OutlineInputs) { + Spindle *TF, TaskOutlineInfo &Out, + SmallVectorImpl &OutlineInputs) { if (Task *T = TF->getTaskFromTaskFrame()) // Remove any dependencies from T's exception-handling code to T's parent. unlinkTaskEHFromParent(T); @@ -1072,11 +1071,12 @@ Instruction *llvm::replaceTaskFrameWithCallToOutline( /// Inputs. The map \p VMap is updated with the mapping of instructions in \p T /// to instructions in the new helper function. Information about the helper /// function is returned as a TaskOutlineInfo structure. -TaskOutlineInfo llvm::outlineTask( - Task *T, ValueSet &Inputs, SmallVectorImpl &HelperInputs, - Module *DestM, ValueToValueMapTy &VMap, - TapirTarget::ArgStructMode useArgStruct, Type *ReturnType, - ValueToValueMapTy &InputMap, OutlineAnalysis &OA) { +TaskOutlineInfo llvm::outlineTask(Task *T, ValueSet &Inputs, + SmallVectorImpl &HelperInputs, + Module *DestM, ValueToValueMapTy &VMap, + TapirTarget::ArgStructMode useArgStruct, + Type *ReturnType, ValueToValueMapTy &InputMap, + OutlineAnalysis &OA) { assert(!T->isRootTask() && "Cannot outline the root task."); Function &F = *T->getEntry()->getParent(); DetachInst *DI = T->getDetach(); @@ -1107,14 +1107,13 @@ TaskOutlineInfo llvm::outlineTask( HelperInputs.push_back(V); // Clone the blocks into a helper function. - Function *Helper = createHelperForTask(F, T, HelperArgs, DestM, VMap, - ReturnType, OA); + Function *Helper = + createHelperForTask(F, T, HelperArgs, DestM, VMap, ReturnType, OA); Value *ClonedTFCreate = TFCreate ? VMap[TFCreate] : nullptr; - return TaskOutlineInfo(Helper, T->getEntry(), - dyn_cast_or_null(VMap[DI]), - dyn_cast_or_null(ClonedTFCreate), Inputs, - ArgsStart, StorePt, T->getDetach()->getSyncRegion(), - DI->getContinue(), Unwind); + return TaskOutlineInfo( + Helper, T->getEntry(), dyn_cast_or_null(VMap[DI]), + dyn_cast_or_null(ClonedTFCreate), Inputs, ArgsStart, StorePt, + T->getDetach()->getSyncRegion(), DI->getContinue(), Unwind); } //----------------------------------------------------------------------------// @@ -1124,7 +1123,8 @@ TaskOutlineInfo llvm::outlineTask( /// blocks in a function. static bool definedOutsideBlocks(const Value *V, SmallPtrSetImpl &Blocks) { - if (isa(V)) return true; + if (isa(V)) + return true; if (const Instruction *I = dyn_cast(V)) return !Blocks.count(I->getParent()); return false; @@ -1163,7 +1163,8 @@ ValueSet llvm::getTapirLoopInputs(TapirLoopInfo *TL, ValueSet &TaskInputs) { for (BasicBlock *BB : BlocksToCheck) { for (Instruction &II : *BB) { // Skip the condition of this loop, since we will process that specially. - if (TL->getCondition() == &II) continue; + if (TL->getCondition() == &II) + continue; // Examine all operands of this instruction. for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE; ++OI) { @@ -1171,10 +1172,10 @@ ValueSet llvm::getTapirLoopInputs(TapirLoopInfo *TL, ValueSet &TaskInputs) { if (SyncRegion == *OI) continue; LLVM_DEBUG({ - if (Instruction *OP = dyn_cast(*OI)) - assert(!T->encloses(OP->getParent()) && - "Loop control uses value defined in body task."); - }); + if (Instruction *OP = dyn_cast(*OI)) + assert(!T->encloses(OP->getParent()) && + "Loop control uses value defined in body task."); + }); // If this operand is not defined in the header or latch, it's an input. if (definedOutsideBlocks(*OI, BlocksToCheck)) LoopInputs.insert(*OI); @@ -1187,9 +1188,9 @@ ValueSet llvm::getTapirLoopInputs(TapirLoopInfo *TL, ValueSet &TaskInputs) { /// Replaces the Tapir loop \p TL, with associated TaskOutlineInfo \p Out, with /// a call or invoke to the outlined helper function created for \p TL. -Instruction *llvm::replaceLoopWithCallToOutline( - TapirLoopInfo *TL, TaskOutlineInfo &Out, - SmallVectorImpl &OutlineInputs) { +Instruction * +llvm::replaceLoopWithCallToOutline(TapirLoopInfo *TL, TaskOutlineInfo &Out, + SmallVectorImpl &OutlineInputs) { // Remove any dependencies from the detach unwind of T code to T's parent. unlinkTaskEHFromParent(TL->getTask()); @@ -1266,7 +1267,7 @@ void TapirTarget::lowerTaskFrameAddrCall(CallInst *TaskFrameAddrCall) { // By default, replace calls to task_frameaddress with ordinary calls to the // frameaddress intrinsic. TaskFrameAddrCall->setCalledFunction(Intrinsic::getDeclaration( - &M, Intrinsic::frameaddress, PointerType::getInt8PtrTy(M.getContext()))); + &M, Intrinsic::frameaddress, PointerType::get(M.getContext(), 0))); } void TapirTarget::lowerTapirRTCalls(SmallVectorImpl &TapirRTCalls, @@ -1283,18 +1284,15 @@ bool TapirTarget::processOrdinaryFunction(Function &F, BasicBlock *TFEntry) { return false; } - /// @brief Wite the given module to a file as readable IR. /// @param M - the module to save. /// @param Filename - optional file name (empty string uses module name). -void llvm::saveModuleToFile(const Module *M, - const std::string &FileName, - const std::string &Extension) { +void llvm::saveModuleToFile(const Module *M, const std::string &FileName, + const std::string &Extension) { std::error_code EC; SmallString<256> IRFileName; if (FileName.empty()) - IRFileName = Twine(sys::path::filename(M->getName())).str() - + Extension; + IRFileName = Twine(sys::path::filename(M->getName())).str() + Extension; else IRFileName = Twine(FileName).str() + Extension; @@ -1310,9 +1308,8 @@ void llvm::saveModuleToFile(const Module *M, /// @brief Write the given function to a file as readable IR. /// @param Fn - the function to save. /// @param Filename - optional file name (empty string uses function name). -void llvm::saveFunctionToFile(const Function *Fn, - const std::string &FileName, - const std::string &Extension) { +void llvm::saveFunctionToFile(const Function *Fn, const std::string &FileName, + const std::string &Extension) { std::error_code EC; SmallString<256> IRFileName; if (FileName.empty()) { diff --git a/llvm/lib/Transforms/Tapir/OMPTaskABI.cpp b/llvm/lib/Transforms/Tapir/OMPTaskABI.cpp index c0fdab7440b8443..184895705821ab1 100644 --- a/llvm/lib/Transforms/Tapir/OMPTaskABI.cpp +++ b/llvm/lib/Transforms/Tapir/OMPTaskABI.cpp @@ -193,7 +193,7 @@ void OMPTaskABI::prepareModule() { PointerType *StackFramePtrTy = PointerType::getUnqual(StackFrameTy); Type *VoidTy = Type::getVoidTy(C); - Type *VoidPtrTy = Type::getInt8PtrTy(C); + Type *VoidPtrTy = PointerType::getUnqual(C); // Define the types of the RTS functions. FunctionType *RTSFnTy = FunctionType::get(VoidTy, {StackFramePtrTy}, false); diff --git a/llvm/lib/Transforms/Tapir/OpenCilkABI.cpp b/llvm/lib/Transforms/Tapir/OpenCilkABI.cpp index b4b8d601d28dcf3..b91a076d6512ccb 100644 --- a/llvm/lib/Transforms/Tapir/OpenCilkABI.cpp +++ b/llvm/lib/Transforms/Tapir/OpenCilkABI.cpp @@ -210,7 +210,7 @@ void OpenCilkABI::prepareModule() { PointerType *StackFramePtrTy = PointerType::getUnqual(StackFrameTy); Type *VoidTy = Type::getVoidTy(C); - Type *VoidPtrTy = Type::getInt8PtrTy(C); + Type *VoidPtrTy = PointerType::getUnqual(C); // Define the types of the CilkRTS functions. FunctionType *CilkRTSFnTy = @@ -220,7 +220,7 @@ void OpenCilkABI::prepareModule() { FunctionType *CilkRTSEnterLandingpadFnTy = FunctionType::get(VoidTy, {StackFramePtrTy, Int32Ty}, false); FunctionType *CilkRTSPauseFrameFnTy = FunctionType::get( - VoidTy, {StackFramePtrTy, PointerType::getInt8PtrTy(C)}, false); + VoidTy, {StackFramePtrTy, PointerType::getUnqual(C)}, false); FunctionType *Grainsize8FnTy = FunctionType::get(Int8Ty, {Int8Ty}, false); FunctionType *Grainsize16FnTy = FunctionType::get(Int16Ty, {Int16Ty}, false); FunctionType *Grainsize32FnTy = FunctionType::get(Int32Ty, {Int32Ty}, false); @@ -647,7 +647,7 @@ BasicBlock *OpenCilkABI::GetDefaultSyncLandingpad(Function &F, Value *SF, LLVMContext &C = F.getContext(); const Twine Name = "default_sync_lpad"; BasicBlock *CleanupBB = BasicBlock::Create(C, Name, &F); - Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C)); + Type *ExnTy = StructType::get(PointerType::getUnqual(C), Type::getInt32Ty(C)); IRBuilder<> Builder(CleanupBB); Builder.SetCurrentDebugLocation(Loc); diff --git a/llvm/lib/Transforms/Tapir/QthreadsABI.cpp b/llvm/lib/Transforms/Tapir/QthreadsABI.cpp index 15e7bac3e474e8e..6fe51f6b6c3dce3 100644 --- a/llvm/lib/Transforms/Tapir/QthreadsABI.cpp +++ b/llvm/lib/Transforms/Tapir/QthreadsABI.cpp @@ -1,4 +1,4 @@ -//===- QthreadsABI.cpp - Lower Tapir into Qthreads runtime system calls -----------===// +//===- QthreadsABI.cpp - Lower Tapir into Qthreads runtime system calls ---===// // // The LLVM Compiler Infrastructure // @@ -20,15 +20,14 @@ #include "llvm/Transforms/Tapir/Outline.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/TapirUtils.h" -#include "llvm/Support/CommandLine.h" using namespace llvm; #define DEBUG_TYPE "qthreadsabi" -static cl::opt UseCopyargs( - "qthreads-use-fork-copyargs", cl::init(false), cl::Hidden, - cl::desc("Use copyargs variant of fork")); +static cl::opt UseCopyargs("qthreads-use-fork-copyargs", cl::init(false), + cl::Hidden, + cl::desc("Use copyargs variant of fork")); // Accessors for opaque Qthreads RTS functions FunctionCallee QthreadsABI::get_qthread_num_workers() { @@ -51,13 +50,15 @@ FunctionCallee QthreadsABI::get_qthread_fork_copyargs() { const DataLayout &DL = M.getDataLayout(); AttributeList AL; // TODO: Set appropriate function attributes. - FunctionType *FTy = FunctionType::get( - Type::getInt32Ty(C), - { QthreadFTy, // qthread_f f - Type::getInt8PtrTy(C), // const void *arg - DL.getIntPtrType(C), // size_t arg_size - Type::getInt64PtrTy(C) // aligned_t *ret - }, false); + FunctionType *FTy = + FunctionType::get(Type::getInt32Ty(C), + { + QthreadFTy, // qthread_f f + PointerType::getUnqual(C), // const void *arg + DL.getIntPtrType(C), // size_t arg_size + PointerType::getUnqual(C) // aligned_t *ret + }, + false); QthreadForkCopyargs = M.getOrInsertFunction("qthread_fork_copyargs", FTy, AL); return QthreadForkCopyargs; @@ -70,8 +71,7 @@ FunctionCallee QthreadsABI::get_qthread_initialize() { LLVMContext &C = M.getContext(); AttributeList AL; // TODO: Set appropriate function attributes. - FunctionType *FTy = FunctionType::get( - Type::getInt32Ty(C), {}, false); + FunctionType *FTy = FunctionType::get(Type::getInt32Ty(C), {}, false); QthreadInitialize = M.getOrInsertFunction("qthread_initialize", FTy, AL); return QthreadInitialize; @@ -85,14 +85,15 @@ FunctionCallee QthreadsABI::get_qt_sinc_create() { const DataLayout &DL = M.getDataLayout(); AttributeList AL; // TODO: Set appropriate function attributes. - FunctionType *FTy = FunctionType::get( - Type::getInt8PtrTy(C), - { DL.getIntPtrType(C), // size_t size - Type::getInt8PtrTy(C), // void *initval - Type::getInt8PtrTy(C), // void *op - DL.getIntPtrType(C) // size_t expect - }, - false); + FunctionType *FTy = + FunctionType::get(PointerType::getUnqual(C), + { + DL.getIntPtrType(C), // size_t size + PointerType::getUnqual(C), // void *initval + PointerType::getUnqual(C), // void *op + DL.getIntPtrType(C) // size_t expect + }, + false); QtSincCreate = M.getOrInsertFunction("qt_sinc_create", FTy, AL); return QtSincCreate; @@ -106,12 +107,13 @@ FunctionCallee QthreadsABI::get_qt_sinc_expect() { const DataLayout &DL = M.getDataLayout(); AttributeList AL; // TODO: Set appropriate function attributes. - FunctionType *FTy = FunctionType::get( - Type::getVoidTy(C), - { Type::getInt8PtrTy(C), // sync_t *s - DL.getIntPtrType(C) // size_t incr - }, - false); + FunctionType *FTy = + FunctionType::get(Type::getVoidTy(C), + { + PointerType::getUnqual(C), // sync_t *s + DL.getIntPtrType(C) // size_t incr + }, + false); QtSincExpect = M.getOrInsertFunction("qt_sinc_expect", FTy, AL); return QtSincExpect; @@ -124,12 +126,13 @@ FunctionCallee QthreadsABI::get_qt_sinc_submit() { LLVMContext &C = M.getContext(); AttributeList AL; // TODO: Set appropriate function attributes. - FunctionType *FTy = FunctionType::get( - Type::getVoidTy(C), - { Type::getInt8PtrTy(C), // sync_t *s - Type::getInt8PtrTy(C) // void *val - }, - false); + FunctionType *FTy = + FunctionType::get(Type::getVoidTy(C), + { + PointerType::getUnqual(C), // sync_t *s + PointerType::getUnqual(C) // void *val + }, + false); QtSincSubmit = M.getOrInsertFunction("qt_sinc_submit", FTy, AL); return QtSincSubmit; @@ -142,12 +145,13 @@ FunctionCallee QthreadsABI::get_qt_sinc_wait() { LLVMContext &C = M.getContext(); AttributeList AL; // TODO: Set appropriate function attributes. - FunctionType *FTy = FunctionType::get( - Type::getVoidTy(C), - { Type::getInt8PtrTy(C), // sync_t *s - Type::getInt8PtrTy(C) // void *target - }, - false); + FunctionType *FTy = + FunctionType::get(Type::getVoidTy(C), + { + PointerType::getUnqual(C), // sync_t *s + PointerType::getUnqual(C) // void *target + }, + false); QtSincWait = M.getOrInsertFunction("qt_sinc_wait", FTy, AL); return QtSincWait; @@ -160,11 +164,12 @@ FunctionCallee QthreadsABI::get_qt_sinc_destroy() { LLVMContext &C = M.getContext(); AttributeList AL; // TODO: Set appropriate function attributes. - FunctionType *FTy = FunctionType::get( - Type::getVoidTy(C), - { Type::getInt8PtrTy(C), // sync_t *s - }, - false); + FunctionType *FTy = + FunctionType::get(Type::getVoidTy(C), + { + PointerType::getUnqual(C), // sync_t *s + }, + false); QtSincDestroy = M.getOrInsertFunction("qt_sinc_destroy", FTy, AL); return QtSincDestroy; @@ -173,8 +178,8 @@ FunctionCallee QthreadsABI::get_qt_sinc_destroy() { QthreadsABI::QthreadsABI(Module &M) : TapirTarget(M) { LLVMContext &C = M.getContext(); // Initialize any types we need for lowering. - QthreadFTy = PointerType::getUnqual( - FunctionType::get(Type::getInt64Ty(C), { Type::getInt8PtrTy(C) }, false)); + QthreadFTy = PointerType::getUnqual(FunctionType::get( + Type::getInt64Ty(C), {PointerType::getUnqual(C)}, false)); } /// Lower a call to get the grainsize of this Tapir loop. @@ -195,10 +200,10 @@ Value *QthreadsABI::lowerGrainsizeCall(CallInst *GrainsizeCall) { Limit->getType(), false); // Compute ceil(limit / 8 * workers) = // (limit + 8 * workers - 1) / (8 * workers) - Value *SmallLoopVal = - Builder.CreateUDiv(Builder.CreateSub(Builder.CreateAdd(Limit, WorkersX8), - ConstantInt::get(Limit->getType(), 1)), - WorkersX8); + Value *SmallLoopVal = Builder.CreateUDiv( + Builder.CreateSub(Builder.CreateAdd(Limit, WorkersX8), + ConstantInt::get(Limit->getType(), 1)), + WorkersX8); // Compute min Value *LargeLoopVal = ConstantInt::get(Limit->getType(), 2048); Value *Cmp = Builder.CreateICmpULT(LargeLoopVal, SmallLoopVal); @@ -211,22 +216,21 @@ Value *QthreadsABI::lowerGrainsizeCall(CallInst *GrainsizeCall) { Value *QthreadsABI::getOrCreateSinc(Value *SyncRegion, Function *F) { LLVMContext &C = M.getContext(); - Value* sinc; - if((sinc = SyncRegionToSinc[SyncRegion])) + Value *sinc; + if ((sinc = SyncRegionToSinc[SyncRegion])) return sinc; else { - Value* zero = ConstantInt::get(Type::getInt64Ty(C), 0); - Value* null = Constant::getNullValue(Type::getInt8PtrTy(C)); - std::vector createArgs = {zero, null, null, zero}; + Value *zero = ConstantInt::get(Type::getInt64Ty(C), 0); + Value *null = Constant::getNullValue(PointerType::getUnqual(C)); + std::vector createArgs = {zero, null, null, zero}; sinc = CallInst::Create(get_qt_sinc_create(), createArgs, "", F->getEntryBlock().getTerminator()); SyncRegionToSinc[SyncRegion] = sinc; // Make sure we destroy the sinc at all exit points to prevent memory leaks - for(BasicBlock &BB : *F) { - if(isa(BB.getTerminator())){ - CallInst::Create(get_qt_sinc_destroy(), {sinc}, "", - BB.getTerminator()); + for (BasicBlock &BB : *F) { + if (isa(BB.getTerminator())) { + CallInst::Create(get_qt_sinc_destroy(), {sinc}, "", BB.getTerminator()); } } @@ -237,9 +241,9 @@ Value *QthreadsABI::getOrCreateSinc(Value *SyncRegion, Function *F) { void QthreadsABI::lowerSync(SyncInst &SI) { IRBuilder<> builder(&SI); auto F = SI.getParent()->getParent(); - auto& C = M.getContext(); - auto null = Constant::getNullValue(Type::getInt8PtrTy(C)); - Value* SR = SI.getSyncRegion(); + auto &C = M.getContext(); + auto null = Constant::getNullValue(PointerType::getUnqual(C)); + Value *SR = SI.getSyncRegion(); auto sinc = getOrCreateSinc(SR, F); std::vector args = {sinc, null}; auto sincwait = get_qt_sinc_wait(); @@ -265,18 +269,18 @@ void QthreadsABI::processSubTaskCall(TaskOutlineInfo &TOI, DominatorTree &DT) { // To match the Qthreads ABI, we replace the existing call with a call to // qthreads_fork_copyargs. IRBuilder<> CallerIRBuilder(ReplCall); - Value *OutlinedFnPtr = CallerIRBuilder.CreatePointerBitCastOrAddrSpaceCast( - Outlined, QthreadFTy); + Value *OutlinedFnPtr = + CallerIRBuilder.CreatePointerBitCastOrAddrSpaceCast(Outlined, QthreadFTy); AllocaInst *CallerArgStruct = cast(ReplCall->getArgOperand(0)); Type *ArgsTy = CallerArgStruct->getAllocatedType(); - Value *ArgStructPtr = CallerIRBuilder.CreateBitCast(CallerArgStruct, - Type::getInt8PtrTy(C)); - Constant *Null = Constant::getNullValue(Type::getInt64PtrTy(C)); - ConstantInt *ArgSize = ConstantInt::get(DL.getIntPtrType(C), - DL.getTypeAllocSize(ArgsTy)); - CallInst *Call = CallerIRBuilder.CreateCall( - get_qthread_fork_copyargs(), { OutlinedFnPtr, ArgStructPtr, - ArgSize, Null }); + Value *ArgStructPtr = + CallerIRBuilder.CreateBitCast(CallerArgStruct, PointerType::getUnqual(C)); + Constant *Null = Constant::getNullValue(PointerType::getUnqual(C)); + ConstantInt *ArgSize = + ConstantInt::get(DL.getIntPtrType(C), DL.getTypeAllocSize(ArgsTy)); + CallInst *Call = + CallerIRBuilder.CreateCall(get_qthread_fork_copyargs(), + {OutlinedFnPtr, ArgStructPtr, ArgSize, Null}); Call->setDebugLoc(ReplCall->getDebugLoc()); TOI.replaceReplCall(Call); ReplCall->eraseFromParent(); @@ -316,18 +320,18 @@ bool QthreadsABI::preProcessFunction(Function &F, TaskInfo &TI, // Add an expect increment before spawning IRBuilder<> preSpawnB(detB); - Value* one = ConstantInt::get(Type::getInt64Ty(C), 1); - std::vector expectArgs = {sinc, one}; + Value *one = ConstantInt::get(Type::getInt64Ty(C), 1); + std::vector expectArgs = {sinc, one}; CallInst::Create(get_qt_sinc_expect(), expectArgs, "", Detach); // Add a submit to end of task body - for(Spindle *S : T->spindles()){ - for(BasicBlock *B : S->blocks()){ - if(T->isTaskExiting(B)){ + for (Spindle *S : T->spindles()) { + for (BasicBlock *B : S->blocks()) { + if (T->isTaskExiting(B)) { IRBuilder<> footerB(B->getTerminator()); - Value* SR = T->getDetach()->getSyncRegion(); + Value *SR = T->getDetach()->getSyncRegion(); auto sinc = getOrCreateSinc(SR, &F); - auto null = Constant::getNullValue(Type::getInt8PtrTy(C)); + auto null = Constant::getNullValue(PointerType::getUnqual(C)); footerB.CreateCall(get_qt_sinc_submit(), {sinc, null}); } } diff --git a/llvm/lib/Transforms/Tapir/RealmABI.cpp b/llvm/lib/Transforms/Tapir/RealmABI.cpp index 784dc63ff8c92e4..5764476495a6c8d 100644 --- a/llvm/lib/Transforms/Tapir/RealmABI.cpp +++ b/llvm/lib/Transforms/Tapir/RealmABI.cpp @@ -1,4 +1,5 @@ -//===- RealmABI.cpp - Lower Tapir into Realm runtime system calls -----------===// +//===- RealmABI.cpp - Lower Tapir into Realm runtime system calls +//-----------===// // // The LLVM Compiler Infrastructure // @@ -20,25 +21,29 @@ #include "llvm/Transforms/Tapir/Outline.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/TapirUtils.h" -#include #include +#include using namespace llvm; #define DEBUG_TYPE "realmabi" -void RealmABI::preProcessOutlinedTask(llvm::Function&, llvm::Instruction*, llvm::Instruction*, bool, BasicBlock*){} -void RealmABI::postProcessOutlinedTask(llvm::Function&, llvm::Instruction*, llvm::Instruction*, bool, BasicBlock*){} -void RealmABI::preProcessRootSpawner(llvm::Function&, BasicBlock *TFEntry){} -void RealmABI::postProcessRootSpawner(llvm::Function&, BasicBlock *TFEntry){} +void RealmABI::preProcessOutlinedTask(llvm::Function &, llvm::Instruction *, + llvm::Instruction *, bool, BasicBlock *) { +} +void RealmABI::postProcessOutlinedTask(llvm::Function &, llvm::Instruction *, + llvm::Instruction *, bool, + BasicBlock *) {} +void RealmABI::preProcessRootSpawner(llvm::Function &, BasicBlock *TFEntry) {} +void RealmABI::postProcessRootSpawner(llvm::Function &, BasicBlock *TFEntry) {} FunctionCallee RealmABI::get_realmGetNumProcs() { - if(RealmGetNumProcs) + if (RealmGetNumProcs) return RealmGetNumProcs; LLVMContext &C = M.getContext(); AttributeList AL; - std::vector TypeArray; + std::vector TypeArray; // TODO: Set appropriate function attributes. FunctionType *FTy = FunctionType::get(Type::getInt64Ty(C), {}, false); @@ -46,106 +51,98 @@ FunctionCallee RealmABI::get_realmGetNumProcs() { return RealmGetNumProcs; } -static StructType* getBarrierType(LLVMContext &C){ +static StructType *getBarrierType(LLVMContext &C) { auto eventTy = StructType::get(Type::getInt64Ty(C)); return StructType::get(eventTy, Type::getInt64Ty(C)); } -FunctionCallee RealmABI::get_createRealmBarrier(){ - if(CreateBar) +FunctionCallee RealmABI::get_createRealmBarrier() { + if (CreateBar) return CreateBar; LLVMContext &C = M.getContext(); AttributeList AL; - FunctionType *FTy = FunctionType::get( - getBarrierType(C), {}, false); + FunctionType *FTy = FunctionType::get(getBarrierType(C), {}, false); CreateBar = M.getOrInsertFunction("createRealmBarrier", FTy, AL); return CreateBar; } -FunctionCallee RealmABI::get_destroyRealmBarrier(){ - if(DestroyBar) +FunctionCallee RealmABI::get_destroyRealmBarrier() { + if (DestroyBar) return DestroyBar; LLVMContext &C = M.getContext(); AttributeList AL; FunctionType *FTy = FunctionType::get( - Type::getInt8Ty(C), {PointerType::getUnqual(getBarrierType(C))}, false); + Type::getInt8Ty(C), {PointerType::getUnqual(getBarrierType(C))}, false); DestroyBar = M.getOrInsertFunction("destroyRealmBarrier", FTy, AL); return DestroyBar; } FunctionCallee RealmABI::get_realmSpawn() { - if(RealmSpawn) + if (RealmSpawn) return RealmSpawn; LLVMContext &C = M.getContext(); const DataLayout &DL = M.getDataLayout(); AttributeList AL; - Type* TypeArray[] = { - PointerType::getUnqual(getBarrierType(C)), - RealmFTy, // RealmFTy fxn - Type::getInt8PtrTy(C), // void *args - DL.getIntPtrType(C)}; // size_t argsize + Type *TypeArray[] = {PointerType::getUnqual(getBarrierType(C)), + RealmFTy, // RealmFTy fxn + PointerType::get(C, 0), // void *args + DL.getIntPtrType(C)}; // size_t argsize // TODO: Set appropriate function attributes. - FunctionType *FTy = FunctionType::get( - Type::getInt32Ty(C), // returns int - TypeArray, - false); + FunctionType *FTy = FunctionType::get(Type::getInt32Ty(C), // returns int + TypeArray, false); RealmSpawn = M.getOrInsertFunction("realmSpawn", FTy, AL); return RealmSpawn; } FunctionCallee RealmABI::get_realmSync() { - if(RealmSync) + if (RealmSync) return RealmSync; LLVMContext &C = M.getContext(); AttributeList AL; - Type* TypeArray[] = { PointerType::getUnqual(getBarrierType(C)) }; + Type *TypeArray[] = {PointerType::getUnqual(getBarrierType(C))}; // TODO: Set appropriate function attributes. - FunctionType *FTy = FunctionType::get(Type::getInt8Ty(C), - TypeArray, - false); + FunctionType *FTy = FunctionType::get(Type::getInt8Ty(C), TypeArray, false); RealmSync = M.getOrInsertFunction("realmSync", FTy, AL); return RealmSync; } FunctionCallee RealmABI::get_realmInitRuntime() { - if(RealmInitRuntime) + if (RealmInitRuntime) return RealmInitRuntime; LLVMContext &C = M.getContext(); AttributeList AL; - Type* TypeArray[] = { Type::getInt32Ty(C), // int argc - PointerType::getUnqual(Type::getInt8PtrTy(C))}; // char **argv + Type *TypeArray[] = { + Type::getInt32Ty(C), // int argc + PointerType::getUnqual(PointerType::getUnqual(C))}; // char **argv // TODO: Set appropriate function attributes. - FunctionType *FTy = FunctionType::get( - Type::getInt32Ty(C), // returns int - TypeArray, - false); + FunctionType *FTy = FunctionType::get(Type::getInt32Ty(C), // returns int + TypeArray, false); RealmInitRuntime = M.getOrInsertFunction("realmInitRuntime", FTy, AL); return RealmInitRuntime; } FunctionCallee RealmABI::get_realmFinalize() { - if(RealmFinalize) + if (RealmFinalize) return RealmFinalize; LLVMContext &C = M.getContext(); AttributeList AL; - std::vector TypeArray; + std::vector TypeArray; // TODO: Set appropriate function attributes. - FunctionType *FTy = FunctionType::get(Type::getInt8PtrTy(C), - TypeArray, - false); + FunctionType *FTy = + FunctionType::get(PointerType::getUnqual(C), TypeArray, false); RealmFinalize = M.getOrInsertFunction("realmFinalize", FTy, AL); return RealmFinalize; } @@ -156,12 +153,12 @@ RealmABI::RealmABI(Module &M) : TapirTarget(M) { LLVMContext &C = M.getContext(); // Initialize any types we need for lowering. // NOTE: RealmFTy is NOT the same as a Realm::Processor::TaskFuncPtr - RealmFTy = PointerType::getUnqual( - FunctionType::get(Type::getInt64Ty(C), { Type::getInt8PtrTy(C) }, false)); + RealmFTy = PointerType::getUnqual(FunctionType::get( + Type::getInt64Ty(C), {PointerType::getUnqual(C)}, false)); } RealmABI::~RealmABI() { - //call something that deletes the context struct + // call something that deletes the context struct } /// Lower a call to get the grainsize of this Tapir loop. @@ -173,10 +170,10 @@ RealmABI::~RealmABI() { Value *RealmABI::lowerGrainsizeCall(CallInst *GrainsizeCall) { Value *Limit = GrainsizeCall->getArgOperand(0); IRBuilder<> Builder(GrainsizeCall); - Value *Workers = Builder.CreateIntCast(Builder.CreateCall(get_realmGetNumProcs()), - Limit->getType(), false); + Value *Workers = Builder.CreateIntCast( + Builder.CreateCall(get_realmGetNumProcs()), Limit->getType(), false); Value *Ceiling = Builder.CreateSub(Builder.CreateAdd(Limit, Workers), - ConstantInt::get(Workers->getType(), 1)); + ConstantInt::get(Workers->getType(), 1)); Value *Grainsize = Builder.CreateUDiv(Ceiling, Workers); // Replace uses of grainsize intrinsic call with this grainsize value. GrainsizeCall->replaceAllUsesWith(Grainsize); @@ -185,20 +182,21 @@ Value *RealmABI::lowerGrainsizeCall(CallInst *GrainsizeCall) { Value *RealmABI::getOrCreateBarrier(Value *SyncRegion, Function *F) { LLVMContext &C = M.getContext(); - Value* barrier; - if((barrier = SyncRegionToBarrier[SyncRegion])) + Value *barrier; + if ((barrier = SyncRegionToBarrier[SyncRegion])) return barrier; else { IRBuilder<> builder(F->getEntryBlock().getFirstNonPHIOrDbg()); - AllocaInst* ab = builder.CreateAlloca(getBarrierType(C)); + AllocaInst *ab = builder.CreateAlloca(getBarrierType(C)); barrier = ab; Value *barrierVal = builder.CreateCall(get_createRealmBarrier(), {}, ""); builder.CreateAlignedStore(barrierVal, barrier, MaybeAlign(ab->getAlign())); SyncRegionToBarrier[SyncRegion] = barrier; - // Make sure we destroy the barrier at all exit points to prevent memory leaks - for(BasicBlock &BB : *F) { - if(isa(BB.getTerminator())){ + // Make sure we destroy the barrier at all exit points to prevent memory + // leaks + for (BasicBlock &BB : *F) { + if (isa(BB.getTerminator())) { CallInst::Create(get_destroyRealmBarrier(), {barrier}, "", BB.getTerminator()); } @@ -211,7 +209,7 @@ Value *RealmABI::getOrCreateBarrier(Value *SyncRegion, Function *F) { void RealmABI::lowerSync(SyncInst &SI) { IRBuilder<> builder(&SI); auto F = SI.getParent()->getParent(); - Value* SR = SI.getSyncRegion(); + Value *SR = SI.getSyncRegion(); auto barrier = getOrCreateBarrier(SR, F); std::vector args = {barrier}; builder.CreateCall(get_realmSync(), args); @@ -226,8 +224,8 @@ void RealmABI::processSubTaskCall(TaskOutlineInfo &TOI, DominatorTree &DT) { Instruction *ReplStart = TOI.ReplStart; CallBase *ReplCall = cast(TOI.ReplCall); BasicBlock *CallBlock = ReplStart->getParent(); - Value* SR = TOI.SR; - if(!SR){ + Value *SR = TOI.SR; + if (!SR) { // If there's no syncregion, we leave it as a function call return; } @@ -243,16 +241,16 @@ void RealmABI::processSubTaskCall(TaskOutlineInfo &TOI, DominatorTree &DT) { // To match the kitsune-rt Realm wrapper, we replace the existing call with // a call to realmSpawn IRBuilder<> CallerIRBuilder(ReplCall); - Value *OutlinedFnPtr = CallerIRBuilder.CreatePointerBitCastOrAddrSpaceCast( - Outlined, RealmFTy); + Value *OutlinedFnPtr = + CallerIRBuilder.CreatePointerBitCastOrAddrSpaceCast(Outlined, RealmFTy); AllocaInst *CallerArgStruct = cast(ReplCall->getArgOperand(0)); Type *ArgsTy = CallerArgStruct->getAllocatedType(); - Value *ArgStructPtr = CallerIRBuilder.CreateBitCast(CallerArgStruct, - Type::getInt8PtrTy(C)); - ConstantInt *ArgSize = ConstantInt::get(DL.getIntPtrType(C), - DL.getTypeAllocSize(ArgsTy)); + Value *ArgStructPtr = + CallerIRBuilder.CreateBitCast(CallerArgStruct, PointerType::getUnqual(C)); + ConstantInt *ArgSize = + ConstantInt::get(DL.getIntPtrType(C), DL.getTypeAllocSize(ArgsTy)); CallInst *Call = CallerIRBuilder.CreateCall( - get_realmSpawn(), { barrier, OutlinedFnPtr, ArgStructPtr, ArgSize}); + get_realmSpawn(), {barrier, OutlinedFnPtr, ArgStructPtr, ArgSize}); Call->setDebugLoc(ReplCall->getDebugLoc()); TOI.replaceReplCall(Call); ReplCall->eraseFromParent(); @@ -282,12 +280,12 @@ void RealmABI::postProcessFunction(Function &F, bool OutliningTapirLoops) { LLVMContext &C = M->getContext(); IRBuilder<> builder(F.getEntryBlock().getFirstNonPHIOrDbg()); - //default values of 0 and nullptr - //TODO: handle the case where main actually has an argc and argv - Value* zero = ConstantInt::get(Type::getInt32Ty(C), 0); - Value* null = Constant::getNullValue(PointerType::getUnqual(Type::getInt8PtrTy(C))); + // default values of 0 and nullptr + // TODO: handle the case where main actually has an argc and argv + Value *zero = ConstantInt::get(Type::getInt32Ty(C), 0); + Value *null = Constant::getNullValue(PointerType::getUnqual(C)); - Value* initArgs[2]; + Value *initArgs[2]; initArgs[0] = zero; initArgs[1] = null; diff --git a/llvm/lib/Transforms/Tapir/TapirGPUUtils.cpp b/llvm/lib/Transforms/Tapir/TapirGPUUtils.cpp index b0e3b721c93b42d..7a0a7508e10718e 100644 --- a/llvm/lib/Transforms/Tapir/TapirGPUUtils.cpp +++ b/llvm/lib/Transforms/Tapir/TapirGPUUtils.cpp @@ -17,6 +17,7 @@ // changes here as well. // //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Tapir/TapirGPUUtils.h" #include "llvm/ADT/SmallVector.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" @@ -26,7 +27,6 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Module.h" #include "llvm/Support/SmallVectorMemoryBuffer.h" -#include "llvm/Transforms/Tapir/TapirGPUUtils.h" #include using namespace llvm; @@ -76,7 +76,7 @@ void appendToGlobalCtors(Module &M, Constant *C, int Priority, Constant *Data) { // the new ctor to the list. SmallVector CurrentCtors; StructType *EltTy = StructType::get( - IRB.getInt32Ty(), PointerType::getUnqual(FnTy), IRB.getInt8PtrTy()); + IRB.getInt32Ty(), PointerType::getUnqual(FnTy), IRB.getPtrTy()); if (GlobalVariable *GVCtor = M.getNamedGlobal("llvm.global_ctors")) { if (Constant *Init = GVCtor->getInitializer()) { unsigned N = Init->getNumOperands(); @@ -89,13 +89,13 @@ void appendToGlobalCtors(Module &M, Constant *C, int Priority, Constant *Data) { // Build a 3 field global_ctor entry. // We don't take a comdat key. - Constant* CSVals[3]; + Constant *CSVals[3]; CSVals[0] = IRB.getInt32(Priority); CSVals[1] = C; - CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getInt8PtrTy()) - : Constant::getNullValue(IRB.getInt8PtrTy()); + CSVals[2] = Data ? ConstantExpr::getPointerCast(Data, IRB.getPtrTy()) + : Constant::getNullValue(IRB.getPtrTy()); Constant *RuntimeCtorInit = ConstantStruct::get( - EltTy, ArrayRef(CSVals, EltTy->getNumElements())); + EltTy, ArrayRef(CSVals, EltTy->getNumElements())); CurrentCtors.push_back(RuntimeCtorInit); diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index ef43c373354bdb5..639ff867a557014 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -3125,9 +3125,6 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, } } else if (InlinedFunctionInfo.ContainsDynamicAllocas) { Module *M = Caller->getParent(); - // Get the two intrinsics we care about. - Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); - Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore); // Insert the llvm.stacksave. CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin()) diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index e3594584d62f628..c305068a64d6a9b 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1582,28 +1582,13 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB, SmallVector SuccIterPairs; for (auto *Succ : successors(BB)) { BasicBlock::iterator SuccItr = Succ->begin(); + while (isTaskFrameCreate(&*SuccItr)) + ++SuccItr; if (isa(*SuccItr)) return false; SuccIterPairs.push_back(SuccIterPair(SuccItr, 0)); } - // Skip taskframe.create calls. - while (isTaskFrameCreate(I1)) - I1 = &*BB1_Itr++; - while (isTaskFrameCreate(I2)) - I2 = &*BB2_Itr++; - if (isa(I1)) - return false; - - BasicBlock *BIParent = BI->getParent(); - - bool Changed = false; - - auto _ = make_scope_exit([&]() { - if (Changed) - ++NumHoistCommonCode; - }); - // Check if only hoisting terminators is allowed. This does not add new // instructions to the hoist location. if (EqTermsOnly) { @@ -1650,10 +1635,14 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB, if (!AllDbgInstsAreIdentical) { while (isa(I1)) I1 = &*++BB1ItrPair.first; + while (isTaskFrameCreate(I1)) + I1 = &*++BB1ItrPair.first; for (auto &SuccIter : OtherSuccIterRange) { Instruction *I2 = &*SuccIter; while (isa(I2)) I2 = &*++SuccIter; + while (isTaskFrameCreate(I2)) + I2 = &*++SuccIter; } } @@ -1743,24 +1732,6 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB, } ++NumSkipped; } - - // KITSUNE FIXME: I have no idea if this is correct. - I1 = &*BB1_Itr++; - I2 = &*BB2_Itr++; - // Skip debug info if it is not identical. - DbgInfoIntrinsic *DBI1 = dyn_cast(I1); - DbgInfoIntrinsic *DBI2 = dyn_cast(I2); - if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { - while (isa(I1)) - I1 = &*BB1_Itr++; - while (isa(I2)) - I2 = &*BB2_Itr++; - } - // Skip taskframe.create calls. - while (isTaskFrameCreate(I1)) - I1 = &*BB1_Itr++; - while (isTaskFrameCreate(I2)) - I2 = &*BB2_Itr++; } } diff --git a/llvm/lib/Transforms/Utils/TapirUtils.cpp b/llvm/lib/Transforms/Utils/TapirUtils.cpp index 9ff3bf8e069ca23..5bb299aa371889f 100644 --- a/llvm/lib/Transforms/Utils/TapirUtils.cpp +++ b/llvm/lib/Transforms/Utils/TapirUtils.cpp @@ -236,7 +236,7 @@ static bool isUsedByLifetimeMarker(Value *V) { static bool hasLifetimeMarkers(AllocaInst *AI) { Type *Ty = AI->getType(); Type *Int8PtrTy = - Type::getInt8PtrTy(Ty->getContext(), Ty->getPointerAddressSpace()); + PointerType::get(Ty->getContext(), Ty->getPointerAddressSpace()); if (Ty == Int8PtrTy) return isUsedByLifetimeMarker(AI); @@ -861,6 +861,7 @@ void llvm::SerializeDetach(DetachInst *DI, BasicBlock *ParentEntry, BasicBlock *Unwind = DI->getUnwindDest(); Value *SyncRegion = DI->getSyncRegion(); Module *M = Spawner->getModule(); + LLVMContext& Ctx = M->getContext(); // If the spawned task has a taskframe, serialize the taskframe. SmallVector ToErase; @@ -892,9 +893,10 @@ void llvm::SerializeDetach(DetachInst *DI, BasicBlock *ParentEntry, // code with llvm.stacksave/llvm.stackrestore intrinsics. if (ContainsDynamicAllocas) { // Get the two intrinsics we care about. - Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); - Function *StackRestore = - Intrinsic::getDeclaration(M, Intrinsic::stackrestore); + Function *StackSave = Intrinsic::getDeclaration( + M, Intrinsic::stacksave, {PointerType::getUnqual(Ctx)}); + Function *StackRestore = Intrinsic::getDeclaration( + M, Intrinsic::stackrestore, {PointerType::getUnqual(Ctx)}); // Insert the llvm.stacksave. CallInst *SavedPtr = IRBuilder<>(TaskEntry, TaskEntry->begin()) @@ -2203,7 +2205,7 @@ void llvm::promoteCallsInTasksToInvokes(Function &F, const Twine Name) { // Create a cleanup block. LLVMContext &C = F.getContext(); BasicBlock *CleanupBB = BasicBlock::Create(C, Name, &F); - Type *ExnTy = StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C)); + Type *ExnTy = StructType::get(PointerType::getUnqual(C), Type::getInt32Ty(C)); LandingPadInst *LPad = LandingPadInst::Create(ExnTy, 1, Name + ".lpad", CleanupBB); @@ -2305,7 +2307,7 @@ void llvm::TapirLoopHints::getHintsFromMetadata() { /// Checks string hint with one operand and set value if valid. void llvm::TapirLoopHints::setHint(StringRef Name, Metadata *Arg) { - if (!Name.startswith(Prefix())) + if (!Name.starts_with(Prefix())) return; Name = Name.substr(Prefix().size(), StringRef::npos); @@ -2345,7 +2347,7 @@ bool llvm::TapirLoopHints::matchesHintMetadataName( return false; for (auto H : HintTypes) - if (Name->getString().endswith(H.Name)) + if (Name->getString().ends_with(H.Name)) return true; return false; } @@ -2494,7 +2496,7 @@ MDNode *llvm::CopyNonTapirLoopMetadata(MDNode *LoopID, MDNode *OrigLoopID) { return nullptr; StringRef AttrName = cast(NameMD)->getString(); // Skip tapir.loop metadata - if (!AttrName.startswith("tapir.loop")) + if (!AttrName.starts_with("tapir.loop")) MDs.push_back(Op); } diff --git a/llvm/lib/Transforms/Utils/TaskSimplify.cpp b/llvm/lib/Transforms/Utils/TaskSimplify.cpp index 795780acad63a96..104bba9c0d50ba6 100644 --- a/llvm/lib/Transforms/Utils/TaskSimplify.cpp +++ b/llvm/lib/Transforms/Utils/TaskSimplify.cpp @@ -428,9 +428,11 @@ bool llvm::simplifyTaskFrames(TaskInfo &TI, DominatorTree &DT) { for (Instruction *TFCreate : TaskFramesToConvert) { LLVM_DEBUG(dbgs() << "Converting taskframe " << *TFCreate << "\n"); Module *M = TFCreate->getModule(); - Function *StackSave = Intrinsic::getDeclaration(M, Intrinsic::stacksave); + Type* PtrType = PointerType::getUnqual(M->getContext()); + Function *StackSave = + Intrinsic::getDeclaration(M, Intrinsic::stacksave, {PtrType}); Function *StackRestore = - Intrinsic::getDeclaration(M, Intrinsic::stackrestore); + Intrinsic::getDeclaration(M, Intrinsic::stackrestore, {PtrType}); // Save the stack at the point of the taskframe.create. CallInst *SavedPtr = diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 005e6f327d0a198..8c48d85a4346f47 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -552,15 +552,3 @@ if(runtimes) set_property(GLOBAL APPEND PROPERTY LLVM_ALL_ADDITIONAL_TEST_TARGETS runtimes ${RUNTIMES_TEST_DEPENDS}) endif() endif() - -# KITSUNE FIXME: Clean this up once all the tapir targets are built and run -# correctly. -# get_cmake_property(vars VARIABLES) -# list(FILTER vars INCLUDE REGEX "^KIT") -# list(SORT vars) -# foreach (var ${vars}) -# message(STATUS "${var}=${${var}}") -# endforeach() -# message(STATUS "bindir: ${LLVM_RUNTIME_OUTPUT_INTDIR}") -# message(FATAL_ERROR "Done") -# add_subdirectory(${CMAKE_SOURCE_DIR}/../kitrt kitrt) diff --git a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll index 3bbbebe377d9eb3..6392a67b54b5eb2 100644 --- a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll +++ b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll @@ -197,18 +197,18 @@ define i1 @test_conditional2(i32 %a, i32 %b, ptr %c) { ; CHECK-NEXT: mov w22, #2 ; =0x2 ; CHECK-NEXT: LBB3_6: ; %for.cond ; CHECK-NEXT: ; =>This Loop Header: Depth=1 -; CHECK-NEXT: ; Child Loop BB3_7 Depth 2 +; CHECK-NEXT: ; Child Loop BB3_7 Depth 2 ; CHECK-NEXT: cbz w22, LBB3_9 ; CHECK-NEXT: LBB3_7: ; %for.body -; CHECK-NEXT: ; Parent Loop BB3_6 Depth=1 -; CHECK-NEXT: ; => This Inner Loop Header: Depth=2 +; CHECK-NEXT: ; Parent Loop BB3_6 Depth=1 +; CHECK-NEXT: ; => This Inner Loop Header: Depth=2 ; CHECK-NEXT: sub w22, w22, #1 ; CHECK-NEXT: orr w9, w21, w20 ; CHECK-NEXT: ldr w10, [x19, w22, sxtw #2] ; CHECK-NEXT: cmp w9, w10 ; CHECK-NEXT: b.eq LBB3_6 ; CHECK-NEXT: ; %bb.8: ; %if.then -; CHECK-NEXT: ; in Loop: Header=BB3_6 Depth=1 +; CHECK-NEXT: ; in Loop: Header=BB3_7 Depth=2 ; CHECK-NEXT: str w9, [x19, w22, sxtw #2] ; CHECK-NEXT: bl _foo ; CHECK-NEXT: mov w8, wzr @@ -241,10 +241,10 @@ define i1 @test_conditional2(i32 %a, i32 %b, ptr %c) { ; OUTLINE-ATOMICS-NEXT: cset w8, eq ; OUTLINE-ATOMICS-NEXT: LBB3_1: ; %for.cond ; OUTLINE-ATOMICS-NEXT: ; =>This Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ; Child Loop BB3_2 Depth 2 +; OUTLINE-ATOMICS-NEXT: ; Child Loop BB3_2 Depth 2 ; OUTLINE-ATOMICS-NEXT: cbz w22, LBB3_4 ; OUTLINE-ATOMICS-NEXT: LBB3_2: ; %for.body -; OUTLINE-ATOMICS-NEXT: ; Parent Loop BB3_1 Depth=1 +; OUTLINE-ATOMICS-NEXT: ; Parent Loop BB3_1 Depth=1 ; OUTLINE-ATOMICS-NEXT: ; => This Inner Loop Header: Depth=2 ; OUTLINE-ATOMICS-NEXT: sub w22, w22, #1 ; OUTLINE-ATOMICS-NEXT: orr w9, w21, w20 @@ -252,7 +252,7 @@ define i1 @test_conditional2(i32 %a, i32 %b, ptr %c) { ; OUTLINE-ATOMICS-NEXT: cmp w9, w10 ; OUTLINE-ATOMICS-NEXT: b.eq LBB3_1 ; OUTLINE-ATOMICS-NEXT: ; %bb.3: ; %if.then -; OUTLINE-ATOMICS-NEXT: ; in Loop: Header=BB3_1 Depth=1 +; OUTLINE-ATOMICS-NEXT: ; in Loop: Header=BB3_2 Depth=2 ; OUTLINE-ATOMICS-NEXT: str w9, [x19, w22, sxtw #2] ; OUTLINE-ATOMICS-NEXT: bl _foo ; OUTLINE-ATOMICS-NEXT: mov w8, wzr diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index 7957bae8515af2c..63d2e127cb70d6f 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -200,8 +200,6 @@ ; GCN-O1-NEXT: Expand Atomic instructions ; GCN-O1-NEXT: AMDGPU Promote Alloca ; GCN-O1-NEXT: Dominator Tree Construction -; GCN-O1-NEXT: Tapir Task Information -; GCN-O1-NEXT: SROA ; GCN-O1-NEXT: Cycle Info Analysis ; GCN-O1-NEXT: Uniformity Analysis ; GCN-O1-NEXT: AMDGPU IR optimizations @@ -479,8 +477,6 @@ ; GCN-O1-OPTS-NEXT: Expand Atomic instructions ; GCN-O1-OPTS-NEXT: AMDGPU Promote Alloca ; GCN-O1-OPTS-NEXT: Dominator Tree Construction -; GCN-O1-OPTS-NEXT: Tapir Task Information -; GCN-O1-OPTS-NEXT: SROA ; GCN-O1-OPTS-NEXT: Natural Loop Information ; GCN-O1-OPTS-NEXT: Canonicalize natural loops ; GCN-O1-OPTS-NEXT: Lazy Branch Probability Analysis @@ -788,8 +784,6 @@ ; GCN-O2-NEXT: Expand Atomic instructions ; GCN-O2-NEXT: AMDGPU Promote Alloca ; GCN-O2-NEXT: Dominator Tree Construction -; GCN-O2-NEXT: Tapir Task Information -; GCN-O2-NEXT: SROA ; GCN-O2-NEXT: Natural Loop Information ; GCN-O2-NEXT: Split GEPs to a variadic base and a constant offset for better CSE ; GCN-O2-NEXT: Scalar Evolution Analysis @@ -801,6 +795,7 @@ ; GCN-O2-NEXT: Cycle Info Analysis ; GCN-O2-NEXT: Uniformity Analysis ; GCN-O2-NEXT: AMDGPU IR optimizations +; GCN-O2-NEXT: Tapir Task Information ; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O2-NEXT: Function Alias Analysis Results ; GCN-O2-NEXT: Memory SSA @@ -1101,8 +1096,6 @@ ; GCN-O3-NEXT: Expand Atomic instructions ; GCN-O3-NEXT: AMDGPU Promote Alloca ; GCN-O3-NEXT: Dominator Tree Construction -; GCN-O3-NEXT: Tapir Task Information -; GCN-O3-NEXT: SROA ; GCN-O3-NEXT: Natural Loop Information ; GCN-O3-NEXT: Split GEPs to a variadic base and a constant offset for better CSE ; GCN-O3-NEXT: Scalar Evolution Analysis @@ -1120,6 +1113,7 @@ ; GCN-O3-NEXT: Cycle Info Analysis ; GCN-O3-NEXT: Uniformity Analysis ; GCN-O3-NEXT: AMDGPU IR optimizations +; GCN-O3-NEXT: Tapir Task Information ; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) ; GCN-O3-NEXT: Function Alias Analysis Results ; GCN-O3-NEXT: Memory SSA diff --git a/llvm/test/CodeGen/X86/sse1.ll b/llvm/test/CodeGen/X86/sse1.ll index aa0da89d0cd72e7..d344b37c586a673 100644 --- a/llvm/test/CodeGen/X86/sse1.ll +++ b/llvm/test/CodeGen/X86/sse1.ll @@ -93,12 +93,12 @@ define <4 x float> @vselect(ptr%p, <4 x i32> %q) { ; X64-NEXT: .LBB1_4: ; X64-NEXT: movss {{.*#+}} xmm2 = [3.0E+0,0.0E+0,0.0E+0,0.0E+0] ; X64-NEXT: testl %r8d, %r8d -; X64-NEXT: jne .LBB1_8 -; X64-NEXT: .LBB1_7: -; X64-NEXT: movss {{.*#+}} xmm3 = [4.0E+0,0.0E+0,0.0E+0,0.0E+0] +; X64-NEXT: je .LBB1_7 +; X64-NEXT: .LBB1_8: # %entry +; X64-NEXT: xorps %xmm3, %xmm3 ; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; X64-NEXT: testl %esi, %esi -; X64-NEXT: je .LBB1_10 +; X64-NEXT: je .LBB1_10 ; X64-NEXT: jmp .LBB1_11 ; X64-NEXT: .LBB1_1: ; X64-NEXT: movss {{.*#+}} xmm1 = [2.0E+0,0.0E+0,0.0E+0,0.0E+0] @@ -109,7 +109,7 @@ define <4 x float> @vselect(ptr%p, <4 x i32> %q) { ; X64-NEXT: testl %r8d, %r8d ; X64-NEXT: jne .LBB1_8 ; X64-NEXT: .LBB1_7: -; X64-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; X64-NEXT: movss {{.*#+}} xmm3 = [4.0E+0,0.0E+0,0.0E+0,0.0E+0] ; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; X64-NEXT: testl %esi, %esi ; X64-NEXT: jne .LBB1_11 diff --git a/llvm/test/Other/new-pm-defaults.ll b/llvm/test/Other/new-pm-defaults.ll index 1cbfcb79d94e8ee..a2b61aa50eb3dd1 100644 --- a/llvm/test/Other/new-pm-defaults.ll +++ b/llvm/test/Other/new-pm-defaults.ll @@ -122,9 +122,9 @@ ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-EP-PEEPHOLE-NEXT: Running pass: NoOpFunctionPass ; CHECK-O-NEXT: Running pass: SimplifyCFGPass +; CHECK-O-NEXT: Running pass: TaskSimplifyPass ; CHECK-O-NEXT: Running pass: AlwaysInlinerPass ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis -; CHECK-O-NEXT: Running pass: TaskSimplifyPass ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll index 7cce66c1102755d..d5628bc64b4525d 100644 --- a/llvm/test/Other/new-pm-lto-defaults.ll +++ b/llvm/test/Other/new-pm-lto-defaults.ll @@ -130,8 +130,8 @@ ; CHECK-O3-NEXT: Running pass: SLPVectorizerPass on foo ; CHECK-OS-NEXT: Running pass: SLPVectorizerPass on foo ; CHECK-O23SZ-NEXT: Running pass: VectorCombinePass on foo -; CHECK-O23SZ-NEXT: Running pass: InferAlignmentPass on foo ; CHECK-O23SZ-NEXT: Running pass: EarlyCSEPass on foo +; CHECK-O23SZ-NEXT: Running pass: InferAlignmentPass on foo ; CHECK-O23SZ-NEXT: Running pass: InstCombinePass on foo ; CHECK-O23SZ-NEXT: Running pass: LoopSimplifyPass ; CHECK-O23SZ-NEXT: Running pass: LCSSAPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll index 969dcf7aa6ebca7..c15d451c6c18b67 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-defaults.ll @@ -62,6 +62,7 @@ ; CHECK-O-NEXT: Running analysis: TypeBasedAA ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: SimplifyCFGPass +; CHECK-O-NEXT: Running pass: TaskSimplifyPass ; CHECK-O-NEXT: Running pass: AlwaysInlinerPass ; CHECK-PRELINK-O-NEXT: Running analysis: ProfileSummaryAnalysis ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll index b8384f0b9d60159..dbc58b9cbc7a56d 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll @@ -51,8 +51,8 @@ ; CHECK-O-NEXT: Running analysis: LoopAnalysis on foo ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O-NEXT: Running pass: SimplifyCFGPass -; CHECK-O-NEXT: Running pass: AlwaysInlinerPass ; CHECK-O-NEXT: Running pass: TaskSimplifyPass +; CHECK-O-NEXT: Running pass: AlwaysInlinerPass ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll index 8f946689b21a06f..f8aa6d86657a560 100644 --- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll @@ -59,8 +59,8 @@ ; CHECK-O-NEXT: Running analysis: LoopAnalysis on foo ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O-NEXT: Running pass: SimplifyCFGPass on foo -; CHECK-O-NEXT: Running pass: AlwaysInlinerPass ; CHECK-O-NEXT: Running pass: TaskSimplifyPass on foo +; CHECK-O-NEXT: Running pass: AlwaysInlinerPass ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA diff --git a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll index e4cd8964df70cdb..caae1be226cb795 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-defaults.ll @@ -93,9 +93,9 @@ ; CHECK-O-NEXT: Running analysis: TypeBasedAA ; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy ; CHECK-O-NEXT: Running pass: SimplifyCFGPass +; CHECK-O-NEXT: Running pass: TaskSimplifyPass ; CHECK-O-NEXT: Running pass: AlwaysInlinerPass ; CHECK-O-NEXT: Running analysis: ProfileSummaryAnalysis -; CHECK-O-NEXT: Running pass: TaskSimplifyPass ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA diff --git a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll index 9e97a4cc20c7759..b058863ea886e57 100644 --- a/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll +++ b/llvm/test/Other/new-pm-thinlto-prelink-samplepgo-defaults.ll @@ -64,8 +64,8 @@ ; CHECK-O-NEXT: Running analysis: LoopAnalysis on foo ; CHECK-O-NEXT: Running analysis: PostDominatorTreeAnalysis on foo ; CHECK-O-NEXT: Running pass: SimplifyCFGPass on foo -; CHECK-O-NEXT: Running pass: AlwaysInlinerPass ; CHECK-O-NEXT: Running pass: TaskSimplifyPass on foo +; CHECK-O-NEXT: Running pass: AlwaysInlinerPass ; CHECK-O-NEXT: Running pass: ModuleInlinerWrapperPass ; CHECK-O-NEXT: Running analysis: InlineAdvisorAnalysis ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}GlobalsAA diff --git a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll index 53fa915842699eb..b66c42ff976ecf3 100644 --- a/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll +++ b/llvm/test/Transforms/MergeFunc/call-and-invoke-with-ranges.ll @@ -74,6 +74,9 @@ lpad: resume { ptr, i32 } zeroinitializer } +; CHECK-LABEL: @invoke_with_same_range() +; CHECK: tail call i8 @invoke_with_range() + define i8 @call_with_same_range() { ; CHECK-LABEL: @call_with_same_range ; CHECK: tail call i8 @call_with_range @@ -82,9 +85,6 @@ define i8 @call_with_same_range() { ret i8 %out } -; CHECK-LABEL: @invoke_with_same_range() -; CHECK: tail call i8 @invoke_with_range() - declare i8 @dummy(); declare i32 @__gxx_personality_v0(...) diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll index cf4dfc0b76ac98e..ac41592e8eab1b2 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/hoisting-sinking-required-for-vectorization.ll @@ -168,9 +168,8 @@ define void @loop2(ptr %A, ptr %B, ptr %C, float %x) { ; CHECK-NEXT: [[PREDPHI:%.*]] = fadd <4 x float> [[TMP6]], [[TMP10]] ; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP3]], <4 x float> , <4 x float> [[WIDE_LOAD11]] ; CHECK-NEXT: [[PREDPHI12:%.*]] = fadd <4 x float> [[TMP7]], [[TMP11]] -; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP8]], i64 4 ; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4, !alias.scope [[META9]], !noalias [[META11]] -; CHECK-NEXT: store <4 x float> [[PREDPHI12]], ptr [[TMP12]], align 4, !alias.scope [[META9]], !noalias [[META11]] +; CHECK-NEXT: store <4 x float> [[PREDPHI12]], ptr [[TMP9]], align 4, !alias.scope [[META9]], !noalias [[META11]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 ; CHECK-NEXT: br i1 [[TMP13]], label [[EXIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] diff --git a/llvm/test/Transforms/Tapir/CilkSanitizer/cilksan-aarch64-neon.ll b/llvm/test/Transforms/Tapir/CilkSanitizer/cilksan-aarch64-neon.ll index 0ae8087ef069d45..e71d567c9829b2d 100644 --- a/llvm/test/Transforms/Tapir/CilkSanitizer/cilksan-aarch64-neon.ll +++ b/llvm/test/Transforms/Tapir/CilkSanitizer/cilksan-aarch64-neon.ll @@ -140,16 +140,16 @@ for.body14.i.i: ; preds = %for.body14.i.i, %fo br i1 %cmp13.not.i.i, label %for.end44.i.loopexit.i, label %for.body14.i.i, !llvm.loop !18 ; CHECK: %vld1xN.i.i.i.i.i = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0(ptr %[[ARG:.+]]) -; CHECK-NEXT: %[[STACKSAVE:.+]] = call ptr @llvm.stacksave() +; CHECK-NEXT: %[[STACKSAVE:.+]] = call ptr @llvm.stacksave ; CHECK-NEXT: %[[SPILL:.+]] = alloca { <4 x float>, <4 x float> } ; CHECK-NEXT: store { <4 x float>, <4 x float> } %vld1xN.i.i.i.i.i, ptr %[[SPILL]] ; CHECK-NEXT: call void @__csan_llvm_aarch64_neon_ld1x2_v4f32_p0( ; CHECK: ptr %[[SPILL]], ; CHECK: ptr %[[ARG]]) -; CHECK-NEXT: call void @llvm.stackrestore(ptr %[[STACKSAVE]]) +; CHECK-NEXT: call void @llvm.stackrestore{{.*}}(ptr %[[STACKSAVE]]) ; CHECK: tail call void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float> %vrndz1.i.i.i.i.i.i.i, <4 x float> %vrndz1.i9.i.i.i.i.i.i, ptr %[[ARG2:.+]]) -; CHECK-NEXT: %[[STACKSAVE2:.+]] = call ptr @llvm.stacksave() +; CHECK-NEXT: %[[STACKSAVE2:.+]] = call ptr @llvm.stacksave ; CHECK-NEXT: %[[SPILL2A:.+]] = alloca <4 x float> ; CHECK-NEXT: store <4 x float> %vrndz1.i.i.i.i.i.i.i, ptr %[[SPILL2A]] ; CHECK-NEXT: %[[SPILL2B:.+]] = alloca <4 x float> @@ -158,7 +158,7 @@ for.body14.i.i: ; preds = %for.body14.i.i, %fo ; CHECK: ptr %[[SPILL2A]], ; CHECK: ptr %[[SPILL2B]], ; CHECK: ptr %[[ARG2]]) -; CHECK-NEXT: call void @llvm.stackrestore(ptr %[[STACKSAVE2]]) +; CHECK-NEXT: call void @llvm.stackrestore{{.*}}(ptr %[[STACKSAVE2]]) for.end44.i.loopexit.i: ; preds = %for.body14.i.i %cmp45.i.i = icmp slt i64 %add43.i.i, %params3 diff --git a/llvm/test/Transforms/Tapir/CilkSanitizer/finddbgvalues.ll b/llvm/test/Transforms/Tapir/CilkSanitizer/finddbgvalues.ll index dff6be867467769..8f3251bb082cf74 100644 --- a/llvm/test/Transforms/Tapir/CilkSanitizer/finddbgvalues.ll +++ b/llvm/test/Transforms/Tapir/CilkSanitizer/finddbgvalues.ll @@ -2,6 +2,13 @@ ; @llvm.dbg.value() call with complex constant metadata node. ; ; RUN: opt < %s -passes="cilksan" -S | FileCheck %s +; +; KITSUNE FIXME: +; constexprs with the operand "and" have been removed in LLVM. This test should +; be modified to handle some complex constant expression that is still +; supported. +; +; XFAIL: * %"class.std::__1::basic_string" = type { %"class.std::__1::__compressed_pair.67" } %"class.std::__1::__compressed_pair.67" = type { %"struct.std::__1::__compressed_pair_elem.68" } diff --git a/llvm/test/Transforms/Tapir/dead-tapir-intrinsics.ll b/llvm/test/Transforms/Tapir/dead-tapir-intrinsics.ll index b9c988ecc5b7145..5bfce010fbc63d3 100644 --- a/llvm/test/Transforms/Tapir/dead-tapir-intrinsics.ll +++ b/llvm/test/Transforms/Tapir/dead-tapir-intrinsics.ll @@ -36,9 +36,9 @@ define dso_local void @_Z6parentl(i64 %x) #1 personality i8* bitcast (i32 (...)* ; CHECK-NOTFSIMPLIFY: call token @llvm.taskframe.create() ; CHECK-NOTFSIMPLIFY: call token @llvm.taskframe.create() ; CHECK-NOTFSIMPLIFY: call token @llvm.taskframe.create() -; CHECK-TFSIMPLIFY: call ptr @llvm.stacksave() -; CHECK-TFSIMPLIFY: call ptr @llvm.stacksave() -; CHECK-TFSIMPLIFY: call ptr @llvm.stacksave() +; CHECK-TFSIMPLIFY: call ptr @llvm.stacksave +; CHECK-TFSIMPLIFY: call ptr @llvm.stacksave +; CHECK-TFSIMPLIFY: call ptr @llvm.stacksave ; CHECK-TFSIMPLIFY-NOT: call token @llvm.taskframe.create() ; CHECK: call token @llvm.taskframe.create() ; CHECK detach within %[[SYNCREG]] diff --git a/llvm/test/Transforms/Tapir/loop-stripmine.ll b/llvm/test/Transforms/Tapir/loop-stripmine.ll index d599d3ab4109e63..d54fc9df82baaaa 100644 --- a/llvm/test/Transforms/Tapir/loop-stripmine.ll +++ b/llvm/test/Transforms/Tapir/loop-stripmine.ll @@ -371,11 +371,11 @@ declare dso_local i32 @foo(double*, i32) local_unnamed_addr #5 ; CHECK: br label %[[EPILBODY:.+]], !dbg !51 ; CHECK: [[EPILBODY]]: -; CHECK-NEXT: call ptr @llvm.stacksave() +; CHECK-NEXT: call ptr @llvm.stacksave ; CHECK: alloca double ; CHECK: call void @llvm.lifetime.start ; CHECK: call void @llvm.lifetime.end -; CHECK: call void @llvm.stackrestore( +; CHECK: call void @llvm.stackrestore{{.*}}( ; CHECK: [[STRPLOOPDETACHENTRY]]: ; CHECK: %[[NEWSYNCREG:.+]] = call token @llvm.syncregion.start() @@ -392,11 +392,11 @@ declare dso_local i32 @foo(double*, i32) local_unnamed_addr #5 ; CHECK: br label %[[STRPLOOPINNERBODY:.+]], !dbg !51 ; CHECK: [[STRPLOOPINNERBODY]]: -; CHECK: call ptr @llvm.stacksave() +; CHECK: call ptr @llvm.stacksave ; CHECK: alloca double ; CHECK: call void @llvm.lifetime.start ; CHECK: call void @llvm.lifetime.end -; CHECK: call void @llvm.stackrestore( +; CHECK: call void @llvm.stackrestore{{.*}}( attributes #0 = { argmemonly nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { argmemonly nounwind } diff --git a/llvm/test/Transforms/Tapir/slp-vectorize-long-bb.ll b/llvm/test/Transforms/Tapir/slp-vectorize-long-bb.ll index ef291bd88c77419..74748177c85d573 100644 --- a/llvm/test/Transforms/Tapir/slp-vectorize-long-bb.ll +++ b/llvm/test/Transforms/Tapir/slp-vectorize-long-bb.ll @@ -13,336 +13,336 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK: and <4 x i64> %{{.+}}, ; CHECK: load <4 x i64>, ptr ; CHECK-NEXT: and <4 x i64> %{{.+}}, -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK: load <4 x i64>, ptr ; CHECK: and <4 x i64> %{{.+}}, ; CHECK: load <4 x i64>, ptr ; CHECK-NEXT: and <4 x i64> %{{.+}}, -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT': store <4 x i64> ; CHECK: load <4 x i64>, ptr ; CHECK: and <4 x i64> %{{.+}}, ; CHECK: load <4 x i64>, ptr ; CHECK-NEXT: and <4 x i64> %{{.+}}, -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK: load <4 x i64>, ptr ; CHECK: and <4 x i64> %{{.+}}, ; CHECK: load <4 x i64>, ptr ; CHECK-NEXT: and <4 x i64> %{{.+}}, -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK: load <4 x i64>, ptr ; CHECK: and <4 x i64> %{{.+}}, ; CHECK: load <4 x i64>, ptr ; CHECK-NEXT: and <4 x i64> %{{.+}}, -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK: load <4 x i64>, ptr ; CHECK: and <4 x i64> %{{.+}}, ; CHECK: load <4 x i64>, ptr ; CHECK-NEXT: and <4 x i64> %{{.+}}, -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK: load <4 x i64>, ptr ; CHECK: and <4 x i64> %{{.+}}, ; CHECK: load <4 x i64>, ptr ; CHECK-NEXT: and <4 x i64> %{{.+}}, -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK: load <4 x i64>, ptr ; CHECK: and <4 x i64> %{{.+}}, ; CHECK: load <4 x i64>, ptr ; CHECK-NEXT: and <4 x i64> %{{.+}}, -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK: load <2 x i64> @@ -354,7 +354,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK: load <4 x i64> ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, @@ -362,7 +362,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, @@ -370,7 +370,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, @@ -378,7 +378,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, @@ -386,7 +386,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, @@ -394,7 +394,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, @@ -402,7 +402,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, @@ -410,7 +410,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, @@ -418,7 +418,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, @@ -426,7 +426,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, @@ -434,7 +434,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, @@ -442,7 +442,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, @@ -450,7 +450,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, @@ -458,7 +458,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, @@ -467,14 +467,14 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: shufflevector <2 x i64> ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK: load <2 x i64> @@ -490,7 +490,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: load <4 x i64> ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> @@ -498,7 +498,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> @@ -506,7 +506,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> @@ -514,7 +514,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> @@ -522,7 +522,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> @@ -530,7 +530,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> @@ -538,7 +538,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> @@ -546,7 +546,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> @@ -554,7 +554,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> @@ -562,7 +562,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> @@ -570,7 +570,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> @@ -578,7 +578,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> @@ -586,7 +586,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> @@ -594,7 +594,7 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: and <4 x i64> @@ -603,14 +603,14 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: shufflevector <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: insertelement <4 x i64> ; CHECK-NEXT: and <4 x i64> %{{.+}}, ; CHECK-NEXT: shufflevector <4 x i64> ; CHECK-NEXT: and <4 x i64> -; CHECK-NEXT: or <4 x i64> +; CHECK-NEXT: or disjoint <4 x i64> ; CHECK-NEXT: store <4 x i64> ; CHECK-NEXT: ret void diff --git a/llvm/tools/gold/gold-plugin.cpp b/llvm/tools/gold/gold-plugin.cpp index 3e6a5a9392c3272..33a4eac89190447 100644 --- a/llvm/tools/gold/gold-plugin.cpp +++ b/llvm/tools/gold/gold-plugin.cpp @@ -229,12 +229,12 @@ namespace options { return StringSwitch(tapirTarget) .Case("none", TapirTargetID::None) .Case("serial", TapirTargetID::Serial) - .Case("cheetah", TapirTargetID::Cheetah) - .Case("cilkplus", TapirTargetID::Cilk) .Case("cuda", TapirTargetID::Cuda) + .Case("hip", TapirTargetID::Hip) .Case("opencilk", TapirTargetID::OpenCilk) .Case("openmp", TapirTargetID::OpenMP) .Case("qthreads", TapirTargetID::Qthreads) + .Case("realm", TapirTargetID::Realm) .Default(TapirTargetID::Last_TapirTargetID); } diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index ca22338a3cc2f39..42117f719dfbc7a 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -50,29 +50,6 @@ find_package(Clang PATHS "${LLVM_BINARY_DIR}" NO_DEFAULT_PATH NO_CMAKE_FIND_ROOT set(LLVM_THIRD_PARTY_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../third-party") -# function(get_cheetah_path path) -# foreach(entry ${runtimes}) -# get_filename_component(projName ${entry} NAME) -# if("${projName}" MATCHES "cheetah") -# set(${path} ${entry} PARENT_SCOPE) -# return() -# endif() -# endforeach() -# endfunction() - -# # Some of the runtimes will conditionally use cheetah. To make this -# # work smoothly we ensure that cheetah is added early in the list of -# # sub-projects. This allows other sub-projects to have checks like -# # `if(TARGET cheetah)`. -# get_cheetah_path(cheetah_path) -# message(FATAL_ERROR "cheetah_path: ${cheetah_path}") -# if(cheetah_path) -# list(REMOVE_ITEM runtimes ${cheetah_path}) -# if(NOT DEFINED LLVM_BUILD_CHEETAH OR LLVM_BUILD_CHEETAH) -# list(INSERT runtimes 0 ${cheetah_path}) -# endif() -# endif() - # KITSUNE FIXME: Do we care about compiler-rt? If it is only used by cilktools, # we probably don't. function(get_compiler_rt_path path)