diff --git a/llvm/include/llvm/Analysis/StructuralHash.h b/llvm/include/llvm/Analysis/StructuralHash.h index 9f33c69aed345c..4c9f063bc7d2c8 100644 --- a/llvm/include/llvm/Analysis/StructuralHash.h +++ b/llvm/include/llvm/Analysis/StructuralHash.h @@ -13,15 +13,22 @@ namespace llvm { +enum class StructuralHashOptions { + None, /// Hash with opcode only. + Detailed, /// Hash with opcode and operands. + CallTargetIgnored, /// Ignore call target operand when computing hash. +}; + /// Printer pass for StructuralHashes class StructuralHashPrinterPass : public PassInfoMixin { raw_ostream &OS; - bool EnableDetailedStructuralHash; + const StructuralHashOptions Options; public: - explicit StructuralHashPrinterPass(raw_ostream &OS, bool Detailed) - : OS(OS), EnableDetailedStructuralHash(Detailed) {} + explicit StructuralHashPrinterPass(raw_ostream &OS, + StructuralHashOptions Options) + : OS(OS), Options(Options) {} PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); diff --git a/llvm/include/llvm/IR/StructuralHash.h b/llvm/include/llvm/IR/StructuralHash.h index e2e192cc9501b3..071575137ff572 100644 --- a/llvm/include/llvm/IR/StructuralHash.h +++ b/llvm/include/llvm/IR/StructuralHash.h @@ -14,7 +14,9 @@ #ifndef LLVM_IR_STRUCTURALHASH_H #define LLVM_IR_STRUCTURALHASH_H +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/StableHashing.h" +#include "llvm/IR/Instruction.h" #include namespace llvm { @@ -35,6 +37,49 @@ stable_hash StructuralHash(const Function &F, bool DetailedHash = false); /// composed the module hash. stable_hash StructuralHash(const Module &M, bool DetailedHash = false); +/// The pair of an instruction index and a operand index. +using IndexPair = std::pair; + +/// A map from an instruction index to an instruction pointer. +using IndexInstrMap = MapVector; + +/// A map from an IndexPair to a stable hash. +using IndexOperandHashMapType = DenseMap; + +/// A function that takes an instruction and an operand index and returns true +/// if the operand should be ignored in the function hash computation. +using IgnoreOperandFunc = std::function; + +struct FunctionHashInfo { + /// A hash value representing the structural content of the function + stable_hash FunctionHash; + /// A mapping from instruction indices to instruction pointers + std::unique_ptr IndexInstruction; + /// A mapping from pairs of instruction indices and operand indices + /// to the hashes of the operands. This can be used to analyze or + /// reconstruct the differences in ignored operands + std::unique_ptr IndexOperandHashMap; + + FunctionHashInfo(stable_hash FuntionHash, + std::unique_ptr IndexInstruction, + std::unique_ptr IndexOperandHashMap) + : FunctionHash(FuntionHash), + IndexInstruction(std::move(IndexInstruction)), + IndexOperandHashMap(std::move(IndexOperandHashMap)) {} +}; + +/// Computes a structural hash of a given function, considering the structure +/// and content of the function's instructions while allowing for selective +/// ignoring of certain operands based on custom criteria. This hash can be used +/// to identify functions that are structurally similar or identical, which is +/// useful in optimizations, deduplication, or analysis tasks. +/// \param F The function to hash. +/// \param IgnoreOp A callable that takes an instruction and an operand index, +/// and returns true if the operand should be ignored in the hash computation. +/// \return A FunctionHashInfo structure +FunctionHashInfo StructuralHashWithDifferences(const Function &F, + IgnoreOperandFunc IgnoreOp); + } // end namespace llvm #endif diff --git a/llvm/lib/Analysis/StructuralHash.cpp b/llvm/lib/Analysis/StructuralHash.cpp index 3a2341fe59ad9c..4f2e003148b606 100644 --- a/llvm/lib/Analysis/StructuralHash.cpp +++ b/llvm/lib/Analysis/StructuralHash.cpp @@ -21,14 +21,33 @@ using namespace llvm; PreservedAnalyses StructuralHashPrinterPass::run(Module &M, ModuleAnalysisManager &MAM) { OS << "Module Hash: " - << format("%016" PRIx64, StructuralHash(M, EnableDetailedStructuralHash)) + << format("%016" PRIx64, + StructuralHash(M, Options != StructuralHashOptions::None)) << "\n"; for (Function &F : M) { if (F.isDeclaration()) continue; - OS << "Function " << F.getName() << " Hash: " - << format("%016" PRIx64, StructuralHash(F, EnableDetailedStructuralHash)) - << "\n"; + if (Options == StructuralHashOptions::CallTargetIgnored) { + auto IgnoreOp = [&](const Instruction *I, unsigned OpndIdx) { + return I->getOpcode() == Instruction::Call && + isa(I->getOperand(OpndIdx)); + }; + auto FuncHashInfo = StructuralHashWithDifferences(F, IgnoreOp); + OS << "Function " << F.getName() + << " Hash: " << format("%016" PRIx64, FuncHashInfo.FunctionHash) + << "\n"; + for (auto &[IndexPair, OpndHash] : *FuncHashInfo.IndexOperandHashMap) { + auto [InstIndex, OpndIndex] = IndexPair; + OS << "\tIgnored Operand Hash: " << format("%016" PRIx64, OpndHash) + << " at (" << InstIndex << "," << OpndIndex << ")\n"; + } + } else { + OS << "Function " << F.getName() << " Hash: " + << format( + "%016" PRIx64, + StructuralHash(F, Options == StructuralHashOptions::Detailed)) + << "\n"; + } } return PreservedAnalyses::all(); } diff --git a/llvm/lib/IR/StructuralHash.cpp b/llvm/lib/IR/StructuralHash.cpp index 267a085c5af705..a51f9124af04da 100644 --- a/llvm/lib/IR/StructuralHash.cpp +++ b/llvm/lib/IR/StructuralHash.cpp @@ -34,14 +34,18 @@ class StructuralHashImpl { static constexpr stable_hash FunctionHeaderHash = 0x62642d6b6b2d6b72; static constexpr stable_hash GlobalHeaderHash = 23456; - // This will produce different values on 32-bit and 64-bit systens as - // hash_combine returns a size_t. However, this is only used for - // detailed hashing which, in-tree, only needs to distinguish between - // differences in functions. - // TODO: This is not stable. - template stable_hash hashArbitaryType(const T &V) { - return hash_combine(V); - } + /// IgnoreOp is a function that returns true if the operand should be ignored. + IgnoreOperandFunc IgnoreOp = nullptr; + /// A mapping from instruction indices to instruction pointers. + /// The index represents the position of an instruction based on the order in + /// which it is first encountered. + std::unique_ptr IndexInstruction = nullptr; + /// A mapping from pairs of instruction indices and operand indices + /// to the hashes of the operands. + std::unique_ptr IndexOperandHashMap = nullptr; + + /// Assign a unique ID to each Value in the order they are first seen. + DenseMap ValueToId; stable_hash hashType(Type *ValueType) { SmallVector Hashes; @@ -53,23 +57,95 @@ class StructuralHashImpl { public: StructuralHashImpl() = delete; - explicit StructuralHashImpl(bool DetailedHash) : DetailedHash(DetailedHash) {} + explicit StructuralHashImpl(bool DetailedHash, + IgnoreOperandFunc IgnoreOp = nullptr) + : DetailedHash(DetailedHash), IgnoreOp(IgnoreOp) { + if (IgnoreOp) { + IndexInstruction = std::make_unique(); + IndexOperandHashMap = std::make_unique(); + } + } + + stable_hash hashAPInt(const APInt &I) { + SmallVector Hashes; + Hashes.emplace_back(I.getBitWidth()); + auto RawVals = ArrayRef(I.getRawData(), I.getNumWords()); + Hashes.append(RawVals.begin(), RawVals.end()); + return stable_hash_combine(Hashes); + } + + stable_hash hashAPFloat(const APFloat &F) { + return hashAPInt(F.bitcastToAPInt()); + } + + stable_hash hashGlobalValue(const GlobalValue *GV) { + if (!GV->hasName()) + return 0; + return stable_hash_name(GV->getName()); + } + // Compute a hash for a Constant. This function is logically similar to + // FunctionComparator::cmpConstants() in FunctionComparator.cpp, but here + // we're interested in computing a hash rather than comparing two Constants. + // Some of the logic is simplified, e.g, we don't expand GEPOperator. stable_hash hashConstant(Constant *C) { SmallVector Hashes; - // TODO: hashArbitaryType() is not stable. - if (ConstantInt *ConstInt = dyn_cast(C)) { - Hashes.emplace_back(hashArbitaryType(ConstInt->getValue())); - } else if (ConstantFP *ConstFP = dyn_cast(C)) { - Hashes.emplace_back(hashArbitaryType(ConstFP->getValue())); - } else if (Function *Func = dyn_cast(C)) { - // Hashing the name will be deterministic as LLVM's hashing infrastructure - // has explicit support for hashing strings and will not simply hash - // the pointer. - Hashes.emplace_back(hashArbitaryType(Func->getName())); + + Type *Ty = C->getType(); + Hashes.emplace_back(hashType(Ty)); + + if (C->isNullValue()) { + Hashes.emplace_back(static_cast('N')); + return stable_hash_combine(Hashes); } - return stable_hash_combine(Hashes); + if (auto *G = dyn_cast(C)) { + Hashes.emplace_back(hashGlobalValue(G)); + return stable_hash_combine(Hashes); + } + + if (const auto *Seq = dyn_cast(C)) { + Hashes.emplace_back(xxh3_64bits(Seq->getRawDataValues())); + return stable_hash_combine(Hashes); + } + + switch (C->getValueID()) { + case Value::ConstantIntVal: { + const APInt &Int = cast(C)->getValue(); + Hashes.emplace_back(hashAPInt(Int)); + return stable_hash_combine(Hashes); + } + case Value::ConstantFPVal: { + const APFloat &APF = cast(C)->getValueAPF(); + Hashes.emplace_back(hashAPFloat(APF)); + return stable_hash_combine(Hashes); + } + case Value::ConstantArrayVal: + case Value::ConstantStructVal: + case Value::ConstantVectorVal: + case Value::ConstantExprVal: { + for (const auto &Op : C->operands()) { + auto H = hashConstant(cast(Op)); + Hashes.emplace_back(H); + } + return stable_hash_combine(Hashes); + } + case Value::BlockAddressVal: { + const BlockAddress *BA = cast(C); + auto H = hashGlobalValue(BA->getFunction()); + Hashes.emplace_back(H); + return stable_hash_combine(Hashes); + } + case Value::DSOLocalEquivalentVal: { + const auto *Equiv = cast(C); + auto H = hashGlobalValue(Equiv->getGlobalValue()); + Hashes.emplace_back(H); + return stable_hash_combine(Hashes); + } + default: + // Skip other types of constants for simplicity. + return stable_hash_combine(Hashes); + } } stable_hash hashValue(Value *V) { @@ -83,6 +159,10 @@ class StructuralHashImpl { if (Argument *Arg = dyn_cast(V)) Hashes.emplace_back(Arg->getArgNo()); + // Get an index (an insertion order) for the non-constant value. + auto [It, WasInserted] = ValueToId.try_emplace(V, ValueToId.size()); + Hashes.emplace_back(It->second); + return stable_hash_combine(Hashes); } @@ -107,8 +187,20 @@ class StructuralHashImpl { if (const auto *ComparisonInstruction = dyn_cast(&Inst)) Hashes.emplace_back(ComparisonInstruction->getPredicate()); - for (const auto &Op : Inst.operands()) - Hashes.emplace_back(hashOperand(Op)); + unsigned InstIdx = 0; + if (IndexInstruction) { + InstIdx = IndexInstruction->size(); + IndexInstruction->try_emplace(InstIdx, const_cast(&Inst)); + } + + for (const auto [OpndIdx, Op] : enumerate(Inst.operands())) { + auto OpndHash = hashOperand(Op); + if (IgnoreOp && IgnoreOp(&Inst, OpndIdx)) { + assert(IndexOperandHashMap); + IndexOperandHashMap->try_emplace({InstIdx, OpndIdx}, OpndHash); + } else + Hashes.emplace_back(OpndHash); + } return stable_hash_combine(Hashes); } @@ -188,6 +280,14 @@ class StructuralHashImpl { } uint64_t getHash() const { return Hash; } + + std::unique_ptr getIndexInstrMap() { + return std::move(IndexInstruction); + } + + std::unique_ptr getIndexPairOpndHashMap() { + return std::move(IndexOperandHashMap); + } }; } // namespace @@ -203,3 +303,12 @@ stable_hash llvm::StructuralHash(const Module &M, bool DetailedHash) { H.update(M); return H.getHash(); } + +FunctionHashInfo +llvm::StructuralHashWithDifferences(const Function &F, + IgnoreOperandFunc IgnoreOp) { + StructuralHashImpl H(/*DetailedHash=*/true, IgnoreOp); + H.update(F); + return FunctionHashInfo(H.getHash(), H.getIndexInstrMap(), + H.getIndexPairOpndHashMap()); +} diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index f5ce405ab8d961..d1f75dfb5350a0 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1175,9 +1175,17 @@ Expected parseMemProfUsePassOptions(StringRef Params) { return Result; } -Expected parseStructuralHashPrinterPassOptions(StringRef Params) { - return PassBuilder::parseSinglePassOption(Params, "detailed", - "StructuralHashPrinterPass"); +Expected +parseStructuralHashPrinterPassOptions(StringRef Params) { + if (Params.empty()) + return StructuralHashOptions::None; + if (Params == "detailed") + return StructuralHashOptions::Detailed; + if (Params == "call-target-ignored") + return StructuralHashOptions::CallTargetIgnored; + return make_error( + formatv("invalid structural hash printer parameter '{0}' ", Params).str(), + inconvertibleErrorCode()); } Expected parseWinEHPrepareOptions(StringRef Params) { diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 549c1359b5852c..017ae311c55eb4 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -220,10 +220,11 @@ MODULE_PASS_WITH_PARAMS( parseMSanPassOptions, "recover;kernel;eager-checks;track-origins=N") MODULE_PASS_WITH_PARAMS( "print", "StructuralHashPrinterPass", - [](bool EnableDetailedStructuralHash) { - return StructuralHashPrinterPass(dbgs(), EnableDetailedStructuralHash); + [](StructuralHashOptions Options) { + return StructuralHashPrinterPass(dbgs(), Options); }, - parseStructuralHashPrinterPassOptions, "detailed") + parseStructuralHashPrinterPassOptions, "detailed;call-target-ignored") + #undef MODULE_PASS_WITH_PARAMS #ifndef CGSCC_ANALYSIS diff --git a/llvm/test/Analysis/StructuralHash/structural-hash-printer.ll b/llvm/test/Analysis/StructuralHash/structural-hash-printer.ll index 5936199bf32f43..3c23b54d297369 100644 --- a/llvm/test/Analysis/StructuralHash/structural-hash-printer.ll +++ b/llvm/test/Analysis/StructuralHash/structural-hash-printer.ll @@ -1,17 +1,21 @@ ; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s ; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s -check-prefix=DETAILED-HASH +; RUN: opt -passes='print' -disable-output %s 2>&1 | FileCheck %s -check-prefix=CALLTARGETIGNORED-HASH ; Add a declaration so that we can test we skip it. -declare i64 @d1() +declare i64 @d1(i64) +declare i64 @e1(i64) define i64 @f1(i64 %a) { %b = add i64 %a, 1 - ret i64 %b + %c = call i64 @d1(i64 %b) + ret i64 %c } -define i32 @f2(i32 %a) { - %b = add i32 %a, 2 - ret i32 %b +define i64 @f2(i64 %a) { + %b = add i64 %a, 1 + %c = call i64 @e1(i64 %b) + ret i64 %c } ; CHECK: Module Hash: {{([a-f0-9]{16,})}} @@ -22,3 +26,13 @@ define i32 @f2(i32 %a) { ; DETAILED-HASH-NEXT: Function f1 Hash: [[DF1H:([a-f0-9]{16,})]] ; DETAILED-HASH-NOT: [[DF1H]] ; DETAILED-HASH-NEXT: Function f2 Hash: {{([a-f0-9]{16,})}} + +; When ignoring the call target, check if `f1` and `f2` produce the same function hash. +; The index for the call instruction is 1, and the index of the call target operand is 1. +; The ignored operand hashes for different call targets should be different. +; CALLTARGETIGNORED-HASH: Module Hash: {{([a-f0-9]{16,})}} +; CALLTARGETIGNORED-HASH-NEXT: Function f1 Hash: [[IF1H:([a-f0-9]{16,})]] +; CALLTARGETIGNORED-HASH-NEXT: Ignored Operand Hash: [[IO1H:([a-f0-9]{16,})]] at (1,1) +; CALLTARGETIGNORED-HASH-NEXT: Function f2 Hash: [[IF1H]] +; CALLTARGETIGNORED-HASH-NOT: [[IO1H]] +; CALLTARGETIGNORED-HASH-NEXT: Ignored Operand Hash: {{([a-f0-9]{16,})}} at (1,1) diff --git a/llvm/unittests/IR/StructuralHashTest.cpp b/llvm/unittests/IR/StructuralHashTest.cpp index 64e66aa5f97a6d..81c17120a1f6ff 100644 --- a/llvm/unittests/IR/StructuralHashTest.cpp +++ b/llvm/unittests/IR/StructuralHashTest.cpp @@ -10,6 +10,7 @@ #include "llvm/AsmParser/Parser.h" #include "llvm/IR/Module.h" #include "llvm/Support/SourceMgr.h" +#include "gmock/gmock-matchers.h" #include "gtest/gtest.h" #include @@ -18,6 +19,11 @@ using namespace llvm; namespace { +using testing::Contains; +using testing::Key; +using testing::Pair; +using testing::SizeIs; + std::unique_ptr parseIR(LLVMContext &Context, const char *IR) { SMDiagnostic Err; std::unique_ptr M = parseAssemblyString(IR, Err, Context); @@ -239,4 +245,59 @@ TEST(StructuralHashTest, ArgumentNumber) { EXPECT_EQ(StructuralHash(*M1), StructuralHash(*M2)); EXPECT_NE(StructuralHash(*M1, true), StructuralHash(*M2, true)); } + +TEST(StructuralHashTest, Differences) { + LLVMContext Ctx; + std::unique_ptr M1 = parseIR(Ctx, "define i64 @f(i64 %a) {\n" + " %c = add i64 %a, 1\n" + " %b = call i64 @f1(i64 %c)\n" + " ret i64 %b\n" + "}\n" + "declare i64 @f1(i64)"); + auto *F1 = M1->getFunction("f"); + std::unique_ptr M2 = parseIR(Ctx, "define i64 @g(i64 %a) {\n" + " %c = add i64 %a, 1\n" + " %b = call i64 @f2(i64 %c)\n" + " ret i64 %b\n" + "}\n" + "declare i64 @f2(i64)"); + auto *F2 = M2->getFunction("g"); + + // They are originally different when not ignoring any operand. + EXPECT_NE(StructuralHash(*F1, true), StructuralHash(*F2, true)); + EXPECT_NE(StructuralHashWithDifferences(*F1, nullptr).FunctionHash, + StructuralHashWithDifferences(*F2, nullptr).FunctionHash); + + // When we ignore the call target f1 vs f2, they have the same hash. + auto IgnoreOp = [&](const Instruction *I, unsigned OpndIdx) { + return I->getOpcode() == Instruction::Call && OpndIdx == 1; + }; + auto FuncHashInfo1 = StructuralHashWithDifferences(*F1, IgnoreOp); + auto FuncHashInfo2 = StructuralHashWithDifferences(*F2, IgnoreOp); + EXPECT_EQ(FuncHashInfo1.FunctionHash, FuncHashInfo2.FunctionHash); + + // There are total 3 instructions. + EXPECT_THAT(*FuncHashInfo1.IndexInstruction, SizeIs(3)); + EXPECT_THAT(*FuncHashInfo2.IndexInstruction, SizeIs(3)); + + // The only 1 operand (the call target) has been ignored. + EXPECT_THAT(*FuncHashInfo1.IndexOperandHashMap, SizeIs(1u)); + EXPECT_THAT(*FuncHashInfo2.IndexOperandHashMap, SizeIs(1u)); + + // The index pair of instruction and operand (1, 1) is a key in the map. + ASSERT_THAT(*FuncHashInfo1.IndexOperandHashMap, Contains(Key(Pair(1, 1)))); + ASSERT_THAT(*FuncHashInfo2.IndexOperandHashMap, Contains(Key(Pair(1, 1)))); + + // The indexed instruciton must be the call instruction as shown in the + // IgnoreOp above. + EXPECT_EQ(FuncHashInfo1.IndexInstruction->lookup(1)->getOpcode(), + Instruction::Call); + EXPECT_EQ(FuncHashInfo2.IndexInstruction->lookup(1)->getOpcode(), + Instruction::Call); + + // The ignored operand hashes (for f1 vs. f2) are different. + EXPECT_NE(FuncHashInfo1.IndexOperandHashMap->lookup({1, 1}), + FuncHashInfo2.IndexOperandHashMap->lookup({1, 1})); +} + } // end anonymous namespace