From 2db9b6a93f9fc9b943f52efed51264ef8760cec2 Mon Sep 17 00:00:00 2001 From: Maksim Panchenko Date: Mon, 13 Nov 2023 14:33:39 -0800 Subject: [PATCH] [BOLT] Make instruction size a first-class annotation (#72167) When NOP instructions are used to reserve space in the code, e.g. for patching, it becomes critical to preserve their original size while emitting the code. On x86, we rely on "Size" annotation for NOP instructions size, as the original instruction size is lost in the disassembly/assembly process. This change makes instruction size a first-class annotation and is affectively NFCI. A follow-up diff will use the annotation for code emission. --- bolt/include/bolt/Core/BinaryContext.h | 4 ++-- bolt/include/bolt/Core/MCPlus.h | 1 + bolt/include/bolt/Core/MCPlusBuilder.h | 6 ++++++ bolt/lib/Core/BinaryContext.cpp | 2 ++ bolt/lib/Core/BinaryFunction.cpp | 11 ++++++----- bolt/lib/Core/MCPlusBuilder.cpp | 11 +++++++++++ bolt/lib/Profile/DataReader.cpp | 3 ++- bolt/lib/Rewrite/RewriteInstance.cpp | 1 - 8 files changed, 30 insertions(+), 9 deletions(-) diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 1f27133644d4ab..a4e84cb93c093d 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -1239,8 +1239,8 @@ class BinaryContext { uint64_t computeInstructionSize(const MCInst &Inst, const MCCodeEmitter *Emitter = nullptr) const { - if (auto Size = MIB->getAnnotationWithDefault(Inst, "Size")) - return Size; + if (std::optional Size = MIB->getSize(Inst)) + return *Size; if (!Emitter) Emitter = this->MCE.get(); diff --git a/bolt/include/bolt/Core/MCPlus.h b/bolt/include/bolt/Core/MCPlus.h index f6ffd33513dd25..b6a9e73f2347e7 100644 --- a/bolt/include/bolt/Core/MCPlus.h +++ b/bolt/include/bolt/Core/MCPlus.h @@ -72,6 +72,7 @@ class MCAnnotation { kConditionalTailCall, /// CTC. kOffset, /// Offset in the function. kLabel, /// MCSymbol pointing to this instruction. + kSize, /// Size of the instruction. kGeneric /// First generic annotation. }; diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index 41a5dd8399dfc2..687b49a3cbda43 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -1179,6 +1179,12 @@ class MCPlusBuilder { /// is emitted to MCStreamer. bool setLabel(MCInst &Inst, MCSymbol *Label) const; + /// Get instruction size specified via annotation. + std::optional getSize(const MCInst &Inst) const; + + /// Set instruction size. + void setSize(MCInst &Inst, uint32_t Size) const; + /// Return MCSymbol that represents a target of this instruction at a given /// operand number \p OpNum. If there's no symbol associated with /// the operand - return nullptr. diff --git a/bolt/lib/Core/BinaryContext.cpp b/bolt/lib/Core/BinaryContext.cpp index 00725a2ff2225a..fd4d09964b725e 100644 --- a/bolt/lib/Core/BinaryContext.cpp +++ b/bolt/lib/Core/BinaryContext.cpp @@ -1897,6 +1897,8 @@ void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction, } if (std::optional Offset = MIB->getOffset(Instruction)) OS << " # Offset: " << *Offset; + if (std::optional Size = MIB->getSize(Instruction)) + OS << " # Size: " << *Size; if (MCSymbol *Label = MIB->getLabel(Instruction)) OS << " # Label: " << *Label; diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 17c2c0ca408d99..9bbdd0deaff2d3 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -1380,7 +1380,7 @@ bool BinaryFunction::disassemble() { // NOTE: disassembly loses the correct size information for noops on x86. // E.g. nopw 0x0(%rax,%rax,1) is 9 bytes, but re-encoded it's only // 5 bytes. Preserve the size info using annotations. - MIB->addAnnotation(Instruction, "Size", static_cast(Size)); + MIB->setSize(Instruction, Size); } addInstruction(Offset, std::move(Instruction)); @@ -4353,10 +4353,11 @@ MCInst *BinaryFunction::getInstructionAtOffset(uint64_t Offset) { } if (MCInst *LastInstr = BB->getLastNonPseudoInstr()) { - const uint32_t Size = - BC.MIB->getAnnotationWithDefault(*LastInstr, "Size"); - if (BB->getEndOffset() - Offset == Size) - return LastInstr; + if (std::optional Size = BC.MIB->getSize(*LastInstr)) { + if (BB->getEndOffset() - Offset == Size) { + return LastInstr; + } + } } return nullptr; diff --git a/bolt/lib/Core/MCPlusBuilder.cpp b/bolt/lib/Core/MCPlusBuilder.cpp index 8c8ebe7d9830d2..44e5f88d8950fc 100644 --- a/bolt/lib/Core/MCPlusBuilder.cpp +++ b/bolt/lib/Core/MCPlusBuilder.cpp @@ -279,6 +279,17 @@ bool MCPlusBuilder::setLabel(MCInst &Inst, MCSymbol *Label) const { return true; } +std::optional MCPlusBuilder::getSize(const MCInst &Inst) const { + if (std::optional Value = + getAnnotationOpValue(Inst, MCAnnotation::kSize)) + return static_cast(*Value); + return std::nullopt; +} + +void MCPlusBuilder::setSize(MCInst &Inst, uint32_t Size) const { + setAnnotationOpValue(Inst, MCAnnotation::kSize, Size); +} + bool MCPlusBuilder::hasAnnotation(const MCInst &Inst, unsigned Index) const { return (bool)getAnnotationOpValue(Inst, Index); } diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp index 64873e0de4bab5..3d4db6feb51a72 100644 --- a/bolt/lib/Profile/DataReader.cpp +++ b/bolt/lib/Profile/DataReader.cpp @@ -698,7 +698,8 @@ bool DataReader::recordBranch(BinaryFunction &BF, uint64_t From, uint64_t To, if (!BC.MIB->isNoop(Instr)) break; - Offset += BC.MIB->getAnnotationWithDefault(Instr, "Size"); + if (std::optional Size = BC.MIB->getSize(Instr)) + Offset += *Size; } if (To == Offset) diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index af100447786bb0..ce3402c5827ae6 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -3212,7 +3212,6 @@ void RewriteInstance::buildFunctionsCFG() { // Create annotation indices to allow lock-free execution BC->MIB->getOrCreateAnnotationIndex("JTIndexReg"); BC->MIB->getOrCreateAnnotationIndex("NOP"); - BC->MIB->getOrCreateAnnotationIndex("Size"); ParallelUtilities::WorkFuncWithAllocTy WorkFun = [&](BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId) {