Skip to content

Commit

Permalink
[Safe Buffers] Serialize unsafe_buffer_usage pragmas (llvm#92031)
Browse files Browse the repository at this point in the history
The commit adds serialization and de-serialization implementations for
the stored regions. Basically, the serialized representation of the
regions of a PP is a (ordered) sequence of source location encodings.
For de-serialization, regions from loaded files are stored by their ASTs.
When later one queries if a loaded location L is in an opt-out
region, PP looks up the regions of the loaded AST where L is at.

(Background if helps: a pair of `#pragma clang unsafe_buffer_usage begin/end` pragmas marks a
warning-opt-out region. The begin and end locations (opt-out regions)
are stored in preprocessor instances (PP) and will be queried by the
`-Wunsafe-buffer-usage` analyzer.)

The reported issue at upstream: llvm#90501
rdar://124035402
  • Loading branch information
ziqingluo-90 authored Jun 14, 2024
1 parent 53dbc1f commit 2e7b95e
Show file tree
Hide file tree
Showing 12 changed files with 477 additions and 24 deletions.
5 changes: 5 additions & 0 deletions clang/include/clang/Basic/SourceManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -1676,6 +1676,11 @@ class SourceManager : public RefCountedBase<SourceManager> {
isInTheSameTranslationUnit(std::pair<FileID, unsigned> &LOffs,
std::pair<FileID, unsigned> &ROffs) const;

/// \param Loc a source location in a loaded AST (of a PCH/Module file).
/// \returns a FileID uniquely identifies the AST of a loaded
/// module/PCH where `Loc` is at.
FileID getUniqueLoadedASTFileID(SourceLocation Loc) const;

/// Determines whether the two decomposed source location is in the same TU.
bool isInTheSameTranslationUnitImpl(
const std::pair<FileID, unsigned> &LOffs,
Expand Down
52 changes: 47 additions & 5 deletions clang/include/clang/Lex/Preprocessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -2883,11 +2883,41 @@ class Preprocessor {
/// otherwise.
SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region.

// An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in one
// translation unit. Each region is represented by a pair of start and end
// locations. A region is "open" if its' start and end locations are
// identical.
SmallVector<std::pair<SourceLocation, SourceLocation>, 8> SafeBufferOptOutMap;
using SafeBufferOptOutRegionsTy =
SmallVector<std::pair<SourceLocation, SourceLocation>, 16>;
// An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this
// translation unit. Each region is represented by a pair of start and
// end locations.
SafeBufferOptOutRegionsTy SafeBufferOptOutMap;

// The "-Wunsafe-buffer-usage" opt-out regions in loaded ASTs. We use the
// following structure to manage them by their ASTs.
struct {
// A map from unique IDs to region maps of loaded ASTs. The ID identifies a
// loaded AST. See `SourceManager::getUniqueLoadedASTID`.
llvm::DenseMap<FileID, SafeBufferOptOutRegionsTy> LoadedRegions;

// Returns a reference to the safe buffer opt-out regions of the loaded
// AST where `Loc` belongs to. (Construct if absent)
SafeBufferOptOutRegionsTy &
findAndConsLoadedOptOutMap(SourceLocation Loc, SourceManager &SrcMgr) {
return LoadedRegions[SrcMgr.getUniqueLoadedASTFileID(Loc)];
}

// Returns a reference to the safe buffer opt-out regions of the loaded
// AST where `Loc` belongs to. (This const function returns nullptr if
// absent.)
const SafeBufferOptOutRegionsTy *
lookupLoadedOptOutMap(SourceLocation Loc,
const SourceManager &SrcMgr) const {
FileID FID = SrcMgr.getUniqueLoadedASTFileID(Loc);
auto Iter = LoadedRegions.find(FID);

if (Iter == LoadedRegions.end())
return nullptr;
return &Iter->getSecond();
}
} LoadedSafeBufferOptOutMap;

public:
/// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out
Expand Down Expand Up @@ -2918,6 +2948,18 @@ class Preprocessor {
/// opt-out region
bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc);

/// \return a sequence of SourceLocations representing ordered opt-out regions
/// specified by
/// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit.
SmallVector<SourceLocation, 64> serializeSafeBufferOptOutMap() const;

/// \param SrcLocSeqs a sequence of SourceLocations deserialized from a
/// record of code `PP_UNSAFE_BUFFER_USAGE`.
/// \return true iff the `Preprocessor` has been updated; false `Preprocessor`
/// is same as itself before the call.
bool setDeserializedSafeBufferOptOutMap(
const SmallVectorImpl<SourceLocation> &SrcLocSeqs);

private:
/// Helper functions to forward lexing to the actual lexer. They all share the
/// same signature.
Expand Down
3 changes: 3 additions & 0 deletions clang/include/clang/Serialization/ASTBitCodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,9 @@ enum ASTRecordTypes {
/// Record code for lexical and visible block for delayed namespace in
/// reduced BMI.
DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD = 68,

/// Record code for \#pragma clang unsafe_buffer_usage begin/end
PP_UNSAFE_BUFFER_USAGE = 69,
};

/// Record types used within a source manager block.
Expand Down
18 changes: 18 additions & 0 deletions clang/lib/Basic/SourceManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1915,6 +1915,24 @@ SourceManager::getDecomposedIncludedLoc(FileID FID) const {
return DecompLoc;
}

FileID SourceManager::getUniqueLoadedASTFileID(SourceLocation Loc) const {
assert(isLoadedSourceLocation(Loc) &&
"Must be a source location in a loaded PCH/Module file");

auto [FID, Ignore] = getDecomposedLoc(Loc);
// `LoadedSLocEntryAllocBegin` stores the sorted lowest FID of each loaded
// allocation. Later allocations have lower FileIDs. The call below is to find
// the lowest FID of a loaded allocation from any FID in the same allocation.
// The lowest FID is used to identify a loaded allocation.
const FileID *FirstFID =
llvm::lower_bound(LoadedSLocEntryAllocBegin, FID, std::greater<FileID>{});

assert(FirstFID &&
"The failure to find the first FileID of a "
"loaded AST from a loaded source location was unexpected.");
return *FirstFID;
}

bool SourceManager::isInTheSameTranslationUnitImpl(
const std::pair<FileID, unsigned> &LOffs,
const std::pair<FileID, unsigned> &ROffs) const {
Expand Down
110 changes: 91 additions & 19 deletions clang/lib/Lex/Preprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/Support/Capacity.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
Expand Down Expand Up @@ -1483,26 +1484,56 @@ void Preprocessor::emitFinalMacroWarning(const Token &Identifier,
}

bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr,
const SourceLocation &Loc) const {
// Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
auto FirstRegionEndingAfterLoc = llvm::partition_point(
SafeBufferOptOutMap,
[&SourceMgr,
&Loc](const std::pair<SourceLocation, SourceLocation> &Region) {
return SourceMgr.isBeforeInTranslationUnit(Region.second, Loc);
});
const SourceLocation &Loc) const {
// The lambda that tests if a `Loc` is in an opt-out region given one opt-out
// region map:
auto TestInMap = [&SourceMgr](const SafeBufferOptOutRegionsTy &Map,
const SourceLocation &Loc) -> bool {
// Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
auto FirstRegionEndingAfterLoc = llvm::partition_point(
Map, [&SourceMgr,
&Loc](const std::pair<SourceLocation, SourceLocation> &Region) {
return SourceMgr.isBeforeInTranslationUnit(Region.second, Loc);
});

if (FirstRegionEndingAfterLoc != Map.end()) {
// To test if the start location of the found region precedes `Loc`:
return SourceMgr.isBeforeInTranslationUnit(
FirstRegionEndingAfterLoc->first, Loc);
}
// If we do not find a region whose end location passes `Loc`, we want to
// check if the current region is still open:
if (!Map.empty() && Map.back().first == Map.back().second)
return SourceMgr.isBeforeInTranslationUnit(Map.back().first, Loc);
return false;
};

if (FirstRegionEndingAfterLoc != SafeBufferOptOutMap.end()) {
// To test if the start location of the found region precedes `Loc`:
return SourceMgr.isBeforeInTranslationUnit(FirstRegionEndingAfterLoc->first,
Loc);
}
// If we do not find a region whose end location passes `Loc`, we want to
// check if the current region is still open:
if (!SafeBufferOptOutMap.empty() &&
SafeBufferOptOutMap.back().first == SafeBufferOptOutMap.back().second)
return SourceMgr.isBeforeInTranslationUnit(SafeBufferOptOutMap.back().first,
Loc);
// What the following does:
//
// If `Loc` belongs to the local TU, we just look up `SafeBufferOptOutMap`.
// Otherwise, `Loc` is from a loaded AST. We look up the
// `LoadedSafeBufferOptOutMap` first to get the opt-out region map of the
// loaded AST where `Loc` is at. Then we find if `Loc` is in an opt-out
// region w.r.t. the region map. If the region map is absent, it means there
// is no opt-out pragma in that loaded AST.
//
// Opt-out pragmas in the local TU or a loaded AST is not visible to another
// one of them. That means if you put the pragmas around a `#include
// "module.h"`, where module.h is a module, it is not actually suppressing
// warnings in module.h. This is fine because warnings in module.h will be
// reported when module.h is compiled in isolation and nothing in module.h
// will be analyzed ever again. So you will not see warnings from the file
// that imports module.h anyway. And you can't even do the same thing for PCHs
// because they can only be included from the command line.

if (SourceMgr.isLocalSourceLocation(Loc))
return TestInMap(SafeBufferOptOutMap, Loc);

const SafeBufferOptOutRegionsTy *LoadedRegions =
LoadedSafeBufferOptOutMap.lookupLoadedOptOutMap(Loc, SourceMgr);

if (LoadedRegions)
return TestInMap(*LoadedRegions, Loc);
return false;
}

Expand Down Expand Up @@ -1551,6 +1582,47 @@ bool Preprocessor::isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc) {
return InSafeBufferOptOutRegion;
}

SmallVector<SourceLocation, 64>
Preprocessor::serializeSafeBufferOptOutMap() const {
assert(!InSafeBufferOptOutRegion &&
"Attempt to serialize safe buffer opt-out regions before file being "
"completely preprocessed");

SmallVector<SourceLocation, 64> SrcSeq;

for (const auto &[begin, end] : SafeBufferOptOutMap) {
SrcSeq.push_back(begin);
SrcSeq.push_back(end);
}
// Only `SafeBufferOptOutMap` gets serialized. No need to serialize
// `LoadedSafeBufferOptOutMap` because if this TU loads a pch/module, every
// pch/module in the pch-chain/module-DAG will be loaded one by one in order.
// It means that for each loading pch/module m, it just needs to load m's own
// `SafeBufferOptOutMap`.
return SrcSeq;
}

bool Preprocessor::setDeserializedSafeBufferOptOutMap(
const SmallVectorImpl<SourceLocation> &SourceLocations) {
if (SourceLocations.size() == 0)
return false;

assert(SourceLocations.size() % 2 == 0 &&
"ill-formed SourceLocation sequence");

auto It = SourceLocations.begin();
SafeBufferOptOutRegionsTy &Regions =
LoadedSafeBufferOptOutMap.findAndConsLoadedOptOutMap(*It, SourceMgr);

do {
SourceLocation Begin = *It++;
SourceLocation End = *It++;

Regions.emplace_back(Begin, End);
} while (It != SourceLocations.end());
return true;
}

ModuleLoader::~ModuleLoader() = default;

CommentHandler::~CommentHandler() = default;
Expand Down
11 changes: 11 additions & 0 deletions clang/lib/Serialization/ASTReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3583,6 +3583,17 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
break;
}

case PP_UNSAFE_BUFFER_USAGE: {
if (!Record.empty()) {
SmallVector<SourceLocation, 64> SrcLocs;
unsigned Idx = 0;
while (Idx < Record.size())
SrcLocs.push_back(ReadSourceLocation(F, Record, Idx));
PP.setDeserializedSafeBufferOptOutMap(SrcLocs);
}
break;
}

case PP_CONDITIONAL_STACK:
if (!Record.empty()) {
unsigned Idx = 0, End = Record.size() - 1;
Expand Down
7 changes: 7 additions & 0 deletions clang/lib/Serialization/ASTWriter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -926,6 +926,7 @@ void ASTWriter::WriteBlockInfoBlock() {
RECORD(PP_CONDITIONAL_STACK);
RECORD(DECLS_TO_CHECK_FOR_DEFERRED_DIAGS);
RECORD(PP_ASSUME_NONNULL_LOC);
RECORD(PP_UNSAFE_BUFFER_USAGE);

// SourceManager Block.
BLOCK(SOURCE_MANAGER_BLOCK);
Expand Down Expand Up @@ -2518,6 +2519,12 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
Record.clear();
}

// Write the safe buffer opt-out region map in PP
for (SourceLocation &S : PP.serializeSafeBufferOptOutMap())
AddSourceLocation(S, Record);
Stream.EmitRecord(PP_UNSAFE_BUFFER_USAGE, Record);
Record.clear();

// Enter the preprocessor block.
Stream.EnterSubblock(PREPROCESSOR_BLOCK_ID, 3);

Expand Down
Loading

0 comments on commit 2e7b95e

Please sign in to comment.