Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PPU LLVM: Fix optimization of branch string #14620

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 38 additions & 15 deletions rpcs3/Emu/Cell/PPUTranslator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,7 @@ Function* PPUTranslator::Translate(const ppu_function& info)
if (!m_ir->GetInsertBlock()->getTerminator())
{
FlushRegisters();
CallFunction(m_addr);
CallFunction(m_addr, nullptr, m_ir->GetInsertBlock());
}
}

Expand Down Expand Up @@ -354,7 +354,7 @@ Value* PPUTranslator::RotateLeft(Value* arg, Value* n)
return m_ir->CreateOr(m_ir->CreateShl(arg, m_ir->CreateAnd(n, mask)), m_ir->CreateLShr(arg, m_ir->CreateAnd(m_ir->CreateNeg(n), mask)));
}

void PPUTranslator::CallFunction(u64 target, Value* indirect)
void PPUTranslator::CallFunction(u64 target, Value* indirect, BasicBlock* prev_block)
{
const auto type = m_function->getFunctionType();
const auto block = m_ir->GetInsertBlock();
Expand All @@ -372,21 +372,27 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)

if (_target >= caddr && _target <= cend)
{
std::unordered_set<u64> passed_targets{_target};
std::unordered_set<u64> passed_targets;

u32 target_last = _target;

// Try to follow unconditional branches as long as there is no infinite loop
while (target_last != _target)
while (target_last != m_addr + base)
{
if (passed_targets.empty())
{
passed_targets.emplace(_target);
passed_targets.emplace(m_addr + base);
}

const ppu_opcode_t op{*ensure(m_info.get_ptr<u32>(target_last))};
const ppu_itype::type itype = g_ppu_itype.decode(op.opcode);

if (((itype == ppu_itype::BC && (op.bo & 0x14) == 0x14) || itype == ppu_itype::B) && !op.lk)
{
const u32 new_target = (op.aa ? 0 : target_last) + (itype == ppu_itype::B ? +op.bt24 : +op.bt14);

if (target_last >= caddr && target_last <= cend)
if (new_target >= caddr && new_target <= cend)
{
if (passed_targets.emplace(new_target).second)
{
Expand All @@ -401,11 +407,21 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)

// Odd destination
}
else if (itype == ppu_itype::BCLR && (op.bo & 0x14) == 0x14 && !op.lk)
else if (itype == ppu_itype::BCLR && (op.bo & 0x14) == 0x14 && !op.lk && (prev_block || m_lr))
{
// Special case: empty function
// In this case the branch can be treated as BCLR because previous CIA does not matter
indirect = RegLoad(m_lr);
indirect = m_lr;

if (!indirect)
{
// Emit register load in the beginning of the common block
m_ir->SetInsertPoint(prev_block, prev_block->getFirstInsertionPt());
indirect = RegLoad(m_lr);

// Restore current insert point
m_ir->SetInsertPoint(block);
}
}

break;
Expand Down Expand Up @@ -629,7 +645,7 @@ Value* PPUTranslator::Trunc(Value* value, Type* type)
return type != value->getType() ? m_ir->CreateTrunc(value, type) : value;
}

void PPUTranslator::UseCondition(MDNode* hint, Value* cond)
void PPUTranslator::UseCondition(MDNode* hint, Value* cond, BasicBlock* prev_block)
{
FlushRegisters();

Expand All @@ -639,7 +655,7 @@ void PPUTranslator::UseCondition(MDNode* hint, Value* cond)
const auto next = BasicBlock::Create(m_context, "__next", m_function);
m_ir->CreateCondBr(cond, local, next, hint);
m_ir->SetInsertPoint(next);
CallFunction(m_addr + 4);
CallFunction(m_addr + 4, nullptr, prev_block);
m_ir->SetInsertPoint(local);
}
}
Expand Down Expand Up @@ -2024,19 +2040,22 @@ void PPUTranslator::BC(ppu_opcode_t op)
const s32 bt14 = op.bt14; // Workaround for VS 16.5
const u64 target = (op.aa ? 0 : m_addr) + bt14;

const auto block = m_ir->GetInsertBlock();

if (op.aa && m_reloc)
{
CompilationError("Branch with absolute address");
}

if (op.lk)
{
m_ir->CreateStore(GetAddr(+4), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_lr - m_locals)));
m_lr = GetAddr(+4);
m_ir->CreateStore(m_lr, m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_lr - m_locals)));
}

UseCondition(CheckBranchProbability(op.bo), CheckBranchCondition(op.bo, op.bi));
UseCondition(CheckBranchProbability(op.bo), CheckBranchCondition(op.bo, op.bi), block);

CallFunction(target);
CallFunction(target, nullptr, block);
}

void PPUTranslator::SC(ppu_opcode_t op)
Expand Down Expand Up @@ -2074,6 +2093,8 @@ void PPUTranslator::B(ppu_opcode_t op)
const s32 bt24 = op.bt24; // Workaround for VS 16.5
const u64 target = (op.aa ? 0 : m_addr) + bt24;

const auto block = m_ir->GetInsertBlock();

if (op.aa && m_reloc)
{
CompilationError("Branch with absolute address");
Expand All @@ -2085,7 +2106,7 @@ void PPUTranslator::B(ppu_opcode_t op)
}

FlushRegisters();
CallFunction(target);
CallFunction(target, nullptr, block);
}

void PPUTranslator::MCRF(ppu_opcode_t op)
Expand All @@ -2103,7 +2124,8 @@ void PPUTranslator::BCLR(ppu_opcode_t op)

if (op.lk)
{
m_ir->CreateStore(GetAddr(+4), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_lr - m_locals)));
m_lr = GetAddr(+4);
m_ir->CreateStore(m_lr, m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_lr - m_locals)));
}

UseCondition(CheckBranchProbability(op.bo), CheckBranchCondition(op.bo, op.bi));
Expand Down Expand Up @@ -2166,7 +2188,8 @@ void PPUTranslator::BCCTR(ppu_opcode_t op)

if (op.lk)
{
m_ir->CreateStore(GetAddr(+4), m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_lr - m_locals)));
m_lr = GetAddr(+4);
m_ir->CreateStore(m_lr, m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(&m_lr - m_locals)));
}

UseCondition(CheckBranchProbability(op.bo | 0x4), CheckBranchCondition(op.bo | 0x4, op.bi));
Expand Down
4 changes: 2 additions & 2 deletions rpcs3/Emu/Cell/PPUTranslator.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ class PPUTranslator final : public cpu_translator
llvm::Value* RotateLeft(llvm::Value* arg, llvm::Value* n);

// Emit function call
void CallFunction(u64 target, llvm::Value* indirect = nullptr);
void CallFunction(u64 target, llvm::Value* indirect = nullptr, llvm::BasicBlock* prev_block = nullptr);

// Initialize global for writing
llvm::Value* RegInit(llvm::Value*& local);
Expand Down Expand Up @@ -292,7 +292,7 @@ class PPUTranslator final : public cpu_translator
llvm::MDNode* CheckBranchProbability(u32 bo);

// Branch to next instruction if condition failed, never branch on nullptr
void UseCondition(llvm::MDNode* hint, llvm::Value* = nullptr);
void UseCondition(llvm::MDNode* hint, llvm::Value* = nullptr, llvm::BasicBlock* prev_block = nullptr);

// Get memory pointer
llvm::Value* GetMemory(llvm::Value* addr);
Expand Down