Skip to content

Commit

Permalink
[X86] Resolve FIXME: Add FPCW as a rounding control register (llvm#82452
Browse files Browse the repository at this point in the history
)

To prevent tests from breaking, another fix had to be made: Now, we
check if the instruction after a waiting instruction is a call, and if
so, we insert the wait.
  • Loading branch information
AreaZR authored Mar 5, 2024
1 parent 81617f8 commit 3e40c96
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 29 deletions.
4 changes: 1 addition & 3 deletions llvm/lib/Target/X86/X86ISelLoweringCall.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -670,9 +670,7 @@ const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
}

ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const {
// FIXME: We should def X86::FPCW for x87 as well. But it affects a lot of lit
// tests at the moment, which is not what we expected.
static const MCPhysReg RCRegs[] = {X86::MXCSR};
static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR};
return RCRegs;
}

Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Target/X86/X86InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3443,6 +3443,11 @@ static bool isX87Reg(unsigned Reg) {

/// check if the instruction is X87 instruction
bool X86::isX87Instruction(MachineInstr &MI) {
// Call defs X87 register, so we special case it here because
// otherwise calls are incorrectly flagged as x87 instructions
// as a result.
if (MI.isCall())
return false;
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
Expand Down
89 changes: 63 additions & 26 deletions llvm/test/CodeGen/X86/pr59305.ll
Original file line number Diff line number Diff line change
@@ -1,32 +1,69 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s
; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s --check-prefix=X86-64
; RUN: llc -mtriple=i686-pc-linux < %s | FileCheck %s --check-prefix=X86

define double @foo(double %0) #0 {
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: subq $24, %rsp
; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
; CHECK-NEXT: movl $1024, %edi # imm = 0x400
; CHECK-NEXT: callq fesetround@PLT
; CHECK-NEXT: movsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0]
; CHECK-NEXT: divsd (%rsp), %xmm1 # 8-byte Folded Reload
; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: movl $1024, %edi # imm = 0x400
; CHECK-NEXT: callq fesetround@PLT
; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
; CHECK-NEXT: divsd (%rsp), %xmm0 # 8-byte Folded Reload
; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: movl $1024, %edi # imm = 0x400
; CHECK-NEXT: callq fesetround@PLT
; CHECK-NEXT: movsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
; CHECK-NEXT: divsd (%rsp), %xmm2 # 8-byte Folded Reload
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; CHECK-NEXT: # xmm1 = mem[0],zero
; CHECK-NEXT: callq fma@PLT
; CHECK-NEXT: addq $24, %rsp
; CHECK-NEXT: retq
; X86-64-LABEL: foo:
; X86-64: # %bb.0:
; X86-64-NEXT: subq $24, %rsp
; X86-64-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
; X86-64-NEXT: movl $1024, %edi # imm = 0x400
; X86-64-NEXT: callq fesetround@PLT
; X86-64-NEXT: movsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0]
; X86-64-NEXT: divsd (%rsp), %xmm1 # 8-byte Folded Reload
; X86-64-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X86-64-NEXT: movl $1024, %edi # imm = 0x400
; X86-64-NEXT: callq fesetround@PLT
; X86-64-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0]
; X86-64-NEXT: divsd (%rsp), %xmm0 # 8-byte Folded Reload
; X86-64-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X86-64-NEXT: movl $1024, %edi # imm = 0x400
; X86-64-NEXT: callq fesetround@PLT
; X86-64-NEXT: movsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0]
; X86-64-NEXT: divsd (%rsp), %xmm2 # 8-byte Folded Reload
; X86-64-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
; X86-64-NEXT: # xmm0 = mem[0],zero
; X86-64-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
; X86-64-NEXT: # xmm1 = mem[0],zero
; X86-64-NEXT: callq fma@PLT
; X86-64-NEXT: addq $24, %rsp
; X86-64-NEXT: retq
;
; X86-LABEL: foo:
; X86: # %bb.0:
; X86-NEXT: subl $60, %esp
; X86-NEXT: fldl {{[0-9]+}}(%esp)
; X86-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
; X86-NEXT: wait
; X86-NEXT: movl $1024, (%esp) # imm = 0x400
; X86-NEXT: calll fesetround@PLT
; X86-NEXT: fld1
; X86-NEXT: fstl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
; X86-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
; X86-NEXT: fdivrp %st, %st(1)
; X86-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
; X86-NEXT: wait
; X86-NEXT: movl $1024, (%esp) # imm = 0x400
; X86-NEXT: calll fesetround@PLT
; X86-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
; X86-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
; X86-NEXT: fdivp %st, %st(1)
; X86-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill
; X86-NEXT: wait
; X86-NEXT: movl $1024, (%esp) # imm = 0x400
; X86-NEXT: calll fesetround@PLT
; X86-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
; X86-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
; X86-NEXT: fdivp %st, %st(1)
; X86-NEXT: fstpl {{[0-9]+}}(%esp)
; X86-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
; X86-NEXT: fstpl {{[0-9]+}}(%esp)
; X86-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload
; X86-NEXT: fstpl (%esp)
; X86-NEXT: wait
; X86-NEXT: calll fma
; X86-NEXT: addl $60, %esp
; X86-NEXT: retl
%2 = call i32 @fesetround(i32 noundef 1024)
%3 = call double @llvm.experimental.constrained.fdiv.f64(double 1.000000e+00, double %0, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0
%4 = call i32 @fesetround(i32 noundef 1024)
Expand Down

0 comments on commit 3e40c96

Please sign in to comment.