From 3e40c96d8970f8a52a1f711b4f28aec5cb13e89e Mon Sep 17 00:00:00 2001 From: AtariDreams <83477269+AtariDreams@users.noreply.github.com> Date: Mon, 4 Mar 2024 19:47:05 -0500 Subject: [PATCH] [X86] Resolve FIXME: Add FPCW as a rounding control register (#82452) To prevent tests from breaking, another fix had to be made: Now, we check if the instruction after a waiting instruction is a call, and if so, we insert the wait. --- llvm/lib/Target/X86/X86ISelLoweringCall.cpp | 4 +- llvm/lib/Target/X86/X86InstrInfo.cpp | 5 ++ llvm/test/CodeGen/X86/pr59305.ll | 89 +++++++++++++++------ 3 files changed, 69 insertions(+), 29 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp index be8275c92e11ae..c7ef11aede886a 100644 --- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp +++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp @@ -670,9 +670,7 @@ const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const { } ArrayRef X86TargetLowering::getRoundingControlRegisters() const { - // FIXME: We should def X86::FPCW for x87 as well. But it affects a lot of lit - // tests at the moment, which is not what we expected. - static const MCPhysReg RCRegs[] = {X86::MXCSR}; + static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR}; return RCRegs; } diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index 0f21880f6df90c..25be1e2f68339f 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -3443,6 +3443,11 @@ static bool isX87Reg(unsigned Reg) { /// check if the instruction is X87 instruction bool X86::isX87Instruction(MachineInstr &MI) { + // Call defs X87 register, so we special case it here because + // otherwise calls are incorrectly flagged as x87 instructions + // as a result. + if (MI.isCall()) + return false; for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg()) continue; diff --git a/llvm/test/CodeGen/X86/pr59305.ll b/llvm/test/CodeGen/X86/pr59305.ll index c2f6d21a41d4dc..4172aa6204def2 100644 --- a/llvm/test/CodeGen/X86/pr59305.ll +++ b/llvm/test/CodeGen/X86/pr59305.ll @@ -1,32 +1,69 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-pc-linux < %s | FileCheck %s --check-prefix=X86-64 +; RUN: llc -mtriple=i686-pc-linux < %s | FileCheck %s --check-prefix=X86 define double @foo(double %0) #0 { -; CHECK-LABEL: foo: -; CHECK: # %bb.0: -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill -; CHECK-NEXT: movl $1024, %edi # imm = 0x400 -; CHECK-NEXT: callq fesetround@PLT -; CHECK-NEXT: movsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0] -; CHECK-NEXT: divsd (%rsp), %xmm1 # 8-byte Folded Reload -; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: movl $1024, %edi # imm = 0x400 -; CHECK-NEXT: callq fesetround@PLT -; CHECK-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] -; CHECK-NEXT: divsd (%rsp), %xmm0 # 8-byte Folded Reload -; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: movl $1024, %edi # imm = 0x400 -; CHECK-NEXT: callq fesetround@PLT -; CHECK-NEXT: movsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0] -; CHECK-NEXT: divsd (%rsp), %xmm2 # 8-byte Folded Reload -; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload -; CHECK-NEXT: # xmm0 = mem[0],zero -; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload -; CHECK-NEXT: # xmm1 = mem[0],zero -; CHECK-NEXT: callq fma@PLT -; CHECK-NEXT: addq $24, %rsp -; CHECK-NEXT: retq +; X86-64-LABEL: foo: +; X86-64: # %bb.0: +; X86-64-NEXT: subq $24, %rsp +; X86-64-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill +; X86-64-NEXT: movl $1024, %edi # imm = 0x400 +; X86-64-NEXT: callq fesetround@PLT +; X86-64-NEXT: movsd {{.*#+}} xmm1 = [1.0E+0,0.0E+0] +; X86-64-NEXT: divsd (%rsp), %xmm1 # 8-byte Folded Reload +; X86-64-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X86-64-NEXT: movl $1024, %edi # imm = 0x400 +; X86-64-NEXT: callq fesetround@PLT +; X86-64-NEXT: movsd {{.*#+}} xmm0 = [1.0E+0,0.0E+0] +; X86-64-NEXT: divsd (%rsp), %xmm0 # 8-byte Folded Reload +; X86-64-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X86-64-NEXT: movl $1024, %edi # imm = 0x400 +; X86-64-NEXT: callq fesetround@PLT +; X86-64-NEXT: movsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0] +; X86-64-NEXT: divsd (%rsp), %xmm2 # 8-byte Folded Reload +; X86-64-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; X86-64-NEXT: # xmm0 = mem[0],zero +; X86-64-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload +; X86-64-NEXT: # xmm1 = mem[0],zero +; X86-64-NEXT: callq fma@PLT +; X86-64-NEXT: addq $24, %rsp +; X86-64-NEXT: retq +; +; X86-LABEL: foo: +; X86: # %bb.0: +; X86-NEXT: subl $60, %esp +; X86-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill +; X86-NEXT: wait +; X86-NEXT: movl $1024, (%esp) # imm = 0x400 +; X86-NEXT: calll fesetround@PLT +; X86-NEXT: fld1 +; X86-NEXT: fstl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill +; X86-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload +; X86-NEXT: fdivrp %st, %st(1) +; X86-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill +; X86-NEXT: wait +; X86-NEXT: movl $1024, (%esp) # imm = 0x400 +; X86-NEXT: calll fesetround@PLT +; X86-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload +; X86-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload +; X86-NEXT: fdivp %st, %st(1) +; X86-NEXT: fstpl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Spill +; X86-NEXT: wait +; X86-NEXT: movl $1024, (%esp) # imm = 0x400 +; X86-NEXT: calll fesetround@PLT +; X86-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload +; X86-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload +; X86-NEXT: fdivp %st, %st(1) +; X86-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload +; X86-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-NEXT: fldl {{[-0-9]+}}(%e{{[sb]}}p) # 8-byte Folded Reload +; X86-NEXT: fstpl (%esp) +; X86-NEXT: wait +; X86-NEXT: calll fma +; X86-NEXT: addl $60, %esp +; X86-NEXT: retl %2 = call i32 @fesetround(i32 noundef 1024) %3 = call double @llvm.experimental.constrained.fdiv.f64(double 1.000000e+00, double %0, metadata !"round.dynamic", metadata !"fpexcept.ignore") #0 %4 = call i32 @fesetround(i32 noundef 1024)