diff --git a/llvm/lib/Target/AArch64/SMEABIPass.cpp b/llvm/lib/Target/AArch64/SMEABIPass.cpp index 72e87a663fceb1..c813d92ec85b7a 100644 --- a/llvm/lib/Target/AArch64/SMEABIPass.cpp +++ b/llvm/lib/Target/AArch64/SMEABIPass.cpp @@ -112,6 +112,12 @@ bool SMEABI::updateNewZAFunctions(Module *M, Function *F, Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_za_enable); Builder.CreateCall(EnableZAIntr->getFunctionType(), EnableZAIntr); + // ZA state must be zeroed upon entry to a function with NewZA + Function *ZeroIntr = + Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_zero); + Builder.CreateCall(ZeroIntr->getFunctionType(), ZeroIntr, + Builder.getInt32(0xff)); + // Before returning, disable pstate.za for (BasicBlock &BB : *F) { Instruction *T = BB.getTerminator(); diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll index 848cc9903b3441..98a8769afea851 100644 --- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll +++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll @@ -229,6 +229,7 @@ define double @za_new_caller_to_za_shared_callee(double %x) nounwind noinline o ; CHECK-COMMON-NEXT: b .LBB6_2 ; CHECK-COMMON-NEXT: .LBB6_2: // %entry ; CHECK-COMMON-NEXT: smstart za +; CHECK-COMMON-NEXT: zero {za} ; CHECK-COMMON-NEXT: bl za_shared_callee ; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 ; CHECK-COMMON-NEXT: fmov d1, x8 diff --git a/llvm/test/CodeGen/AArch64/sme-new-za-function.ll b/llvm/test/CodeGen/AArch64/sme-new-za-function.ll index 54ef5fd432755f..0cee26dbb349ed 100644 --- a/llvm/test/CodeGen/AArch64/sme-new-za-function.ll +++ b/llvm/test/CodeGen/AArch64/sme-new-za-function.ll @@ -15,6 +15,7 @@ define void @private_za() "aarch64_pstate_za_new" { ; CHECK-NEXT: br label [[TMP0]] ; CHECK: 0: ; CHECK-NEXT: call void @llvm.aarch64.sme.za.enable() +; CHECK-NEXT: call void @llvm.aarch64.sme.zero(i32 255) ; CHECK-NEXT: call void @shared_za_callee() ; CHECK-NEXT: call void @llvm.aarch64.sme.za.disable() ; CHECK-NEXT: ret void @@ -35,6 +36,7 @@ define i32 @private_za_multiple_exit(i32 %a, i32 %b, i64 %cond) "aarch64_pstate_ ; CHECK-NEXT: br label [[ENTRY]] ; CHECK: entry: ; CHECK-NEXT: call void @llvm.aarch64.sme.za.enable() +; CHECK-NEXT: call void @llvm.aarch64.sme.zero(i32 255) ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[COND:%.*]], 1 ; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_ELSE:%.*]], label [[IF_END:%.*]] ; CHECK: if.else: