From cb76ac7de5096e2689d379544bfe00ecebf26c0f Mon Sep 17 00:00:00 2001 From: TB Schardl Date: Tue, 14 Jan 2025 20:35:51 -0500 Subject: [PATCH 1/3] [LoopSpawning] Fix generation of parallel divide-and-conquer function to avoid hosting syncregions used within a taskframe inserted by loop-spawning outside of that taskframe. Improve logic for finding external uses of taskframes to fixup. --- llvm/lib/Transforms/Tapir/LoopSpawningTI.cpp | 21 +++--------- llvm/lib/Transforms/Utils/TapirUtils.cpp | 2 ++ .../Tapir/alloca-insert-split-taskframe.ll | 22 ++++-------- ...exception-spawn-in-parfor-loop-spawning.ll | 34 +++++++++++++++---- ...loop-spawning-nested-spawn-alloc-unwind.ll | 12 +++---- .../Tapir/loop-spawning-nested-spawn-alloc.ll | 2 +- 6 files changed, 47 insertions(+), 46 deletions(-) diff --git a/llvm/lib/Transforms/Tapir/LoopSpawningTI.cpp b/llvm/lib/Transforms/Tapir/LoopSpawningTI.cpp index c897623ebb49..4d4d0d4f6d82 100644 --- a/llvm/lib/Transforms/Tapir/LoopSpawningTI.cpp +++ b/llvm/lib/Transforms/Tapir/LoopSpawningTI.cpp @@ -688,23 +688,10 @@ void DACSpawning::implementDACIterSpawnOnHelper( SplitBlock(Preheader, &Preheader->front(), (DomTreeUpdater *)nullptr, nullptr, nullptr, Preheader->getName() + ".dac.head"); - // Move any syncregion_start's in DACHead into Preheader. - BasicBlock::iterator InsertPoint = Preheader->begin(); - for (BasicBlock::iterator I = DACHead->begin(), E = DACHead->end(); - I != E;) { - IntrinsicInst *II = dyn_cast(I++); - if (!II) - continue; - if (Intrinsic::syncregion_start != II->getIntrinsicID()) - continue; - - while (isa(I) && - Intrinsic::syncregion_start == - cast(I)->getIntrinsicID()) - ++I; - - Preheader->splice(InsertPoint, &*DACHead, II->getIterator(), I); - } + // Move the syncregion corresponding with the original loop into Preheader, + // so the new detach can use it. + if (Instruction *SyncRegionI = dyn_cast(SyncRegion)) + SyncRegionI->moveBefore(&*Preheader->getFirstInsertionPt()); if (!Preheader->getTerminator()->getDebugLoc()) Preheader->getTerminator()->setDebugLoc( diff --git a/llvm/lib/Transforms/Utils/TapirUtils.cpp b/llvm/lib/Transforms/Utils/TapirUtils.cpp index e58b55afd4dc..6a90f70e7195 100644 --- a/llvm/lib/Transforms/Utils/TapirUtils.cpp +++ b/llvm/lib/Transforms/Utils/TapirUtils.cpp @@ -1816,6 +1816,8 @@ void llvm::fixupTaskFrameExternalUses(Spindle *TF, const TaskInfo &TI, // Examine all users of this instruction. for (Use &U : I.uses()) { + if (!DT.isReachableFromEntry(U)) + continue; // If we find a live use outside of the task, it's an output. if (Instruction *UI = dyn_cast(U.getUser())) { if (!taskFrameEncloses(TF, UI->getParent(), TI)) { diff --git a/llvm/test/Transforms/Tapir/alloca-insert-split-taskframe.ll b/llvm/test/Transforms/Tapir/alloca-insert-split-taskframe.ll index a2debb6481a2..a61ae1da8732 100644 --- a/llvm/test/Transforms/Tapir/alloca-insert-split-taskframe.ll +++ b/llvm/test/Transforms/Tapir/alloca-insert-split-taskframe.ll @@ -75,10 +75,9 @@ det.achd.i.i.ls2: ; preds = %invoke.cont61.tf.i. ; CHECK-LABEL: define {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2(ptr %call.i.i.i.i31.i874.ls2) ; CHECK: invoke.cont61.tf.i.i.ls2: -; CHECK-NEXT: %[[FIXUP_ALLOCA:.+]] = alloca ptr ; CHECK-NEXT: call token @llvm.syncregion.start() ; CHECK-NEXT: call token @llvm.syncregion.start() -; CHECK-NEXT: call {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2.outline_invoke.cont61.tf.i.i.ls2.tf.otf0(ptr %call.i.i.i.i31.i874.ls2, ptr %[[FIXUP_ALLOCA]]) +; CHECK-NEXT: call {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2.outline_invoke.cont61.tf.i.i.ls2.tf.otf0(ptr %call.i.i.i.i31.i874.ls2) ; CHECK-NEXT: br label %sync.continue28.i.i1530.ls2.tfend ; CHECK: sync.continue28.i.i1530.ls2.tfend: @@ -89,12 +88,10 @@ det.achd.i.i.ls2: ; preds = %invoke.cont61.tf.i. ; CHECK: call void @__cilkrts_detach( ; CHECK: ret void -; CHECK-LABEL: define {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2.outline_invoke.cont61.tf.i.i.ls2.tf.tf.otf1(ptr {{.*}}%call.i.i.i.i31.i874.ls2.otf1, -; CHECK: ptr {{.*}}%[[ARG:.+]]) +; CHECK-LABEL: define {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2.outline_invoke.cont61.tf.i.i.ls2.tf.tf.otf1(ptr {{.*}}%call.i.i.i.i31.i874.ls2.otf1) ; CHECK: invoke.cont61.tf.i.i.ls2.tf.tf.otf1: ; CHECK: %[[ADDR:.+]] = getelementptr %"struct.parlay::sequence_internal::sequence_base, false>::storage_impl::capacitated_buffer::header", ptr %call.i.i.i.i31.i874.ls2.otf1, i64 0, i32 1 -; CHECK-NEXT: store ptr %[[ADDR]], ptr %[[ARG]] ; CHECK-NEXT: call void @__cilkrts_enter_frame( ; CHECK-NEXT: %[[TAPIR_RT_START:.+]] = call token @llvm.tapir.runtime.start() ; CHECK-NEXT: call i32 @__cilk_prepare_spawn( @@ -136,22 +133,15 @@ det.achd.i.i.ls2: ; preds = %invoke.cont61.tf.i. ; CHECK: ret void -; CHECK-LABEL: define {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2.outline_invoke.cont61.tf.i.i.ls2.tf.otf0(ptr align 1 %call.i.i.i.i31.i874.ls2.otf0, -; CHECK: ptr {{.*}}%[[ARG:.+]]) -; CHECK: %[[FIXUP_ALLOCA:.+]] = alloca ptr +; CHECK-LABEL: define {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2.outline_invoke.cont61.tf.i.i.ls2.tf.otf0(ptr align 1 %call.i.i.i.i31.i874.ls2.otf0) -; CHECK: call void @llvm.lifetime.start.p0(i64 8, ptr %[[FIXUP_ALLOCA]]) -; CHECK-NEXT: call void @__cilkrts_enter_frame( +; CHECK: call void @__cilkrts_enter_frame( ; CHECK-NEXT: %[[TAPIR_RT_START:.+]] = call token @llvm.tapir.runtime.start() ; CHECK-NEXT: call void @__cilk_parent_epilogue( ; CHECK-NEXT: call void @llvm.tapir.runtime.end(token %[[TAPIR_RT_START]]) -; CHECK-NEXT: call {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2.outline_invoke.cont61.tf.i.i.ls2.tf.tf.otf1(ptr %call.i.i.i.i31.i874.ls2.otf0, ptr %[[FIXUP_ALLOCA]]) - -; CHECK: %[[FIXUP_LOAD:.+]] = load ptr, ptr %[[FIXUP_ALLOCA]] -; CHECK-NEXT: store ptr %[[FIXUP_LOAD]], ptr %[[ARG]] -; CHECK-NEXT: call {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2.outline_invoke.cont.i.i.i.i.i266.i.i.ls2.tf.otf1() +; CHECK-NEXT: call {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2.outline_invoke.cont61.tf.i.i.ls2.tf.tf.otf1(ptr %call.i.i.i.i31.i874.ls2.otf0) -; CHECK: call void @llvm.lifetime.end.p0(i64 8, ptr %[[FIXUP_ALLOCA]]) +; CHECK: call {{.*}}void @_Z6kmeansIFdRKN6parlay8sequenceIdNS0_9allocatorIdEELb0EEES6_EEDaRNS1_IS4_NS2_IS4_EELb0EEEiRT_d.outline_pfor.cond.i.i.i182.ls2.outline_invoke.cont.i.i.i.i.i266.i.i.ls2.tf.otf1() ; CHECK: ret void diff --git a/llvm/test/Transforms/Tapir/exception-spawn-in-parfor-loop-spawning.ll b/llvm/test/Transforms/Tapir/exception-spawn-in-parfor-loop-spawning.ll index 94dd7fe5edad..c6b1d6d48c07 100644 --- a/llvm/test/Transforms/Tapir/exception-spawn-in-parfor-loop-spawning.ll +++ b/llvm/test/Transforms/Tapir/exception-spawn-in-parfor-loop-spawning.ll @@ -617,7 +617,6 @@ declare void @llvm.assume(i1) #6 declare i32 @llvm.tapir.loop.grainsize.i32(i32) #7 ; CHECK-LABEL: define internal fastcc void @_Z15parfor_trycatchi.outline_pfor.cond48.ls1( -; CHECK: %[[SYNCREG:.+]] = tail call token @llvm.syncregion.start() ; CHECK: %[[DACSYNCREG:.+]] = tail call token @llvm.syncregion.start() ; CHECK: detach within %[[DACSYNCREG]], label %[[DACSPAWN:.+]], label %[[DACCONT:.+]] @@ -626,6 +625,9 @@ declare i32 @llvm.tapir.loop.grainsize.i32(i32) #7 ; CHECK: call fastcc void @_Z15parfor_trycatchi.outline_pfor.cond48.ls1( ; CHECK-NEXT: reattach within %[[DACSYNCREG]], label %[[DACCONT]] +; CHECK: %[[LS_TF:.+]] = call token @llvm.taskframe.create() +; CHECK-NEXT: %[[SYNCREG:.+]] = tail call token @llvm.syncregion.start() + ; CHECK: pfor.body54.ls1: ; CHECK-NEXT: %[[TASKFRAME:.+]] = tail call token @llvm.taskframe.create() ; CHECK: detach within %[[SYNCREG]], label %det.achd56.ls1, label %det.cont69.ls1 unwind label %lpad66.ls1 @@ -656,12 +658,14 @@ declare i32 @llvm.tapir.loop.grainsize.i32(i32) #7 ; CHECK: [[INVOKECONT]]: ; CHECK-NEXT: reattach within %[[SYNCREG]] +; CHECK: call void @llvm.taskframe.end(token %[[LS_TF]]) +; CHECK-NEXT: sync within %[[DACSYNCREG]], + ; CHECK: [[UNREACHABLE]]: ; CHECK-NEXT: unreachable ; CHECK-LABEL: define internal fastcc void @_Z15parfor_trycatchi.outline_pfor.cond.ls1( -; CHECK: %[[SYNCREG:.+]] = tail call token @llvm.syncregion.start() ; CHECK: %[[DACSYNCREG:.+]] = tail call token @llvm.syncregion.start() ; CHECK: detach within %[[DACSYNCREG]], label %[[DACSPAWN:.+]], label %[[DACCONT:.+]] @@ -670,6 +674,9 @@ declare i32 @llvm.tapir.loop.grainsize.i32(i32) #7 ; CHECK: call fastcc void @_Z15parfor_trycatchi.outline_pfor.cond.ls1( ; CHECK-NEXT: reattach within %[[DACSYNCREG]], label %[[DACCONT]] +; CHECK: %[[LS_TF:.+]] = call token @llvm.taskframe.create() +; CHECK-NEXT: %[[SYNCREG:.+]] = tail call token @llvm.syncregion.start() + ; CHECK: pfor.body.ls1: ; CHECK-NEXT: %[[TASKFRAME:.+]] = tail call token @llvm.taskframe.create() ; CHECK: detach within %[[SYNCREG]], label %det.achd5.ls1, label %det.cont10.ls1 unwind label %lpad7.ls1 @@ -700,12 +707,14 @@ declare i32 @llvm.tapir.loop.grainsize.i32(i32) #7 ; CHECK: [[INVOKECONT]]: ; CHECK-NEXT: reattach within %[[SYNCREG]] +; CHECK: call void @llvm.taskframe.end(token %[[LS_TF]]) +; CHECK-NEXT: sync within %[[DACSYNCREG]], + ; CHECK: [[UNREACHABLE]]: ; CHECK-NEXT: unreachable ; CHECK-LABEL: define internal fastcc void @_Z27parfor_trycatch_destructorsi.outline_pfor.cond70.ls1( -; CHECK: %[[SYNCREG:.+]] = call token @llvm.syncregion.start() ; CHECK: %[[DACSYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() ; CHECK: detach within %[[DACSYNCREG]], label %[[DACSPAWN:.+]], label %[[DACCONT:.+]] unwind label %[[DACDU:.+]] @@ -717,9 +726,13 @@ declare i32 @llvm.tapir.loop.grainsize.i32(i32) #7 ; CHECK: [[DACINVOKECONT]]: ; CHECK-NEXT: reattach within %[[DACSYNCREG]], label %[[DACCONT]] +; CHECK: %[[LS_TF:.+]] = call token @llvm.taskframe.create() +; CHECK-NEXT: %[[B3:.+]] = alloca %class.Bar +; CHECK-NEXT: %[[SYNCREG:.+]] = call token @llvm.syncregion.start() + ; CHECK: pfor.body76.ls1: ; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %[[B3PTR:.+]]) -; CHECK-NEXT: invoke void @_ZN3BarC1Ev(ptr nonnull %[[B3:.+]]) +; CHECK-NEXT: invoke void @_ZN3BarC1Ev(ptr nonnull %[[B3]]) ; CHECK-NEXT: to label %[[B3CONSTRCONT:.+]] unwind label %lpad77.ls1 ; CHECK: lpad77.ls1: @@ -769,6 +782,9 @@ declare i32 @llvm.tapir.loop.grainsize.i32(i32) #7 ; CHECK: [[INVOKECONT]]: ; CHECK-NEXT: reattach within %[[SYNCREG]], label %det.cont95.ls1 +; CHECK: call void @llvm.taskframe.end(token %[[LS_TF]]) +; CHECK-NEXT: sync within %[[DACSYNCREG]], + ; CHECK: [[DACDU]]: ; CHECK-NEXT: landingpad ; CHECK-NEXT: cleanup @@ -784,7 +800,6 @@ declare i32 @llvm.tapir.loop.grainsize.i32(i32) #7 ; CHECK-LABEL: define internal fastcc void @_Z27parfor_trycatch_destructorsi.outline_pfor.cond.ls1( -; CHECK: %[[SYNCREG:.+]] = call token @llvm.syncregion.start() ; CHECK: %[[DACSYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() ; CHECK: detach within %[[DACSYNCREG]], label %[[DACSPAWN:.+]], label %[[DACCONT:.+]] unwind label %[[DACDU:.+]] @@ -796,9 +811,13 @@ declare i32 @llvm.tapir.loop.grainsize.i32(i32) #7 ; CHECK: [[DACINVOKECONT]]: ; CHECK-NEXT: reattach within %[[DACSYNCREG]], label %[[DACCONT]] +; CHECK: %[[LS_TF:.+]] = call token @llvm.taskframe.create() +; CHECK-NEXT: %[[B2:.+]] = alloca %class.Bar +; CHECK-NEXT: %[[SYNCREG:.+]] = call token @llvm.syncregion.start() + ; CHECK: pfor.body.ls1: ; CHECK: call void @llvm.lifetime.start.p0(i64 1, ptr nonnull %[[B2PTR:.+]]) -; CHECK-NEXT: invoke void @_ZN3BarC1Ev(ptr nonnull %[[B2:.+]]) +; CHECK-NEXT: invoke void @_ZN3BarC1Ev(ptr nonnull %[[B2]]) ; CHECK-NEXT: to label %[[B2CONSTRCONT:.+]] unwind label %lpad15.ls1 ; CHECK: lpad15.ls1: @@ -848,6 +867,9 @@ declare i32 @llvm.tapir.loop.grainsize.i32(i32) #7 ; CHECK: [[INVOKECONT]]: ; CHECK-NEXT: reattach within %[[SYNCREG]], label %det.cont33.ls1 +; CHECK: call void @llvm.taskframe.end(token %[[LS_TF]]) +; CHECK-NEXT: sync within %[[DACSYNCREG]], + ; CHECK: [[DACDU]]: ; CHECK-NEXT: landingpad ; CHECK-NEXT: cleanup diff --git a/llvm/test/Transforms/Tapir/loop-spawning-nested-spawn-alloc-unwind.ll b/llvm/test/Transforms/Tapir/loop-spawning-nested-spawn-alloc-unwind.ll index a0baf5e3d30b..1f448062c1c1 100644 --- a/llvm/test/Transforms/Tapir/loop-spawning-nested-spawn-alloc-unwind.ll +++ b/llvm/test/Transforms/Tapir/loop-spawning-nested-spawn-alloc-unwind.ll @@ -1015,12 +1015,6 @@ csi.cleanup389391393: ; preds = %for.body185.us ; CHECK: define internal fastcc void @_Z28ggml_compute_forward_mul_matPK19ggml_compute_paramsP11ggml_tensor.outline_pfor.cond.us.ls1( ; CHECK: pfor.cond.us.preheader.split.split.ls1: -; CHECK-NEXT: %[[NESTED_SPAWN_SYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() -; CHECK-NEXT: #dbg_value( -; CHECK-NEXT: #dbg_value( -; CHECK-NEXT: #dbg_value( -; CHECK-NEXT: #dbg_value( -; CHECK-NEXT: #dbg_value( ; CHECK-NEXT: %[[LOOP_DAC_SYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() ; CHECK: br label %[[LOOP_DAC_HEADER:.+]], !dbg @@ -1042,6 +1036,12 @@ csi.cleanup389391393: ; preds = %for.body185.us ; CHECK: call void @__csan_task( ; CHECK: %[[NEW_TF:.+]] = call token @llvm.taskframe.create(), !dbg ; CHECK-NEXT: %[[TMP:.+]] = alloca [32 x float] +; CHECK-NEXT: %[[NESTED_SPAWN_SYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() +; CHECK-NEXT: #dbg_value( +; CHECK-NEXT: #dbg_value( +; CHECK-NEXT: #dbg_value( +; CHECK-NEXT: #dbg_value( +; CHECK-NEXT: #dbg_value( ; CHECK: br label %[[NESTED_LOOP_HEADER:.+]], !dbg ; CHECK: [[NESTED_LOOP_HEADER]]: diff --git a/llvm/test/Transforms/Tapir/loop-spawning-nested-spawn-alloc.ll b/llvm/test/Transforms/Tapir/loop-spawning-nested-spawn-alloc.ll index ecfe17bb1a07..4853f41167dd 100644 --- a/llvm/test/Transforms/Tapir/loop-spawning-nested-spawn-alloc.ll +++ b/llvm/test/Transforms/Tapir/loop-spawning-nested-spawn-alloc.ll @@ -222,7 +222,6 @@ cleanup272: ; preds = %pfor.cond.cleanup26 ; CHECK: define internal fastcc void @ggml_compute_forward_mul_mat.outline_pfor.cond.us.us.ls1( ; CHECK: pfor.cond.us.us.preheader.ls1: -; CHECK-NEXT: %[[NESTED_SPAWN_SYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() ; CHECK-NEXT: %[[LOOP_DAC_SYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() ; CHECK: br label %[[LOOP_DAC_HEADER:.+]] @@ -240,6 +239,7 @@ cleanup272: ; preds = %pfor.cond.cleanup26 ; Check for a newly introduced taskframe that contains the static alloca. ; CHECK: %[[NEW_TF:.+]] = call token @llvm.taskframe.create() ; CHECK-NEXT: %[[TMP:.+]] = alloca [32 x float] +; CHECK-NEXT: %[[NESTED_SPAWN_SYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() ; CHECK-NEXT: br label %[[NESTED_LOOP_HEADER:.+]] ; CHECK: [[NESTED_LOOP_HEADER]]: From 8789ce788f0a6ecd35d9e9eef9e6652704d143d2 Mon Sep 17 00:00:00 2001 From: TB Schardl Date: Tue, 14 Jan 2025 20:39:44 -0500 Subject: [PATCH 2/3] [CSI] Handle ASan hooks specially when setting up functions for CSI or CilkSanitizer instrumentation. If ASan inserts hooks into spawned tasks with other exception-handling logic, it either needs to mark that these hooks do not throw or it needs to invoke these hooks with unwinding blocks that connect with the unwind blocks of the enclosing task. ASan does not currently do this and instead inserts these hooks naively. This change works around ASan's behavior by marking that calls to ASan hook in spawned tasks do not throw. --- .../llvm/Transforms/Utils/TapirUtils.h | 8 ++- .../ComprehensiveStaticInstrumentation.cpp | 10 ++- llvm/lib/Transforms/Utils/TapirUtils.cpp | 40 ++++++++---- .../CilkSanitizer/csi-setup-asan-hook.ll | 65 +++++++++++++++++++ 4 files changed, 106 insertions(+), 17 deletions(-) create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/csi-setup-asan-hook.ll diff --git a/llvm/include/llvm/Transforms/Utils/TapirUtils.h b/llvm/include/llvm/Transforms/Utils/TapirUtils.h index d40a63da486c..3ce34ed8d900 100644 --- a/llvm/include/llvm/Transforms/Utils/TapirUtils.h +++ b/llvm/include/llvm/Transforms/Utils/TapirUtils.h @@ -231,8 +231,12 @@ BasicBlock *CreateSubTaskUnwindEdge(Intrinsic::ID TermFunc, Value *Token, /// promoteCallsInTasksToInvokes - Traverse the control-flow graph of F to /// convert calls to invokes, recursively traversing tasks and taskframes to -/// insert appropriate detached.rethrow and taskframe.resume terminators. -void promoteCallsInTasksToInvokes(Function &F, const Twine Name = "cleanup"); +/// insert appropriate detached.rethrow and taskframe.resume terminators. The +/// optional \p IgnoreFunctionCheck parameter allows the caller to handle some +/// call sites in a custom manner. +void promoteCallsInTasksToInvokes( + Function &F, const Twine Name = "cleanup", + std::function IgnoreFunctionCheck = nullptr); /// eraseTaskFrame - Remove the specified taskframe and all uses of it. The /// given \p TaskFrame should correspond to a taskframe.create call. The diff --git a/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp index bcca4e4bbfc8..a36f011a3968 100644 --- a/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp @@ -746,7 +746,15 @@ void CSIImpl::setupCalls(Function &F) { if (F.doesNotThrow()) return; - promoteCallsInTasksToInvokes(F, "csi.cleanup"); + promoteCallsInTasksToInvokes(F, "csi.cleanup", [](CallBase *CB) { + if (const Function *F = CB->getCalledFunction()) { + if (F->getName().starts_with("__asan")) { + CB->setDoesNotThrow(); + return true; + } + } + return false; + }); } static BasicBlock *splitOffPreds(BasicBlock *BB, diff --git a/llvm/lib/Transforms/Utils/TapirUtils.cpp b/llvm/lib/Transforms/Utils/TapirUtils.cpp index 6a90f70e7195..fb2c1becfa8b 100644 --- a/llvm/lib/Transforms/Utils/TapirUtils.cpp +++ b/llvm/lib/Transforms/Utils/TapirUtils.cpp @@ -2007,9 +2007,9 @@ BasicBlock *llvm::CreateSubTaskUnwindEdge(Intrinsic::ID TermFunc, Value *Token, return NewUnwindEdge; } -static BasicBlock *maybePromoteCallInBlock(BasicBlock *BB, - BasicBlock *UnwindEdge, - const Value *TaskFrame) { +static BasicBlock *maybePromoteCallInBlock( + BasicBlock *BB, BasicBlock *UnwindEdge, const Value *TaskFrame, + std::function IgnoreFunctionCheck) { for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) { Instruction *I = &*BBI++; @@ -2033,6 +2033,8 @@ static BasicBlock *maybePromoteCallInBlock(BasicBlock *BB, // We cannot transform calls with musttail tag. if (CI->isMustTailCall()) continue; + if (IgnoreFunctionCheck && IgnoreFunctionCheck(CI)) + continue; // We do not need to (and in fact, cannot) convert possibly throwing calls // to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into @@ -2075,10 +2077,10 @@ static Instruction *getTaskFrameInstructionInBlock(BasicBlock *BB, // Recursively handle inlined tasks. static void promoteCallsInTasksHelper( - BasicBlock *EntryBlock, BasicBlock *UnwindEdge, - BasicBlock *Unreachable, Value *CurrentTaskFrame, - SmallVectorImpl *ParentWorklist, - SmallPtrSetImpl &Processed) { + BasicBlock *EntryBlock, BasicBlock *UnwindEdge, BasicBlock *Unreachable, + Value *CurrentTaskFrame, SmallVectorImpl *ParentWorklist, + SmallPtrSetImpl &Processed, + std::function IgnoreFunctionCheck) { SmallVector DetachesToReplace; SmallVector Worklist; // TODO: See if we need a global Visited set over all recursive calls, i.e., @@ -2092,8 +2094,8 @@ static void promoteCallsInTasksHelper( continue; // Promote any calls in the block to invokes. - while (BasicBlock *NewBB = - maybePromoteCallInBlock(BB, UnwindEdge, CurrentTaskFrame)) + while (BasicBlock *NewBB = maybePromoteCallInBlock( + BB, UnwindEdge, CurrentTaskFrame, IgnoreFunctionCheck)) BB = cast(NewBB->getTerminator())->getNormalDest(); Instruction *TFI = getTaskFrameInstructionInBlock(BB, CurrentTaskFrame); @@ -2115,7 +2117,8 @@ static void promoteCallsInTasksHelper( // Recursively check all blocks promoteCallsInTasksHelper(NewBB, TaskFrameUnwindEdge, Unreachable, - TFCreate, &Worklist, Processed); + TFCreate, &Worklist, Processed, + IgnoreFunctionCheck); // Remove the unwind edge for the taskframe if it is not needed. if (pred_empty(TaskFrameUnwindEdge)) @@ -2172,7 +2175,7 @@ static void promoteCallsInTasksHelper( // Recursively check all blocks in the detached task. promoteCallsInTasksHelper(DI->getDetached(), SubTaskUnwindEdge, Unreachable, CurrentTaskFrame, &Worklist, - Processed); + Processed, IgnoreFunctionCheck); // If the new unwind edge is not used, remove it. if (pred_empty(SubTaskUnwindEdge)) SubTaskUnwindEdge->eraseFromParent(); @@ -2180,9 +2183,16 @@ static void promoteCallsInTasksHelper( DetachesToReplace.push_back(DI); } else { - // Because this detach has an unwind destination, Any calls in the + // Because this detach has an unwind destination, any calls in the // spawned task that may throw should already be invokes. Hence there // is no need to promote calls in this task. + if (IgnoreFunctionCheck) { + // This recursive call should only apply IgnoreFunctionCheck to callsites. + promoteCallsInTasksHelper(DI->getDetached(), DI->getUnwindDest(), + Unreachable, CurrentTaskFrame, &Worklist, + Processed, IgnoreFunctionCheck); + } + if (Visited.insert(DI->getUnwindDest()).second) // If the detach-unwind isn't dead, add it to the worklist. Worklist.push_back(DI->getUnwindDest()); @@ -2225,7 +2235,9 @@ static FunctionCallee getDefaultPersonalityFn(Module *M) { FunctionType::get(Type::getInt32Ty(C), true)); } -void llvm::promoteCallsInTasksToInvokes(Function &F, const Twine Name) { +void llvm::promoteCallsInTasksToInvokes( + Function &F, const Twine Name, + std::function IgnoreFunctionCheck) { // Collect blocks to process, in order to handle unreachable blocks. SmallVector ToProcess; ToProcess.push_back(&F.getEntryBlock()); @@ -2256,7 +2268,7 @@ void llvm::promoteCallsInTasksToInvokes(Function &F, const Twine Name) { for (BasicBlock *BB : ToProcess) { if (!Processed.contains(BB)) promoteCallsInTasksHelper(BB, CleanupBB, UnreachableBlk, nullptr, nullptr, - Processed); + Processed, IgnoreFunctionCheck); } // Either finish inserting the cleanup block (and associated data) or remove diff --git a/llvm/test/Transforms/Tapir/CilkSanitizer/csi-setup-asan-hook.ll b/llvm/test/Transforms/Tapir/CilkSanitizer/csi-setup-asan-hook.ll new file mode 100644 index 000000000000..1431711cdf2f --- /dev/null +++ b/llvm/test/Transforms/Tapir/CilkSanitizer/csi-setup-asan-hook.ll @@ -0,0 +1,65 @@ +; Check that CSI-setup ignores __asan hooks when promoting calls to invokes. +; +; ASan should have inserted these hooks into tasks with proper attributes or +; control flow for exceptional returns, but it does not do so at this time. +; As a workaround, CSI will ignore these hooks when setting up a function for +; instrumentation. +; +; RUN: opt < %s -passes="csi-setup" -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @_Z16reduce_with_cilkPP6ScalarS1_S0_S0_mm() personality ptr null { +entry: + %syncreg = tail call token @llvm.syncregion.start() + unreachable + +pfor.cond.preheader: ; No predecessors! + detach within %syncreg, label %pfor.body.entry, label %pfor.inc241 unwind label %lpad240.loopexit + +pfor.body.entry: ; preds = %pfor.cond.preheader + %syncreg14.strpm.detachloop = call token @llvm.syncregion.start() + %syncreg14 = call token @llvm.syncregion.start() + br label %invoke.cont222 + +pfor.cond24.preheader.new: ; No predecessors! + detach within %syncreg14.strpm.detachloop, label %invoke.cont174.strpm.outer, label %pfor.inc.strpm.outer + +invoke.cont174.strpm.outer: ; preds = %pfor.cond24.preheader.new + unreachable + +pfor.inc.strpm.outer: ; preds = %pfor.cond24.preheader.new + sync within %syncreg14.strpm.detachloop, label %invoke.cont222 + +invoke.cont222: ; preds = %pfor.inc.strpm.outer, %pfor.body.entry + call void @__asan_report(i64 0) + unreachable + +; CHECK: invoke.cont222: +; CHECK-NOT: invoke {{.*}}void @__asan_report( +; CHECK-NEXT: call void @__asan_report( +; CHECK-NEXT: unreachable + +pfor.inc241: ; preds = %pfor.cond.preheader + sync within %syncreg, label %sync.continue246 + +lpad240.loopexit: ; preds = %pfor.cond.preheader + %lpad.loopexit = landingpad { ptr, i32 } + cleanup + resume { ptr, i32 } zeroinitializer + +sync.continue246: ; preds = %pfor.inc241 + unreachable +} + +declare void @__asan_report(i64) local_unnamed_addr + +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare token @llvm.syncregion.start() #0 + +; uselistorder directives +uselistorder ptr null, { 1, 2, 0 } +uselistorder ptr @llvm.syncregion.start, { 2, 1, 0 } + +attributes #0 = { nounwind willreturn memory(argmem: readwrite) } From 13688ae9ee498346fee08f5b0e551ec8b569a6dc Mon Sep 17 00:00:00 2001 From: TB Schardl Date: Fri, 31 Jan 2025 09:59:36 -0500 Subject: [PATCH 3/3] [CSI] Fix instrumentation around sync and sync.unwind instructions. Fix promotion of calls to invokes when potentially-throwing calls are inside tasks with unwind destinations. --- clang/test/Cilk/cilk-mixed-unwind-codegen.cpp | 37 +++ .../ComprehensiveStaticInstrumentation.cpp | 52 ++- llvm/lib/Transforms/Utils/TapirUtils.cpp | 59 ++-- .../csi-instrument-sync-with-unwind.ll | 69 ++++ .../csi-setup-mixed-task-lpad.ll | 185 +++++++++++ .../csi-sync-unwind-loopexit-multiple.ll | 313 ++++++++++++++++++ .../CilkSanitizer/csi-sync-unwind-loopexit.ll | 235 +++++++++++++ .../split-unreachable-predecessors.ll | 3 - .../mixed-predecessors-of-unreachable.ll | 91 +++++ 9 files changed, 995 insertions(+), 49 deletions(-) create mode 100644 clang/test/Cilk/cilk-mixed-unwind-codegen.cpp create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/csi-instrument-sync-with-unwind.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/csi-setup-mixed-task-lpad.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/csi-sync-unwind-loopexit-multiple.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/csi-sync-unwind-loopexit.ll create mode 100644 llvm/test/Transforms/Tapir/mixed-predecessors-of-unreachable.ll diff --git a/clang/test/Cilk/cilk-mixed-unwind-codegen.cpp b/clang/test/Cilk/cilk-mixed-unwind-codegen.cpp new file mode 100644 index 000000000000..0dddd92d1419 --- /dev/null +++ b/clang/test/Cilk/cilk-mixed-unwind-codegen.cpp @@ -0,0 +1,37 @@ +// Check that Clang may generate functions calls that can throw with or without +// a landingpad in the same Cilk scope. +// +// RUN: %clang_cc1 -fopencilk -fcxx-exceptions -fexceptions -ftapir=none -triple x86_64-unknown-linux-gnu -std=c++11 -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics + +int bar(int n); +void foo(int n) { + cilk_for (int i = 0; i < n; ++i) { + int w = bar(i); + throw bar(w); + } +} + +// CHECK-LABEL: define {{.*}}void @_Z3fooi(i32 {{.*}}%n) + +// Check for detach with an unwind destination +// CHECK: detach within %[[SYNCREG:.+]], label %[[PFOR_BODY_ENTRY:.+]], label %[[PFOR_INC:.+]] unwind label %[[DETACH_LPAD:.+]] + +// CHECK: [[PFOR_BODY_ENTRY]]: + +// Check for call to function bar that might throw. +// CHECK: call {{.*}}i32 @_Z3bari(i32 + +// Check for invoke of function bar +// CHECK: invoke noundef i32 @_Z3bari(i32 +// CHECK-NEXT: to label %[[INVOKE_CONT:.+]] unwind label %[[TASK_LPAD:.+]] + +// CHECK: [[INVOKE_CONT]]: +// CHECK: call void @__cxa_throw(ptr +// CHECK-NEXT: unreachable + +// CHECK: [[TASK_LPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], { ptr, i32 } %{{.*}}) +// CHECK-NEXT: to label %[[UNREACHABLE:.+]] unwind label %[[DETACH_LPAD]] diff --git a/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp index a36f011a3968..7ac26a7a5572 100644 --- a/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp @@ -24,12 +24,14 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/EHPersonalities.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" @@ -1138,10 +1140,19 @@ bool CSIImpl::instrumentMemIntrinsic(Instruction *I) { } void CSIImpl::instrumentBasicBlock(BasicBlock &BB, const TaskInfo &TI) { - IRBuilder<> IRB(&*BB.getFirstInsertionPt()); + Instruction *InsertPt = &*BB.getFirstInsertionPt(); bool IsEntry = isEntryBlock(BB, TI); if (IsEntry) - IRB.SetInsertPoint(getEntryBBInsertPt(BB)); + InsertPt = getEntryBBInsertPt(BB); + // Skip any sync.unwind intrinsics, which need to remain paired with + // corresponding syncs. + if (isSyncUnwind(InsertPt)) + InsertPt = InsertPt->getNextNode(); + // Skip any taskframe.end intrinsics, to keep the basic-block instrumentation + // in the same basic block. + if (isTapirIntrinsic(Intrinsic::taskframe_end, InsertPt)) + InsertPt = InsertPt->getNextNode(); + IRBuilder<> IRB(InsertPt); uint64_t LocalId = BasicBlockFED.add(BB); uint64_t BBSizeId = BBSize.add(BB, GetTTI ? &(*GetTTI)(*BB.getParent()) : nullptr); @@ -1235,8 +1246,24 @@ void CSIImpl::instrumentLoop(Loop &L, TaskInfo &TI, ScalarEvolution *SE) { insertHookCall(&*IRB.GetInsertPoint(), CsiLoopBodyEntry, {LoopCsiId, LoopPropVal}); + SmallPtrSet ExitingBlocksVisited; // Insert hooks at the ends of the exiting blocks. - for (BasicBlock *BB : ExitingBlocks) { + while (!ExitingBlocks.empty()) { + BasicBlock *BB = ExitingBlocks.pop_back_val(); + if (!ExitingBlocksVisited.insert(BB).second) + continue; + if (isSyncUnwind(BB->getTerminator())) { + // Insert the loopbody_exit hook before the sync instruction, rather than + // the sync.unwind. + // TODO: I don't think there's anything preventing a sync.unwind from + // having multiple sync-instruction predecessors, so all such predecessors + // need to be addressed. This logic should become simpler if sync itself + // is modified to have an unwind destination. + for (BasicBlock *Pred : predecessors(BB)) + ExitingBlocks.push_back(Pred); + continue; + } + // Record properties of this loop exit CsiLoopExitProperty LoopExitProp; LoopExitProp.setIsLatch(L.isLoopLatch(BB)); @@ -1806,13 +1833,16 @@ CallInst *CSIImpl::insertHookCallInSuccessorBB(BasicBlock *Succ, BasicBlock *BB, ArrayRef HookArgs, ArrayRef DefaultArgs) { assert(HookFunction && "No hook function given."); + Instruction *InsertPt = &*Succ->getFirstInsertionPt(); + if (isSyncUnwind(InsertPt)) + InsertPt = InsertPt->getNextNode(); + // If this successor block has a unique predecessor, just insert the hook call // as normal. if (Succ->getUniquePredecessor()) { assert(Succ->getUniquePredecessor() == BB && "BB is not unique predecessor of successor block"); - return insertHookCall(&*Succ->getFirstInsertionPt(), HookFunction, - HookArgs); + return insertHookCall(InsertPt, HookFunction, HookArgs); } if (updateArgPHIs(Succ, BB, HookFunction, HookArgs, DefaultArgs)) @@ -1823,7 +1853,7 @@ CallInst *CSIImpl::insertHookCallInSuccessorBB(BasicBlock *Succ, BasicBlock *BB, for (PHINode *ArgPHI : ArgPHIs[Key]) SuccessorHookArgs.push_back(ArgPHI); - IRBuilder<> IRB(&*Succ->getFirstInsertionPt()); + IRBuilder<> IRB(InsertPt); // Insert the hook call, using the PHI as the CSI ID. CallInst *Call = IRB.CreateCall(HookFunction, SuccessorHookArgs); setInstrumentationDebugLoc(*Succ, (Instruction *)Call); @@ -2747,6 +2777,11 @@ void CSIImpl::instrumentFunction(Function &F) { for (BasicBlock *BB : BasicBlocks) instrumentBasicBlock(*BB, TI); + if (Options.InstrumentLoops) + // Recursively instrument all loops + for (Loop *L : LI) + instrumentLoop(*L, TI, SE); + // Instrument Tapir constructs. if (Options.InstrumentTapir) { if (Config->DoesFunctionRequireInstrumentationForPoint( @@ -2768,11 +2803,6 @@ void CSIImpl::instrumentFunction(Function &F) { for (Instruction *I : Allocas) instrumentAlloca(I, TI); - if (Options.InstrumentLoops) - // Recursively instrument all loops - for (Loop *L : LI) - instrumentLoop(*L, TI, SE); - // Do this work in a separate loop after copying the iterators so that we // aren't modifying the list as we're iterating. if (Options.InstrumentMemoryAccesses) diff --git a/llvm/lib/Transforms/Utils/TapirUtils.cpp b/llvm/lib/Transforms/Utils/TapirUtils.cpp index fb2c1becfa8b..f24ae286fcf3 100644 --- a/llvm/lib/Transforms/Utils/TapirUtils.cpp +++ b/llvm/lib/Transforms/Utils/TapirUtils.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/TapirUtils.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/LoopInfo.h" @@ -85,16 +86,13 @@ bool llvm::isSkippableTapirIntrinsic(const Instruction *I) { /// Returns true if the given basic block \p B is a placeholder successor of a /// taskframe.resume or detached.rethrow. bool llvm::isTapirPlaceholderSuccessor(const BasicBlock *B) { - for (const BasicBlock *Pred : predecessors(B)) { + return llvm::any_of(predecessors(B), [&](const BasicBlock *Pred) { if (!isDetachedRethrow(Pred->getTerminator()) && !isTaskFrameResume(Pred->getTerminator())) return false; - const InvokeInst *II = dyn_cast(Pred->getTerminator()); - if (B != II->getNormalDest()) - return false; - } - return true; + return B == II->getNormalDest(); + }); } /// Returns a taskframe.resume that uses the given taskframe, or nullptr if no @@ -2166,37 +2164,28 @@ static void promoteCallsInTasksHelper( // spawned task recursively. if (DetachInst *DI = dyn_cast(BB->getTerminator())) { Processed.insert(BB); - if (!DI->hasUnwindDest()) { - // Create an unwind edge for the subtask, which is terminated with a - // detached-rethrow. - BasicBlock *SubTaskUnwindEdge = CreateSubTaskUnwindEdge( - Intrinsic::detached_rethrow, DI->getSyncRegion(), UnwindEdge, - Unreachable, DI); - // Recursively check all blocks in the detached task. - promoteCallsInTasksHelper(DI->getDetached(), SubTaskUnwindEdge, - Unreachable, CurrentTaskFrame, &Worklist, - Processed, IgnoreFunctionCheck); - // If the new unwind edge is not used, remove it. - if (pred_empty(SubTaskUnwindEdge)) - SubTaskUnwindEdge->eraseFromParent(); - else - DetachesToReplace.push_back(DI); - } else { - // Because this detach has an unwind destination, any calls in the - // spawned task that may throw should already be invokes. Hence there - // is no need to promote calls in this task. - if (IgnoreFunctionCheck) { - // This recursive call should only apply IgnoreFunctionCheck to callsites. - promoteCallsInTasksHelper(DI->getDetached(), DI->getUnwindDest(), - Unreachable, CurrentTaskFrame, &Worklist, - Processed, IgnoreFunctionCheck); - } + // Create an unwind edge for the subtask, which is terminated with a + // detached-rethrow. + BasicBlock *SubTaskUnwindEdge = CreateSubTaskUnwindEdge( + Intrinsic::detached_rethrow, DI->getSyncRegion(), + DI->hasUnwindDest() ? DI->getUnwindDest() : UnwindEdge, Unreachable, + DI); + // Recursively check all blocks in the detached task. + promoteCallsInTasksHelper(DI->getDetached(), SubTaskUnwindEdge, + Unreachable, CurrentTaskFrame, &Worklist, + Processed, IgnoreFunctionCheck); + + // If the new unwind edge is not used, remove it. + if (pred_empty(SubTaskUnwindEdge)) + SubTaskUnwindEdge->eraseFromParent(); + else if (!DI->hasUnwindDest()) + DetachesToReplace.push_back(DI); + + if (DI->hasUnwindDest() && Visited.insert(DI->getUnwindDest()).second) + // If the detach-unwind isn't dead, add it to the worklist. + Worklist.push_back(DI->getUnwindDest()); - if (Visited.insert(DI->getUnwindDest()).second) - // If the detach-unwind isn't dead, add it to the worklist. - Worklist.push_back(DI->getUnwindDest()); - } // Add the continuation to the worklist. if (isTaskFrameResume(UnwindEdge->getTerminator()) && (CurrentTaskFrame == getTaskFrameUsed(DI->getDetached()))) { diff --git a/llvm/test/Transforms/Tapir/CilkSanitizer/csi-instrument-sync-with-unwind.ll b/llvm/test/Transforms/Tapir/CilkSanitizer/csi-instrument-sync-with-unwind.ll new file mode 100644 index 000000000000..a3b2d5084faf --- /dev/null +++ b/llvm/test/Transforms/Tapir/CilkSanitizer/csi-instrument-sync-with-unwind.ll @@ -0,0 +1,69 @@ +; Check that CSI does not insert instrumentation between a sync and its corresponding sync.unwind. +; +; RUN: opt < %s -passes="csi-setup,csi" -csi-instrument-basic-blocks=false -S | FileCheck %s --check-prefixes=CHECK +; RUN: opt < %s -passes="csi-setup,csi" -S | FileCheck %s --check-prefixes=CHECK,CHECK-BB +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare token @llvm.syncregion.start() #0 + +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare token @llvm.taskframe.create() #0 + +; Function Attrs: willreturn memory(argmem: readwrite) +declare void @llvm.sync.unwind(token) #1 + +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare void @llvm.taskframe.end(token) #0 + +define fastcc void @_Z28prove_sumcheck_cubic_batchedR16ProverTranscriptRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE14GoldilockField8FixedVecIP9DensePolyESD_RSB_SD_SD_SD_St4spanIKS9_Lm18446744073709551615EE.outline_pfor.cond522.ls2() personality ptr null { +pfor.cond522.preheader.ls2: + %syncreg529.ls2 = tail call token @llvm.syncregion.start() + br label %pfor.body.entry525.tf.tf.tf.tf.tf.tf.tf.tf.ls2 + +pfor.body.entry525.tf.tf.tf.tf.tf.tf.tf.tf.ls2: ; preds = %sync.continue578.ls2, %pfor.cond522.preheader.ls2 + %0 = tail call token @llvm.taskframe.create() + detach within %syncreg529.ls2, label %det.achd554.ls2, label %det.cont569.ls2 + +det.cont569.ls2: ; preds = %det.achd554.ls2, %pfor.body.entry525.tf.tf.tf.tf.tf.tf.tf.tf.ls2 + sync within %syncreg529.ls2, label %sync.continue578.ls2 + +sync.continue578.ls2: ; preds = %det.cont569.ls2 + tail call void @llvm.sync.unwind(token %syncreg529.ls2) #2 + tail call void @llvm.taskframe.end(token %0) + br i1 false, label %pfor.cond.cleanup599.ls2.tfend, label %pfor.body.entry525.tf.tf.tf.tf.tf.tf.tf.tf.ls2 + +det.achd554.ls2: ; preds = %pfor.body.entry525.tf.tf.tf.tf.tf.tf.tf.tf.ls2 + reattach within %syncreg529.ls2, label %det.cont569.ls2 + +pfor.cond.cleanup599.ls2.tfend: ; preds = %sync.continue578.ls2 + ret void +} + +; CHECK: define {{.*}}void @_Z28prove_sumcheck_cubic_batchedR16ProverTranscriptRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE14GoldilockField8FixedVecIP9DensePolyESD_RSB_SD_SD_SD_St4spanIKS9_Lm18446744073709551615EE.outline_pfor.cond522.ls2() + +; CHECK: %syncreg529.ls2 = {{.*}}call token @llvm.syncregion.start() +; CHECK: %[[TF:.+]] = {{.*}}call token @llvm.taskframe.create() + +; CHECK: sync within %syncreg529.ls2, label %[[SYNC_CONT:.+]] + +; CHECK: [[SYNC_CONT]]: +; CHECK-NOT: call void @__csi_ +; CHECK-NEXT: void @llvm.sync.unwind(token %syncreg529.ls2 +; CHECK: call void @__csi_after_sync( +; CHECK-BB-NOT: @__csi_bb_ +; CHECK: call void @llvm.taskframe.end(token %[[TF]]) + +; CHECK-BB: call void @__csi_bb_entry( + +; CHECK: call void @__csi_loopbody_exit( + +; CHECK: reattach within %syncreg529.ls2 + +; uselistorder directives +uselistorder ptr null, { 1, 2, 0 } + +attributes #0 = { nounwind willreturn memory(argmem: readwrite) } +attributes #1 = { willreturn memory(argmem: readwrite) } +attributes #2 = { nounwind } diff --git a/llvm/test/Transforms/Tapir/CilkSanitizer/csi-setup-mixed-task-lpad.ll b/llvm/test/Transforms/Tapir/CilkSanitizer/csi-setup-mixed-task-lpad.ll new file mode 100644 index 000000000000..72c4eb02e604 --- /dev/null +++ b/llvm/test/Transforms/Tapir/CilkSanitizer/csi-setup-mixed-task-lpad.ll @@ -0,0 +1,185 @@ +; Check that csi-setup properly promotes calls to invokes when a call that might throw is inside a task with a detach-unwind. +; +; RUN: opt < %s -passes="csi-setup" -S | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@_ZTIi = external constant ptr + +; Function Attrs: mustprogress noinline optnone sanitize_cilk uwtable +define dso_local void @_Z3fooi(i32 noundef %n) #0 personality ptr @__gxx_personality_v0 { +entry: + %n.addr = alloca i32, align 4 + %syncreg = call token @llvm.syncregion.start() + %__init = alloca i32, align 4 + %__limit = alloca i32, align 4 + %__begin = alloca i32, align 4 + %__end = alloca i32, align 4 + %exn.slot4 = alloca ptr, align 8 + %ehselector.slot5 = alloca i32, align 4 + store i32 %n, ptr %n.addr, align 4 + store i32 0, ptr %__init, align 4 + %0 = load i32, ptr %n.addr, align 4 + store i32 %0, ptr %__limit, align 4 + %1 = load i32, ptr %__init, align 4 + %2 = load i32, ptr %__limit, align 4 + %cmp = icmp slt i32 %1, %2 + br i1 %cmp, label %pfor.ph, label %pfor.end + +pfor.ph: ; preds = %entry + store i32 0, ptr %__begin, align 4 + %3 = load i32, ptr %__limit, align 4 + %4 = load i32, ptr %__init, align 4 + %sub = sub nsw i32 %3, %4 + store i32 %sub, ptr %__end, align 4 + br label %pfor.cond + +pfor.cond: ; preds = %pfor.inc, %pfor.ph + br label %pfor.detach + +pfor.detach: ; preds = %pfor.cond + %5 = load i32, ptr %__init, align 4 + %6 = load i32, ptr %__begin, align 4 + %add = add nsw i32 %5, %6 + detach within %syncreg, label %pfor.body.entry, label %pfor.inc unwind label %lpad3 + +pfor.body.entry: ; preds = %pfor.detach + %i = alloca i32, align 4 + %w = alloca i32, align 4 + %exn.slot = alloca ptr, align 8 + %ehselector.slot = alloca i32, align 4 + store i32 %add, ptr %i, align 4 + br label %pfor.body + +pfor.body: ; preds = %pfor.body.entry + %7 = load i32, ptr %i, align 4 + %call = call noundef i32 @_Z3bari(i32 noundef %7) + store i32 %call, ptr %w, align 4 + %exception = call ptr @__cxa_allocate_exception(i64 4) #4 + %8 = load i32, ptr %w, align 4 + %call1 = invoke noundef i32 @_Z3bari(i32 noundef %8) + to label %invoke.cont unwind label %lpad + +; CHECK: pfor.body: +; CHECK-NEXT: %[[ARG1:.+]] = load i32, ptr %i +; CHECK-NOT: call {{.*}}i32 @_Z3bari(i32 noundef %{{.*}}) +; CHECK: invoke {{.*}}i32 @_Z3bari(i32 noundef %[[ARG1]]) +; CHECK-NEXT: to label %[[CALL_NOEXC:.+]] unwind label %[[CSI_SETUP_LPAD:.+]] + +; CHECK: [[CALL_NOEXC]]: +; CHECK: %[[ARG2:.+]] = load i32, ptr %w +; CHECK-NEXT: invoke noundef i32 @_Z3bari(i32 noundef %[[ARG2]]) +; CHECK-NEXT: to label %invoke.cont unwind label %lpad + +invoke.cont: ; preds = %pfor.body + store i32 %call1, ptr %exception, align 16 + call void @__cxa_throw(ptr %exception, ptr @_ZTIi, ptr null) #5 + unreachable + +lpad: ; preds = %pfor.body + %9 = landingpad { ptr, i32 } + cleanup + %10 = extractvalue { ptr, i32 } %9, 0 + store ptr %10, ptr %exn.slot, align 8 + %11 = extractvalue { ptr, i32 } %9, 1 + store i32 %11, ptr %ehselector.slot, align 4 + call void @__cxa_free_exception(ptr %exception) #4 + %exn = load ptr, ptr %exn.slot, align 8 + %sel = load i32, ptr %ehselector.slot, align 4 + %lpad.val = insertvalue { ptr, i32 } undef, ptr %exn, 0 + %lpad.val2 = insertvalue { ptr, i32 } %lpad.val, i32 %sel, 1 + invoke void @llvm.detached.rethrow.sl_p0i32s(token %syncreg, { ptr, i32 } %lpad.val2) + to label %unreachable unwind label %lpad3 + +; CHECK: lpad: +; CHECK-NEXT: landingpad +; CHECK-NEXT: cleanup +; CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %syncreg, +; CHECK-NEXT: to label %unreachable unwind label %lpad3 + +pfor.preattach: ; No predecessors! + reattach within %syncreg, label %pfor.inc + +pfor.inc: ; preds = %pfor.preattach, %pfor.detach + %12 = load i32, ptr %__begin, align 4 + %inc = add nsw i32 %12, 1 + store i32 %inc, ptr %__begin, align 4 + %13 = load i32, ptr %__begin, align 4 + %14 = load i32, ptr %__end, align 4 + %cmp6 = icmp slt i32 %13, %14 + br i1 %cmp6, label %pfor.cond, label %pfor.cond.cleanup, !llvm.loop !6 + +pfor.cond.cleanup: ; preds = %pfor.inc + sync within %syncreg, label %sync.continue + +lpad3: ; preds = %lpad, %pfor.detach + %15 = landingpad { ptr, i32 } + cleanup + %16 = extractvalue { ptr, i32 } %15, 0 + store ptr %16, ptr %exn.slot4, align 8 + %17 = extractvalue { ptr, i32 } %15, 1 + store i32 %17, ptr %ehselector.slot5, align 4 + br label %eh.resume + +sync.continue: ; preds = %pfor.cond.cleanup + call void @llvm.sync.unwind(token %syncreg) + br label %pfor.end + +pfor.end: ; preds = %sync.continue, %entry + ret void + +eh.resume: ; preds = %lpad3 + %exn7 = load ptr, ptr %exn.slot4, align 8 + %sel8 = load i32, ptr %ehselector.slot5, align 4 + %lpad.val9 = insertvalue { ptr, i32 } poison, ptr %exn7, 0 + %lpad.val10 = insertvalue { ptr, i32 } %lpad.val9, i32 %sel8, 1 + resume { ptr, i32 } %lpad.val10 + +unreachable: ; preds = %lpad + unreachable + +; CHECK: [[CSI_SETUP_LPAD]]: +; CHECK-NEXT: landingpad +; CHECK-NEXT: cleanup +; CHECK-NEXT: invoke void @llvm.detached.rethrow.sl_p0i32s(token %syncreg, +; CHECK-NEXT: to label %[[CSI_SETUP_UNREACHABLE:.+]] unwind label %lpad3 +} + +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare token @llvm.syncregion.start() #1 + +declare noundef i32 @_Z3bari(i32 noundef) #2 + +declare ptr @__cxa_allocate_exception(i64) + +declare i32 @__gxx_personality_v0(...) + +declare void @__cxa_free_exception(ptr) + +declare void @__cxa_throw(ptr, ptr, ptr) + +; Function Attrs: willreturn memory(argmem: readwrite) +declare void @llvm.detached.rethrow.sl_p0i32s(token, { ptr, i32 }) #3 + +; Function Attrs: willreturn memory(argmem: readwrite) +declare void @llvm.sync.unwind(token) #3 + +attributes #0 = { mustprogress noinline optnone sanitize_cilk uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #3 = { willreturn memory(argmem: readwrite) } +attributes #4 = { nounwind } +attributes #5 = { noreturn } + +!llvm.module.flags = !{!0, !1, !2, !3, !4} +!llvm.ident = !{!5} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"PIE Level", i32 2} +!3 = !{i32 7, !"uwtable", i32 2} +!4 = !{i32 7, !"frame-pointer", i32 2} +!5 = !{!"clang version 19.1.7 (git@github.com:OpenCilk/opencilk-project.git e929b19f1ca3426871e22a5843cc9e5725894576)"} +!6 = distinct !{!6, !7, !8} +!7 = !{!"llvm.loop.mustprogress"} +!8 = !{!"tapir.loop.spawn.strategy", i32 1} diff --git a/llvm/test/Transforms/Tapir/CilkSanitizer/csi-sync-unwind-loopexit-multiple.ll b/llvm/test/Transforms/Tapir/CilkSanitizer/csi-sync-unwind-loopexit-multiple.ll new file mode 100644 index 000000000000..4f12326695c5 --- /dev/null +++ b/llvm/test/Transforms/Tapir/CilkSanitizer/csi-sync-unwind-loopexit-multiple.ll @@ -0,0 +1,313 @@ +; Check that CSI loop instrumentation instruments around sync-unwind loop exits properly. +; +; RUN: opt < %s -passes="csi-setup,csi" -S | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +$__clang_call_terminate = comdat any + +; Function Attrs: mustprogress uwtable +define dso_local void @_Z3fooi(i32 noundef %n) local_unnamed_addr #0 personality ptr @__gxx_personality_v0 { +entry: + %syncreg = tail call token @llvm.syncregion.start() + %cmp = icmp sgt i32 %n, 0 + br i1 %cmp, label %pfor.cond, label %try.cont + +pfor.cond: ; preds = %entry, %pfor.inc + %__begin.0 = phi i32 [ %inc, %pfor.inc ], [ 0, %entry ] + detach within %syncreg, label %pfor.body.entry, label %pfor.inc unwind label %lpad69.loopexit + +pfor.body.entry: ; preds = %pfor.cond + %w = alloca i32, align 4 + %syncreg2 = tail call token @llvm.syncregion.start() + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %w) + %and = and i32 %__begin.0, 1 + %tobool.not = icmp eq i32 %and, 0 + %0 = tail call token @llvm.taskframe.create() + br i1 %tobool.not, label %if.else.tf.tf.tf.tf, label %if.then.tf.tf.tf.tf + +if.then.tf.tf.tf.tf: ; preds = %pfor.body.entry + detach within %syncreg2, label %det.achd, label %det.cont unwind label %lpad4 + +det.achd: ; preds = %if.then.tf.tf.tf.tf + %call = invoke noundef i32 @_Z3bari(i32 noundef %__begin.0) + to label %invoke.cont unwind label %lpad + +invoke.cont: ; preds = %det.achd + store i32 %call, ptr %w, align 4, !tbaa !5 + reattach within %syncreg2, label %det.cont + +det.cont: ; preds = %if.then.tf.tf.tf.tf, %invoke.cont + %add14 = add nuw nsw i32 %__begin.0, 1 + %call16 = invoke noundef i32 @_Z3bari(i32 noundef %add14) + to label %invoke.cont15 unwind label %lpad11.tfsplit.split-lp + +invoke.cont15: ; preds = %det.cont + sync within %syncreg2, label %sync.continue + +; CHECK: invoke.cont15: +; CHECK: call void @__csi_loopbody_exit( +; CHECK: call void @__csi_before_sync( +; CHECK-NEXT: sync within %syncreg2, label %sync.continue + +sync.continue: ; preds = %invoke.cont15 + invoke void @llvm.sync.unwind(token %syncreg2) + to label %invoke.cont17 unwind label %lpad11.tfsplit.split-lp + +; CHECK: sync.continue: +; CHECK-NOT: call +; CHECK-NEXT: invoke void @llvm.sync.unwind(token %syncreg2) +; CHECK-NEXT: to label %invoke.cont17 unwind label %[[CSI_LPAD_SPLIT:.+]] + +invoke.cont17: ; preds = %sync.continue + tail call void @llvm.taskframe.end(token %0) + br label %if.end + +; CHECK: invoke.cont17: +; CHECK-NEXT: call void @__csi_after_sync( +; CHECK-NEXT: call void @llvm.taskframe.end( + +lpad: ; preds = %det.achd + %1 = landingpad { ptr, i32 } + cleanup + invoke void @llvm.detached.rethrow.sl_p0i32s(token %syncreg2, { ptr, i32 } %1) + to label %unreachable unwind label %lpad4 + +lpad4: ; preds = %if.then.tf.tf.tf.tf, %lpad + %2 = landingpad { ptr, i32 } + cleanup + br label %lpad11 + +; CHECK: [[CSI_LPAD_SPLIT]]: +; CHECK-NEXT: landingpad +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @__csi_after_sync( + +lpad11.tfsplit.split-lp: ; preds = %det.cont, %sync.continue + %lpad.tfsplit.split-lp100 = landingpad { ptr, i32 } + cleanup + br label %lpad11 + +lpad11: ; preds = %lpad11.tfsplit.split-lp, %lpad4 + %lpad.phi101 = phi { ptr, i32 } [ %2, %lpad4 ], [ %lpad.tfsplit.split-lp100, %lpad11.tfsplit.split-lp ] + invoke void @llvm.taskframe.resume.sl_p0i32s(token %0, { ptr, i32 } %lpad.phi101) + to label %unreachable unwind label %lpad23.tfsplit + +lpad23.tfsplit: ; preds = %lpad11 + %lpad.tfsplit = landingpad { ptr, i32 } + cleanup + br label %lpad23 + +lpad23.tfsplit.split-lp.tfsplit: ; preds = %lpad48 + %lpad.tfsplit102 = landingpad { ptr, i32 } + cleanup + br label %lpad23 + +lpad23.tfsplit.split-lp.tfsplit.split-lp: ; preds = %if.end + %lpad.tfsplit.split-lp103 = landingpad { ptr, i32 } + cleanup + br label %lpad23 + +lpad23: ; preds = %lpad23.tfsplit.split-lp.tfsplit, %lpad23.tfsplit.split-lp.tfsplit.split-lp, %lpad23.tfsplit + %lpad.phi = phi { ptr, i32 } [ %lpad.tfsplit, %lpad23.tfsplit ], [ %lpad.tfsplit102, %lpad23.tfsplit.split-lp.tfsplit ], [ %lpad.tfsplit.split-lp103, %lpad23.tfsplit.split-lp.tfsplit.split-lp ] + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %w) + invoke void @llvm.detached.rethrow.sl_p0i32s(token %syncreg, { ptr, i32 } %lpad.phi) + to label %unreachable unwind label %lpad69.loopexit + +if.else.tf.tf.tf.tf: ; preds = %pfor.body.entry + detach within %syncreg2, label %det.achd28, label %det.cont42 unwind label %lpad39 + +det.achd28: ; preds = %if.else.tf.tf.tf.tf + %add27 = or disjoint i32 %__begin.0, 1 + %call33 = invoke noundef i32 @_Z3bari(i32 noundef %add27) + to label %invoke.cont32 unwind label %lpad29 + +invoke.cont32: ; preds = %det.achd28 + store i32 %call33, ptr %w, align 4, !tbaa !5 + reattach within %syncreg2, label %det.cont42 + +det.cont42: ; preds = %if.else.tf.tf.tf.tf, %invoke.cont32 + %call52 = invoke noundef i32 @_Z3bari(i32 noundef %__begin.0) + to label %invoke.cont51 unwind label %lpad48.tfsplit.split-lp + +invoke.cont51: ; preds = %det.cont42 + sync within %syncreg2, label %sync.continue53 + +; CHECK: invoke.cont51: +; CHECK: call void @__csi_loopbody_exit( +; CHECK: call void @__csi_before_sync( +; CHECK-NEXT: sync within %syncreg2, label %sync.continue53 + +sync.continue53: ; preds = %invoke.cont51 + invoke void @llvm.sync.unwind(token %syncreg2) + to label %invoke.cont54 unwind label %lpad48.tfsplit.split-lp + +; CHECK: sync.continue53: +; CHECK-NOT: call +; CHECK-NEXT: invoke void @llvm.sync.unwind(token %syncreg2) +; CHECK-NEXT: to label %invoke.cont54 unwind label %[[CSI_LPAD_SPLIT2:.+]] + +invoke.cont54: ; preds = %sync.continue53 + tail call void @llvm.taskframe.end(token %0) + br label %if.end + +; CHECK: invoke.cont54: +; CHECK-NEXT: call void @__csi_after_sync( +; CHECK-NEXT: call void @llvm.taskframe.end( + +lpad29: ; preds = %det.achd28 + %3 = landingpad { ptr, i32 } + cleanup + invoke void @llvm.detached.rethrow.sl_p0i32s(token %syncreg2, { ptr, i32 } %3) + to label %unreachable unwind label %lpad39 + +lpad39: ; preds = %if.else.tf.tf.tf.tf, %lpad29 + %4 = landingpad { ptr, i32 } + cleanup + br label %lpad48 + +; CHECK: [[CSI_LPAD_SPLIT2]]: +; CHECK-NEXT: landingpad +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @__csi_after_sync( + +lpad48.tfsplit.split-lp: ; preds = %det.cont42, %sync.continue53 + %lpad.tfsplit.split-lp = landingpad { ptr, i32 } + cleanup + br label %lpad48 + +lpad48: ; preds = %lpad48.tfsplit.split-lp, %lpad39 + %lpad.phi106 = phi { ptr, i32 } [ %4, %lpad39 ], [ %lpad.tfsplit.split-lp, %lpad48.tfsplit.split-lp ] + invoke void @llvm.taskframe.resume.sl_p0i32s(token %0, { ptr, i32 } %lpad.phi106) + to label %unreachable unwind label %lpad23.tfsplit.split-lp.tfsplit + +if.end: ; preds = %invoke.cont54, %invoke.cont17 + %w.0.load110 = load i32, ptr %w, align 4 + %call61 = invoke noundef i32 @_Z3bari(i32 noundef %w.0.load110) + to label %invoke.cont60 unwind label %lpad23.tfsplit.split-lp.tfsplit.split-lp + +invoke.cont60: ; preds = %if.end + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %w) + reattach within %syncreg, label %pfor.inc + +pfor.inc: ; preds = %pfor.cond, %invoke.cont60 + %inc = add nuw nsw i32 %__begin.0, 1 + %exitcond.not = icmp eq i32 %inc, %n + br i1 %exitcond.not, label %pfor.cond.cleanup, label %pfor.cond, !llvm.loop !9 + +pfor.cond.cleanup: ; preds = %pfor.inc + sync within %syncreg, label %sync.continue73 + +lpad69.loopexit: ; preds = %lpad23, %pfor.cond + %lpad.loopexit = landingpad { ptr, i32 } + catch ptr null + br label %lpad69 + +lpad69.loopexit.split-lp: ; preds = %sync.continue73 + %lpad.loopexit.split-lp = landingpad { ptr, i32 } + catch ptr null + br label %lpad69 + +lpad69: ; preds = %lpad69.loopexit.split-lp, %lpad69.loopexit + %lpad.phi109 = phi { ptr, i32 } [ %lpad.loopexit, %lpad69.loopexit ], [ %lpad.loopexit.split-lp, %lpad69.loopexit.split-lp ] + %5 = extractvalue { ptr, i32 } %lpad.phi109, 0 + %6 = tail call ptr @__cxa_begin_catch(ptr %5) #6 + %call83 = invoke noundef i32 @_Z3bari(i32 noundef 0) + to label %invoke.cont82 unwind label %lpad81 + +sync.continue73: ; preds = %pfor.cond.cleanup + invoke void @llvm.sync.unwind(token %syncreg) + to label %try.cont unwind label %lpad69.loopexit.split-lp + +invoke.cont82: ; preds = %lpad69 + tail call void @__cxa_end_catch() + br label %try.cont + +try.cont: ; preds = %entry, %sync.continue73, %invoke.cont82 + ret void + +lpad81: ; preds = %lpad69 + %7 = landingpad { ptr, i32 } + cleanup + invoke void @__cxa_end_catch() + to label %eh.resume unwind label %terminate.lpad + +eh.resume: ; preds = %lpad81 + resume { ptr, i32 } %7 + +terminate.lpad: ; preds = %lpad81 + %8 = landingpad { ptr, i32 } + catch ptr null + %9 = extractvalue { ptr, i32 } %8, 0 + tail call void @__clang_call_terminate(ptr %9) #7 + unreachable + +unreachable: ; preds = %lpad23, %lpad48, %lpad29, %lpad11, %lpad + unreachable +} + +; Function Attrs: mustprogress nounwind willreturn memory(argmem: readwrite) +declare token @llvm.syncregion.start() #1 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2 + +; Function Attrs: mustprogress nounwind willreturn memory(argmem: readwrite) +declare token @llvm.taskframe.create() #1 + +declare noundef i32 @_Z3bari(i32 noundef) local_unnamed_addr #3 + +declare i32 @__gxx_personality_v0(...) + +; Function Attrs: mustprogress willreturn memory(argmem: readwrite) +declare void @llvm.detached.rethrow.sl_p0i32s(token, { ptr, i32 }) #4 + +; Function Attrs: mustprogress willreturn memory(argmem: readwrite) +declare void @llvm.taskframe.resume.sl_p0i32s(token, { ptr, i32 }) #4 + +; Function Attrs: mustprogress willreturn memory(argmem: readwrite) +declare void @llvm.sync.unwind(token) #4 + +; Function Attrs: mustprogress nounwind willreturn memory(argmem: readwrite) +declare void @llvm.taskframe.end(token) #1 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2 + +declare ptr @__cxa_begin_catch(ptr) local_unnamed_addr + +declare void @__cxa_end_catch() local_unnamed_addr + +; Function Attrs: noinline noreturn nounwind uwtable +define linkonce_odr hidden void @__clang_call_terminate(ptr noundef %0) local_unnamed_addr #5 comdat { + %2 = tail call ptr @__cxa_begin_catch(ptr %0) #6 + tail call void @_ZSt9terminatev() #7 + unreachable +} + +declare void @_ZSt9terminatev() local_unnamed_addr + +attributes #0 = { mustprogress uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { mustprogress nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #3 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #4 = { mustprogress willreturn memory(argmem: readwrite) } +attributes #5 = { noinline noreturn nounwind uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #6 = { nounwind } +attributes #7 = { noreturn nounwind } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"PIE Level", i32 2} +!3 = !{i32 7, !"uwtable", i32 2} +!4 = !{!"clang version 19.1.7 (git@github.com:neboat/opencilk-project.git 8789ce788f0a6ecd35d9e9eef9e6652704d143d2)"} +!5 = !{!6, !6, i64 0} +!6 = !{!"int", !7, i64 0} +!7 = !{!"omnipotent char", !8, i64 0} +!8 = !{!"Simple C++ TBAA"} +!9 = distinct !{!9, !10, !11} +!10 = !{!"llvm.loop.mustprogress"} +!11 = !{!"tapir.loop.spawn.strategy", i32 1} diff --git a/llvm/test/Transforms/Tapir/CilkSanitizer/csi-sync-unwind-loopexit.ll b/llvm/test/Transforms/Tapir/CilkSanitizer/csi-sync-unwind-loopexit.ll new file mode 100644 index 000000000000..188797afedb9 --- /dev/null +++ b/llvm/test/Transforms/Tapir/CilkSanitizer/csi-sync-unwind-loopexit.ll @@ -0,0 +1,235 @@ +; Check that CSI loop instrumentation instruments around sync-unwind loop exits properly. +; +; RUN: opt < %s -passes="csi-setup,csi" -S | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +$__clang_call_terminate = comdat any + +; Function Attrs: mustprogress uwtable +define dso_local void @_Z3fooi(i32 noundef %n) local_unnamed_addr #0 personality ptr @__gxx_personality_v0 { +entry: + %syncreg = tail call token @llvm.syncregion.start() + %cmp = icmp sgt i32 %n, 0 + br i1 %cmp, label %pfor.cond, label %try.cont + +pfor.cond: ; preds = %entry, %pfor.inc + %__begin.0 = phi i32 [ %inc, %pfor.inc ], [ 0, %entry ] + detach within %syncreg, label %pfor.body.entry, label %pfor.inc unwind label %lpad35.loopexit + +pfor.body.entry: ; preds = %pfor.cond + %w = alloca i32, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %w) + %0 = tail call token @llvm.taskframe.create() + %syncreg2 = tail call token @llvm.syncregion.start() + detach within %syncreg2, label %det.achd, label %det.cont unwind label %lpad4 + +det.achd: ; preds = %pfor.body.entry + %call = invoke noundef i32 @_Z3bari(i32 noundef %__begin.0) + to label %invoke.cont unwind label %lpad + +invoke.cont: ; preds = %det.achd + store i32 %call, ptr %w, align 4, !tbaa !5 + reattach within %syncreg2, label %det.cont + +det.cont: ; preds = %pfor.body.entry, %invoke.cont + %add14 = add nuw nsw i32 %__begin.0, 1 + %call16 = invoke noundef i32 @_Z3bari(i32 noundef %add14) + to label %invoke.cont15 unwind label %lpad11.tfsplit.split-lp + +invoke.cont15: ; preds = %det.cont + sync within %syncreg2, label %sync.continue + +sync.continue: ; preds = %invoke.cont15 + invoke void @llvm.sync.unwind(token %syncreg2) + to label %invoke.cont17 unwind label %lpad11.tfsplit.split-lp + +; CHECK: invoke.cont15: +; CHECK: call void @__csi_bb_exit( +; CHECK: call void @__csi_loopbody_exit( +; CHECK: call void @__csi_before_sync( +; CHECK: sync within %syncreg2, label %sync.continue + +; CHECK: sync.continue: +; CHECK-NOT: call +; CHECK-NEXT: invoke void @llvm.sync.unwind(token %syncreg2) +; CHECK-NEXT: to label %invoke.cont17 unwind label %[[CSI_LPAD_SPLIT:.+]] + +invoke.cont17: ; preds = %sync.continue + tail call void @llvm.taskframe.end(token %0) + %w.0.load67 = load i32, ptr %w, align 4 + %call27 = invoke noundef i32 @_Z3bari(i32 noundef %w.0.load67) + to label %invoke.cont26 unwind label %lpad23.tfsplit.split-lp + +; CHECK: invoke.cont17: +; CHECK-NEXT: call void @__csi_after_sync( +; CHECK-NEXT: call void @llvm.taskframe.end( + +invoke.cont26: ; preds = %invoke.cont17 + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %w) + reattach within %syncreg, label %pfor.inc + +pfor.inc: ; preds = %pfor.cond, %invoke.cont26 + %inc = add nuw nsw i32 %__begin.0, 1 + %exitcond.not = icmp eq i32 %inc, %n + br i1 %exitcond.not, label %pfor.cond.cleanup, label %pfor.cond, !llvm.loop !9 + +pfor.cond.cleanup: ; preds = %pfor.inc + sync within %syncreg, label %sync.continue39 + +lpad: ; preds = %det.achd + %1 = landingpad { ptr, i32 } + cleanup + invoke void @llvm.detached.rethrow.sl_p0i32s(token %syncreg2, { ptr, i32 } %1) + to label %unreachable unwind label %lpad4 + +lpad4: ; preds = %pfor.body.entry, %lpad + %2 = landingpad { ptr, i32 } + cleanup + br label %lpad11 + +; CHECK: [[CSI_LPAD_SPLIT]]: +; CHECK-NEXT: landingpad +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @__csi_after_sync( + +lpad11.tfsplit.split-lp: ; preds = %det.cont, %sync.continue + %lpad.tfsplit.split-lp63 = landingpad { ptr, i32 } + cleanup + br label %lpad11 + +lpad11: ; preds = %lpad11.tfsplit.split-lp, %lpad4 + %lpad.phi64 = phi { ptr, i32 } [ %2, %lpad4 ], [ %lpad.tfsplit.split-lp63, %lpad11.tfsplit.split-lp ] + invoke void @llvm.taskframe.resume.sl_p0i32s(token %0, { ptr, i32 } %lpad.phi64) + to label %unreachable unwind label %lpad23.tfsplit + +lpad23.tfsplit: ; preds = %lpad11 + %lpad.tfsplit = landingpad { ptr, i32 } + cleanup + br label %lpad23 + +lpad23.tfsplit.split-lp: ; preds = %invoke.cont17 + %lpad.tfsplit.split-lp = landingpad { ptr, i32 } + cleanup + br label %lpad23 + +lpad23: ; preds = %lpad23.tfsplit.split-lp, %lpad23.tfsplit + %lpad.phi = phi { ptr, i32 } [ %lpad.tfsplit, %lpad23.tfsplit ], [ %lpad.tfsplit.split-lp, %lpad23.tfsplit.split-lp ] + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %w) + invoke void @llvm.detached.rethrow.sl_p0i32s(token %syncreg, { ptr, i32 } %lpad.phi) + to label %unreachable unwind label %lpad35.loopexit + +lpad35.loopexit: ; preds = %lpad23, %pfor.cond + %lpad.loopexit = landingpad { ptr, i32 } + catch ptr null + br label %lpad35 + +lpad35.loopexit.split-lp: ; preds = %sync.continue39 + %lpad.loopexit.split-lp = landingpad { ptr, i32 } + catch ptr null + br label %lpad35 + +lpad35: ; preds = %lpad35.loopexit.split-lp, %lpad35.loopexit + %lpad.phi66 = phi { ptr, i32 } [ %lpad.loopexit, %lpad35.loopexit ], [ %lpad.loopexit.split-lp, %lpad35.loopexit.split-lp ] + %3 = extractvalue { ptr, i32 } %lpad.phi66, 0 + %4 = tail call ptr @__cxa_begin_catch(ptr %3) #6 + %call49 = invoke noundef i32 @_Z3bari(i32 noundef 0) + to label %invoke.cont48 unwind label %lpad47 + +sync.continue39: ; preds = %pfor.cond.cleanup + invoke void @llvm.sync.unwind(token %syncreg) + to label %try.cont unwind label %lpad35.loopexit.split-lp + +invoke.cont48: ; preds = %lpad35 + tail call void @__cxa_end_catch() + br label %try.cont + +try.cont: ; preds = %entry, %sync.continue39, %invoke.cont48 + ret void + +lpad47: ; preds = %lpad35 + %5 = landingpad { ptr, i32 } + cleanup + invoke void @__cxa_end_catch() + to label %eh.resume unwind label %terminate.lpad + +eh.resume: ; preds = %lpad47 + resume { ptr, i32 } %5 + +terminate.lpad: ; preds = %lpad47 + %6 = landingpad { ptr, i32 } + catch ptr null + %7 = extractvalue { ptr, i32 } %6, 0 + tail call void @__clang_call_terminate(ptr %7) #7 + unreachable + +unreachable: ; preds = %lpad23, %lpad11, %lpad + unreachable +} + +; Function Attrs: mustprogress nounwind willreturn memory(argmem: readwrite) +declare token @llvm.syncregion.start() #1 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2 + +; Function Attrs: mustprogress nounwind willreturn memory(argmem: readwrite) +declare token @llvm.taskframe.create() #1 + +declare noundef i32 @_Z3bari(i32 noundef) local_unnamed_addr #3 + +declare i32 @__gxx_personality_v0(...) + +; Function Attrs: mustprogress willreturn memory(argmem: readwrite) +declare void @llvm.detached.rethrow.sl_p0i32s(token, { ptr, i32 }) #4 + +; Function Attrs: mustprogress willreturn memory(argmem: readwrite) +declare void @llvm.taskframe.resume.sl_p0i32s(token, { ptr, i32 }) #4 + +; Function Attrs: mustprogress willreturn memory(argmem: readwrite) +declare void @llvm.sync.unwind(token) #4 + +; Function Attrs: mustprogress nounwind willreturn memory(argmem: readwrite) +declare void @llvm.taskframe.end(token) #1 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2 + +declare ptr @__cxa_begin_catch(ptr) local_unnamed_addr + +declare void @__cxa_end_catch() local_unnamed_addr + +; Function Attrs: noinline noreturn nounwind uwtable +define linkonce_odr hidden void @__clang_call_terminate(ptr noundef %0) local_unnamed_addr #5 comdat { + %2 = tail call ptr @__cxa_begin_catch(ptr %0) #6 + tail call void @_ZSt9terminatev() #7 + unreachable +} + +declare void @_ZSt9terminatev() local_unnamed_addr + +attributes #0 = { mustprogress uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { mustprogress nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #3 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #4 = { mustprogress willreturn memory(argmem: readwrite) } +attributes #5 = { noinline noreturn nounwind uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #6 = { nounwind } +attributes #7 = { noreturn nounwind } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"PIE Level", i32 2} +!3 = !{i32 7, !"uwtable", i32 2} +!4 = !{!"clang version 19.1.7 (git@github.com:neboat/opencilk-project.git 8789ce788f0a6ecd35d9e9eef9e6652704d143d2)"} +!5 = !{!6, !6, i64 0} +!6 = !{!"int", !7, i64 0} +!7 = !{!"omnipotent char", !8, i64 0} +!8 = !{!"Simple C++ TBAA"} +!9 = distinct !{!9, !10, !11, !12} +!10 = !{!"llvm.loop.mustprogress"} +!11 = !{!"tapir.loop.spawn.strategy", i32 1} +!12 = !{!"llvm.loop.unroll.disable"} diff --git a/llvm/test/Transforms/Tapir/CilkSanitizer/split-unreachable-predecessors.ll b/llvm/test/Transforms/Tapir/CilkSanitizer/split-unreachable-predecessors.ll index a83b48741a80..9a4e5343fa8b 100644 --- a/llvm/test/Transforms/Tapir/CilkSanitizer/split-unreachable-predecessors.ll +++ b/llvm/test/Transforms/Tapir/CilkSanitizer/split-unreachable-predecessors.ll @@ -116,9 +116,6 @@ cleanup.cont: ; preds = %cleanup, %cleanup unreachable: ; preds = %cleanup, %lpad unreachable -; CHECK: [[DR_UNREACHABLE]]: -; CHECK-NEXT: unreachable - ; CHECK: unreachable: ; CHECK: unreachable } diff --git a/llvm/test/Transforms/Tapir/mixed-predecessors-of-unreachable.ll b/llvm/test/Transforms/Tapir/mixed-predecessors-of-unreachable.ll new file mode 100644 index 000000000000..2825387ecf92 --- /dev/null +++ b/llvm/test/Transforms/Tapir/mixed-predecessors-of-unreachable.ll @@ -0,0 +1,91 @@ +; Check that loop simplification does not split placeholder successors of +; detached.rethrows when those unreachable blocks have other predecessors. +; +; RUN: opt < %s -passes="cilksan" -S | FileCheck %s + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "arm64-apple-macosx13.0.0" + +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare token @llvm.syncregion.start() #0 + +; Function Attrs: willreturn memory(argmem: readwrite) +declare void @llvm.detached.rethrow.sl_p0i32s(token, { ptr, i32 }) #1 + +; Function Attrs: sanitize_cilk +define void @_ZN9LAMMPS_NS9StencilMD31SORT_LOCAL_ATOMS_ZOID_MANY_CUTSEv() #2 personality ptr null { +entry: + %syncreg = call token @llvm.syncregion.start() + br label %pfor.detach + +pfor.detach: ; preds = %pfor.detach, %entry + detach within %syncreg, label %pfor.body.entry, label %pfor.detach unwind label %lpad635 + +pfor.body.entry: ; preds = %pfor.detach + %syncreg51 = call token @llvm.syncregion.start() + br label %pfor.detach62 + +pfor.detach62: ; preds = %pfor.detach62, %pfor.body.entry + detach within %syncreg51, label %pfor.body.entry64, label %pfor.detach62 unwind label %lpad109 + +pfor.body.entry64: ; preds = %pfor.detach62 + br label %for.cond + +for.cond: ; preds = %for.cond, %pfor.body.entry64 + br label %for.cond + +lpad109: ; preds = %pfor.detach62 + %0 = landingpad { ptr, i32 } + cleanup + invoke void @llvm.detached.rethrow.sl_p0i32s(token %syncreg, { ptr, i32 } zeroinitializer) + to label %unreachable unwind label %lpad635 + +; CHECK: lpad109: +; CHECK-NEXT: landingpad +; CHECK-NEXT: cleanup +; CHECK-NEXT: invoke void @llvm.detached.rethrow.sl_p0i32s(token %syncreg, +; CHECK-NEXT: to label %unreachable unwind label %lpad635 + +pfor.body.entry140: ; No predecessors! + %syncreg143 = call token @llvm.syncregion.start() + br label %pfor.detach157 + +pfor.detach157: ; preds = %pfor.preattach289, %pfor.detach157, %pfor.body.entry140 + detach within %syncreg143, label %pfor.body.entry159, label %pfor.detach157 unwind label %lpad295 + +pfor.body.entry159: ; preds = %pfor.detach157 + switch i32 0, label %unreachable [ + i32 0, label %pfor.preattach289 + i32 1, label %pfor.preattach289 + ] + +pfor.preattach289: ; preds = %pfor.body.entry159, %pfor.body.entry159 + reattach within %syncreg143, label %pfor.detach157 + +lpad295: ; preds = %pfor.detach157 + %1 = landingpad { ptr, i32 } + cleanup + invoke void @llvm.detached.rethrow.sl_p0i32s(token none, { ptr, i32 } zeroinitializer) + to label %unreachable unwind label %lpad635 + +; CHECK: lpad295: +; CHECK-NEXT: landingpad +; CHECK-NEXT: cleanup +; CHECK-NEXT: invoke void @llvm.detached.rethrow.sl_p0i32s(token none, +; CHECK-NEXT: to label %unreachable unwind label %lpad635 + +lpad635: ; preds = %lpad295, %lpad109, %pfor.detach + %2 = landingpad { ptr, i32 } + cleanup + resume { ptr, i32 } zeroinitializer + +unreachable: ; preds = %lpad295, %pfor.body.entry159, %lpad109 + unreachable +} + +; uselistorder directives +uselistorder ptr null, { 1, 2, 0 } + +attributes #0 = { nounwind willreturn memory(argmem: readwrite) } +attributes #1 = { willreturn memory(argmem: readwrite) } +attributes #2 = { sanitize_cilk }