Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Teach alloc-opt to handle atomics a bit better #57208

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/llvm-alloc-helpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,7 @@ void jl_alloc::runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs r
}
if (auto call = dyn_cast<CallInst>(inst)) {
// TODO handle `memcmp`
// TODO handle `memcpy` which is used a lot more often since opaque pointers
// None of the intrinsics should care if the memory is stack or heap allocated.
auto callee = call->getCalledOperand();
if (auto II = dyn_cast<IntrinsicInst>(call)) {
Expand Down
5 changes: 4 additions & 1 deletion src/llvm-alloc-opt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -742,7 +742,9 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref, AllocF
auto replace_inst = [&] (Instruction *user) {
Instruction *orig_i = cur.orig_i;
Instruction *new_i = cur.new_i;
if (isa<LoadInst>(user) || isa<StoreInst>(user)) {
if (isa<LoadInst>(user) || isa<StoreInst>(user) ||
isa<AtomicCmpXchgInst>(user) || isa<AtomicRMWInst>(user)) {
// TODO: these atomics are likely removable if the user is the first argument
user->replaceUsesOfWith(orig_i, new_i);
}
else if (auto call = dyn_cast<CallInst>(user)) {
Expand Down Expand Up @@ -1111,6 +1113,7 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
return;
}
else if (isa<AtomicCmpXchgInst>(user) || isa<AtomicRMWInst>(user)) {
// TODO: Downgrade atomics here potentially
auto slot_idx = find_slot(offset);
auto &slot = slots[slot_idx];
assert(slot.offset <= offset && slot.offset + slot.size >= offset);
Expand Down
180 changes: 155 additions & 25 deletions test/llvmpasses/alloc-opt-pass.ll
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,11 @@ L3: ; preds = %L2, %L1, %0
}
; CHECK-LABEL: }{{$}}

declare void @external_function()

declare ptr addrspace(10) @external_function2()


; CHECK-LABEL: @legal_int_types
; CHECK: alloca [12 x i8]
; CHECK-NOT: alloca i96
Expand All @@ -89,21 +94,6 @@ define void @legal_int_types() {
}
; CHECK-LABEL: }{{$}}

declare void @external_function()

declare ptr addrspace(10) @external_function2()

declare ptr @julia.ptls_states()

declare ptr @julia.get_pgcstack()

declare noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr, i64, ptr addrspace(10))

declare ptr @julia.pointer_from_objref(ptr addrspace(11))

declare token @llvm.julia.gc_preserve_begin(...)

declare void @llvm.julia.gc_preserve_end(token)

; CHECK-LABEL: @memref_collision
; OPAQUE: call ptr @julia.ptls_states()
Expand Down Expand Up @@ -171,13 +161,13 @@ define void @initializers() {
%pgcstack = call ptr @julia.get_pgcstack()
%ptls = call ptr @julia.ptls_states()
%ptls_i8 = bitcast ptr %ptls to ptr
%var1 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 1, ptr addrspace(10) @tag) #1
%var1 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 1, ptr addrspace(10) @tag) #4
%var2 = addrspacecast ptr addrspace(10) %var1 to ptr addrspace(11)
%var3 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var2)
%var4 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 2, ptr addrspace(10) @tag) #2
%var4 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 2, ptr addrspace(10) @tag) #7
%var5 = addrspacecast ptr addrspace(10) %var4 to ptr addrspace(11)
%var6 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var5)
%var7 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 3, ptr addrspace(10) @tag) #3
%var7 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 3, ptr addrspace(10) @tag) #1
%var8 = addrspacecast ptr addrspace(10) %var7 to ptr addrspace(11)
%var9 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var8)
ret void
Expand All @@ -203,14 +193,154 @@ union_move9: ; No predecessors!
}
; CHECK-LABEL: }{{$}}

@0 = private unnamed_addr constant ptr inttoptr (i64 4373799056 to ptr), !julia.constgv !0
@1 = private unnamed_addr constant i64 0, align 8

; CHECK-LABEL: @cmpxchg
; CHECK: alloca
; CHECK: alloca
; CHECK: %20 = cmpxchg ptr %2,
define swiftcc i64 @"cmpxchg"(ptr nonnull swiftself %0) #0 {
%2 = alloca i64, align 16
%3 = call ptr @julia.get_pgcstack()
%4 = getelementptr inbounds i8, ptr %3, i32 -152
%5 = getelementptr inbounds i8, ptr %4, i32 168
%6 = load ptr, ptr %5, align 8, !tbaa !4
%7 = getelementptr inbounds i8, ptr %6, i32 16
%8 = load ptr, ptr %7, align 8, !tbaa !8, !invariant.load !0
fence syncscope("singlethread") seq_cst
call void @julia.safepoint(ptr %8)
fence syncscope("singlethread") seq_cst
%9 = load ptr, ptr @0, align 8, !tbaa !8, !invariant.load !0, !alias.scope !10, !noalias !13, !nonnull !0, !dereferenceable !18, !align !19
%10 = ptrtoint ptr %9 to i64
%11 = inttoptr i64 %10 to ptr
%12 = getelementptr inbounds i8, ptr %3, i32 -152
%13 = addrspacecast ptr %11 to ptr addrspace(10)
call void @llvm.lifetime.start.p0(i64 8, ptr %2)
%14 = call noalias nonnull align 8 dereferenceable(8) ptr addrspace(10) @julia.gc_alloc_obj(ptr %12, i64 8, ptr addrspace(10) %13) #7
%15 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11)
call void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) align 8 %15, ptr align 8 @1, i64 8, i1 false), !tbaa !20, !alias.scope !23, !noalias !24
%16 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11)
%17 = load atomic i64, ptr addrspace(11) %16 monotonic, align 8, !tbaa !25, !alias.scope !23, !noalias !24
br label %19

18: ; preds = %19
ret i64 %21

19: ; preds = %19, %1
%20 = phi i64 [ %17, %1 ], [ %23, %19 ]
%21 = call swiftcc i64 @"jlsys_+_47"(ptr nonnull swiftself %3, i64 signext %20, i64 signext 1)
%22 = cmpxchg ptr addrspace(11) %16, i64 %20, i64 %21 seq_cst monotonic, align 8, !tbaa !25, !alias.scope !23, !noalias !24
%23 = extractvalue { i64, i1 } %22, 0
%24 = extractvalue { i64, i1 } %22, 1
br i1 %24, label %18, label %19
}

; CHECK-LABEL: }{{$}}
; CHECK-LABEL: @atomicrmw
; CHECK: alloca
; CHECK: alloca
; CHECK: atomicrmw xchg ptr %2,
define swiftcc i64 @"atomicrmw"(ptr nonnull swiftself %0) #0 {
%2 = alloca i64, align 16
%3 = call ptr @julia.get_pgcstack()
%4 = getelementptr inbounds i8, ptr %3, i32 -152
%5 = getelementptr inbounds i8, ptr %4, i32 168
%6 = load ptr, ptr %5, align 8, !tbaa !4
%7 = getelementptr inbounds i8, ptr %6, i32 16
%8 = load ptr, ptr %7, align 8, !tbaa !8, !invariant.load !0
fence syncscope("singlethread") seq_cst
call void @julia.safepoint(ptr %8)
fence syncscope("singlethread") seq_cst
%9 = load ptr, ptr @0, align 8, !tbaa !8, !invariant.load !0, !alias.scope !10, !noalias !13, !nonnull !0, !dereferenceable !18, !align !19
%10 = ptrtoint ptr %9 to i64
%11 = inttoptr i64 %10 to ptr
%12 = getelementptr inbounds i8, ptr %3, i32 -152
%13 = addrspacecast ptr %11 to ptr addrspace(10)
call void @llvm.lifetime.start.p0(i64 8, ptr %2)
%14 = call noalias nonnull align 8 dereferenceable(8) ptr addrspace(10) @julia.gc_alloc_obj(ptr %12, i64 8, ptr addrspace(10) %13) #7
%15 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11)
call void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) align 8 %15, ptr align 8 @1, i64 8, i1 false), !tbaa !20, !alias.scope !23, !noalias !24
%16 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11)
%17 = load atomic i64, ptr addrspace(11) %16 monotonic, align 8, !tbaa !25, !alias.scope !23, !noalias !24
%18 = call swiftcc i64 @"jlsys_+_47"(ptr nonnull swiftself %3, i64 signext %17, i64 signext 1)
%19 = atomicrmw xchg ptr addrspace(11) %16, i64 %18 seq_cst, align 8, !tbaa !25, !alias.scope !23, !noalias !24 ; preds = %19
ret i64 %19
}

declare ptr @julia.ptls_states()

declare ptr @julia.pointer_from_objref(ptr addrspace(11))

declare token @llvm.julia.gc_preserve_begin(...)

declare void @llvm.julia.gc_preserve_end(token)

declare ptr @julia.get_pgcstack()

; Function Attrs: mustprogress nounwind willreturn memory(inaccessiblemem: readwrite)
declare nonnull align 8 dereferenceable(8) ptr addrspace(10) @ijl_box_int64(i64 signext) #2

; Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite)
declare void @julia.safepoint(ptr) #3

; Function Attrs: mustprogress nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite)
declare noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr, i64, ptr addrspace(10)) #4

; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
declare void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0
declare void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #5

; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
declare void @llvm.memcpy.p0.p11.i64(ptr noalias nocapture writeonly, ptr addrspace(11) noalias nocapture readonly, i64, i1 immarg) #0
declare void @llvm.memcpy.p0.p11.i64(ptr noalias nocapture writeonly, ptr addrspace(11) noalias nocapture readonly, i64, i1 immarg) #5

; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #5

declare swiftcc i64 @"jlsys_+_47"(ptr nonnull swiftself, i64 signext, i64 signext) #0

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #6

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #6

attributes #0 = { "probe-stack"="inline-asm" }
attributes #1 = { nounwind willreturn allockind("alloc,zeroed") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) }
attributes #2 = { mustprogress nounwind willreturn memory(inaccessiblemem: readwrite) }
attributes #3 = { memory(argmem: readwrite, inaccessiblemem: readwrite) }
attributes #4 = { mustprogress nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) }
attributes #5 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
attributes #7 = { nounwind willreturn allockind("alloc,uninitialized") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) }
attributes #8 = { nounwind willreturn memory(inaccessiblemem: readwrite) }

!llvm.module.flags = !{!1, !2, !3}

!0 = !{}
!1 = !{i32 2, !"Dwarf Version", i32 4}
!2 = !{i32 2, !"Debug Info Version", i32 3}
!3 = !{i32 2, !"julia.optlevel", i32 2}
!4 = !{!5, !5, i64 0}
!5 = !{!"jtbaa_gcframe", !6, i64 0}
!6 = !{!"jtbaa", !7, i64 0}
!7 = !{!"jtbaa"}
!8 = !{!9, !9, i64 0, i64 1}
!9 = !{!"jtbaa_const", !6, i64 0}
!10 = !{!11}
!11 = !{!"jnoalias_const", !12}
!12 = !{!"jnoalias"}
!13 = !{!14, !15, !16, !17}
!14 = !{!"jnoalias_gcframe", !12}
!15 = !{!"jnoalias_stack", !12}
!16 = !{!"jnoalias_data", !12}
!17 = !{!"jnoalias_typemd", !12}
!18 = !{i64 56}
!19 = !{i64 16}
!20 = !{!21, !21, i64 0}
!21 = !{!"jtbaa_value", !22, i64 0}
!22 = !{!"jtbaa_data", !6, i64 0}
!23 = !{!16}
!24 = !{!14, !15, !17, !11}
!25 = !{!26, !26, i64 0}
!26 = !{!"jtbaa_mutab", !21, i64 0}

attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
attributes #1 = { allockind("alloc") }
attributes #2 = { allockind("alloc,uninitialized") }
attributes #3 = { allockind("alloc,zeroed") }