diff --git a/src/llvm-alloc-helpers.cpp b/src/llvm-alloc-helpers.cpp index 194c6837860ca..a1ed66a190190 100644 --- a/src/llvm-alloc-helpers.cpp +++ b/src/llvm-alloc-helpers.cpp @@ -214,6 +214,7 @@ void jl_alloc::runEscapeAnalysis(llvm::CallInst *I, EscapeAnalysisRequiredArgs r } if (auto call = dyn_cast(inst)) { // TODO handle `memcmp` + // TODO handle `memcpy` which is used a lot more often since opaque pointers // None of the intrinsics should care if the memory is stack or heap allocated. auto callee = call->getCalledOperand(); if (auto II = dyn_cast(call)) { diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp index 7dd794a4d8847..ce1d22f42d0ae 100644 --- a/src/llvm-alloc-opt.cpp +++ b/src/llvm-alloc-opt.cpp @@ -742,7 +742,9 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref, AllocF auto replace_inst = [&] (Instruction *user) { Instruction *orig_i = cur.orig_i; Instruction *new_i = cur.new_i; - if (isa(user) || isa(user)) { + if (isa(user) || isa(user) || + isa(user) || isa(user)) { + // TODO: these atomics are likely removable if the user is the first argument user->replaceUsesOfWith(orig_i, new_i); } else if (auto call = dyn_cast(user)) { @@ -1111,6 +1113,7 @@ void Optimizer::splitOnStack(CallInst *orig_inst) return; } else if (isa(user) || isa(user)) { + // TODO: Downgrade atomics here potentially auto slot_idx = find_slot(offset); auto &slot = slots[slot_idx]; assert(slot.offset <= offset && slot.offset + slot.size >= offset); diff --git a/test/atomics.jl b/test/atomics.jl index 7e9f29c23ca10..2d4a713b1d30d 100644 --- a/test/atomics.jl +++ b/test/atomics.jl @@ -1099,3 +1099,14 @@ test_once_undef(Any) test_once_undef(Union{Nothing,Integer}) test_once_undef(UndefComplex{Any}) test_once_undef(UndefComplex{UndefComplex{Any}}) + +mutable struct Atomic57190 + @atomic x::Int +end + + +function add_one57190!() + @atomic (Atomic57190(0).x) += 1 +end + +@test add_one57190!() == 1 diff --git a/test/llvmpasses/alloc-opt-pass.ll b/test/llvmpasses/alloc-opt-pass.ll index 665687e86835d..83f2118412cc1 100644 --- a/test/llvmpasses/alloc-opt-pass.ll +++ b/test/llvmpasses/alloc-opt-pass.ll @@ -73,6 +73,11 @@ L3: ; preds = %L2, %L1, %0 } ; CHECK-LABEL: }{{$}} +declare void @external_function() + +declare ptr addrspace(10) @external_function2() + + ; CHECK-LABEL: @legal_int_types ; CHECK: alloca [12 x i8] ; CHECK-NOT: alloca i96 @@ -89,21 +94,6 @@ define void @legal_int_types() { } ; CHECK-LABEL: }{{$}} -declare void @external_function() - -declare ptr addrspace(10) @external_function2() - -declare ptr @julia.ptls_states() - -declare ptr @julia.get_pgcstack() - -declare noalias ptr addrspace(10) @julia.gc_alloc_obj(ptr, i64, ptr addrspace(10)) - -declare ptr @julia.pointer_from_objref(ptr addrspace(11)) - -declare token @llvm.julia.gc_preserve_begin(...) - -declare void @llvm.julia.gc_preserve_end(token) ; CHECK-LABEL: @memref_collision ; OPAQUE: call ptr @julia.ptls_states() @@ -171,13 +161,13 @@ define void @initializers() { %pgcstack = call ptr @julia.get_pgcstack() %ptls = call ptr @julia.ptls_states() %ptls_i8 = bitcast ptr %ptls to ptr - %var1 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 1, ptr addrspace(10) @tag) #1 + %var1 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 1, ptr addrspace(10) @tag) #4 %var2 = addrspacecast ptr addrspace(10) %var1 to ptr addrspace(11) %var3 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var2) - %var4 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 2, ptr addrspace(10) @tag) #2 + %var4 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 2, ptr addrspace(10) @tag) #7 %var5 = addrspacecast ptr addrspace(10) %var4 to ptr addrspace(11) %var6 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var5) - %var7 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 3, ptr addrspace(10) @tag) #3 + %var7 = call ptr addrspace(10) @julia.gc_alloc_obj(ptr %ptls_i8, i64 3, ptr addrspace(10) @tag) #1 %var8 = addrspacecast ptr addrspace(10) %var7 to ptr addrspace(11) %var9 = call ptr @julia.pointer_from_objref(ptr addrspace(11) %var8) ret void @@ -203,14 +193,154 @@ union_move9: ; No predecessors! } ; CHECK-LABEL: }{{$}} +@0 = private unnamed_addr constant ptr inttoptr (i64 4373799056 to ptr), !julia.constgv !0 +@1 = private unnamed_addr constant i64 0, align 8 + +; CHECK-LABEL: @cmpxchg +; CHECK: alloca +; CHECK: alloca +; CHECK: %20 = cmpxchg ptr %2, +define swiftcc i64 @"cmpxchg"(ptr nonnull swiftself %0) #0 { + %2 = alloca i64, align 16 + %3 = call ptr @julia.get_pgcstack() + %4 = getelementptr inbounds i8, ptr %3, i32 -152 + %5 = getelementptr inbounds i8, ptr %4, i32 168 + %6 = load ptr, ptr %5, align 8, !tbaa !4 + %7 = getelementptr inbounds i8, ptr %6, i32 16 + %8 = load ptr, ptr %7, align 8, !tbaa !8, !invariant.load !0 + fence syncscope("singlethread") seq_cst + call void @julia.safepoint(ptr %8) + fence syncscope("singlethread") seq_cst + %9 = load ptr, ptr @0, align 8, !tbaa !8, !invariant.load !0, !alias.scope !10, !noalias !13, !nonnull !0, !dereferenceable !18, !align !19 + %10 = ptrtoint ptr %9 to i64 + %11 = inttoptr i64 %10 to ptr + %12 = getelementptr inbounds i8, ptr %3, i32 -152 + %13 = addrspacecast ptr %11 to ptr addrspace(10) + call void @llvm.lifetime.start.p0(i64 8, ptr %2) + %14 = call noalias nonnull align 8 dereferenceable(8) ptr addrspace(10) @julia.gc_alloc_obj(ptr %12, i64 8, ptr addrspace(10) %13) #7 + %15 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11) + call void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) align 8 %15, ptr align 8 @1, i64 8, i1 false), !tbaa !20, !alias.scope !23, !noalias !24 + %16 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11) + %17 = load atomic i64, ptr addrspace(11) %16 monotonic, align 8, !tbaa !25, !alias.scope !23, !noalias !24 + br label %19 + +18: ; preds = %19 + ret i64 %21 + +19: ; preds = %19, %1 + %20 = phi i64 [ %17, %1 ], [ %23, %19 ] + %21 = call swiftcc i64 @"jlsys_+_47"(ptr nonnull swiftself %3, i64 signext %20, i64 signext 1) + %22 = cmpxchg ptr addrspace(11) %16, i64 %20, i64 %21 seq_cst monotonic, align 8, !tbaa !25, !alias.scope !23, !noalias !24 + %23 = extractvalue { i64, i1 } %22, 0 + %24 = extractvalue { i64, i1 } %22, 1 + br i1 %24, label %18, label %19 +} + +; CHECK-LABEL: }{{$}} +; CHECK-LABEL: @atomicrmw +; CHECK: alloca +; CHECK: alloca +; CHECK: atomicrmw xchg ptr %2, +define swiftcc i64 @"atomicrmw"(ptr nonnull swiftself %0) #0 { + %2 = alloca i64, align 16 + %3 = call ptr @julia.get_pgcstack() + %4 = getelementptr inbounds i8, ptr %3, i32 -152 + %5 = getelementptr inbounds i8, ptr %4, i32 168 + %6 = load ptr, ptr %5, align 8, !tbaa !4 + %7 = getelementptr inbounds i8, ptr %6, i32 16 + %8 = load ptr, ptr %7, align 8, !tbaa !8, !invariant.load !0 + fence syncscope("singlethread") seq_cst + call void @julia.safepoint(ptr %8) + fence syncscope("singlethread") seq_cst + %9 = load ptr, ptr @0, align 8, !tbaa !8, !invariant.load !0, !alias.scope !10, !noalias !13, !nonnull !0, !dereferenceable !18, !align !19 + %10 = ptrtoint ptr %9 to i64 + %11 = inttoptr i64 %10 to ptr + %12 = getelementptr inbounds i8, ptr %3, i32 -152 + %13 = addrspacecast ptr %11 to ptr addrspace(10) + call void @llvm.lifetime.start.p0(i64 8, ptr %2) + %14 = call noalias nonnull align 8 dereferenceable(8) ptr addrspace(10) @julia.gc_alloc_obj(ptr %12, i64 8, ptr addrspace(10) %13) #7 + %15 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11) + call void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) align 8 %15, ptr align 8 @1, i64 8, i1 false), !tbaa !20, !alias.scope !23, !noalias !24 + %16 = addrspacecast ptr addrspace(10) %14 to ptr addrspace(11) + %17 = load atomic i64, ptr addrspace(11) %16 monotonic, align 8, !tbaa !25, !alias.scope !23, !noalias !24 + %18 = call swiftcc i64 @"jlsys_+_47"(ptr nonnull swiftself %3, i64 signext %17, i64 signext 1) + %19 = atomicrmw xchg ptr addrspace(11) %16, i64 %18 seq_cst, align 8, !tbaa !25, !alias.scope !23, !noalias !24 ; preds = %19 + ret i64 %19 +} + +declare ptr @julia.ptls_states() + +declare ptr @julia.pointer_from_objref(ptr addrspace(11)) + +declare token @llvm.julia.gc_preserve_begin(...) + +declare void @llvm.julia.gc_preserve_end(token) + +declare ptr @julia.get_pgcstack() + +; Function Attrs: mustprogress nounwind willreturn memory(inaccessiblemem: readwrite) +declare nonnull align 8 dereferenceable(8) ptr addrspace(10) @ijl_box_int64(i64 signext) #2 + +; Function Attrs: memory(argmem: readwrite, inaccessiblemem: readwrite) +declare void @julia.safepoint(ptr) #3 + +; Function Attrs: mustprogress nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) +declare noalias nonnull ptr addrspace(10) @julia.gc_alloc_obj(ptr, i64, ptr addrspace(10)) #4 + ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) -declare void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0 +declare void @llvm.memcpy.p11.p0.i64(ptr addrspace(11) noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #5 + ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) -declare void @llvm.memcpy.p0.p11.i64(ptr noalias nocapture writeonly, ptr addrspace(11) noalias nocapture readonly, i64, i1 immarg) #0 +declare void @llvm.memcpy.p0.p11.i64(ptr noalias nocapture writeonly, ptr addrspace(11) noalias nocapture readonly, i64, i1 immarg) #5 + ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) -declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #0 +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #5 + +declare swiftcc i64 @"jlsys_+_47"(ptr nonnull swiftself, i64 signext, i64 signext) #0 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #6 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #6 + +attributes #0 = { "probe-stack"="inline-asm" } +attributes #1 = { nounwind willreturn allockind("alloc,zeroed") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) } +attributes #2 = { mustprogress nounwind willreturn memory(inaccessiblemem: readwrite) } +attributes #3 = { memory(argmem: readwrite, inaccessiblemem: readwrite) } +attributes #4 = { mustprogress nounwind willreturn allockind("alloc") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) } +attributes #5 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +attributes #6 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #7 = { nounwind willreturn allockind("alloc,uninitialized") allocsize(1) memory(argmem: read, inaccessiblemem: readwrite) } +attributes #8 = { nounwind willreturn memory(inaccessiblemem: readwrite) } + +!llvm.module.flags = !{!1, !2, !3} + +!0 = !{} +!1 = !{i32 2, !"Dwarf Version", i32 4} +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = !{i32 2, !"julia.optlevel", i32 2} +!4 = !{!5, !5, i64 0} +!5 = !{!"jtbaa_gcframe", !6, i64 0} +!6 = !{!"jtbaa", !7, i64 0} +!7 = !{!"jtbaa"} +!8 = !{!9, !9, i64 0, i64 1} +!9 = !{!"jtbaa_const", !6, i64 0} +!10 = !{!11} +!11 = !{!"jnoalias_const", !12} +!12 = !{!"jnoalias"} +!13 = !{!14, !15, !16, !17} +!14 = !{!"jnoalias_gcframe", !12} +!15 = !{!"jnoalias_stack", !12} +!16 = !{!"jnoalias_data", !12} +!17 = !{!"jnoalias_typemd", !12} +!18 = !{i64 56} +!19 = !{i64 16} +!20 = !{!21, !21, i64 0} +!21 = !{!"jtbaa_value", !22, i64 0} +!22 = !{!"jtbaa_data", !6, i64 0} +!23 = !{!16} +!24 = !{!14, !15, !17, !11} +!25 = !{!26, !26, i64 0} +!26 = !{!"jtbaa_mutab", !21, i64 0} -attributes #0 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } -attributes #1 = { allockind("alloc") } -attributes #2 = { allockind("alloc,uninitialized") } -attributes #3 = { allockind("alloc,zeroed") }