From b07ca0efed062a0297789db7417cc115b56658a2 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 26 Aug 2024 00:05:40 -0700 Subject: [PATCH 1/4] arch/x86: Fix segfault after mcount() on temporary registers It seems some aggresively optimizing compilers use temporary registers like r10 across function calls. The -fipa-ra might enable this but I didn't think it'll do that over function calls it cannot see the definition. Anyway it happens and we should be able to deal with it like we did in dynamic.S. Signed-off-by: Namhyung Kim --- arch/x86_64/mcount.S | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/arch/x86_64/mcount.S b/arch/x86_64/mcount.S index dd32b0a6a..2a89c4fab 100644 --- a/arch/x86_64/mcount.S +++ b/arch/x86_64/mcount.S @@ -58,8 +58,15 @@ GLOBAL(mcount) /* save rax (implicit argument for variadic functions) */ push %rax + /* save scratch registers due to -fipa-ra */ + push %r10 + push %r11 + call mcount_entry + pop %r11 + pop %r10 + pop %rax /* restore original stack pointer */ @@ -88,10 +95,19 @@ END(mcount) */ ENTRY(mcount_return) .cfi_startproc - sub $48, %rsp - .cfi_def_cfa_offset 48 - - movq %rdi, 32(%rsp) + sub $96, %rsp + .cfi_def_cfa_offset 96 + + /* save all caller-saved registers due to -fipa-ra */ + movq %r11, 80(%rsp) + movq %r10, 72(%rsp) + movq %r9, 64(%rsp) + movq %r8, 56(%rsp) + movq %rdi, 48(%rsp) + movq %rsi, 40(%rsp) + movq %rcx, 32(%rsp) + + /* below are used to carry return value */ movdqu %xmm0, 16(%rsp) movq %rdx, 8(%rsp) movq %rax, 0(%rsp) @@ -113,14 +129,21 @@ ENTRY(mcount_return) movq 0(%rsp), %rsp /* restore original return address in parent */ - movq %rax, 40(%rsp) + movq %rax, 88(%rsp) movq 0(%rsp), %rax movq 8(%rsp), %rdx movdqu 16(%rsp), %xmm0 - movq 32(%rsp), %rdi - add $40, %rsp + movq 32(%rsp), %rcx + movq 40(%rsp), %rsi + movq 48(%rsp), %rdi + movq 56(%rsp), %r8 + movq 64(%rsp), %r9 + movq 72(%rsp), %r10 + movq 80(%rsp), %r11 + + add $88, %rsp .cfi_def_cfa_offset 8 retq .cfi_endproc From f1436f934c902af91135bc8f2ef85da336e12ec9 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 26 Aug 2024 00:15:47 -0700 Subject: [PATCH 2/4] mcount: Get rid of tid from retstack I don't remember why it has tid for each return stack entry. It's not used so let's get rid of it. Also move dyn_idx field to remove the padding. Signed-off-by: Namhyung Kim --- libmcount/mcount.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/libmcount/mcount.h b/libmcount/mcount.h index 78d85358d..2ade24e1d 100644 --- a/libmcount/mcount.h +++ b/libmcount/mcount.h @@ -49,11 +49,10 @@ struct mcount_ret_stack { unsigned long parent_ip; unsigned long child_ip; enum mcount_rstack_flag flags; + unsigned dyn_idx; /* time in nsec (CLOCK_MONOTONIC) */ uint64_t start_time; uint64_t end_time; - int tid; - unsigned dyn_idx; uint64_t filter_time; unsigned filter_size; unsigned short depth; From 915749b3ff2acceff3ccefccbb939b093b2ad2f1 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 26 Aug 2024 00:18:02 -0700 Subject: [PATCH 3/4] mcount: Handle recover trigger only if -e is not used The -e/--estimate-return doesn't hook the return addresses so no need to recover the original address. Signed-off-by: Namhyung Kim --- libmcount/mcount.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libmcount/mcount.c b/libmcount/mcount.c index 92045822b..ed1e1ed90 100644 --- a/libmcount/mcount.c +++ b/libmcount/mcount.c @@ -1201,7 +1201,7 @@ void mcount_entry_filter_record(struct mcount_thread_data *mtdp, struct mcount_r #define FLAGS_TO_CHECK (TRIGGER_FL_RECOVER | TRIGGER_FL_TRACE_ON | TRIGGER_FL_TRACE_OFF) if (tr->flags & FLAGS_TO_CHECK) { - if (tr->flags & TRIGGER_FL_RECOVER) { + if ((tr->flags & TRIGGER_FL_RECOVER) && !mcount_estimate_return) { mcount_rstack_restore(mtdp); *rstack->parent_loc = mcount_return_fn; rstack->flags |= MCOUNT_FL_RECOVER; From c454d1e11022cf4b22cb7a535cc9b8552a2fce4c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 26 Aug 2024 00:20:06 -0700 Subject: [PATCH 4/4] mcount: Use mcount_memcpy1() instead of memcpy() We don't want to use external memcpy() function in the mcount fast path because it might use some SSE/AVX registers internally. We want to preserve the original register values and to avoid saving and restoring all the registers unnecessarily. Signed-off-by: Namhyung Kim --- libmcount/record.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libmcount/record.c b/libmcount/record.c index 2ff9f4b89..b480b6fd5 100644 --- a/libmcount/record.c +++ b/libmcount/record.c @@ -905,7 +905,7 @@ static int record_event(struct mcount_thread_data *mtdp, struct mcount_event *ev rec->data += 4; /* set 'more' bit in uftrace_record */ *(uint16_t *)ptr = data_size; - memcpy(ptr + 2, event->data, data_size); + mcount_memcpy1(ptr + 2, event->data, data_size); } curr_buf->size += size;