From 8b68f8c4bf36b401f8fd4acefa23ed93fc423948 Mon Sep 17 00:00:00 2001 From: vladaindjic Date: Mon, 3 Aug 2020 16:55:31 -0500 Subject: [PATCH 01/10] Temporary solution for two bugs: 1. parallel_data at level 0 present, but not at outer levels (1, 2, etc.), even though there are few levels of nested regions. 2. The same region descriptor may be passed multiple times to ompt_callback_parallel_end. --- runtime/src/kmp_runtime.cpp | 46 ++++++++++++++++++++++++++++++++++--- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/runtime/src/kmp_runtime.cpp b/runtime/src/kmp_runtime.cpp index e66f37529..3ed63ab03 100644 --- a/runtime/src/kmp_runtime.cpp +++ b/runtime/src/kmp_runtime.cpp @@ -2350,6 +2350,12 @@ void __kmp_join_call(ident_t *loc, int gtid #if OMPT_SUPPORT ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data); void *codeptr = team->t.ompt_team_info.master_return_address; + + // FIXME vi3: Store old value of parallel_data before freeing team. + // Since, ompt_callback_parallel_end is called after team has been free, + // it is possible that pointer to parallel_data points to a parallel_data + // of a new (recycled) team. + ompt_data_t old_parallel_data = *parallel_data; #endif #if USE_ITT_BUILD @@ -2421,8 +2427,12 @@ void __kmp_join_call(ident_t *loc, int gtid #if OMPT_SUPPORT if (ompt_enabled.enabled) { - __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context, + // __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context, + // codeptr); + // FIXME vi3: I think this is not necessary since this happens before freeing the team. + __kmp_join_ompt(gtid, master_th, parent_team, &old_parallel_data, fork_context, codeptr); + } #endif @@ -2535,8 +2545,29 @@ void __kmp_join_call(ident_t *loc, int gtid #if OMPT_SUPPORT if (ompt_enabled.enabled) { - __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context, + // __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context, + // codeptr); + // FIXME vi3: parallel_data represents a pointer to ompt_data_t structure stored + // inside team descriptor (datastructure). + // Data race may happened when sending parallel_data to ompt_callback_parallel_end + // after team has been freed. + // Consider the following example: + // 1. join has been called for parallel region A. + // parallel_data points to a field in A's descriptor. + // 2. fork has been called for region B. + // 3. free A's team and its descriptor. parallel_data points to a field + // contained in descriptor present in freelist. + // 4. allocate new team descriptor for B region by reusing previously freed descriptor. + // parallel_data now points to a field stored in B's descriptor. + // 5. Calls ompt_callback_parallel_end by sending parallel_data. + // Aforementioned callbacks thinks that region B is ending. + // In order to avoid this data rade, follow next steps: + // 1. Before freeing A's team, access to field to which parallel_data points and + // save its (filed's) content to local variable old_parallel_data. + // 2. Send address of old_parallel_data to ompt_callback_parallel_end. + __kmp_join_ompt(gtid, master_th, parent_team, &old_parallel_data, fork_context, codeptr); + } #endif @@ -5413,8 +5444,17 @@ void __kmp_free_team(kmp_root_t *root, } } + // FIXME vi3: This team is still active. If someone asks for parallel_data + // at level 0 (while this function is still active), parallel_data stored + // in this team's descriptor will be returned. + // On the other hand, if someone tries to access parent's parallel_data, + // (while this function is still active), NULL will be returned, + // because there's no way to access parent's descriptor + // (Team's connection to parent team has been disconnected + // by invalidating t_parent pointer). + // Reset pointer to parent team only for non-hot teams. - team->t.t_parent = NULL; + // team->t.t_parent = NULL; team->t.t_level = 0; team->t.t_active_level = 0; From 5a2a572ef3eafb5e51fc8f2de40b15bf8f688acf Mon Sep 17 00:00:00 2001 From: vladaindjic Date: Thu, 27 Aug 2020 05:34:06 -0500 Subject: [PATCH 02/10] Modifications: 1. ompt_state_wait_barrier_implicit_parallel has been used instead of deprecated ompt_state_wait_barrier_implicit 2. In function __ompt_get_task_info_internal, check if prev_team exists before getting master_id. --- runtime/src/kmp_barrier.cpp | 11 +++++++++-- runtime/src/kmp_runtime.cpp | 6 +++++- runtime/src/kmp_wait_release.h | 10 ++++++++-- runtime/src/ompt-specific.cpp | 5 +++-- 4 files changed, 25 insertions(+), 7 deletions(-) diff --git a/runtime/src/kmp_barrier.cpp b/runtime/src/kmp_barrier.cpp index 8fd4e8cfa..07d051faf 100644 --- a/runtime/src/kmp_barrier.cpp +++ b/runtime/src/kmp_barrier.cpp @@ -1726,7 +1726,10 @@ void __kmp_join_barrier(int gtid) { if (!KMP_MASTER_TID(ds_tid)) this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr); #endif - this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier_implicit; + // vi3-ompt_state_wait_barrier_implicit_parallel: + // old + // this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier_implicit; + this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier_implicit_parallel; } #endif @@ -1983,8 +1986,12 @@ void __kmp_fork_barrier(int gtid, int tid) { } #if OMPT_SUPPORT + // vi3-ompt_state_wait_barrier_implicit_parallel + // old + // if (ompt_enabled.enabled && + // this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) { if (ompt_enabled.enabled && - this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) { + this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit_parallel) { int ds_tid = this_thr->th.th_info.ds.ds_tid; ompt_data_t *task_data = (team) ? OMPT_CUR_TASK_DATA(this_thr) diff --git a/runtime/src/kmp_runtime.cpp b/runtime/src/kmp_runtime.cpp index 3ed63ab03..b4b7afd60 100644 --- a/runtime/src/kmp_runtime.cpp +++ b/runtime/src/kmp_runtime.cpp @@ -7300,8 +7300,12 @@ void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) { __kmp_join_barrier(gtid); /* wait for everyone */ #if OMPT_SUPPORT + // vi3-ompt_state_wait_barrier_implicit_parallel + // old + // if (ompt_enabled.enabled && + // this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) { if (ompt_enabled.enabled && - this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) { + this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit_parallel) { int ds_tid = this_thr->th.th_info.ds.ds_tid; ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr); this_thr->th.ompt_thread_info.state = ompt_state_overhead; diff --git a/runtime/src/kmp_wait_release.h b/runtime/src/kmp_wait_release.h index df4a9de8e..0f3c1f481 100644 --- a/runtime/src/kmp_wait_release.h +++ b/runtime/src/kmp_wait_release.h @@ -123,7 +123,10 @@ static void __ompt_implicit_task_end(kmp_info_t *this_thr, ompt_state_t ompt_state, ompt_data_t *tId) { int ds_tid = this_thr->th.th_info.ds.ds_tid; - if (ompt_state == ompt_state_wait_barrier_implicit) { + // vi3-ompt_state_wait_barrier_implicit_parallel + // old + // if (ompt_state == ompt_state_wait_barrier_implicit) { + if (ompt_state == ompt_state_wait_barrier_implicit_parallel) { this_thr->th.ompt_thread_info.state = ompt_state_overhead; #if OMPT_OPTIONAL void *codeptr = NULL; @@ -267,7 +270,10 @@ final_spin=FALSE) ompt_data_t *tId; if (ompt_enabled.enabled) { ompt_entry_state = this_thr->th.ompt_thread_info.state; - if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit || + // vi3-ompt_state_wait_barrier_implicit_parallel + // old + // if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit || + if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit_parallel || KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) { ompt_lw_taskteam_t *team = this_thr->th.th_team->t.ompt_serialized_team_info; diff --git a/runtime/src/ompt-specific.cpp b/runtime/src/ompt-specific.cpp index e3fa9166c..56f0a00bf 100644 --- a/runtime/src/ompt-specific.cpp +++ b/runtime/src/ompt-specific.cpp @@ -430,9 +430,10 @@ int __ompt_get_task_info_internal(int ancestor_level, int *type, *thread_num = __kmp_get_tid(); else if (prev_lwt) *thread_num = 0; - else + else if (prev_team){ *thread_num = prev_team->t.t_master_tid; - // *thread_num = team->t.t_master_tid; + // *thread_num = team->t.t_master_tid; + } } return info ? 2 : 0; } From 63b01eb1fa5b86965af832d7b61505319e8dff09 Mon Sep 17 00:00:00 2001 From: vladaindjic Date: Tue, 24 Nov 2020 05:28:42 -0600 Subject: [PATCH 03/10] ompt_get_task_info fix - Prevent the edge case when the team of the currently active task doesn't match the team of the currently active region. --- runtime/src/ompt-specific.cpp | 38 +++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/runtime/src/ompt-specific.cpp b/runtime/src/ompt-specific.cpp index 56f0a00bf..b914ac847 100644 --- a/runtime/src/ompt-specific.cpp +++ b/runtime/src/ompt-specific.cpp @@ -424,6 +424,44 @@ int __ompt_get_task_info_internal(int ancestor_level, int *type, } if (parallel_data) { *parallel_data = team_info ? &(team_info->parallel_data) : NULL; + // vi3 fix: + // The following edge case can happen: + // - Assume that a thread is part of the region at depth 0 (reg0) + // and that it's executing the corresponding implicit task + // - Currently active team of the thread is the team of reg0, + // and the currently active task is the implicit task of reg0 + // - Thread encounters at parallel construct so it's + // going to form a new team of the nested region (reg1) + // - After that, thread sets that its currently active + // team is the reg1's team. + // - Note that still active task is the reg0's implicit task. + // - If someone calls ompt_get_task_info(0) at this moment, the function + // should return currently active task (reg0's implicit task) + // and the parallel_data that belongs to region in which + // the mentioned task has been executed. + // (reg0's parallel_data). + // - Unfortunately, the current implementation will return + // the parallel_data that belongs to the currently active team + // (reg1's team) + // In order to prevent this, check if the team of the currently active + // task matches the team of the currently active region. + // If this is not the true, then the previously described scenario + // occurred, so try to access to the parent region team and use its + // parallel_data word. + if (taskdata && team && taskdata->td_team != team) { + // The team of the currently active task does not match the team of + // the currently active region. + if (team->t.t_parent && team->t.t_parent == taskdata->td_team) { + // Check whether the team of the parent region matches the + // current task team. + ompt_team_info_t *parent_team_info = + &team->t.t_parent->t.ompt_team_info; + if (parent_team_info) { + // use parent team parallel_data + *parallel_data = &parent_team_info->parallel_data; + } + } + } } if (thread_num) { if (level == 0) From 189811255d8ad127f71b890848552831ef5a7ffc Mon Sep 17 00:00:00 2001 From: vladaindjic Date: Sun, 10 Jan 2021 12:34:14 -0600 Subject: [PATCH 04/10] __ompt_get_task_info_internal bug: Standard isn't clear about what should be the value of the thread_num argument when thread tries get the information about the outer region in which it is not involved. --- runtime/src/ompt-specific.cpp | 58 ++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/runtime/src/ompt-specific.cpp b/runtime/src/ompt-specific.cpp index b914ac847..25d452d74 100644 --- a/runtime/src/ompt-specific.cpp +++ b/runtime/src/ompt-specific.cpp @@ -459,6 +459,18 @@ int __ompt_get_task_info_internal(int ancestor_level, int *type, if (parent_team_info) { // use parent team parallel_data *parallel_data = &parent_team_info->parallel_data; + // update values of team, prev_team and level + // in order to find valid information about thread_num + prev_team = team; + team = team->t.t_parent; + // If level is 0, then thread_num will be set to __kmp_get_tid() + // (see if-branch below). + // If however forming the new region team hasn't been finished, + // then return value of the __kmp_get_tid may not be valid. + // Incrementing level will lead to executing else-if-branch + // that will return the id of the thread in parent team + // (if thread is the part of it). + level++; } } } @@ -469,8 +481,52 @@ int __ompt_get_task_info_internal(int ancestor_level, int *type, else if (prev_lwt) *thread_num = 0; else if (prev_team){ - *thread_num = prev_team->t.t_master_tid; + if (team) { + // Parallel region at the "ancestor_level" exists and is represented + // by the team stored in "team" variable. + // One thread of this "team" is the master of the "prev_team" + // (team of the nested region). + // That is the thread which "thread_num" inside the "team" + // matches prev_team->t.t_master_tid. + // The previous implementation didn't consider the case when + // the "thr" is not part of the "team". It ("thr") may be the + // worker inside prev_team's parallel region, or in one of + // its descendants. e.g. Worker thread of the innermost + // region calls this function with ancestor_level > 0. + + // First check whether "thr" belongs to the "team" + if (thr == team->t.t_threads[prev_team->t.t_master_tid]) { + // "thr" is the master of the prev_team, so return t_master_tid + *thread_num = prev_team->t.t_master_tid; + } else { + // NOTE: "thr" doesn't belong to the "team" + // FIXME: + // Two possible options: + // - return 0 (according to standard) + // - set invalid value of thread_num (not sure if this + // satisfies standard) + *thread_num = -1; + } + } else { + // NOTE: No parallel region at the ancestor_level. + // FIXME: + // Two possible options: + // - return 0 (according to standard) + // - set invalid value of thread_num (not sure if this + // satisfies standard) + *thread_num = -1; + } + // old implementation // *thread_num = team->t.t_master_tid; + } else { + // NOTE: prev_team == NULL, so there's no parallel region at + // this ancestor_level. + // FIXME vi3: + // Two possible options: + // - return 0 (according to standard) + // - set invalid value of thread_num (not sure if this + // satisfies standard) + *thread_num = -1; } } return info ? 2 : 0; From 0eee7a03f12f0562be6779a41cb6ca6c9573e0aa Mon Sep 17 00:00:00 2001 From: vladaindjic Date: Fri, 22 Jan 2021 06:56:54 -0600 Subject: [PATCH 05/10] ompt_get_task_info bugfix - If thread is the master of the innermost region, then assert thread_num == 0. --- runtime/src/ompt-specific.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/runtime/src/ompt-specific.cpp b/runtime/src/ompt-specific.cpp index 25d452d74..0acf68403 100644 --- a/runtime/src/ompt-specific.cpp +++ b/runtime/src/ompt-specific.cpp @@ -476,8 +476,21 @@ int __ompt_get_task_info_internal(int ancestor_level, int *type, } } if (thread_num) { - if (level == 0) + if (level == 0) { *thread_num = __kmp_get_tid(); +#if 1 + if (team->t.t_threads[0] == thr) { + // FIXME: If this function is called by the master thread of + // the innermost region inside ompt_callback_implicit_task + // (scope=end), this function may return thead_num different + // then zero, which should be wrong. + // This check should prevent this. + // thr is the master of the team + *thread_num = 0; + assert(*thread_num == 0); + } +#endif + } else if (prev_lwt) *thread_num = 0; else if (prev_team){ From ff0963fdeb9ee79ac5dc1f44024a0c836bdba6af Mon Sep 17 00:00:00 2001 From: vladaindjic Date: Sat, 23 Jan 2021 05:27:34 -0600 Subject: [PATCH 06/10] __kmp_task_start bugfix: Initialize scheduling_parent to new taskadata, before proclaiming the taskdata as th_current_task. --- runtime/src/kmp_tasking.cpp | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/runtime/src/kmp_tasking.cpp b/runtime/src/kmp_tasking.cpp index 1fd68f003..1d5dfeafe 100644 --- a/runtime/src/kmp_tasking.cpp +++ b/runtime/src/kmp_tasking.cpp @@ -512,6 +512,14 @@ static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task, } #endif /* BUILD_TIED_TASK_STACK */ +#if OMPT_SUPPORT + // FIXME: Set scheduling parent first. + // Otherwise, if someone calls ompt_get_task_info(1) immediately after + // current_task is set as the th_current_task, then the function will use + // th_current_task->ompt_task_info.scheduling_parent which may be outdated + // if th_current_task (here only current) is recycled. + taskdata->ompt_task_info.scheduling_parent = current_task; +#endif // mark starting task as executing and as current task thread->th.th_current_task = taskdata; @@ -568,7 +576,12 @@ static inline void __ompt_task_start(kmp_task_t *task, &(current_task->ompt_task_info.task_data), status, &(taskdata->ompt_task_info.task_data)); } - taskdata->ompt_task_info.scheduling_parent = current_task; + // FIXME vi3: If taskdata is set to thr->th.th_current_task before + // current_task is set as taskdata's scheduling_parent, and if + // someone calls ompt_get_task_info(1), then the function will use + // potentially outdated taskdata's scheduling_parent. + // This happens if taskdata has been recycled. + //taskdata->ompt_task_info.scheduling_parent = current_task; } // __ompt_task_finish: From 3eb0fa0f42b57b95f4b7f9673062af97b653b76b Mon Sep 17 00:00:00 2001 From: vladaindjic Date: Sat, 23 Jan 2021 05:30:49 -0600 Subject: [PATCH 07/10] ompt_get_task_info bug: The logic for thread_num determination is invalid. This is the hack that will make hpcrun work, since that tool is not interested to know the specific thread_num, but whether the thread is the master of the region at specified level. --- runtime/src/ompt-specific.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/runtime/src/ompt-specific.cpp b/runtime/src/ompt-specific.cpp index 0acf68403..57e9e8a0a 100644 --- a/runtime/src/ompt-specific.cpp +++ b/runtime/src/ompt-specific.cpp @@ -476,6 +476,17 @@ int __ompt_get_task_info_internal(int ancestor_level, int *type, } } if (thread_num) { + // FIXME vi3: This is wrong. However, hpcrun only cares about whether + // thread is master of the region or not. + if (team) { + if (thr == team->t.t_threads[0]) { + *thread_num = 0; + } else { + *thread_num = -1; + } + } + +#if 0 if (level == 0) { *thread_num = __kmp_get_tid(); #if 1 @@ -541,6 +552,8 @@ int __ompt_get_task_info_internal(int ancestor_level, int *type, // satisfies standard) *thread_num = -1; } + +#endif } return info ? 2 : 0; } From ba81fb61f012674e7f3e1355eb62c23e8f9adbd4 Mon Sep 17 00:00:00 2001 From: vladaindjic Date: Tue, 26 Jan 2021 10:29:03 -0600 Subject: [PATCH 08/10] Determine the return_addres only if the tool register for the ompt_callback_sync_region. --- runtime/src/kmp_tasking.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/runtime/src/kmp_tasking.cpp b/runtime/src/kmp_tasking.cpp index 1d5dfeafe..8d956fa33 100644 --- a/runtime/src/kmp_tasking.cpp +++ b/runtime/src/kmp_tasking.cpp @@ -1831,6 +1831,8 @@ template static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, void *frame_address, void *return_address) { + // TODO: consider removing the return_address parameter + return_address = NULL; kmp_taskdata_t *taskdata; kmp_info_t *thread; int thread_finished = FALSE; @@ -1853,12 +1855,18 @@ static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, taskdata->ompt_task_info.frame.enter_frame.ptr = frame_address; if (ompt_enabled.ompt_callback_sync_region) { + if (!return_address) + return_address = OMPT_LOAD_RETURN_ADDRESS(gtid); + ompt_callbacks.ompt_callback(ompt_callback_sync_region)( ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data, my_task_data, return_address); } if (ompt_enabled.ompt_callback_sync_region_wait) { + if (!return_address) + return_address = OMPT_LOAD_RETURN_ADDRESS(gtid); + ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)( ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data, my_task_data, return_address); @@ -1948,7 +1956,7 @@ kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) { if (UNLIKELY(ompt_enabled.enabled)) { OMPT_STORE_RETURN_ADDRESS(gtid); return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0), - OMPT_LOAD_RETURN_ADDRESS(gtid)); + NULL); } #endif return __kmpc_omp_taskwait_template(loc_ref, gtid, NULL, NULL); From ca1b7c90ce7b2f6f645386222baf69d58aba9676 Mon Sep 17 00:00:00 2001 From: vladaindjic Date: Tue, 9 Feb 2021 17:01:37 -0600 Subject: [PATCH 09/10] Use td_team stored inside th_current_task instead of the team stored inside thr. TODO: Avoid using for loop in order to find thread_num in case when worker thread is creating the parallel region. --- runtime/src/ompt-specific.cpp | 218 ++++++++++++++-------------------- 1 file changed, 92 insertions(+), 126 deletions(-) diff --git a/runtime/src/ompt-specific.cpp b/runtime/src/ompt-specific.cpp index 57e9e8a0a..dccb6752f 100644 --- a/runtime/src/ompt-specific.cpp +++ b/runtime/src/ompt-specific.cpp @@ -358,16 +358,29 @@ int __ompt_get_task_info_internal(int ancestor_level, int *type, kmp_taskdata_t *taskdata = thr->th.th_current_task; if (taskdata == NULL) return 0; - kmp_team *team = thr->th.th_team, *prev_team = NULL; + kmp_team *team = taskdata->td_team, *prev_team = NULL; + if (team == NULL) return 0; ompt_lw_taskteam_t *lwt = NULL, *next_lwt = LWT_FROM_TEAM(taskdata->td_team), *prev_lwt = NULL; + // NOTE: It is possible that taskdata->td_team and team doesn't match + // in one of the following cases: + // - team if formed, but the implicit task execution still hasn't start + // so the taskdata points to the outer task instead of the implicit task + // if newly formed team + // - the implicit task of the innermost region has been finished, and + // is revoked as th_current_task, however team is still active team. + // Since the tool is interested to know the information about the tasks, + // shouldn't we just used the information stored inside taskdatas? + + + while (ancestor_level > 0) { // needed for thread_num - prev_team = team; + // FIXME Test prev_lwt? prev_lwt = lwt; // next lightweight team (if any) if (lwt) @@ -387,6 +400,9 @@ int __ompt_get_task_info_internal(int ancestor_level, int *type, taskdata = taskdata->td_parent; if (team == NULL) return 0; + // Store current team as previous team only when encounter + // on an implicit task. + prev_team = team; team = team->t.t_parent; if (taskdata) { next_lwt = LWT_FROM_TEAM(taskdata->td_team); @@ -415,145 +431,95 @@ int __ompt_get_task_info_internal(int ancestor_level, int *type, } } } + // If the info doesn't exists, then there's no task at the + // specified ancestor level. Return 0, and don't try to provide + // any of the passed arguments. Do the same if team doesn't exist + // at this level. + if (!info || !team) { + return 0; + } + + // FIXME VI3: Even if it is determined that the info exists, I don't + // this it is guaranteed it's still valid, and not reclaimed memory. + // Consider the case when the thread is waiting on the last implicit + // barrier. if (task_data) { - *task_data = info ? &info->task_data : NULL; + *task_data = &info->task_data; } if (task_frame) { // OpenMP spec asks for the scheduling task to be returned. - *task_frame = info ? &info->frame : NULL; + *task_frame = &info->frame; } if (parallel_data) { *parallel_data = team_info ? &(team_info->parallel_data) : NULL; - // vi3 fix: - // The following edge case can happen: - // - Assume that a thread is part of the region at depth 0 (reg0) - // and that it's executing the corresponding implicit task - // - Currently active team of the thread is the team of reg0, - // and the currently active task is the implicit task of reg0 - // - Thread encounters at parallel construct so it's - // going to form a new team of the nested region (reg1) - // - After that, thread sets that its currently active - // team is the reg1's team. - // - Note that still active task is the reg0's implicit task. - // - If someone calls ompt_get_task_info(0) at this moment, the function - // should return currently active task (reg0's implicit task) - // and the parallel_data that belongs to region in which - // the mentioned task has been executed. - // (reg0's parallel_data). - // - Unfortunately, the current implementation will return - // the parallel_data that belongs to the currently active team - // (reg1's team) - // In order to prevent this, check if the team of the currently active - // task matches the team of the currently active region. - // If this is not the true, then the previously described scenario - // occurred, so try to access to the parent region team and use its - // parallel_data word. - if (taskdata && team && taskdata->td_team != team) { - // The team of the currently active task does not match the team of - // the currently active region. - if (team->t.t_parent && team->t.t_parent == taskdata->td_team) { - // Check whether the team of the parent region matches the - // current task team. - ompt_team_info_t *parent_team_info = - &team->t.t_parent->t.ompt_team_info; - if (parent_team_info) { - // use parent team parallel_data - *parallel_data = &parent_team_info->parallel_data; - // update values of team, prev_team and level - // in order to find valid information about thread_num - prev_team = team; - team = team->t.t_parent; - // If level is 0, then thread_num will be set to __kmp_get_tid() - // (see if-branch below). - // If however forming the new region team hasn't been finished, - // then return value of the __kmp_get_tid may not be valid. - // Incrementing level will lead to executing else-if-branch - // that will return the id of the thread in parent team - // (if thread is the part of it). - level++; - } - } - } } if (thread_num) { - // FIXME vi3: This is wrong. However, hpcrun only cares about whether - // thread is master of the region or not. - if (team) { - if (thr == team->t.t_threads[0]) { - *thread_num = 0; - } else { - *thread_num = -1; - } - } + if (level == 0 || !prev_team) { + // prev_team == NULL if at ancestor_level is explicit task that + // belongs to the innermost region or the corresponding implicit task. + int tnum = __kmp_get_tid(); + // NOTE: It is possible that master of the outer region + // is in the middle of creating/destroying the inner region. + // Even though it finished updated/invalidated current implicit task, + // tid remains 0. + if (team->t.t_threads[tnum] != thr) { + // Information stored inside th.th_info.ds.ds_tid doesn't match the + // current task. Either thread finished the implicit task and changes + // the ds_tied before invalidating th_current_task, or thread set + // ds_tid to be zero, but hasn't started working in nested team it + // has just formed. + kmp_team_t *parent_team = team->t.t_parent; + // I don't think that this can happen in the case when there is not + // nested regions; + assert(parent_team); + if (parent_team->t.t_threads[tnum] == thr) { + // Master thread of the innermost region is in the middle of finishing + // corresponding implicit task. + // It changed the tid to match the thread_num in parent's team. + // However, innermost implicit task isn't invalidated. + assert(team->t.t_threads[0] == thr); + // thread is the master of the team. + tnum = 0; + } else if (tnum == 0) { + // Since the worker thread is creating a new region, it updates tid + // to match 0, however the innermost implicit task is not updated. + // Need a mechanism to find the thread_num in outer region. + // Try to avoid iterating over team->t.t_threads. + int i; + for (i = 1; i < team->t.t_nproc; i++) { + if (team->t.t_threads[i] == thr) { + tnum = i; + } + } + assert(tnum != 0); + } else { + // I don't think this can happen. + assert(false); + } -#if 0 - if (level == 0) { - *thread_num = __kmp_get_tid(); -#if 1 - if (team->t.t_threads[0] == thr) { - // FIXME: If this function is called by the master thread of - // the innermost region inside ompt_callback_implicit_task - // (scope=end), this function may return thead_num different - // then zero, which should be wrong. - // This check should prevent this. - // thr is the master of the team - *thread_num = 0; - assert(*thread_num == 0); } -#endif + // store thread_num + *thread_num = tnum; + assert(team->t.t_threads[*thread_num] == thr); + // FIXME VI3 ASSERT THIS. } else if (prev_lwt) *thread_num = 0; - else if (prev_team){ - if (team) { - // Parallel region at the "ancestor_level" exists and is represented - // by the team stored in "team" variable. - // One thread of this "team" is the master of the "prev_team" - // (team of the nested region). - // That is the thread which "thread_num" inside the "team" - // matches prev_team->t.t_master_tid. - // The previous implementation didn't consider the case when - // the "thr" is not part of the "team". It ("thr") may be the - // worker inside prev_team's parallel region, or in one of - // its descendants. e.g. Worker thread of the innermost - // region calls this function with ancestor_level > 0. - - // First check whether "thr" belongs to the "team" - if (thr == team->t.t_threads[prev_team->t.t_master_tid]) { - // "thr" is the master of the prev_team, so return t_master_tid - *thread_num = prev_team->t.t_master_tid; - } else { - // NOTE: "thr" doesn't belong to the "team" - // FIXME: - // Two possible options: - // - return 0 (according to standard) - // - set invalid value of thread_num (not sure if this - // satisfies standard) - *thread_num = -1; - } - } else { - // NOTE: No parallel region at the ancestor_level. - // FIXME: - // Two possible options: - // - return 0 (according to standard) - // - set invalid value of thread_num (not sure if this - // satisfies standard) - *thread_num = -1; - } - // old implementation - // *thread_num = team->t.t_master_tid; - } else { - // NOTE: prev_team == NULL, so there's no parallel region at - // this ancestor_level. - // FIXME vi3: - // Two possible options: - // - return 0 (according to standard) - // - set invalid value of thread_num (not sure if this - // satisfies standard) - *thread_num = -1; + else { + // FIXME VI3 ASSERT THIS. + // Need to be careful in this case. It is possible tha thread is not + // part of the team, but some of the nested teams instead. + // Consider the case when the worker of the regions at level 2 + // calls this function with ancestor_level 1. + // If thread is part of the team, then it is the master of prev_team, + // so use prev_team->t.t_master_tid. + // Otherwise, I think some special value should be return as thread_num. + // This case is not clarified in the OMPT 5.0 specification + int prev_team_master_id = prev_team->t.t_master_tid; + *thread_num = (team->t.t_threads[prev_team_master_id] == thr) + ? prev_team->t.t_master_tid : -1; } - -#endif + // *thread_num = team->t.t_master_tid; } return info ? 2 : 0; } From d4baa93c7b4999638ae824b85a880f3034a0bbea Mon Sep 17 00:00:00 2001 From: vladaindjic Date: Thu, 11 Feb 2021 09:23:26 -0600 Subject: [PATCH 10/10] Update th_current_task before ds_tid when creating the nested region. --- runtime/src/kmp_runtime.cpp | 34 +++++++++++++++++++-- runtime/src/ompt-specific.cpp | 56 ++++++++++++----------------------- 2 files changed, 50 insertions(+), 40 deletions(-) diff --git a/runtime/src/kmp_runtime.cpp b/runtime/src/kmp_runtime.cpp index b4b7afd60..f99c3e844 100644 --- a/runtime/src/kmp_runtime.cpp +++ b/runtime/src/kmp_runtime.cpp @@ -967,8 +967,23 @@ static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team, KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid()); KMP_MB(); + // The ds_tid is set again inside __kmp_initialize_info + // The ds_tid was set to zero, the team of the innermost region was formed + // and set as current team of the thread. However, the corresponding implicit + // task hasn't been set as th_current_task yet. At this moment, + // the th_current_task points to the innermost task being executed inside + // the outer region. + // Since __ompt_get_task_info_internal uses th_current_task, previous "early" + // update of the ds_tid leads to losing the information about the thread_num + // in th_current_task->team. + // Thread needs to iterate over the th_current_task->team->t_threads array + // in order to find the right thread_num. + // This can be avoided if ds_tid is updated after the implicit task + // (of the newly formed innermost region) is pushed to the thread + // (set as new the th_current_task). + // FIXME VI3: check if this is safe to do for hot teams! /* first, let's setup the master thread */ - master_th->th.th_info.ds.ds_tid = 0; + //master_th->th.th_info.ds.ds_tid = 0; master_th->th.th_team = team; master_th->th.th_team_nproc = team->t.t_nproc; master_th->th.th_team_master = master_th; @@ -1052,6 +1067,9 @@ static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team, #endif } + // Update ds_tid after updating the th_current_task. + master_th->th.th_info.ds.ds_tid = 0; + if (__kmp_display_affinity && team->t.t_display_affinity != 1) { for (i = 0; i < team->t.t_nproc; i++) { kmp_info_t *thr = team->t.t_threads[i]; @@ -4045,8 +4063,15 @@ static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team, KMP_MB(); TCW_SYNC_PTR(this_thr->th.th_team, team); - - this_thr->th.th_info.ds.ds_tid = tid; + // FIXME VI3: Hasn't this already done inside __kmp_fork_team_threads for the + // thread with tid == 0. + // The problem that may appear for the master thread of the + // corresponding region (which is soon to be fully formed) if the ds_tid + // is updated before the th_current_task is the following: + // - ds_tid is set to zero + // - th_current_task corresponds to the outer region so the + // ds_tid doesn't match the thread_num in that team. + // this_thr->th.th_info.ds.ds_tid = tid; this_thr->th.th_set_nproc = 0; if (__kmp_tasking_mode != tskm_immediate_exec) // When tasking is possible, threads are not safe to reap until they are @@ -4074,6 +4099,9 @@ static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team, __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE); + // Update ds_tid now. + this_thr->th.th_info.ds.ds_tid = tid; + KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n", tid, gtid, this_thr, this_thr->th.th_current_task)); // TODO: Initialize ICVs from parent; GEH - isn't that already done in diff --git a/runtime/src/ompt-specific.cpp b/runtime/src/ompt-specific.cpp index dccb6752f..3d01f8da9 100644 --- a/runtime/src/ompt-specific.cpp +++ b/runtime/src/ompt-specific.cpp @@ -455,48 +455,30 @@ int __ompt_get_task_info_internal(int ancestor_level, int *type, } if (thread_num) { if (level == 0 || !prev_team) { - // prev_team == NULL if at ancestor_level is explicit task that - // belongs to the innermost region or the corresponding implicit task. + // prev_team == NULL if at ancestor_level is an implicit task of the + // innermost region or an explicit task that belongs to the region. int tnum = __kmp_get_tid(); // NOTE: It is possible that master of the outer region - // is in the middle of creating/destroying the inner region. - // Even though it finished updated/invalidated current implicit task, - // tid remains 0. + // is in the middle of process of creating/destroying the inner region. + // Even though thread finished updating/invalidating th_current_task + // (implicit task that corresponds to the innermost region), the ds_tid + // may not be updated yet. Since it remains zero for both inner and + // outer region, it is safe to return zero as thread_num. + // However, this is not case for the worker of outer regions. + // Handle this carefully. if (team->t.t_threads[tnum] != thr) { // Information stored inside th.th_info.ds.ds_tid doesn't match the - // current task. Either thread finished the implicit task and changes - // the ds_tied before invalidating th_current_task, or thread set - // ds_tid to be zero, but hasn't started working in nested team it - // has just formed. + // thread_num inside the th_current_task->team. + // Either thread changed the ds_tid before invalidating + // th_current_task, or thread set + // newly formed implicit task as th_current_task, but hasn't updated + // ds_tid to be zero yet. + // team variable corresponds to the just finished/created implicit task. + // ds_tid matches thread_num inside team->t.t_parent. + // 0 is the thread_num of the thread inside the team. kmp_team_t *parent_team = team->t.t_parent; - // I don't think that this can happen in the case when there is not - // nested regions; - assert(parent_team); - if (parent_team->t.t_threads[tnum] == thr) { - // Master thread of the innermost region is in the middle of finishing - // corresponding implicit task. - // It changed the tid to match the thread_num in parent's team. - // However, innermost implicit task isn't invalidated. - assert(team->t.t_threads[0] == thr); - // thread is the master of the team. - tnum = 0; - } else if (tnum == 0) { - // Since the worker thread is creating a new region, it updates tid - // to match 0, however the innermost implicit task is not updated. - // Need a mechanism to find the thread_num in outer region. - // Try to avoid iterating over team->t.t_threads. - int i; - for (i = 1; i < team->t.t_nproc; i++) { - if (team->t.t_threads[i] == thr) { - tnum = i; - } - } - assert(tnum != 0); - } else { - // I don't think this can happen. - assert(false); - } - + assert(parent_team && parent_team->t.t_threads[tnum] == thr); + tnum = 0; } // store thread_num *thread_num = tnum;