Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ompt_get_task_info - bug fix #10

Open
wants to merge 10 commits into
base: ompt-final-barrier
Choose a base branch
from
Open
11 changes: 9 additions & 2 deletions runtime/src/kmp_barrier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1726,7 +1726,10 @@ void __kmp_join_barrier(int gtid) {
if (!KMP_MASTER_TID(ds_tid))
this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr);
#endif
this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier_implicit;
// vi3-ompt_state_wait_barrier_implicit_parallel:
// old
// this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier_implicit;
this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier_implicit_parallel;
}
#endif

Expand Down Expand Up @@ -1983,8 +1986,12 @@ void __kmp_fork_barrier(int gtid, int tid) {
}

#if OMPT_SUPPORT
// vi3-ompt_state_wait_barrier_implicit_parallel
// old
// if (ompt_enabled.enabled &&
// this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
if (ompt_enabled.enabled &&
this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit_parallel) {
int ds_tid = this_thr->th.th_info.ds.ds_tid;
ompt_data_t *task_data = (team)
? OMPT_CUR_TASK_DATA(this_thr)
Expand Down
86 changes: 79 additions & 7 deletions runtime/src/kmp_runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -967,8 +967,23 @@ static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
KMP_MB();

// The ds_tid is set again inside __kmp_initialize_info
// The ds_tid was set to zero, the team of the innermost region was formed
// and set as current team of the thread. However, the corresponding implicit
// task hasn't been set as th_current_task yet. At this moment,
// the th_current_task points to the innermost task being executed inside
// the outer region.
// Since __ompt_get_task_info_internal uses th_current_task, previous "early"
// update of the ds_tid leads to losing the information about the thread_num
// in th_current_task->team.
// Thread needs to iterate over the th_current_task->team->t_threads array
// in order to find the right thread_num.
// This can be avoided if ds_tid is updated after the implicit task
// (of the newly formed innermost region) is pushed to the thread
// (set as new the th_current_task).
// FIXME VI3: check if this is safe to do for hot teams!
/* first, let's setup the master thread */
master_th->th.th_info.ds.ds_tid = 0;
//master_th->th.th_info.ds.ds_tid = 0;
master_th->th.th_team = team;
master_th->th.th_team_nproc = team->t.t_nproc;
master_th->th.th_team_master = master_th;
Expand Down Expand Up @@ -1052,6 +1067,9 @@ static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
#endif
}

// Update ds_tid after updating the th_current_task.
master_th->th.th_info.ds.ds_tid = 0;

if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
for (i = 0; i < team->t.t_nproc; i++) {
kmp_info_t *thr = team->t.t_threads[i];
Expand Down Expand Up @@ -2350,6 +2368,12 @@ void __kmp_join_call(ident_t *loc, int gtid
#if OMPT_SUPPORT
ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
void *codeptr = team->t.ompt_team_info.master_return_address;

// FIXME vi3: Store old value of parallel_data before freeing team.
// Since, ompt_callback_parallel_end is called after team has been free,
// it is possible that pointer to parallel_data points to a parallel_data
// of a new (recycled) team.
ompt_data_t old_parallel_data = *parallel_data;
#endif

#if USE_ITT_BUILD
Expand Down Expand Up @@ -2421,8 +2445,12 @@ void __kmp_join_call(ident_t *loc, int gtid

#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
__kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
// __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
// codeptr);
// FIXME vi3: I think this is not necessary since this happens before freeing the team.
__kmp_join_ompt(gtid, master_th, parent_team, &old_parallel_data, fork_context,
codeptr);

}
#endif

Expand Down Expand Up @@ -2535,8 +2563,29 @@ void __kmp_join_call(ident_t *loc, int gtid

#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
__kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
// __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
// codeptr);
// FIXME vi3: parallel_data represents a pointer to ompt_data_t structure stored
// inside team descriptor (datastructure).
// Data race may happened when sending parallel_data to ompt_callback_parallel_end
// after team has been freed.
// Consider the following example:
// 1. join has been called for parallel region A.
// parallel_data points to a field in A's descriptor.
// 2. fork has been called for region B.
// 3. free A's team and its descriptor. parallel_data points to a field
// contained in descriptor present in freelist.
// 4. allocate new team descriptor for B region by reusing previously freed descriptor.
// parallel_data now points to a field stored in B's descriptor.
// 5. Calls ompt_callback_parallel_end by sending parallel_data.
// Aforementioned callbacks thinks that region B is ending.
// In order to avoid this data rade, follow next steps:
// 1. Before freeing A's team, access to field to which parallel_data points and
// save its (filed's) content to local variable old_parallel_data.
// 2. Send address of old_parallel_data to ompt_callback_parallel_end.
__kmp_join_ompt(gtid, master_th, parent_team, &old_parallel_data, fork_context,
codeptr);

}
#endif

Expand Down Expand Up @@ -4014,8 +4063,15 @@ static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
KMP_MB();

TCW_SYNC_PTR(this_thr->th.th_team, team);

this_thr->th.th_info.ds.ds_tid = tid;
// FIXME VI3: Hasn't this already done inside __kmp_fork_team_threads for the
// thread with tid == 0.
// The problem that may appear for the master thread of the
// corresponding region (which is soon to be fully formed) if the ds_tid
// is updated before the th_current_task is the following:
// - ds_tid is set to zero
// - th_current_task corresponds to the outer region so the
// ds_tid doesn't match the thread_num in that team.
// this_thr->th.th_info.ds.ds_tid = tid;
this_thr->th.th_set_nproc = 0;
if (__kmp_tasking_mode != tskm_immediate_exec)
// When tasking is possible, threads are not safe to reap until they are
Expand Down Expand Up @@ -4043,6 +4099,9 @@ static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
__kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
team, tid, TRUE);

// Update ds_tid now.
this_thr->th.th_info.ds.ds_tid = tid;

KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
tid, gtid, this_thr, this_thr->th.th_current_task));
// TODO: Initialize ICVs from parent; GEH - isn't that already done in
Expand Down Expand Up @@ -5413,8 +5472,17 @@ void __kmp_free_team(kmp_root_t *root,
}
}

// FIXME vi3: This team is still active. If someone asks for parallel_data
// at level 0 (while this function is still active), parallel_data stored
// in this team's descriptor will be returned.
// On the other hand, if someone tries to access parent's parallel_data,
// (while this function is still active), NULL will be returned,
// because there's no way to access parent's descriptor
// (Team's connection to parent team has been disconnected
// by invalidating t_parent pointer).

// Reset pointer to parent team only for non-hot teams.
team->t.t_parent = NULL;
// team->t.t_parent = NULL;
team->t.t_level = 0;
team->t.t_active_level = 0;

Expand Down Expand Up @@ -7260,8 +7328,12 @@ void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {

__kmp_join_barrier(gtid); /* wait for everyone */
#if OMPT_SUPPORT
// vi3-ompt_state_wait_barrier_implicit_parallel
// old
// if (ompt_enabled.enabled &&
// this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
if (ompt_enabled.enabled &&
this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit_parallel) {
int ds_tid = this_thr->th.th_info.ds.ds_tid;
ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
this_thr->th.ompt_thread_info.state = ompt_state_overhead;
Expand Down
25 changes: 23 additions & 2 deletions runtime/src/kmp_tasking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,14 @@ static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task,
}
#endif /* BUILD_TIED_TASK_STACK */

#if OMPT_SUPPORT
// FIXME: Set scheduling parent first.
// Otherwise, if someone calls ompt_get_task_info(1) immediately after
// current_task is set as the th_current_task, then the function will use
// th_current_task->ompt_task_info.scheduling_parent which may be outdated
// if th_current_task (here only current) is recycled.
taskdata->ompt_task_info.scheduling_parent = current_task;
#endif
// mark starting task as executing and as current task
thread->th.th_current_task = taskdata;

Expand Down Expand Up @@ -568,7 +576,12 @@ static inline void __ompt_task_start(kmp_task_t *task,
&(current_task->ompt_task_info.task_data), status,
&(taskdata->ompt_task_info.task_data));
}
taskdata->ompt_task_info.scheduling_parent = current_task;
// FIXME vi3: If taskdata is set to thr->th.th_current_task before
// current_task is set as taskdata's scheduling_parent, and if
// someone calls ompt_get_task_info(1), then the function will use
// potentially outdated taskdata's scheduling_parent.
// This happens if taskdata has been recycled.
//taskdata->ompt_task_info.scheduling_parent = current_task;
}

// __ompt_task_finish:
Expand Down Expand Up @@ -1818,6 +1831,8 @@ template <bool ompt>
static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid,
void *frame_address,
void *return_address) {
// TODO: consider removing the return_address parameter
return_address = NULL;
kmp_taskdata_t *taskdata;
kmp_info_t *thread;
int thread_finished = FALSE;
Expand All @@ -1840,12 +1855,18 @@ static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid,
taskdata->ompt_task_info.frame.enter_frame.ptr = frame_address;

if (ompt_enabled.ompt_callback_sync_region) {
if (!return_address)
return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);

ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
my_task_data, return_address);
}

if (ompt_enabled.ompt_callback_sync_region_wait) {
if (!return_address)
return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);

ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
ompt_sync_region_taskwait, ompt_scope_begin, my_parallel_data,
my_task_data, return_address);
Expand Down Expand Up @@ -1935,7 +1956,7 @@ kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) {
if (UNLIKELY(ompt_enabled.enabled)) {
OMPT_STORE_RETURN_ADDRESS(gtid);
return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0),
OMPT_LOAD_RETURN_ADDRESS(gtid));
NULL);
}
#endif
return __kmpc_omp_taskwait_template<false>(loc_ref, gtid, NULL, NULL);
Expand Down
10 changes: 8 additions & 2 deletions runtime/src/kmp_wait_release.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,10 @@ static void __ompt_implicit_task_end(kmp_info_t *this_thr,
ompt_state_t ompt_state,
ompt_data_t *tId) {
int ds_tid = this_thr->th.th_info.ds.ds_tid;
if (ompt_state == ompt_state_wait_barrier_implicit) {
// vi3-ompt_state_wait_barrier_implicit_parallel
// old
// if (ompt_state == ompt_state_wait_barrier_implicit) {
if (ompt_state == ompt_state_wait_barrier_implicit_parallel) {
this_thr->th.ompt_thread_info.state = ompt_state_overhead;
#if OMPT_OPTIONAL
void *codeptr = NULL;
Expand Down Expand Up @@ -267,7 +270,10 @@ final_spin=FALSE)
ompt_data_t *tId;
if (ompt_enabled.enabled) {
ompt_entry_state = this_thr->th.ompt_thread_info.state;
if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
// vi3-ompt_state_wait_barrier_implicit_parallel
// old
// if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit_parallel ||
KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
ompt_lw_taskteam_t *team =
this_thr->th.th_team->t.ompt_serialized_team_info;
Expand Down
85 changes: 77 additions & 8 deletions runtime/src/ompt-specific.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -358,16 +358,29 @@ int __ompt_get_task_info_internal(int ancestor_level, int *type,
kmp_taskdata_t *taskdata = thr->th.th_current_task;
if (taskdata == NULL)
return 0;
kmp_team *team = thr->th.th_team, *prev_team = NULL;
kmp_team *team = taskdata->td_team, *prev_team = NULL;

if (team == NULL)
return 0;
ompt_lw_taskteam_t *lwt = NULL,
*next_lwt = LWT_FROM_TEAM(taskdata->td_team),
*prev_lwt = NULL;

// NOTE: It is possible that taskdata->td_team and team doesn't match
// in one of the following cases:
// - team if formed, but the implicit task execution still hasn't start
// so the taskdata points to the outer task instead of the implicit task
// if newly formed team
// - the implicit task of the innermost region has been finished, and
// is revoked as th_current_task, however team is still active team.
// Since the tool is interested to know the information about the tasks,
// shouldn't we just used the information stored inside taskdatas?



while (ancestor_level > 0) {
// needed for thread_num
prev_team = team;
// FIXME Test prev_lwt?
prev_lwt = lwt;
// next lightweight team (if any)
if (lwt)
Expand All @@ -387,6 +400,9 @@ int __ompt_get_task_info_internal(int ancestor_level, int *type,
taskdata = taskdata->td_parent;
if (team == NULL)
return 0;
// Store current team as previous team only when encounter
// on an implicit task.
prev_team = team;
team = team->t.t_parent;
if (taskdata) {
next_lwt = LWT_FROM_TEAM(taskdata->td_team);
Expand Down Expand Up @@ -415,23 +431,76 @@ int __ompt_get_task_info_internal(int ancestor_level, int *type,
}
}
}
// If the info doesn't exists, then there's no task at the
// specified ancestor level. Return 0, and don't try to provide
// any of the passed arguments. Do the same if team doesn't exist
// at this level.
if (!info || !team) {
return 0;
}

// FIXME VI3: Even if it is determined that the info exists, I don't
// this it is guaranteed it's still valid, and not reclaimed memory.
// Consider the case when the thread is waiting on the last implicit
// barrier.
if (task_data) {
*task_data = info ? &info->task_data : NULL;
*task_data = &info->task_data;
}
if (task_frame) {
// OpenMP spec asks for the scheduling task to be returned.
*task_frame = info ? &info->frame : NULL;
*task_frame = &info->frame;
}
if (parallel_data) {
*parallel_data = team_info ? &(team_info->parallel_data) : NULL;
}
if (thread_num) {
if (level == 0)
*thread_num = __kmp_get_tid();
if (level == 0 || !prev_team) {
// prev_team == NULL if at ancestor_level is an implicit task of the
// innermost region or an explicit task that belongs to the region.
int tnum = __kmp_get_tid();
// NOTE: It is possible that master of the outer region
// is in the middle of process of creating/destroying the inner region.
// Even though thread finished updating/invalidating th_current_task
// (implicit task that corresponds to the innermost region), the ds_tid
// may not be updated yet. Since it remains zero for both inner and
// outer region, it is safe to return zero as thread_num.
// However, this is not case for the worker of outer regions.
// Handle this carefully.
if (team->t.t_threads[tnum] != thr) {
// Information stored inside th.th_info.ds.ds_tid doesn't match the
// thread_num inside the th_current_task->team.
// Either thread changed the ds_tid before invalidating
// th_current_task, or thread set
// newly formed implicit task as th_current_task, but hasn't updated
// ds_tid to be zero yet.
// team variable corresponds to the just finished/created implicit task.
// ds_tid matches thread_num inside team->t.t_parent.
// 0 is the thread_num of the thread inside the team.
kmp_team_t *parent_team = team->t.t_parent;
assert(parent_team && parent_team->t.t_threads[tnum] == thr);
tnum = 0;
}
// store thread_num
*thread_num = tnum;
assert(team->t.t_threads[*thread_num] == thr);
// FIXME VI3 ASSERT THIS.
}
else if (prev_lwt)
*thread_num = 0;
else
*thread_num = prev_team->t.t_master_tid;
else {
// FIXME VI3 ASSERT THIS.
// Need to be careful in this case. It is possible tha thread is not
// part of the team, but some of the nested teams instead.
// Consider the case when the worker of the regions at level 2
// calls this function with ancestor_level 1.
// If thread is part of the team, then it is the master of prev_team,
// so use prev_team->t.t_master_tid.
// Otherwise, I think some special value should be return as thread_num.
// This case is not clarified in the OMPT 5.0 specification
int prev_team_master_id = prev_team->t.t_master_tid;
*thread_num = (team->t.t_threads[prev_team_master_id] == thr)
? prev_team->t.t_master_tid : -1;
}
// *thread_num = team->t.t_master_tid;
}
return info ? 2 : 0;
Expand Down