14 #ifndef KMP_WAIT_RELEASE_H 15 #define KMP_WAIT_RELEASE_H 19 #include "kmp_stats.h" 21 #include "ompt-specific.h" 60 volatile P *
get() {
return loc; }
64 void set(
volatile P *new_loc) {
loc = new_loc; }
92 static inline void __ompt_implicit_task_end(kmp_info_t *this_thr,
93 omp_state_t omp_state,
96 int ds_tid = this_thr->th.th_info.ds.ds_tid;
97 if (omp_state == omp_state_wait_barrier_implicit) {
98 this_thr->th.ompt_thread_info.state = omp_state_overhead;
100 void *codeptr = NULL;
101 if (ompt_enabled.ompt_callback_sync_region_wait) {
102 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
103 ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
105 if (ompt_enabled.ompt_callback_sync_region) {
106 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
107 ompt_sync_region_barrier, ompt_scope_end, NULL, tId, codeptr);
110 if (!KMP_MASTER_TID(ds_tid)) {
111 if (ompt_enabled.ompt_callback_implicit_task) {
112 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
113 ompt_scope_end, NULL, tId, 0, ds_tid);
116 if (ompt_enabled.ompt_callback_idle) {
117 ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_begin);
121 this_thr->th.ompt_thread_info.state = omp_state_idle;
123 this_thr->th.ompt_thread_info.state = omp_state_overhead;
134 __kmp_wait_template(kmp_info_t *this_thr, C *flag,
135 int final_spin USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
137 volatile typename C::flag_t *spin = flag->get();
139 kmp_uint32 hibernate;
141 int tasks_completed = FALSE;
144 kmp_uint64 poll_count;
145 kmp_uint64 hibernate_goal;
148 KMP_FSYNC_SPIN_INIT(spin, NULL);
149 if (flag->done_check()) {
150 KMP_FSYNC_SPIN_ACQUIRED(CCAST(
typename C::flag_t *, spin));
153 th_gtid = this_thr->th.th_info.ds.ds_gtid;
155 (
"__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
156 #if KMP_STATS_ENABLED 211 omp_state_t ompt_entry_state;
212 ompt_data_t *pId = NULL;
214 if (ompt_enabled.enabled) {
215 ompt_entry_state = this_thr->th.ompt_thread_info.state;
216 if (!final_spin || ompt_entry_state != omp_state_wait_barrier_implicit ||
217 KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
218 ompt_lw_taskteam_t *team =
219 this_thr->th.th_team->t.ompt_serialized_team_info;
221 pId = &(team->ompt_team_info.parallel_data);
222 tId = &(team->ompt_task_info.task_data);
224 pId = OMPT_CUR_TEAM_DATA(this_thr);
225 tId = OMPT_CUR_TASK_DATA(this_thr);
229 tId = &(this_thr->th.ompt_thread_info.task_data);
232 if (ompt_entry_state == omp_state_idle) {
233 if (ompt_enabled.ompt_callback_idle) {
234 ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_begin);
238 if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
239 this_thr->th.th_task_team == NULL)) {
241 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId, pId);
247 KMP_INIT_YIELD(spins);
249 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
253 #ifdef KMP_ADJUST_BLOCKTIME 254 if (__kmp_zero_bt && !this_thr->th.th_team_bt_set)
259 hibernate = this_thr->th.th_team_bt_intervals;
261 hibernate = this_thr->th.th_team_bt_intervals;
272 hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
273 KF_TRACE(20, (
"__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
274 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
275 hibernate - __kmp_global.g.g_time.dt.t_value));
277 hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
279 #endif // KMP_USE_MONITOR 282 oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
286 while (flag->notdone_check()) {
288 kmp_task_team_t *task_team = NULL;
289 if (__kmp_tasking_mode != tskm_immediate_exec) {
290 task_team = this_thr->th.th_task_team;
298 if (task_team != NULL) {
299 if (TCR_SYNC_4(task_team->tt.tt_active)) {
300 if (KMP_TASKING_ENABLED(task_team))
302 this_thr, th_gtid, final_spin,
303 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
305 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
307 KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
310 if (final_spin && ompt_enabled.enabled)
311 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId, pId);
313 this_thr->th.th_task_team = NULL;
314 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
317 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
321 KMP_FSYNC_SPIN_PREPARE(CCAST(
typename C::flag_t *, spin));
322 if (TCR_4(__kmp_global.g.g_done)) {
323 if (__kmp_global.g.g_abort)
324 __kmp_abort_thread();
333 if (oversubscribed) {
336 KMP_YIELD_SPIN(spins);
340 in_pool = !!TCR_4(this_thr->th.th_in_pool);
341 if (in_pool != !!this_thr->th.th_active_in_pool) {
343 KMP_TEST_THEN_INC32(&__kmp_thread_pool_active_nth);
344 this_thr->th.th_active_in_pool = TRUE;
353 KMP_TEST_THEN_DEC32(&__kmp_thread_pool_active_nth);
354 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
355 this_thr->th.th_active_in_pool = FALSE;
359 #if KMP_STATS_ENABLED 362 if (this_thr->th.th_stats->isIdle() &&
363 KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
364 KMP_SET_THREAD_STATE(IDLE);
365 KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
370 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME)
374 if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
379 if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
382 if (KMP_BLOCKING(hibernate_goal, poll_count++))
386 KF_TRACE(50, (
"__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
387 flag->suspend(th_gtid);
389 if (TCR_4(__kmp_global.g.g_done)) {
390 if (__kmp_global.g.g_abort)
391 __kmp_abort_thread();
393 }
else if (__kmp_tasking_mode != tskm_immediate_exec &&
394 this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
395 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
401 omp_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
402 if (ompt_enabled.enabled && ompt_exit_state != omp_state_undefined) {
405 __ompt_implicit_task_end(this_thr, ompt_exit_state, tId, pId);
406 ompt_exit_state = this_thr->th.ompt_thread_info.state;
409 if (ompt_exit_state == omp_state_idle) {
411 if (ompt_enabled.ompt_callback_idle) {
412 ompt_callbacks.ompt_callback(ompt_callback_idle)(ompt_scope_end);
415 this_thr->th.ompt_thread_info.state = omp_state_overhead;
419 #if KMP_STATS_ENABLED 421 if (KMP_GET_THREAD_STATE() == IDLE) {
422 KMP_POP_PARTITIONED_TIMER();
423 KMP_SET_THREAD_STATE(thread_state);
424 this_thr->th.th_stats->resetIdleFlag();
428 KMP_FSYNC_SPIN_ACQUIRED(CCAST(
typename C::flag_t *, spin));
435 template <
class C>
static inline void __kmp_release_template(C *flag) {
437 int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
439 KF_TRACE(20, (
"__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
440 KMP_DEBUG_ASSERT(flag->get());
441 KMP_FSYNC_RELEASING(CCAST(
typename C::flag_t *, flag->get()));
443 flag->internal_release();
445 KF_TRACE(100, (
"__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
448 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
451 if (flag->is_any_sleeping()) {
452 for (
unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
454 kmp_info_t *waiter = flag->get_waiter(i);
456 int wait_gtid = waiter->th.th_info.ds.ds_gtid;
458 KF_TRACE(50, (
"__kmp_release: T#%d waking up thread T#%d since sleep " 460 gtid, wait_gtid, flag->get()));
461 flag->resume(wait_gtid);
468 template <
typename FlagType>
struct flag_traits {};
470 template <>
struct flag_traits<kmp_uint32> {
471 typedef kmp_uint32 flag_t;
473 static inline flag_t tcr(flag_t f) {
return TCR_4(f); }
474 static inline flag_t test_then_add4(
volatile flag_t *f) {
475 return KMP_TEST_THEN_ADD4_32(RCAST(
volatile kmp_int32 *, f));
477 static inline flag_t test_then_or(
volatile flag_t *f, flag_t v) {
478 return KMP_TEST_THEN_OR32(f, v);
480 static inline flag_t test_then_and(
volatile flag_t *f, flag_t v) {
481 return KMP_TEST_THEN_AND32(f, v);
485 template <>
struct flag_traits<kmp_uint64> {
486 typedef kmp_uint64 flag_t;
488 static inline flag_t tcr(flag_t f) {
return TCR_8(f); }
489 static inline flag_t test_then_add4(
volatile flag_t *f) {
490 return KMP_TEST_THEN_ADD4_64(RCAST(
volatile kmp_int64 *, f));
492 static inline flag_t test_then_or(
volatile flag_t *f, flag_t v) {
493 return KMP_TEST_THEN_OR64(f, v);
495 static inline flag_t test_then_and(
volatile flag_t *f, flag_t v) {
496 return KMP_TEST_THEN_AND64(f, v);
500 template <
typename FlagType>
class kmp_basic_flag :
public kmp_flag<FlagType> {
501 typedef flag_traits<FlagType> traits_type;
509 kmp_basic_flag(
volatile FlagType *p)
511 kmp_basic_flag(
volatile FlagType *p, kmp_info_t *thr)
513 waiting_threads[0] = thr;
515 kmp_basic_flag(
volatile FlagType *p, FlagType c)
517 num_waiting_threads(0) {}
522 kmp_info_t *get_waiter(kmp_uint32 i) {
523 KMP_DEBUG_ASSERT(i < num_waiting_threads);
524 return waiting_threads[i];
529 kmp_uint32 get_num_waiters() {
return num_waiting_threads; }
535 void set_waiter(kmp_info_t *thr) {
536 waiting_threads[0] = thr;
537 num_waiting_threads = 1;
542 bool done_check() {
return traits_type::tcr(*(this->
get())) == checker; }
547 bool done_check_val(FlagType old_loc) {
return old_loc == checker; }
555 bool notdone_check() {
return traits_type::tcr(*(this->
get())) != checker; }
560 void internal_release() {
561 (void)traits_type::test_then_add4((
volatile FlagType *)this->
get());
568 FlagType set_sleeping() {
569 return traits_type::test_then_or((
volatile FlagType *)this->
get(),
570 KMP_BARRIER_SLEEP_STATE);
577 FlagType unset_sleeping() {
578 return traits_type::test_then_and((
volatile FlagType *)this->
get(),
579 ~KMP_BARRIER_SLEEP_STATE);
585 bool is_sleeping_val(FlagType old_loc) {
586 return old_loc & KMP_BARRIER_SLEEP_STATE;
591 bool is_sleeping() {
return is_sleeping_val(*(this->
get())); }
592 bool is_any_sleeping() {
return is_sleeping_val(*(this->
get())); }
593 kmp_uint8 *get_stolen() {
return NULL; }
594 enum barrier_type get_bt() {
return bs_last_barrier; }
597 class kmp_flag_32 :
public kmp_basic_flag<kmp_uint32> {
599 kmp_flag_32(
volatile kmp_uint32 *p) : kmp_basic_flag<kmp_uint32>(p) {}
600 kmp_flag_32(
volatile kmp_uint32 *p, kmp_info_t *thr)
601 : kmp_basic_flag<kmp_uint32>(p, thr) {}
602 kmp_flag_32(
volatile kmp_uint32 *p, kmp_uint32 c)
603 : kmp_basic_flag<kmp_uint32>(p, c) {}
604 void suspend(
int th_gtid) { __kmp_suspend_32(th_gtid,
this); }
605 void resume(
int th_gtid) { __kmp_resume_32(th_gtid,
this); }
606 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
607 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
608 kmp_int32 is_constrained) {
609 return __kmp_execute_tasks_32(
610 this_thr, gtid,
this, final_spin,
611 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
613 void wait(kmp_info_t *this_thr,
614 int final_spin USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
615 __kmp_wait_template(this_thr,
this,
616 final_spin USE_ITT_BUILD_ARG(itt_sync_obj));
618 void release() { __kmp_release_template(
this); }
622 class kmp_flag_64 :
public kmp_basic_flag<kmp_uint64> {
624 kmp_flag_64(
volatile kmp_uint64 *p) : kmp_basic_flag<kmp_uint64>(p) {}
625 kmp_flag_64(
volatile kmp_uint64 *p, kmp_info_t *thr)
626 : kmp_basic_flag<kmp_uint64>(p, thr) {}
627 kmp_flag_64(
volatile kmp_uint64 *p, kmp_uint64 c)
628 : kmp_basic_flag<kmp_uint64>(p, c) {}
629 void suspend(
int th_gtid) { __kmp_suspend_64(th_gtid,
this); }
630 void resume(
int th_gtid) { __kmp_resume_64(th_gtid,
this); }
631 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
632 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
633 kmp_int32 is_constrained) {
634 return __kmp_execute_tasks_64(
635 this_thr, gtid,
this, final_spin,
636 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
638 void wait(kmp_info_t *this_thr,
639 int final_spin USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
640 __kmp_wait_template(this_thr,
this,
641 final_spin USE_ITT_BUILD_ARG(itt_sync_obj));
643 void release() { __kmp_release_template(
this); }
648 class kmp_flag_oncore :
public kmp_flag<kmp_uint64> {
650 kmp_info_t *waiting_threads[1];
651 kmp_uint32 num_waiting_threads;
655 enum barrier_type bt;
656 kmp_info_t *this_thr;
662 unsigned char &byteref(
volatile kmp_uint64 *
loc,
size_t offset) {
663 return (RCAST(
unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
667 kmp_flag_oncore(
volatile kmp_uint64 *p)
669 flag_switch(
false) {}
670 kmp_flag_oncore(
volatile kmp_uint64 *p, kmp_uint32 idx)
672 offset(idx), flag_switch(
false) {}
673 kmp_flag_oncore(
volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
674 enum barrier_type bar_t, kmp_info_t *thr
681 num_waiting_threads(0), offset(idx), flag_switch(
false), bt(bar_t),
689 kmp_info_t *get_waiter(kmp_uint32 i) {
690 KMP_DEBUG_ASSERT(i < num_waiting_threads);
691 return waiting_threads[i];
693 kmp_uint32 get_num_waiters() {
return num_waiting_threads; }
694 void set_waiter(kmp_info_t *thr) {
695 waiting_threads[0] = thr;
696 num_waiting_threads = 1;
698 bool done_check_val(kmp_uint64 old_loc) {
699 return byteref(&old_loc, offset) == checker;
701 bool done_check() {
return done_check_val(*
get()); }
702 bool notdone_check() {
704 if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
706 if (byteref(
get(), offset) != 1 && !flag_switch)
708 else if (flag_switch) {
709 this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
710 kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go,
711 (kmp_uint64)KMP_BARRIER_STATE_BUMP);
712 __kmp_wait_64(this_thr, &flag, TRUE
721 void internal_release() {
722 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
723 byteref(
get(), offset) = 1;
726 byteref(&mask, offset) = 1;
727 KMP_TEST_THEN_OR64(
get(), mask);
730 kmp_uint64 set_sleeping() {
731 return KMP_TEST_THEN_OR64(
get(), KMP_BARRIER_SLEEP_STATE);
733 kmp_uint64 unset_sleeping() {
734 return KMP_TEST_THEN_AND64(
get(), ~KMP_BARRIER_SLEEP_STATE);
736 bool is_sleeping_val(kmp_uint64 old_loc) {
737 return old_loc & KMP_BARRIER_SLEEP_STATE;
739 bool is_sleeping() {
return is_sleeping_val(*
get()); }
740 bool is_any_sleeping() {
return is_sleeping_val(*
get()); }
741 void wait(kmp_info_t *this_thr,
int final_spin) {
742 __kmp_wait_template<kmp_flag_oncore>(
743 this_thr,
this, final_spin USE_ITT_BUILD_ARG(itt_sync_obj));
745 void release() { __kmp_release_template(
this); }
746 void suspend(
int th_gtid) { __kmp_suspend_oncore(th_gtid,
this); }
747 void resume(
int th_gtid) { __kmp_resume_oncore(th_gtid,
this); }
748 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
749 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
750 kmp_int32 is_constrained) {
751 return __kmp_execute_tasks_oncore(
752 this_thr, gtid,
this, final_spin,
753 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
755 kmp_uint8 *get_stolen() {
return NULL; }
756 enum barrier_type get_bt() {
return bt; }
762 static inline void __kmp_null_resume_wrapper(
int gtid,
volatile void *flag) {
766 switch (RCAST(kmp_flag_64 *, CCAST(
void *, flag))->
get_type()) {
768 __kmp_resume_32(gtid, NULL);
771 __kmp_resume_64(gtid, NULL);
774 __kmp_resume_oncore(gtid, NULL);
783 #endif // KMP_WAIT_RELEASE_H
stats_state_e
the states which a thread can be in