15 #include "kmp_affinity.h" 16 #include "kmp_atomic.h" 17 #include "kmp_environment.h" 18 #include "kmp_error.h" 22 #include "kmp_settings.h" 23 #include "kmp_stats.h" 25 #include "kmp_wait_release.h" 26 #include "kmp_wrapper_getpid.h" 29 #include "ompt-specific.h" 33 #define KMP_USE_PRCTL 0 39 #include "tsan_annotations.h" 41 #if defined(KMP_GOMP_COMPAT) 42 char const __kmp_version_alt_comp[] =
43 KMP_VERSION_PREFIX
"alternative compiler support: yes";
46 char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX
"API version: " 58 char const __kmp_version_lock[] =
59 KMP_VERSION_PREFIX
"lock type: run time selectable";
62 #define KMP_MIN(x, y) ((x) < (y) ? (x) : (y)) 66 kmp_info_t __kmp_monitor;
70 void __kmp_cleanup(
void);
72 static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
74 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
75 kmp_internal_control_t *new_icvs,
77 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 78 static void __kmp_partition_places(kmp_team_t *team,
79 int update_master_only = 0);
81 static void __kmp_do_serial_initialize(
void);
82 void __kmp_fork_barrier(
int gtid,
int tid);
83 void __kmp_join_barrier(
int gtid);
84 void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
85 kmp_internal_control_t *new_icvs,
ident_t *loc);
87 #ifdef USE_LOAD_BALANCE 88 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
91 static int __kmp_expand_threads(
int nNeed);
93 static int __kmp_unregister_root_other_thread(
int gtid);
95 static void __kmp_unregister_library(
void);
96 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
97 kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
102 int __kmp_get_global_thread_id() {
104 kmp_info_t **other_threads;
112 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
113 __kmp_nth, __kmp_all_nth));
120 if (!TCR_4(__kmp_init_gtid))
123 #ifdef KMP_TDATA_GTID 124 if (TCR_4(__kmp_gtid_mode) >= 3) {
125 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
129 if (TCR_4(__kmp_gtid_mode) >= 2) {
130 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
131 return __kmp_gtid_get_specific();
133 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
135 stack_addr = (
char *)&stack_data;
136 other_threads = __kmp_threads;
149 for (i = 0; i < __kmp_threads_capacity; i++) {
151 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
155 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
156 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
160 if (stack_addr <= stack_base) {
161 size_t stack_diff = stack_base - stack_addr;
163 if (stack_diff <= stack_size) {
166 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i);
174 (
"*** __kmp_get_global_thread_id: internal alg. failed to find " 175 "thread, using TLS\n"));
176 i = __kmp_gtid_get_specific();
186 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
187 KMP_FATAL(StackOverflow, i);
190 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
191 if (stack_addr > stack_base) {
192 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
193 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
194 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
197 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
198 stack_base - stack_addr);
202 if (__kmp_storage_map) {
203 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
204 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
205 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
206 other_threads[i]->th.th_info.ds.ds_stacksize,
207 "th_%d stack (refinement)", i);
212 int __kmp_get_global_thread_id_reg() {
215 if (!__kmp_init_serial) {
218 #ifdef KMP_TDATA_GTID 219 if (TCR_4(__kmp_gtid_mode) >= 3) {
220 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
224 if (TCR_4(__kmp_gtid_mode) >= 2) {
225 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
226 gtid = __kmp_gtid_get_specific();
229 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
230 gtid = __kmp_get_global_thread_id();
234 if (gtid == KMP_GTID_DNE) {
236 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. " 237 "Registering a new gtid.\n"));
238 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
239 if (!__kmp_init_serial) {
240 __kmp_do_serial_initialize();
241 gtid = __kmp_gtid_get_specific();
243 gtid = __kmp_register_root(FALSE);
245 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
249 KMP_DEBUG_ASSERT(gtid >= 0);
255 void __kmp_check_stack_overlap(kmp_info_t *th) {
257 char *stack_beg = NULL;
258 char *stack_end = NULL;
261 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
262 if (__kmp_storage_map) {
263 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
264 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
266 gtid = __kmp_gtid_from_thread(th);
268 if (gtid == KMP_GTID_MONITOR) {
269 __kmp_print_storage_map_gtid(
270 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
271 "th_%s stack (%s)",
"mon",
272 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
274 __kmp_print_storage_map_gtid(
275 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
276 "th_%d stack (%s)", gtid,
277 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
283 gtid = __kmp_gtid_from_thread(th);
284 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
286 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
287 if (stack_beg == NULL) {
288 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
289 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
292 for (f = 0; f < __kmp_threads_capacity; f++) {
293 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
295 if (f_th && f_th != th) {
296 char *other_stack_end =
297 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
298 char *other_stack_beg =
299 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
300 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
301 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
304 if (__kmp_storage_map)
305 __kmp_print_storage_map_gtid(
306 -1, other_stack_beg, other_stack_end,
307 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
308 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
310 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
316 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
321 void __kmp_infinite_loop(
void) {
322 static int done = FALSE;
329 #define MAX_MESSAGE 512 331 void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
332 char const *format, ...) {
333 char buffer[MAX_MESSAGE];
336 va_start(ap, format);
337 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
338 p2, (
unsigned long)size, format);
339 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
340 __kmp_vprintf(kmp_err, buffer, ap);
341 #if KMP_PRINT_DATA_PLACEMENT 344 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
345 if (__kmp_storage_map_verbose) {
346 node = __kmp_get_host_node(p1);
348 __kmp_storage_map_verbose = FALSE;
352 int localProc = __kmp_get_cpu_from_gtid(gtid);
354 const int page_size = KMP_GET_PAGE_SIZE();
356 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
357 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
359 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
362 __kmp_printf_no_lock(
" GTID %d\n", gtid);
371 (
char *)p1 += page_size;
372 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
373 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
377 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
378 (
char *)p1 + (page_size - 1),
379 __kmp_get_host_node(p1));
381 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
382 (
char *)p2 + (page_size - 1),
383 __kmp_get_host_node(p2));
389 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
392 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
395 void __kmp_warn(
char const *format, ...) {
396 char buffer[MAX_MESSAGE];
399 if (__kmp_generate_warnings == kmp_warnings_off) {
403 va_start(ap, format);
405 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
406 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
407 __kmp_vprintf(kmp_err, buffer, ap);
408 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
413 void __kmp_abort_process() {
415 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
417 if (__kmp_debug_buf) {
418 __kmp_dump_debug_buffer();
421 if (KMP_OS_WINDOWS) {
424 __kmp_global.g.g_abort = SIGABRT;
441 __kmp_infinite_loop();
442 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
446 void __kmp_abort_thread(
void) {
449 __kmp_infinite_loop();
455 static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
456 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
459 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
460 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
462 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
463 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
465 __kmp_print_storage_map_gtid(
466 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
467 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
469 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
470 &thr->th.th_bar[bs_plain_barrier + 1],
471 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
474 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
475 &thr->th.th_bar[bs_forkjoin_barrier + 1],
476 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
479 #if KMP_FAST_REDUCTION_BARRIER 480 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
481 &thr->th.th_bar[bs_reduction_barrier + 1],
482 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
484 #endif // KMP_FAST_REDUCTION_BARRIER 490 static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
491 int team_id,
int num_thr) {
492 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
493 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
496 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
497 &team->t.t_bar[bs_last_barrier],
498 sizeof(kmp_balign_team_t) * bs_last_barrier,
499 "%s_%d.t_bar", header, team_id);
501 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
502 &team->t.t_bar[bs_plain_barrier + 1],
503 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
506 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
507 &team->t.t_bar[bs_forkjoin_barrier + 1],
508 sizeof(kmp_balign_team_t),
509 "%s_%d.t_bar[forkjoin]", header, team_id);
511 #if KMP_FAST_REDUCTION_BARRIER 512 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
513 &team->t.t_bar[bs_reduction_barrier + 1],
514 sizeof(kmp_balign_team_t),
515 "%s_%d.t_bar[reduction]", header, team_id);
516 #endif // KMP_FAST_REDUCTION_BARRIER 518 __kmp_print_storage_map_gtid(
519 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
520 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
522 __kmp_print_storage_map_gtid(
523 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
524 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
526 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
527 &team->t.t_disp_buffer[num_disp_buff],
528 sizeof(dispatch_shared_info_t) * num_disp_buff,
529 "%s_%d.t_disp_buffer", header, team_id);
531 __kmp_print_storage_map_gtid(-1, &team->t.t_taskq, &team->t.t_copypriv_data,
532 sizeof(kmp_taskq_t),
"%s_%d.t_taskq", header,
536 static void __kmp_init_allocator() {}
537 static void __kmp_fini_allocator() {}
541 #ifdef KMP_DYNAMIC_LIB 544 static void __kmp_reset_lock(kmp_bootstrap_lock_t *lck) {
546 __kmp_init_bootstrap_lock(lck);
549 static void __kmp_reset_locks_on_process_detach(
int gtid_req) {
567 for (i = 0; i < __kmp_threads_capacity; ++i) {
570 kmp_info_t *th = __kmp_threads[i];
573 int gtid = th->th.th_info.ds.ds_gtid;
574 if (gtid == gtid_req)
579 int alive = __kmp_is_thread_alive(th, &exit_val);
584 if (thread_count == 0)
590 __kmp_reset_lock(&__kmp_forkjoin_lock);
592 __kmp_reset_lock(&__kmp_stdio_lock);
596 BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
601 case DLL_PROCESS_ATTACH:
602 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
606 case DLL_PROCESS_DETACH:
607 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
609 if (lpReserved != NULL) {
635 __kmp_reset_locks_on_process_detach(__kmp_gtid_get_specific());
638 __kmp_internal_end_library(__kmp_gtid_get_specific());
642 case DLL_THREAD_ATTACH:
643 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
649 case DLL_THREAD_DETACH:
650 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
652 __kmp_internal_end_thread(__kmp_gtid_get_specific());
664 int __kmp_change_library(
int status) {
667 old_status = __kmp_yield_init &
671 __kmp_yield_init |= 1;
673 __kmp_yield_init &= ~1;
681 void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
682 int gtid = *gtid_ref;
683 #ifdef BUILD_PARALLEL_ORDERED 684 kmp_team_t *team = __kmp_team_from_gtid(gtid);
687 if (__kmp_env_consistency_check) {
688 if (__kmp_threads[gtid]->th.th_root->r.r_active)
689 #if KMP_USE_DYNAMIC_LOCK 690 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
692 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
695 #ifdef BUILD_PARALLEL_ORDERED 696 if (!team->t.t_serialized) {
698 KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid),
706 void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
707 int gtid = *gtid_ref;
708 #ifdef BUILD_PARALLEL_ORDERED 709 int tid = __kmp_tid_from_gtid(gtid);
710 kmp_team_t *team = __kmp_team_from_gtid(gtid);
713 if (__kmp_env_consistency_check) {
714 if (__kmp_threads[gtid]->th.th_root->r.r_active)
715 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
717 #ifdef BUILD_PARALLEL_ORDERED 718 if (!team->t.t_serialized) {
723 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
733 int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
738 if (!TCR_4(__kmp_init_parallel))
739 __kmp_parallel_initialize();
741 th = __kmp_threads[gtid];
742 team = th->th.th_team;
745 th->th.th_ident = id_ref;
747 if (team->t.t_serialized) {
750 kmp_int32 old_this = th->th.th_local.this_construct;
752 ++th->th.th_local.this_construct;
756 if (team->t.t_construct == old_this) {
757 status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this,
758 th->th.th_local.this_construct);
761 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
762 KMP_MASTER_GTID(gtid) &&
764 th->th.th_teams_microtask == NULL &&
766 team->t.t_active_level ==
768 __kmp_itt_metadata_single(id_ref);
773 if (__kmp_env_consistency_check) {
774 if (status && push_ws) {
775 __kmp_push_workshare(gtid, ct_psingle, id_ref);
777 __kmp_check_workshare(gtid, ct_psingle, id_ref);
782 __kmp_itt_single_start(gtid);
788 void __kmp_exit_single(
int gtid) {
790 __kmp_itt_single_end(gtid);
792 if (__kmp_env_consistency_check)
793 __kmp_pop_workshare(gtid, ct_psingle, NULL);
802 static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
803 int master_tid,
int set_nthreads
811 KMP_DEBUG_ASSERT(__kmp_init_serial);
812 KMP_DEBUG_ASSERT(root && parent_team);
816 new_nthreads = set_nthreads;
817 if (!get__dynamic_2(parent_team, master_tid)) {
820 #ifdef USE_LOAD_BALANCE 821 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
822 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
823 if (new_nthreads == 1) {
824 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced " 825 "reservation to 1 thread\n",
829 if (new_nthreads < set_nthreads) {
830 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced " 831 "reservation to %d threads\n",
832 master_tid, new_nthreads));
836 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
837 new_nthreads = __kmp_avail_proc - __kmp_nth +
838 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
839 if (new_nthreads <= 1) {
840 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced " 841 "reservation to 1 thread\n",
845 if (new_nthreads < set_nthreads) {
846 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced " 847 "reservation to %d threads\n",
848 master_tid, new_nthreads));
850 new_nthreads = set_nthreads;
852 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
853 if (set_nthreads > 2) {
854 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
855 new_nthreads = (new_nthreads % set_nthreads) + 1;
856 if (new_nthreads == 1) {
857 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced " 858 "reservation to 1 thread\n",
862 if (new_nthreads < set_nthreads) {
863 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced " 864 "reservation to %d threads\n",
865 master_tid, new_nthreads));
873 if (__kmp_nth + new_nthreads -
874 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
876 int tl_nthreads = __kmp_max_nth - __kmp_nth +
877 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
878 if (tl_nthreads <= 0) {
883 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
884 __kmp_reserve_warn = 1;
885 __kmp_msg(kmp_ms_warning,
886 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
887 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
889 if (tl_nthreads == 1) {
890 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT " 891 "reduced reservation to 1 thread\n",
895 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced " 896 "reservation to %d threads\n",
897 master_tid, tl_nthreads));
898 new_nthreads = tl_nthreads;
902 if (root->r.r_cg_nthreads + new_nthreads -
903 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
905 int tl_nthreads = __kmp_cg_max_nth - root->r.r_cg_nthreads +
906 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
907 if (tl_nthreads <= 0) {
912 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
913 __kmp_reserve_warn = 1;
914 __kmp_msg(kmp_ms_warning,
915 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
916 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
918 if (tl_nthreads == 1) {
919 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT " 920 "reduced reservation to 1 thread\n",
924 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced " 925 "reservation to %d threads\n",
926 master_tid, tl_nthreads));
927 new_nthreads = tl_nthreads;
933 capacity = __kmp_threads_capacity;
934 if (TCR_PTR(__kmp_threads[0]) == NULL) {
937 if (__kmp_nth + new_nthreads -
938 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
941 int slotsRequired = __kmp_nth + new_nthreads -
942 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
944 int slotsAdded = __kmp_expand_threads(slotsRequired);
945 if (slotsAdded < slotsRequired) {
947 new_nthreads -= (slotsRequired - slotsAdded);
948 KMP_ASSERT(new_nthreads >= 1);
951 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
952 __kmp_reserve_warn = 1;
953 if (__kmp_tp_cached) {
954 __kmp_msg(kmp_ms_warning,
955 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
956 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
957 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
959 __kmp_msg(kmp_ms_warning,
960 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
961 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
968 if (new_nthreads == 1) {
970 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming " 971 "dead roots and rechecking; requested %d threads\n",
972 __kmp_get_gtid(), set_nthreads));
974 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested" 976 __kmp_get_gtid(), new_nthreads, set_nthreads));
985 static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
986 kmp_info_t *master_th,
int master_gtid) {
990 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
991 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
995 master_th->th.th_info.ds.ds_tid = 0;
996 master_th->th.th_team = team;
997 master_th->th.th_team_nproc = team->t.t_nproc;
998 master_th->th.th_team_master = master_th;
999 master_th->th.th_team_serialized = FALSE;
1000 master_th->th.th_dispatch = &team->t.t_dispatch[0];
1003 #if KMP_NESTED_HOT_TEAMS 1005 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
1008 int level = team->t.t_active_level - 1;
1009 if (master_th->th.th_teams_microtask) {
1010 if (master_th->th.th_teams_size.nteams > 1) {
1014 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
1015 master_th->th.th_teams_level == team->t.t_level) {
1020 if (level < __kmp_hot_teams_max_level) {
1021 if (hot_teams[level].hot_team) {
1023 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
1027 hot_teams[level].hot_team = team;
1028 hot_teams[level].hot_team_nth = team->t.t_nproc;
1035 use_hot_team = team == root->r.r_hot_team;
1037 if (!use_hot_team) {
1040 team->t.t_threads[0] = master_th;
1041 __kmp_initialize_info(master_th, team, 0, master_gtid);
1044 for (i = 1; i < team->t.t_nproc; i++) {
1047 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1048 team->t.t_threads[i] = thr;
1049 KMP_DEBUG_ASSERT(thr);
1050 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1052 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived " 1053 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1054 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1055 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1056 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1057 team->t.t_bar[bs_plain_barrier].b_arrived));
1059 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1060 thr->th.th_teams_level = master_th->th.th_teams_level;
1061 thr->th.th_teams_size = master_th->th.th_teams_size;
1065 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1066 for (b = 0; b < bs_last_barrier; ++b) {
1067 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1068 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1070 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1076 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 1077 __kmp_partition_places(team);
1084 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1088 inline static void propagateFPControl(kmp_team_t *team) {
1089 if (__kmp_inherit_fp_control) {
1090 kmp_int16 x87_fpu_control_word;
1094 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1095 __kmp_store_mxcsr(&mxcsr);
1096 mxcsr &= KMP_X86_MXCSR_MASK;
1107 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1108 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1111 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1115 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1121 inline static void updateHWFPControl(kmp_team_t *team) {
1122 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1125 kmp_int16 x87_fpu_control_word;
1127 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1128 __kmp_store_mxcsr(&mxcsr);
1129 mxcsr &= KMP_X86_MXCSR_MASK;
1131 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1132 __kmp_clear_x87_fpu_status_word();
1133 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1136 if (team->t.t_mxcsr != mxcsr) {
1137 __kmp_load_mxcsr(&team->t.t_mxcsr);
1142 #define propagateFPControl(x) ((void)0) 1143 #define updateHWFPControl(x) ((void)0) 1146 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1151 void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1152 kmp_info_t *this_thr;
1153 kmp_team_t *serial_team;
1155 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1162 if (!TCR_4(__kmp_init_parallel))
1163 __kmp_parallel_initialize();
1165 this_thr = __kmp_threads[global_tid];
1166 serial_team = this_thr->th.th_serial_team;
1169 KMP_DEBUG_ASSERT(serial_team);
1172 if (__kmp_tasking_mode != tskm_immediate_exec) {
1174 this_thr->th.th_task_team ==
1175 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1176 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1178 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / " 1179 "team %p, new task_team = NULL\n",
1180 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1181 this_thr->th.th_task_team = NULL;
1185 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1186 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1187 proc_bind = proc_bind_false;
1188 }
else if (proc_bind == proc_bind_default) {
1191 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1194 this_thr->th.th_set_proc_bind = proc_bind_default;
1198 ompt_data_t ompt_parallel_data;
1199 ompt_parallel_data.ptr = NULL;
1200 ompt_data_t *implicit_task_data;
1201 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1202 if (ompt_enabled.enabled &&
1203 this_thr->th.ompt_thread_info.state != omp_state_overhead) {
1205 ompt_task_info_t *parent_task_info;
1206 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1208 parent_task_info->frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
1209 if (ompt_enabled.ompt_callback_parallel_begin) {
1212 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1213 &(parent_task_info->task_data), &(parent_task_info->frame),
1214 &ompt_parallel_data, team_size, ompt_invoker_program, codeptr);
1217 #endif // OMPT_SUPPORT 1219 if (this_thr->th.th_team != serial_team) {
1221 int level = this_thr->th.th_team->t.t_level;
1223 if (serial_team->t.t_serialized) {
1226 kmp_team_t *new_team;
1228 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1230 new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1237 &this_thr->th.th_current_task->td_icvs,
1238 0 USE_NESTED_HOT_ARG(NULL));
1239 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1240 KMP_ASSERT(new_team);
1243 new_team->t.t_threads[0] = this_thr;
1244 new_team->t.t_parent = this_thr->th.th_team;
1245 serial_team = new_team;
1246 this_thr->th.th_serial_team = serial_team;
1250 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1251 global_tid, serial_team));
1259 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1260 global_tid, serial_team));
1264 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1265 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1266 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1267 serial_team->t.t_ident = loc;
1268 serial_team->t.t_serialized = 1;
1269 serial_team->t.t_nproc = 1;
1270 serial_team->t.t_parent = this_thr->th.th_team;
1271 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1272 this_thr->th.th_team = serial_team;
1273 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1275 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid,
1276 this_thr->th.th_current_task));
1277 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1278 this_thr->th.th_current_task->td_flags.executing = 0;
1280 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1285 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1286 &this_thr->th.th_current_task->td_parent->td_icvs);
1290 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1291 this_thr->th.th_current_task->td_icvs.nproc =
1292 __kmp_nested_nth.nth[level + 1];
1296 if (__kmp_nested_proc_bind.used &&
1297 (level + 1 < __kmp_nested_proc_bind.used)) {
1298 this_thr->th.th_current_task->td_icvs.proc_bind =
1299 __kmp_nested_proc_bind.bind_types[level + 1];
1304 serial_team->t.t_pkfn = (microtask_t)(~0);
1306 this_thr->th.th_info.ds.ds_tid = 0;
1309 this_thr->th.th_team_nproc = 1;
1310 this_thr->th.th_team_master = this_thr;
1311 this_thr->th.th_team_serialized = 1;
1313 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1314 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1316 propagateFPControl(serial_team);
1319 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1320 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1321 serial_team->t.t_dispatch->th_disp_buffer =
1322 (dispatch_private_info_t *)__kmp_allocate(
1323 sizeof(dispatch_private_info_t));
1325 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1332 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1333 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1334 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1335 ++serial_team->t.t_serialized;
1336 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1339 int level = this_thr->th.th_team->t.t_level;
1342 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1343 this_thr->th.th_current_task->td_icvs.nproc =
1344 __kmp_nested_nth.nth[level + 1];
1346 serial_team->t.t_level++;
1347 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level " 1348 "of serial team %p to %d\n",
1349 global_tid, serial_team, serial_team->t.t_level));
1352 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1354 dispatch_private_info_t *disp_buffer =
1355 (dispatch_private_info_t *)__kmp_allocate(
1356 sizeof(dispatch_private_info_t));
1357 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1358 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1360 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1365 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1368 if (__kmp_env_consistency_check)
1369 __kmp_push_parallel(global_tid, NULL);
1371 serial_team->t.ompt_team_info.master_return_address = codeptr;
1372 if (ompt_enabled.enabled &&
1373 this_thr->th.ompt_thread_info.state != omp_state_overhead) {
1374 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = OMPT_GET_FRAME_ADDRESS(1);
1376 ompt_lw_taskteam_t lw_taskteam;
1377 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1378 &ompt_parallel_data, codeptr);
1380 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1384 implicit_task_data = OMPT_CUR_TASK_DATA(this_thr);
1385 if (ompt_enabled.ompt_callback_implicit_task) {
1386 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1387 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1388 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid));
1389 OMPT_CUR_TASK_INFO(this_thr)
1390 ->thread_num = __kmp_tid_from_gtid(global_tid);
1394 this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
1395 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = OMPT_GET_FRAME_ADDRESS(1);
1402 int __kmp_fork_call(
ident_t *loc,
int gtid,
1403 enum fork_context_e call_context,
1404 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1406 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1415 int master_this_cons;
1417 kmp_team_t *parent_team;
1418 kmp_info_t *master_th;
1422 int master_set_numthreads;
1428 #if KMP_NESTED_HOT_TEAMS 1429 kmp_hot_team_ptr_t **p_hot_teams;
1432 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1435 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1436 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1439 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1441 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1442 __kmp_stkpadding += (short)((kmp_int64)dummy);
1448 if (!TCR_4(__kmp_init_parallel))
1449 __kmp_parallel_initialize();
1452 master_th = __kmp_threads[gtid];
1454 parent_team = master_th->th.th_team;
1455 master_tid = master_th->th.th_info.ds.ds_tid;
1456 master_this_cons = master_th->th.th_local.this_construct;
1457 root = master_th->th.th_root;
1458 master_active = root->r.r_active;
1459 master_set_numthreads = master_th->th.th_set_nproc;
1462 ompt_data_t ompt_parallel_data;
1463 ompt_parallel_data.ptr = NULL;
1464 ompt_data_t *parent_task_data;
1465 omp_frame_t *ompt_frame;
1466 ompt_data_t *implicit_task_data;
1467 void *return_address = NULL;
1469 if (ompt_enabled.enabled) {
1470 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1472 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);
1477 level = parent_team->t.t_level;
1479 active_level = parent_team->t.t_active_level;
1482 teams_level = master_th->th.th_teams_level;
1484 #if KMP_NESTED_HOT_TEAMS 1485 p_hot_teams = &master_th->th.th_hot_teams;
1486 if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) {
1487 *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate(
1488 sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level);
1489 (*p_hot_teams)[0].hot_team = root->r.r_hot_team;
1491 (*p_hot_teams)[0].hot_team_nth = 1;
1496 if (ompt_enabled.enabled) {
1497 if (ompt_enabled.ompt_callback_parallel_begin) {
1498 int team_size = master_set_numthreads
1499 ? master_set_numthreads
1500 : get__nproc_2(parent_team, master_tid);
1501 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1502 parent_task_data, ompt_frame, &ompt_parallel_data, team_size,
1503 OMPT_INVOKER(call_context), return_address);
1505 master_th->th.ompt_thread_info.state = omp_state_overhead;
1509 master_th->th.th_ident = loc;
1512 if (master_th->th.th_teams_microtask && ap &&
1513 microtask != (microtask_t)__kmp_teams_master && level == teams_level) {
1517 parent_team->t.t_ident = loc;
1518 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1519 parent_team->t.t_argc = argc;
1520 argv = (
void **)parent_team->t.t_argv;
1521 for (i = argc - 1; i >= 0; --i)
1523 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1524 *argv++ = va_arg(*ap,
void *);
1526 *argv++ = va_arg(ap,
void *);
1529 if (parent_team == master_th->th.th_serial_team) {
1532 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1535 parent_team->t.t_serialized--;
1538 void **exit_runtime_p;
1540 ompt_lw_taskteam_t lw_taskteam;
1542 if (ompt_enabled.enabled) {
1543 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1544 &ompt_parallel_data, return_address);
1545 exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_frame);
1547 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1551 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1552 if (ompt_enabled.ompt_callback_implicit_task) {
1553 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1554 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1555 implicit_task_data, 1, __kmp_tid_from_gtid(gtid));
1556 OMPT_CUR_TASK_INFO(master_th)
1557 ->thread_num = __kmp_tid_from_gtid(gtid);
1561 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
1563 exit_runtime_p = &dummy;
1568 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1569 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1570 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1579 *exit_runtime_p = NULL;
1580 if (ompt_enabled.enabled) {
1581 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = NULL;
1582 if (ompt_enabled.ompt_callback_implicit_task) {
1583 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1584 ompt_scope_end, NULL, implicit_task_data, 1,
1585 OMPT_CUR_TASK_INFO(master_th)->thread_num);
1587 __ompt_lw_taskteam_unlink(master_th);
1589 if (ompt_enabled.ompt_callback_parallel_end) {
1590 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1591 OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th),
1592 OMPT_INVOKER(call_context), return_address);
1594 master_th->th.ompt_thread_info.state = omp_state_overhead;
1600 parent_team->t.t_pkfn = microtask;
1601 parent_team->t.t_invoke = invoker;
1602 KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel);
1603 parent_team->t.t_active_level++;
1604 parent_team->t.t_level++;
1607 if (master_set_numthreads) {
1608 if (master_set_numthreads < master_th->th.th_teams_size.nth) {
1610 kmp_info_t **other_threads = parent_team->t.t_threads;
1611 parent_team->t.t_nproc = master_set_numthreads;
1612 for (i = 0; i < master_set_numthreads; ++i) {
1613 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1617 master_th->th.th_set_nproc = 0;
1621 if (__kmp_debugging) {
1622 int nth = __kmp_omp_num_threads(loc);
1624 master_set_numthreads = nth;
1629 KF_TRACE(10, (
"__kmp_fork_call: before internal fork: root=%p, team=%p, " 1630 "master_th=%p, gtid=%d\n",
1631 root, parent_team, master_th, gtid));
1632 __kmp_internal_fork(loc, gtid, parent_team);
1633 KF_TRACE(10, (
"__kmp_fork_call: after internal fork: root=%p, team=%p, " 1634 "master_th=%p, gtid=%d\n",
1635 root, parent_team, master_th, gtid));
1638 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
1639 parent_team->t.t_id, parent_team->t.t_pkfn));
1642 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1643 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1644 if (!parent_team->t.t_invoke(gtid)) {
1645 KMP_ASSERT2(0,
"cannot invoke microtask for MASTER thread");
1648 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
1649 parent_team->t.t_id, parent_team->t.t_pkfn));
1652 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
1659 if (__kmp_tasking_mode != tskm_immediate_exec) {
1660 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
1661 parent_team->t.t_task_team[master_th->th.th_task_state]);
1665 if (parent_team->t.t_active_level >=
1666 master_th->th.th_current_task->td_icvs.max_active_levels) {
1670 int enter_teams = ((ap == NULL && active_level == 0) ||
1671 (ap && teams_level > 0 && teams_level == level));
1674 master_set_numthreads
1675 ? master_set_numthreads
1684 if ((!get__nested(master_th) && (root->r.r_in_parallel
1689 (__kmp_library == library_serial)) {
1690 KC_TRACE(10, (
"__kmp_fork_call: T#%d serializing team; requested %d" 1698 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1699 nthreads = __kmp_reserve_threads(
1700 root, parent_team, master_tid, nthreads
1711 if (nthreads == 1) {
1715 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1719 KMP_DEBUG_ASSERT(nthreads > 0);
1722 master_th->th.th_set_nproc = 0;
1725 if (nthreads == 1) {
1727 #if KMP_OS_LINUX && \ 1728 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) 1731 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1736 (
"__kmp_fork_call: T#%d serializing parallel region\n", gtid));
1740 if (call_context == fork_context_intel) {
1742 master_th->th.th_serial_team->t.t_ident = loc;
1746 master_th->th.th_serial_team->t.t_level--;
1751 void **exit_runtime_p;
1752 ompt_task_info_t *task_info;
1754 ompt_lw_taskteam_t lw_taskteam;
1756 if (ompt_enabled.enabled) {
1757 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1758 &ompt_parallel_data, return_address);
1760 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1763 task_info = OMPT_CUR_TASK_INFO(master_th);
1764 exit_runtime_p = &(task_info->frame.exit_frame);
1765 if (ompt_enabled.ompt_callback_implicit_task) {
1766 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1767 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1768 &(task_info->task_data), 1, __kmp_tid_from_gtid(gtid));
1769 OMPT_CUR_TASK_INFO(master_th)
1770 ->thread_num = __kmp_tid_from_gtid(gtid);
1774 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
1776 exit_runtime_p = &dummy;
1781 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1782 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1783 __kmp_invoke_microtask(microtask, gtid, 0, argc,
1784 parent_team->t.t_argv
1793 if (ompt_enabled.enabled) {
1794 exit_runtime_p = NULL;
1795 if (ompt_enabled.ompt_callback_implicit_task) {
1796 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1797 ompt_scope_end, NULL, &(task_info->task_data), 1,
1798 OMPT_CUR_TASK_INFO(master_th)->thread_num);
1801 __ompt_lw_taskteam_unlink(master_th);
1802 if (ompt_enabled.ompt_callback_parallel_end) {
1803 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1804 OMPT_CUR_TEAM_DATA(master_th), parent_task_data,
1805 OMPT_INVOKER(call_context), return_address);
1807 master_th->th.ompt_thread_info.state = omp_state_overhead;
1810 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1811 KMP_DEBUG_ASSERT(master_th->th.th_team ==
1812 master_th->th.th_serial_team);
1813 team = master_th->th.th_team;
1815 team->t.t_invoke = invoker;
1816 __kmp_alloc_argv_entries(argc, team, TRUE);
1817 team->t.t_argc = argc;
1818 argv = (
void **)team->t.t_argv;
1820 for (i = argc - 1; i >= 0; --i)
1822 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1823 *argv++ = va_arg(*ap,
void *);
1825 *argv++ = va_arg(ap,
void *);
1828 for (i = 0; i < argc; ++i)
1830 argv[i] = parent_team->t.t_argv[i];
1837 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1838 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1844 for (i = argc - 1; i >= 0; --i)
1846 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
1847 *argv++ = va_arg(*ap,
void *);
1849 *argv++ = va_arg(ap,
void *);
1855 void **exit_runtime_p;
1856 ompt_task_info_t *task_info;
1858 ompt_lw_taskteam_t lw_taskteam;
1860 if (ompt_enabled.enabled) {
1861 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1862 &ompt_parallel_data, return_address);
1863 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1865 task_info = OMPT_CUR_TASK_INFO(master_th);
1866 exit_runtime_p = &(task_info->frame.exit_frame);
1869 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1870 if (ompt_enabled.ompt_callback_implicit_task) {
1871 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1872 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1873 implicit_task_data, 1, __kmp_tid_from_gtid(gtid));
1874 OMPT_CUR_TASK_INFO(master_th)
1875 ->thread_num = __kmp_tid_from_gtid(gtid);
1879 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
1881 exit_runtime_p = &dummy;
1886 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1887 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1888 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1897 if (ompt_enabled.enabled) {
1898 *exit_runtime_p = NULL;
1899 if (ompt_enabled.ompt_callback_implicit_task) {
1900 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1901 ompt_scope_end, NULL, &(task_info->task_data), 1,
1902 OMPT_CUR_TASK_INFO(master_th)->thread_num);
1905 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1906 __ompt_lw_taskteam_unlink(master_th);
1907 if (ompt_enabled.ompt_callback_parallel_end) {
1908 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1909 &ompt_parallel_data, parent_task_data,
1910 OMPT_INVOKER(call_context), return_address);
1912 master_th->th.ompt_thread_info.state = omp_state_overhead;
1918 }
else if (call_context == fork_context_gnu) {
1920 ompt_lw_taskteam_t lwt;
1921 __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
1924 lwt.ompt_task_info.frame.exit_frame = NULL;
1925 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1930 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1933 KMP_ASSERT2(call_context < fork_context_last,
1934 "__kmp_fork_call: unknown fork_context parameter");
1937 KA_TRACE(20, (
"__kmp_fork_call: T#%d serial exit\n", gtid));
1944 KF_TRACE(10, (
"__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, " 1945 "curtask=%p, curtask_max_aclevel=%d\n",
1946 parent_team->t.t_active_level, master_th,
1947 master_th->th.th_current_task,
1948 master_th->th.th_current_task->td_icvs.max_active_levels));
1952 master_th->th.th_current_task->td_flags.executing = 0;
1955 if (!master_th->th.th_teams_microtask || level > teams_level)
1959 KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel);
1963 int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc;
1964 if ((level + 1 < __kmp_nested_nth.used) &&
1965 (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) {
1966 nthreads_icv = __kmp_nested_nth.nth[level + 1];
1973 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1974 kmp_proc_bind_t proc_bind_icv =
1976 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1977 proc_bind = proc_bind_false;
1979 if (proc_bind == proc_bind_default) {
1982 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1988 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1989 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1990 master_th->th.th_current_task->td_icvs.proc_bind)) {
1991 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1996 master_th->th.th_set_proc_bind = proc_bind_default;
1999 if ((nthreads_icv > 0)
2001 || (proc_bind_icv != proc_bind_default)
2004 kmp_internal_control_t new_icvs;
2005 copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs);
2006 new_icvs.next = NULL;
2007 if (nthreads_icv > 0) {
2008 new_icvs.nproc = nthreads_icv;
2012 if (proc_bind_icv != proc_bind_default) {
2013 new_icvs.proc_bind = proc_bind_icv;
2018 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2019 team = __kmp_allocate_team(root, nthreads, nthreads,
2026 &new_icvs, argc USE_NESTED_HOT_ARG(master_th));
2029 KF_TRACE(10, (
"__kmp_fork_call: before __kmp_allocate_team\n"));
2030 team = __kmp_allocate_team(root, nthreads, nthreads,
2037 &master_th->th.th_current_task->td_icvs,
2038 argc USE_NESTED_HOT_ARG(master_th));
2041 10, (
"__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team));
2044 KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid);
2045 KMP_CHECK_UPDATE(team->t.t_master_this_cons, master_this_cons);
2046 KMP_CHECK_UPDATE(team->t.t_ident, loc);
2047 KMP_CHECK_UPDATE(team->t.t_parent, parent_team);
2048 KMP_CHECK_UPDATE_SYNC(team->t.t_pkfn, microtask);
2050 KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.master_return_address,
2053 KMP_CHECK_UPDATE(team->t.t_invoke, invoker);
2056 if (!master_th->th.th_teams_microtask || level > teams_level) {
2058 int new_level = parent_team->t.t_level + 1;
2059 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2060 new_level = parent_team->t.t_active_level + 1;
2061 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2065 int new_level = parent_team->t.t_level;
2066 KMP_CHECK_UPDATE(team->t.t_level, new_level);
2067 new_level = parent_team->t.t_active_level;
2068 KMP_CHECK_UPDATE(team->t.t_active_level, new_level);
2071 kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid);
2073 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
2076 KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq);
2080 propagateFPControl(team);
2082 if (__kmp_tasking_mode != tskm_immediate_exec) {
2085 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2086 parent_team->t.t_task_team[master_th->th.th_task_state]);
2087 KA_TRACE(20, (
"__kmp_fork_call: Master T#%d pushing task_team %p / team " 2088 "%p, new task_team %p / team %p\n",
2089 __kmp_gtid_from_thread(master_th),
2090 master_th->th.th_task_team, parent_team,
2091 team->t.t_task_team[master_th->th.th_task_state], team));
2093 if (active_level || master_th->th.th_task_team) {
2095 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2096 if (master_th->th.th_task_state_top >=
2097 master_th->th.th_task_state_stack_sz) {
2098 kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz;
2099 kmp_uint8 *old_stack, *new_stack;
2101 new_stack = (kmp_uint8 *)__kmp_allocate(new_size);
2102 for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) {
2103 new_stack[i] = master_th->th.th_task_state_memo_stack[i];
2105 for (i = master_th->th.th_task_state_stack_sz; i < new_size;
2109 old_stack = master_th->th.th_task_state_memo_stack;
2110 master_th->th.th_task_state_memo_stack = new_stack;
2111 master_th->th.th_task_state_stack_sz = new_size;
2112 __kmp_free(old_stack);
2116 .th_task_state_memo_stack[master_th->th.th_task_state_top] =
2117 master_th->th.th_task_state;
2118 master_th->th.th_task_state_top++;
2119 #if KMP_NESTED_HOT_TEAMS 2120 if (team == master_th->th.th_hot_teams[active_level].hot_team) {
2122 master_th->th.th_task_state =
2124 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2127 master_th->th.th_task_state = 0;
2128 #if KMP_NESTED_HOT_TEAMS 2132 #if !KMP_NESTED_HOT_TEAMS 2133 KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) ||
2134 (team == root->r.r_hot_team));
2140 (
"__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n",
2141 gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id,
2143 KMP_DEBUG_ASSERT(team != root->r.r_hot_team ||
2144 (team->t.t_master_tid == 0 &&
2145 (team->t.t_parent == root->r.r_root_team ||
2146 team->t.t_parent->t.t_serialized)));
2150 argv = (
void **)team->t.t_argv;
2154 for (i = argc - 1; i >= 0; --i) {
2156 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX 2157 void *new_argv = va_arg(*ap,
void *);
2159 void *new_argv = va_arg(ap,
void *);
2161 KMP_CHECK_UPDATE(*argv, new_argv);
2166 for (i = 0; i < argc; ++i) {
2168 KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]);
2174 KMP_CHECK_UPDATE(team->t.t_master_active, master_active);
2175 if (!root->r.r_active)
2176 root->r.r_active = TRUE;
2178 __kmp_fork_team_threads(root, team, master_th, gtid);
2179 __kmp_setup_icv_copy(team, nthreads,
2180 &master_th->th.th_current_task->td_icvs, loc);
2183 master_th->th.ompt_thread_info.state = omp_state_work_parallel;
2186 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2189 if (team->t.t_active_level == 1
2191 && !master_th->th.th_teams_microtask
2195 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2196 (__kmp_forkjoin_frames_mode == 3 ||
2197 __kmp_forkjoin_frames_mode == 1)) {
2198 kmp_uint64 tmp_time = 0;
2199 if (__itt_get_timestamp_ptr)
2200 tmp_time = __itt_get_timestamp();
2202 master_th->th.th_frame_time = tmp_time;
2203 if (__kmp_forkjoin_frames_mode == 3)
2204 team->t.t_region_time = tmp_time;
2208 if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) &&
2209 __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) {
2211 __kmp_itt_region_forking(gtid, team->t.t_nproc, 0);
2217 KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team);
2220 (
"__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n",
2221 root, team, master_th, gtid));
2224 if (__itt_stack_caller_create_ptr) {
2225 team->t.t_stack_id =
2226 __kmp_itt_stack_caller_create();
2237 __kmp_internal_fork(loc, gtid, team);
2238 KF_TRACE(10, (
"__kmp_internal_fork : after : root=%p, team=%p, " 2239 "master_th=%p, gtid=%d\n",
2240 root, team, master_th, gtid));
2243 if (call_context == fork_context_gnu) {
2244 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2249 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid,
2250 team->t.t_id, team->t.t_pkfn));
2254 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
2255 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
2256 if (!team->t.t_invoke(gtid)) {
2257 KMP_ASSERT2(0,
"cannot invoke microtask for MASTER thread");
2260 KA_TRACE(20, (
"__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid,
2261 team->t.t_id, team->t.t_pkfn));
2264 KA_TRACE(20, (
"__kmp_fork_call: parallel exit T#%d\n", gtid));
2267 if (ompt_enabled.enabled) {
2268 master_th->th.ompt_thread_info.state = omp_state_overhead;
2276 static inline void __kmp_join_restore_state(kmp_info_t *thread,
2279 thread->th.ompt_thread_info.state =
2280 ((team->t.t_serialized) ? omp_state_work_serial
2281 : omp_state_work_parallel);
2284 static inline void __kmp_join_ompt(
int gtid, kmp_info_t *thread,
2285 kmp_team_t *team, ompt_data_t *parallel_data,
2286 fork_context_e fork_context,
void *codeptr) {
2287 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2288 if (ompt_enabled.ompt_callback_parallel_end) {
2289 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
2290 parallel_data, &(task_info->task_data), OMPT_INVOKER(fork_context),
2294 task_info->frame.enter_frame = NULL;
2295 __kmp_join_restore_state(thread, team);
2299 void __kmp_join_call(
ident_t *loc,
int gtid
2302 enum fork_context_e fork_context
2309 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
2311 kmp_team_t *parent_team;
2312 kmp_info_t *master_th;
2317 KA_TRACE(20, (
"__kmp_join_call: enter T#%d\n", gtid));
2320 master_th = __kmp_threads[gtid];
2321 root = master_th->th.th_root;
2322 team = master_th->th.th_team;
2323 parent_team = team->t.t_parent;
2325 master_th->th.th_ident = loc;
2328 if (ompt_enabled.enabled) {
2329 master_th->th.ompt_thread_info.state = omp_state_overhead;
2334 if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) {
2335 KA_TRACE(20, (
"__kmp_join_call: T#%d, old team = %p old task_team = %p, " 2336 "th_task_team = %p\n",
2337 __kmp_gtid_from_thread(master_th), team,
2338 team->t.t_task_team[master_th->th.th_task_state],
2339 master_th->th.th_task_team));
2340 KMP_DEBUG_ASSERT(master_th->th.th_task_team ==
2341 team->t.t_task_team[master_th->th.th_task_state]);
2345 if (team->t.t_serialized) {
2347 if (master_th->th.th_teams_microtask) {
2349 int level = team->t.t_level;
2350 int tlevel = master_th->th.th_teams_level;
2351 if (level == tlevel) {
2355 }
else if (level == tlevel + 1) {
2359 team->t.t_serialized++;
2366 if (ompt_enabled.enabled) {
2367 __kmp_join_restore_state(master_th, parent_team);
2374 master_active = team->t.t_master_active;
2382 __kmp_internal_join(loc, gtid, team);
2386 master_th->th.th_task_state =
2394 ompt_data_t *parallel_data = &(team->t.ompt_team_info.parallel_data);
2395 void *codeptr = team->t.ompt_team_info.master_return_address;
2399 if (__itt_stack_caller_create_ptr) {
2400 __kmp_itt_stack_caller_destroy(
2401 (__itt_caller)team->t
2406 if (team->t.t_active_level == 1
2408 && !master_th->th.th_teams_microtask
2411 master_th->th.th_ident = loc;
2414 if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
2415 __kmp_forkjoin_frames_mode == 3)
2416 __kmp_itt_frame_submit(gtid, team->t.t_region_time,
2417 master_th->th.th_frame_time, 0, loc,
2418 master_th->th.th_team_nproc, 1);
2419 else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) &&
2420 !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames)
2421 __kmp_itt_region_joined(gtid);
2426 if (master_th->th.th_teams_microtask && !exit_teams &&
2427 team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
2428 team->t.t_level == master_th->th.th_teams_level + 1) {
2435 team->t.t_active_level--;
2436 KMP_TEST_THEN_DEC32((kmp_int32 *)&root->r.r_in_parallel);
2439 if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) {
2440 int old_num = master_th->th.th_team_nproc;
2441 int new_num = master_th->th.th_teams_size.nth;
2442 kmp_info_t **other_threads = team->t.t_threads;
2443 team->t.t_nproc = new_num;
2444 for (i = 0; i < old_num; ++i) {
2445 other_threads[i]->th.th_team_nproc = new_num;
2448 for (i = old_num; i < new_num; ++i) {
2451 kmp_balign_t *balign = other_threads[i]->th.th_bar;
2452 for (b = 0; b < bs_last_barrier; ++b) {
2453 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
2454 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
2456 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
2459 if (__kmp_tasking_mode != tskm_immediate_exec) {
2461 other_threads[i]->th.th_task_state = master_th->th.th_task_state;
2467 if (ompt_enabled.enabled) {
2468 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
2478 master_th->th.th_info.ds.ds_tid = team->t.t_master_tid;
2479 master_th->th.th_local.this_construct = team->t.t_master_this_cons;
2481 master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid];
2486 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2489 if (!master_th->th.th_teams_microtask ||
2490 team->t.t_level > master_th->th.th_teams_level)
2494 KMP_TEST_THEN_DEC32((kmp_int32 *)&root->r.r_in_parallel);
2496 KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0);
2499 if (ompt_enabled.enabled) {
2500 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
2501 if (ompt_enabled.ompt_callback_implicit_task) {
2502 int ompt_team_size = team->t.t_nproc;
2503 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
2504 ompt_scope_end, NULL, &(task_info->task_data), ompt_team_size,
2505 OMPT_CUR_TASK_INFO(master_th)->thread_num);
2508 task_info->frame.exit_frame = NULL;
2509 task_info->task_data = ompt_data_none;
2513 KF_TRACE(10, (
"__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0,
2515 __kmp_pop_current_task_from_thread(master_th);
2517 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 2519 master_th->th.th_first_place = team->t.t_first_place;
2520 master_th->th.th_last_place = team->t.t_last_place;
2523 updateHWFPControl(team);
2525 if (root->r.r_active != master_active)
2526 root->r.r_active = master_active;
2528 __kmp_free_team(root, team USE_NESTED_HOT_ARG(
2536 master_th->th.th_team = parent_team;
2537 master_th->th.th_team_nproc = parent_team->t.t_nproc;
2538 master_th->th.th_team_master = parent_team->t.t_threads[0];
2539 master_th->th.th_team_serialized = parent_team->t.t_serialized;
2542 if (parent_team->t.t_serialized &&
2543 parent_team != master_th->th.th_serial_team &&
2544 parent_team != root->r.r_root_team) {
2545 __kmp_free_team(root,
2546 master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL));
2547 master_th->th.th_serial_team = parent_team;
2550 if (__kmp_tasking_mode != tskm_immediate_exec) {
2551 if (master_th->th.th_task_state_top >
2553 KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack);
2555 master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] =
2556 master_th->th.th_task_state;
2557 --master_th->th.th_task_state_top;
2559 master_th->th.th_task_state =
2561 .th_task_state_memo_stack[master_th->th.th_task_state_top];
2564 master_th->th.th_task_team =
2565 parent_team->t.t_task_team[master_th->th.th_task_state];
2567 (
"__kmp_join_call: Master T#%d restoring task_team %p / team %p\n",
2568 __kmp_gtid_from_thread(master_th), master_th->th.th_task_team,
2575 master_th->th.th_current_task->td_flags.executing = 1;
2577 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2580 if (ompt_enabled.enabled) {
2581 __kmp_join_ompt(gtid, master_th, parent_team, parallel_data, fork_context,
2587 KA_TRACE(20, (
"__kmp_join_call: exit T#%d\n", gtid));
2592 void __kmp_save_internal_controls(kmp_info_t *thread) {
2594 if (thread->th.th_team != thread->th.th_serial_team) {
2597 if (thread->th.th_team->t.t_serialized > 1) {
2600 if (thread->th.th_team->t.t_control_stack_top == NULL) {
2603 if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level !=
2604 thread->th.th_team->t.t_serialized) {
2609 kmp_internal_control_t *control =
2610 (kmp_internal_control_t *)__kmp_allocate(
2611 sizeof(kmp_internal_control_t));
2613 copy_icvs(control, &thread->th.th_current_task->td_icvs);
2615 control->serial_nesting_level = thread->th.th_team->t.t_serialized;
2617 control->next = thread->th.th_team->t.t_control_stack_top;
2618 thread->th.th_team->t.t_control_stack_top = control;
2624 void __kmp_set_num_threads(
int new_nth,
int gtid) {
2628 KF_TRACE(10, (
"__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth));
2629 KMP_DEBUG_ASSERT(__kmp_init_serial);
2633 else if (new_nth > __kmp_max_nth)
2634 new_nth = __kmp_max_nth;
2637 thread = __kmp_threads[gtid];
2639 __kmp_save_internal_controls(thread);
2641 set__nproc(thread, new_nth);
2646 root = thread->th.th_root;
2647 if (__kmp_init_parallel && (!root->r.r_active) &&
2648 (root->r.r_hot_team->t.t_nproc > new_nth)
2649 #
if KMP_NESTED_HOT_TEAMS
2650 && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode
2653 kmp_team_t *hot_team = root->r.r_hot_team;
2656 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
2659 for (f = new_nth; f < hot_team->t.t_nproc; f++) {
2660 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2661 if (__kmp_tasking_mode != tskm_immediate_exec) {
2664 hot_team->t.t_threads[f]->th.th_task_team = NULL;
2666 __kmp_free_thread(hot_team->t.t_threads[f]);
2667 hot_team->t.t_threads[f] = NULL;
2669 hot_team->t.t_nproc = new_nth;
2670 #if KMP_NESTED_HOT_TEAMS 2671 if (thread->th.th_hot_teams) {
2672 KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team);
2673 thread->th.th_hot_teams[0].hot_team_nth = new_nth;
2677 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
2680 for (f = 0; f < new_nth; f++) {
2681 KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL);
2682 hot_team->t.t_threads[f]->th.th_team_nproc = new_nth;
2685 hot_team->t.t_size_changed = -1;
2690 void __kmp_set_max_active_levels(
int gtid,
int max_active_levels) {
2693 KF_TRACE(10, (
"__kmp_set_max_active_levels: new max_active_levels for thread " 2695 gtid, max_active_levels));
2696 KMP_DEBUG_ASSERT(__kmp_init_serial);
2699 if (max_active_levels < 0) {
2700 KMP_WARNING(ActiveLevelsNegative, max_active_levels);
2705 KF_TRACE(10, (
"__kmp_set_max_active_levels: the call is ignored: new " 2706 "max_active_levels for thread %d = (%d)\n",
2707 gtid, max_active_levels));
2710 if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) {
2715 KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels,
2716 KMP_MAX_ACTIVE_LEVELS_LIMIT);
2717 max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT;
2723 KF_TRACE(10, (
"__kmp_set_max_active_levels: after validation: new " 2724 "max_active_levels for thread %d = (%d)\n",
2725 gtid, max_active_levels));
2727 thread = __kmp_threads[gtid];
2729 __kmp_save_internal_controls(thread);
2731 set__max_active_levels(thread, max_active_levels);
2735 int __kmp_get_max_active_levels(
int gtid) {
2738 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d\n", gtid));
2739 KMP_DEBUG_ASSERT(__kmp_init_serial);
2741 thread = __kmp_threads[gtid];
2742 KMP_DEBUG_ASSERT(thread->th.th_current_task);
2743 KF_TRACE(10, (
"__kmp_get_max_active_levels: thread %d, curtask=%p, " 2744 "curtask_maxaclevel=%d\n",
2745 gtid, thread->th.th_current_task,
2746 thread->th.th_current_task->td_icvs.max_active_levels));
2747 return thread->th.th_current_task->td_icvs.max_active_levels;
2751 void __kmp_set_schedule(
int gtid, kmp_sched_t kind,
int chunk) {
2755 KF_TRACE(10, (
"__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
2756 gtid, (
int)kind, chunk));
2757 KMP_DEBUG_ASSERT(__kmp_init_serial);
2763 if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
2764 (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
2766 __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind),
2767 KMP_HNT(DefaultScheduleKindUsed,
"static, no chunk"),
2769 kind = kmp_sched_default;
2773 thread = __kmp_threads[gtid];
2775 __kmp_save_internal_controls(thread);
2777 if (kind < kmp_sched_upper_std) {
2778 if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) {
2781 thread->th.th_current_task->td_icvs.sched.r_sched_type =
kmp_sch_static;
2783 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2784 __kmp_sch_map[kind - kmp_sched_lower - 1];
2789 thread->th.th_current_task->td_icvs.sched.r_sched_type =
2790 __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
2791 kmp_sched_lower - 2];
2793 if (kind == kmp_sched_auto || chunk < 1) {
2795 thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
2797 thread->th.th_current_task->td_icvs.sched.chunk = chunk;
2802 void __kmp_get_schedule(
int gtid, kmp_sched_t *kind,
int *chunk) {
2806 KF_TRACE(10, (
"__kmp_get_schedule: thread %d\n", gtid));
2807 KMP_DEBUG_ASSERT(__kmp_init_serial);
2809 thread = __kmp_threads[gtid];
2811 th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;
2815 case kmp_sch_static_greedy:
2816 case kmp_sch_static_balanced:
2817 *kind = kmp_sched_static;
2820 case kmp_sch_static_chunked:
2821 *kind = kmp_sched_static;
2823 case kmp_sch_dynamic_chunked:
2824 *kind = kmp_sched_dynamic;
2827 case kmp_sch_guided_iterative_chunked:
2828 case kmp_sch_guided_analytical_chunked:
2829 *kind = kmp_sched_guided;
2832 *kind = kmp_sched_auto;
2834 case kmp_sch_trapezoidal:
2835 *kind = kmp_sched_trapezoidal;
2837 #if KMP_STATIC_STEAL_ENABLED 2838 case kmp_sch_static_steal:
2839 *kind = kmp_sched_static_steal;
2843 KMP_FATAL(UnknownSchedulingType, th_type);
2846 *chunk = thread->th.th_current_task->td_icvs.sched.chunk;
2849 int __kmp_get_ancestor_thread_num(
int gtid,
int level) {
2855 KF_TRACE(10, (
"__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level));
2856 KMP_DEBUG_ASSERT(__kmp_init_serial);
2863 thr = __kmp_threads[gtid];
2864 team = thr->th.th_team;
2865 ii = team->t.t_level;
2870 if (thr->th.th_teams_microtask) {
2872 int tlevel = thr->th.th_teams_level;
2875 KMP_DEBUG_ASSERT(ii >= tlevel);
2888 return __kmp_tid_from_gtid(gtid);
2890 dd = team->t.t_serialized;
2892 while (ii > level) {
2893 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2895 if ((team->t.t_serialized) && (!dd)) {
2896 team = team->t.t_parent;
2900 team = team->t.t_parent;
2901 dd = team->t.t_serialized;
2906 return (dd > 1) ? (0) : (team->t.t_master_tid);
2909 int __kmp_get_team_size(
int gtid,
int level) {
2915 KF_TRACE(10, (
"__kmp_get_team_size: thread %d %d\n", gtid, level));
2916 KMP_DEBUG_ASSERT(__kmp_init_serial);
2923 thr = __kmp_threads[gtid];
2924 team = thr->th.th_team;
2925 ii = team->t.t_level;
2930 if (thr->th.th_teams_microtask) {
2932 int tlevel = thr->th.th_teams_level;
2935 KMP_DEBUG_ASSERT(ii >= tlevel);
2947 while (ii > level) {
2948 for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
2950 if (team->t.t_serialized && (!dd)) {
2951 team = team->t.t_parent;
2955 team = team->t.t_parent;
2960 return team->t.t_nproc;
2963 kmp_r_sched_t __kmp_get_schedule_global() {
2968 kmp_r_sched_t r_sched;
2976 r_sched.r_sched_type = __kmp_static;
2979 r_sched.r_sched_type = __kmp_guided;
2981 r_sched.r_sched_type = __kmp_sched;
2984 if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
2986 r_sched.chunk = KMP_DEFAULT_CHUNK;
2988 r_sched.chunk = __kmp_chunk;
2996 static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
int realloc) {
2998 KMP_DEBUG_ASSERT(team);
2999 if (!realloc || argc > team->t.t_max_argc) {
3001 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: needed entries=%d, " 3002 "current entries=%d\n",
3003 team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0));
3005 if (realloc && team->t.t_argv != &team->t.t_inline_argv[0])
3006 __kmp_free((
void *)team->t.t_argv);
3008 if (argc <= KMP_INLINE_ARGV_ENTRIES) {
3010 team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES;
3011 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: inline allocate %d " 3013 team->t.t_id, team->t.t_max_argc));
3014 team->t.t_argv = &team->t.t_inline_argv[0];
3015 if (__kmp_storage_map) {
3016 __kmp_print_storage_map_gtid(
3017 -1, &team->t.t_inline_argv[0],
3018 &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES],
3019 (
sizeof(
void *) * KMP_INLINE_ARGV_ENTRIES),
"team_%d.t_inline_argv",
3024 team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1))
3025 ? KMP_MIN_MALLOC_ARGV_ENTRIES
3027 KA_TRACE(100, (
"__kmp_alloc_argv_entries: team %d: dynamic allocate %d " 3029 team->t.t_id, team->t.t_max_argc));
3031 (
void **)__kmp_page_allocate(
sizeof(
void *) * team->t.t_max_argc);
3032 if (__kmp_storage_map) {
3033 __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0],
3034 &team->t.t_argv[team->t.t_max_argc],
3035 sizeof(
void *) * team->t.t_max_argc,
3036 "team_%d.t_argv", team->t.t_id);
3042 static void __kmp_allocate_team_arrays(kmp_team_t *team,
int max_nth) {
3044 int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2;
3046 (kmp_info_t **)__kmp_allocate(
sizeof(kmp_info_t *) * max_nth);
3047 team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate(
3048 sizeof(dispatch_shared_info_t) * num_disp_buff);
3049 team->t.t_dispatch =
3050 (kmp_disp_t *)__kmp_allocate(
sizeof(kmp_disp_t) * max_nth);
3051 team->t.t_implicit_task_taskdata =
3052 (kmp_taskdata_t *)__kmp_allocate(
sizeof(kmp_taskdata_t) * max_nth);
3053 team->t.t_max_nproc = max_nth;
3056 for (i = 0; i < num_disp_buff; ++i) {
3057 team->t.t_disp_buffer[i].buffer_index = i;
3059 team->t.t_disp_buffer[i].doacross_buf_idx = i;
3064 static void __kmp_free_team_arrays(kmp_team_t *team) {
3067 for (i = 0; i < team->t.t_max_nproc; ++i) {
3068 if (team->t.t_dispatch[i].th_disp_buffer != NULL) {
3069 __kmp_free(team->t.t_dispatch[i].th_disp_buffer);
3070 team->t.t_dispatch[i].th_disp_buffer = NULL;
3073 __kmp_free(team->t.t_threads);
3074 __kmp_free(team->t.t_disp_buffer);
3075 __kmp_free(team->t.t_dispatch);
3076 __kmp_free(team->t.t_implicit_task_taskdata);
3077 team->t.t_threads = NULL;
3078 team->t.t_disp_buffer = NULL;
3079 team->t.t_dispatch = NULL;
3080 team->t.t_implicit_task_taskdata = 0;
3083 static void __kmp_reallocate_team_arrays(kmp_team_t *team,
int max_nth) {
3084 kmp_info_t **oldThreads = team->t.t_threads;
3086 __kmp_free(team->t.t_disp_buffer);
3087 __kmp_free(team->t.t_dispatch);
3088 __kmp_free(team->t.t_implicit_task_taskdata);
3089 __kmp_allocate_team_arrays(team, max_nth);
3091 KMP_MEMCPY(team->t.t_threads, oldThreads,
3092 team->t.t_nproc *
sizeof(kmp_info_t *));
3094 __kmp_free(oldThreads);
3097 static kmp_internal_control_t __kmp_get_global_icvs(
void) {
3099 kmp_r_sched_t r_sched =
3100 __kmp_get_schedule_global();
3103 KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0);
3106 kmp_internal_control_t g_icvs = {
3108 (kmp_int8)__kmp_dflt_nested,
3110 (kmp_int8)__kmp_global.g.g_dynamic,
3112 (kmp_int8)__kmp_env_blocktime,
3114 __kmp_dflt_blocktime,
3119 __kmp_dflt_team_nth,
3122 __kmp_dflt_max_active_levels,
3127 __kmp_nested_proc_bind.bind_types[0],
3128 __kmp_default_device,
3136 static kmp_internal_control_t __kmp_get_x_global_icvs(
const kmp_team_t *team) {
3138 kmp_internal_control_t gx_icvs;
3139 gx_icvs.serial_nesting_level =
3141 copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs);
3142 gx_icvs.next = NULL;
3147 static void __kmp_initialize_root(kmp_root_t *root) {
3149 kmp_team_t *root_team;
3150 kmp_team_t *hot_team;
3151 int hot_team_max_nth;
3152 kmp_r_sched_t r_sched =
3153 __kmp_get_schedule_global();
3154 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3155 KMP_DEBUG_ASSERT(root);
3156 KMP_ASSERT(!root->r.r_begin);
3159 __kmp_init_lock(&root->r.r_begin_lock);
3160 root->r.r_begin = FALSE;
3161 root->r.r_active = FALSE;
3162 root->r.r_in_parallel = 0;
3163 root->r.r_blocktime = __kmp_dflt_blocktime;
3164 root->r.r_nested = __kmp_dflt_nested;
3165 root->r.r_cg_nthreads = 1;
3169 KF_TRACE(10, (
"__kmp_initialize_root: before root_team\n"));
3172 __kmp_allocate_team(root,
3179 __kmp_nested_proc_bind.bind_types[0],
3183 USE_NESTED_HOT_ARG(NULL)
3188 TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0));
3191 KF_TRACE(10, (
"__kmp_initialize_root: after root_team = %p\n", root_team));
3193 root->r.r_root_team = root_team;
3194 root_team->t.t_control_stack_top = NULL;
3197 root_team->t.t_threads[0] = NULL;
3198 root_team->t.t_nproc = 1;
3199 root_team->t.t_serialized = 1;
3201 root_team->t.t_sched.sched = r_sched.sched;
3204 (
"__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n",
3205 root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
3209 KF_TRACE(10, (
"__kmp_initialize_root: before hot_team\n"));
3212 __kmp_allocate_team(root,
3214 __kmp_dflt_team_nth_ub * 2,
3219 __kmp_nested_proc_bind.bind_types[0],
3223 USE_NESTED_HOT_ARG(NULL)
3225 KF_TRACE(10, (
"__kmp_initialize_root: after hot_team = %p\n", hot_team));
3227 root->r.r_hot_team = hot_team;
3228 root_team->t.t_control_stack_top = NULL;
3231 hot_team->t.t_parent = root_team;
3234 hot_team_max_nth = hot_team->t.t_max_nproc;
3235 for (f = 0; f < hot_team_max_nth; ++f) {
3236 hot_team->t.t_threads[f] = NULL;
3238 hot_team->t.t_nproc = 1;
3240 hot_team->t.t_sched.sched = r_sched.sched;
3241 hot_team->t.t_size_changed = 0;
3246 typedef struct kmp_team_list_item {
3247 kmp_team_p
const *entry;
3248 struct kmp_team_list_item *next;
3249 } kmp_team_list_item_t;
3250 typedef kmp_team_list_item_t *kmp_team_list_t;
3252 static void __kmp_print_structure_team_accum(
3253 kmp_team_list_t list,
3254 kmp_team_p
const *team
3264 KMP_DEBUG_ASSERT(list != NULL);
3269 __kmp_print_structure_team_accum(list, team->t.t_parent);
3270 __kmp_print_structure_team_accum(list, team->t.t_next_pool);
3274 while (l->next != NULL && l->entry != team) {
3277 if (l->next != NULL) {
3283 while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) {
3289 kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
3290 sizeof(kmp_team_list_item_t));
3297 static void __kmp_print_structure_team(
char const *title, kmp_team_p
const *team
3300 __kmp_printf(
"%s", title);
3302 __kmp_printf(
"%2x %p\n", team->t.t_id, team);
3304 __kmp_printf(
" - (nil)\n");
3308 static void __kmp_print_structure_thread(
char const *title,
3309 kmp_info_p
const *thread) {
3310 __kmp_printf(
"%s", title);
3311 if (thread != NULL) {
3312 __kmp_printf(
"%2d %p\n", thread->th.th_info.ds.ds_gtid, thread);
3314 __kmp_printf(
" - (nil)\n");
3318 void __kmp_print_structure(
void) {
3320 kmp_team_list_t list;
3324 (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(
sizeof(kmp_team_list_item_t));
3328 __kmp_printf(
"\n------------------------------\nGlobal Thread " 3329 "Table\n------------------------------\n");
3332 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3333 __kmp_printf(
"%2d", gtid);
3334 if (__kmp_threads != NULL) {
3335 __kmp_printf(
" %p", __kmp_threads[gtid]);
3337 if (__kmp_root != NULL) {
3338 __kmp_printf(
" %p", __kmp_root[gtid]);
3345 __kmp_printf(
"\n------------------------------\nThreads\n--------------------" 3347 if (__kmp_threads != NULL) {
3349 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3350 kmp_info_t
const *thread = __kmp_threads[gtid];
3351 if (thread != NULL) {
3352 __kmp_printf(
"GTID %2d %p:\n", gtid, thread);
3353 __kmp_printf(
" Our Root: %p\n", thread->th.th_root);
3354 __kmp_print_structure_team(
" Our Team: ", thread->th.th_team);
3355 __kmp_print_structure_team(
" Serial Team: ",
3356 thread->th.th_serial_team);
3357 __kmp_printf(
" Threads: %2d\n", thread->th.th_team_nproc);
3358 __kmp_print_structure_thread(
" Master: ",
3359 thread->th.th_team_master);
3360 __kmp_printf(
" Serialized?: %2d\n", thread->th.th_team_serialized);
3361 __kmp_printf(
" Set NProc: %2d\n", thread->th.th_set_nproc);
3363 __kmp_printf(
" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind);
3365 __kmp_print_structure_thread(
" Next in pool: ",
3366 thread->th.th_next_pool);
3368 __kmp_print_structure_team_accum(list, thread->th.th_team);
3369 __kmp_print_structure_team_accum(list, thread->th.th_serial_team);
3373 __kmp_printf(
"Threads array is not allocated.\n");
3377 __kmp_printf(
"\n------------------------------\nUbers\n----------------------" 3379 if (__kmp_root != NULL) {
3381 for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) {
3382 kmp_root_t
const *root = __kmp_root[gtid];
3384 __kmp_printf(
"GTID %2d %p:\n", gtid, root);
3385 __kmp_print_structure_team(
" Root Team: ", root->r.r_root_team);
3386 __kmp_print_structure_team(
" Hot Team: ", root->r.r_hot_team);
3387 __kmp_print_structure_thread(
" Uber Thread: ",
3388 root->r.r_uber_thread);
3389 __kmp_printf(
" Active?: %2d\n", root->r.r_active);
3390 __kmp_printf(
" Nested?: %2d\n", root->r.r_nested);
3391 __kmp_printf(
" In Parallel: %2d\n", root->r.r_in_parallel);
3393 __kmp_print_structure_team_accum(list, root->r.r_root_team);
3394 __kmp_print_structure_team_accum(list, root->r.r_hot_team);
3398 __kmp_printf(
"Ubers array is not allocated.\n");
3401 __kmp_printf(
"\n------------------------------\nTeams\n----------------------" 3403 while (list->next != NULL) {
3404 kmp_team_p
const *team = list->entry;
3406 __kmp_printf(
"Team %2x %p:\n", team->t.t_id, team);
3407 __kmp_print_structure_team(
" Parent Team: ", team->t.t_parent);
3408 __kmp_printf(
" Master TID: %2d\n", team->t.t_master_tid);
3409 __kmp_printf(
" Max threads: %2d\n", team->t.t_max_nproc);
3410 __kmp_printf(
" Levels of serial: %2d\n", team->t.t_serialized);
3411 __kmp_printf(
" Number threads: %2d\n", team->t.t_nproc);
3412 for (i = 0; i < team->t.t_nproc; ++i) {
3413 __kmp_printf(
" Thread %2d: ", i);
3414 __kmp_print_structure_thread(
"", team->t.t_threads[i]);
3416 __kmp_print_structure_team(
" Next in pool: ", team->t.t_next_pool);
3422 __kmp_printf(
"\n------------------------------\nPools\n----------------------" 3424 __kmp_print_structure_thread(
"Thread pool: ",
3425 CCAST(kmp_info_t *, __kmp_thread_pool));
3426 __kmp_print_structure_team(
"Team pool: ",
3427 CCAST(kmp_team_t *, __kmp_team_pool));
3431 while (list != NULL) {
3432 kmp_team_list_item_t *item = list;
3434 KMP_INTERNAL_FREE(item);
3443 static const unsigned __kmp_primes[] = {
3444 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877,
3445 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231,
3446 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201,
3447 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3,
3448 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7,
3449 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9,
3450 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45,
3451 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7,
3452 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363,
3453 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3,
3454 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f};
3458 unsigned short __kmp_get_random(kmp_info_t *thread) {
3459 unsigned x = thread->th.th_x;
3460 unsigned short r = x >> 16;
3462 thread->th.th_x = x * thread->th.th_a + 1;
3464 KA_TRACE(30, (
"__kmp_get_random: THREAD: %d, RETURN: %u\n",
3465 thread->th.th_info.ds.ds_tid, r));
3471 void __kmp_init_random(kmp_info_t *thread) {
3472 unsigned seed = thread->th.th_info.ds.ds_tid;
3475 __kmp_primes[seed % (
sizeof(__kmp_primes) /
sizeof(__kmp_primes[0]))];
3476 thread->th.th_x = (seed + 1) * thread->th.th_a + 1;
3478 (
"__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a));
3484 static int __kmp_reclaim_dead_roots(
void) {
3487 for (i = 0; i < __kmp_threads_capacity; ++i) {
3488 if (KMP_UBER_GTID(i) &&
3489 !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) &&
3492 r += __kmp_unregister_root_other_thread(i);
3517 static int __kmp_expand_threads(
int nNeed) {
3519 int minimumRequiredCapacity;
3521 kmp_info_t **newThreads;
3522 kmp_root_t **newRoot;
3528 #if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB 3531 added = __kmp_reclaim_dead_roots();
3560 KMP_DEBUG_ASSERT(__kmp_sys_max_nth >= __kmp_threads_capacity);
3563 if (__kmp_sys_max_nth - __kmp_threads_capacity < nNeed) {
3567 minimumRequiredCapacity = __kmp_threads_capacity + nNeed;
3569 newCapacity = __kmp_threads_capacity;
3571 newCapacity = newCapacity <= (__kmp_sys_max_nth >> 1) ? (newCapacity << 1)
3572 : __kmp_sys_max_nth;
3573 }
while (newCapacity < minimumRequiredCapacity);
3574 newThreads = (kmp_info_t **)__kmp_allocate(
3575 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * newCapacity + CACHE_LINE);
3577 (kmp_root_t **)((
char *)newThreads +
sizeof(kmp_info_t *) * newCapacity);
3578 KMP_MEMCPY(newThreads, __kmp_threads,
3579 __kmp_threads_capacity *
sizeof(kmp_info_t *));
3580 KMP_MEMCPY(newRoot, __kmp_root,
3581 __kmp_threads_capacity *
sizeof(kmp_root_t *));
3583 kmp_info_t **temp_threads = __kmp_threads;
3584 *(kmp_info_t * *
volatile *)&__kmp_threads = newThreads;
3585 *(kmp_root_t * *
volatile *)&__kmp_root = newRoot;
3586 __kmp_free(temp_threads);
3587 added += newCapacity - __kmp_threads_capacity;
3588 *(
volatile int *)&__kmp_threads_capacity = newCapacity;
3590 if (newCapacity > __kmp_tp_capacity) {
3591 __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock);
3592 if (__kmp_tp_cached && newCapacity > __kmp_tp_capacity) {
3593 __kmp_threadprivate_resize_cache(newCapacity);
3595 *(
volatile int *)&__kmp_tp_capacity = newCapacity;
3597 __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock);
3606 int __kmp_register_root(
int initial_thread) {
3607 kmp_info_t *root_thread;
3611 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3612 KA_TRACE(20, (
"__kmp_register_root: entered\n"));
3629 capacity = __kmp_threads_capacity;
3630 if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) {
3635 if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1)) {
3636 if (__kmp_tp_cached) {
3637 __kmp_fatal(KMP_MSG(CantRegisterNewThread),
3638 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
3639 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
3641 __kmp_fatal(KMP_MSG(CantRegisterNewThread), KMP_HNT(SystemLimitOnThreads),
3649 for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL;
3653 (
"__kmp_register_root: found slot in threads array: T#%d\n", gtid));
3654 KMP_ASSERT(gtid < __kmp_threads_capacity);
3658 TCW_4(__kmp_nth, __kmp_nth + 1);
3662 if (__kmp_adjust_gtid_mode) {
3663 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
3664 if (TCR_4(__kmp_gtid_mode) != 2) {
3665 TCW_4(__kmp_gtid_mode, 2);
3668 if (TCR_4(__kmp_gtid_mode) != 1) {
3669 TCW_4(__kmp_gtid_mode, 1);
3674 #ifdef KMP_ADJUST_BLOCKTIME 3677 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
3678 if (__kmp_nth > __kmp_avail_proc) {
3679 __kmp_zero_bt = TRUE;
3685 if (!(root = __kmp_root[gtid])) {
3686 root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(
sizeof(kmp_root_t));
3687 KMP_DEBUG_ASSERT(!root->r.r_root_team);
3690 #if KMP_STATS_ENABLED 3692 __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3694 KMP_SET_THREAD_STATE(SERIAL_REGION);
3697 __kmp_initialize_root(root);
3700 if (root->r.r_uber_thread) {
3701 root_thread = root->r.r_uber_thread;
3703 root_thread = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
3704 if (__kmp_storage_map) {
3705 __kmp_print_thread_storage_map(root_thread, gtid);
3707 root_thread->th.th_info.ds.ds_gtid = gtid;
3709 root_thread->th.ompt_thread_info.thread_data.ptr = NULL;
3711 root_thread->th.th_root = root;
3712 if (__kmp_env_consistency_check) {
3713 root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid);
3716 __kmp_initialize_fast_memory(root_thread);
3720 KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL);
3721 __kmp_initialize_bget(root_thread);
3723 __kmp_init_random(root_thread);
3727 if (!root_thread->th.th_serial_team) {
3728 kmp_internal_control_t r_icvs = __kmp_get_global_icvs();
3729 KF_TRACE(10, (
"__kmp_register_root: before serial_team\n"));
3730 root_thread->th.th_serial_team =
3731 __kmp_allocate_team(root, 1, 1,
3738 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
3740 KMP_ASSERT(root_thread->th.th_serial_team);
3741 KF_TRACE(10, (
"__kmp_register_root: after serial_team = %p\n",
3742 root_thread->th.th_serial_team));
3745 TCW_SYNC_PTR(__kmp_threads[gtid], root_thread);
3747 root->r.r_root_team->t.t_threads[0] = root_thread;
3748 root->r.r_hot_team->t.t_threads[0] = root_thread;
3749 root_thread->th.th_serial_team->t.t_threads[0] = root_thread;
3751 root_thread->th.th_serial_team->t.t_serialized = 0;
3752 root->r.r_uber_thread = root_thread;
3755 __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid);
3756 TCW_4(__kmp_init_gtid, TRUE);
3759 __kmp_gtid_set_specific(gtid);
3762 __kmp_itt_thread_name(gtid);
3765 #ifdef KMP_TDATA_GTID 3768 __kmp_create_worker(gtid, root_thread, __kmp_stksize);
3769 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid);
3771 KA_TRACE(20, (
"__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, " 3773 gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team),
3774 root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE,
3775 KMP_INIT_BARRIER_STATE));
3778 for (b = 0; b < bs_last_barrier; ++b) {
3779 root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE;
3781 root_thread->th.th_bar[b].bb.b_worker_arrived = 0;
3785 KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived ==
3786 KMP_INIT_BARRIER_STATE);
3788 #if KMP_AFFINITY_SUPPORTED 3790 root_thread->th.th_current_place = KMP_PLACE_UNDEFINED;
3791 root_thread->th.th_new_place = KMP_PLACE_UNDEFINED;
3792 root_thread->th.th_first_place = KMP_PLACE_UNDEFINED;
3793 root_thread->th.th_last_place = KMP_PLACE_UNDEFINED;
3796 if (TCR_4(__kmp_init_middle)) {
3797 __kmp_affinity_set_init_mask(gtid, TRUE);
3801 __kmp_root_counter++;
3804 if (!initial_thread && ompt_enabled.enabled) {
3806 ompt_thread_t *root_thread = ompt_get_thread();
3808 ompt_set_thread_state(root_thread, omp_state_overhead);
3810 if (ompt_enabled.ompt_callback_thread_begin) {
3811 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
3812 ompt_thread_initial, __ompt_get_thread_data_internal());
3814 ompt_data_t *task_data;
3815 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
3816 if (ompt_enabled.ompt_callback_task_create) {
3817 ompt_callbacks.ompt_callback(ompt_callback_task_create)(
3818 NULL, NULL, task_data, ompt_task_initial, 0, NULL);
3822 ompt_set_thread_state(root_thread, omp_state_work_serial);
3827 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3832 #if KMP_NESTED_HOT_TEAMS 3833 static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr,
int level,
3834 const int max_level) {
3836 kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams;
3837 if (!hot_teams || !hot_teams[level].hot_team) {
3840 KMP_DEBUG_ASSERT(level < max_level);
3841 kmp_team_t *team = hot_teams[level].hot_team;
3842 nth = hot_teams[level].hot_team_nth;
3844 if (level < max_level - 1) {
3845 for (i = 0; i < nth; ++i) {
3846 kmp_info_t *th = team->t.t_threads[i];
3847 n += __kmp_free_hot_teams(root, th, level + 1, max_level);
3848 if (i > 0 && th->th.th_hot_teams) {
3849 __kmp_free(th->th.th_hot_teams);
3850 th->th.th_hot_teams = NULL;
3854 __kmp_free_team(root, team, NULL);
3861 static int __kmp_reset_root(
int gtid, kmp_root_t *root) {
3862 kmp_team_t *root_team = root->r.r_root_team;
3863 kmp_team_t *hot_team = root->r.r_hot_team;
3864 int n = hot_team->t.t_nproc;
3867 KMP_DEBUG_ASSERT(!root->r.r_active);
3869 root->r.r_root_team = NULL;
3870 root->r.r_hot_team = NULL;
3873 __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL));
3874 #if KMP_NESTED_HOT_TEAMS 3875 if (__kmp_hot_teams_max_level >
3877 for (i = 0; i < hot_team->t.t_nproc; ++i) {
3878 kmp_info_t *th = hot_team->t.t_threads[i];
3879 if (__kmp_hot_teams_max_level > 1) {
3880 n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level);
3882 if (th->th.th_hot_teams) {
3883 __kmp_free(th->th.th_hot_teams);
3884 th->th.th_hot_teams = NULL;
3889 __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL));
3894 if (__kmp_tasking_mode != tskm_immediate_exec) {
3895 __kmp_wait_to_unref_task_teams();
3901 10, (
"__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC
3903 (LPVOID) & (root->r.r_uber_thread->th),
3904 root->r.r_uber_thread->th.th_info.ds.ds_thread));
3905 __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread);
3909 if (ompt_enabled.ompt_callback_thread_end) {
3910 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(
3911 &(root->r.r_uber_thread->th.ompt_thread_info.thread_data));
3917 root->r.r_cg_nthreads--;
3919 __kmp_reap_thread(root->r.r_uber_thread, 1);
3923 root->r.r_uber_thread = NULL;
3925 root->r.r_begin = FALSE;
3930 void __kmp_unregister_root_current_thread(
int gtid) {
3931 KA_TRACE(1, (
"__kmp_unregister_root_current_thread: enter T#%d\n", gtid));
3935 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
3936 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
3937 KC_TRACE(10, (
"__kmp_unregister_root_current_thread: already finished, " 3940 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3943 kmp_root_t *root = __kmp_root[gtid];
3945 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
3946 KMP_ASSERT(KMP_UBER_GTID(gtid));
3947 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
3948 KMP_ASSERT(root->r.r_active == FALSE);
3953 kmp_info_t *thread = __kmp_threads[gtid];
3954 kmp_team_t *team = thread->th.th_team;
3955 kmp_task_team_t *task_team = thread->th.th_task_team;
3958 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
3961 thread->th.ompt_thread_info.state = omp_state_undefined;
3963 __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
3967 __kmp_reset_root(gtid, root);
3970 __kmp_gtid_set_specific(KMP_GTID_DNE);
3971 #ifdef KMP_TDATA_GTID 3972 __kmp_gtid = KMP_GTID_DNE;
3977 (
"__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid));
3979 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
3986 static int __kmp_unregister_root_other_thread(
int gtid) {
3987 kmp_root_t *root = __kmp_root[gtid];
3990 KA_TRACE(1, (
"__kmp_unregister_root_other_thread: enter T#%d\n", gtid));
3991 KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]);
3992 KMP_ASSERT(KMP_UBER_GTID(gtid));
3993 KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root);
3994 KMP_ASSERT(root->r.r_active == FALSE);
3996 r = __kmp_reset_root(gtid, root);
3998 (
"__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid));
4004 void __kmp_task_info() {
4006 kmp_int32 gtid = __kmp_entry_gtid();
4007 kmp_int32 tid = __kmp_tid_from_gtid(gtid);
4008 kmp_info_t *this_thr = __kmp_threads[gtid];
4009 kmp_team_t *steam = this_thr->th.th_serial_team;
4010 kmp_team_t *team = this_thr->th.th_team;
4012 __kmp_printf(
"__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p " 4014 gtid, tid, this_thr, team, this_thr->th.th_current_task,
4015 team->t.t_implicit_task_taskdata[tid].td_parent);
4022 static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team,
4023 int tid,
int gtid) {
4027 kmp_info_t *master = team->t.t_threads[0];
4028 KMP_DEBUG_ASSERT(this_thr != NULL);
4029 KMP_DEBUG_ASSERT(this_thr->th.th_serial_team);
4030 KMP_DEBUG_ASSERT(team);
4031 KMP_DEBUG_ASSERT(team->t.t_threads);
4032 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4033 KMP_DEBUG_ASSERT(master);
4034 KMP_DEBUG_ASSERT(master->th.th_root);
4038 TCW_SYNC_PTR(this_thr->th.th_team, team);
4040 this_thr->th.th_info.ds.ds_tid = tid;
4041 this_thr->th.th_set_nproc = 0;
4042 if (__kmp_tasking_mode != tskm_immediate_exec)
4045 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
4047 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
4049 this_thr->th.th_set_proc_bind = proc_bind_default;
4050 #if KMP_AFFINITY_SUPPORTED 4051 this_thr->th.th_new_place = this_thr->th.th_current_place;
4054 this_thr->th.th_root = master->th.th_root;
4057 this_thr->th.th_team_nproc = team->t.t_nproc;
4058 this_thr->th.th_team_master = master;
4059 this_thr->th.th_team_serialized = team->t.t_serialized;
4060 TCW_PTR(this_thr->th.th_sleep_loc, NULL);
4062 KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata);
4064 KF_TRACE(10, (
"__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n",
4065 tid, gtid, this_thr, this_thr->th.th_current_task));
4067 __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr,
4070 KF_TRACE(10, (
"__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n",
4071 tid, gtid, this_thr, this_thr->th.th_current_task));
4076 this_thr->th.th_dispatch = &team->t.t_dispatch[tid];
4078 this_thr->th.th_local.this_construct = 0;
4080 if (!this_thr->th.th_pri_common) {
4081 this_thr->th.th_pri_common =
4082 (
struct common_table *)__kmp_allocate(
sizeof(
struct common_table));
4083 if (__kmp_storage_map) {
4084 __kmp_print_storage_map_gtid(
4085 gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1,
4086 sizeof(
struct common_table),
"th_%d.th_pri_common\n", gtid);
4088 this_thr->th.th_pri_head = NULL;
4093 volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch;
4096 sizeof(dispatch_private_info_t) *
4097 (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers);
4098 KD_TRACE(10, (
"__kmp_initialize_info: T#%d max_nproc: %d\n", gtid,
4099 team->t.t_max_nproc));
4100 KMP_ASSERT(dispatch);
4101 KMP_DEBUG_ASSERT(team->t.t_dispatch);
4102 KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]);
4104 dispatch->th_disp_index = 0;
4106 dispatch->th_doacross_buf_idx = 0;
4108 if (!dispatch->th_disp_buffer) {
4109 dispatch->th_disp_buffer =
4110 (dispatch_private_info_t *)__kmp_allocate(disp_size);
4112 if (__kmp_storage_map) {
4113 __kmp_print_storage_map_gtid(
4114 gtid, &dispatch->th_disp_buffer[0],
4115 &dispatch->th_disp_buffer[team->t.t_max_nproc == 1
4117 : __kmp_dispatch_num_buffers],
4118 disp_size,
"th_%d.th_dispatch.th_disp_buffer " 4119 "(team_%d.t_dispatch[%d].th_disp_buffer)",
4120 gtid, team->t.t_id, gtid);
4123 memset(&dispatch->th_disp_buffer[0],
'\0', disp_size);
4126 dispatch->th_dispatch_pr_current = 0;
4127 dispatch->th_dispatch_sh_current = 0;
4129 dispatch->th_deo_fcn = 0;
4130 dispatch->th_dxo_fcn = 0;
4133 this_thr->th.th_next_pool = NULL;
4135 if (!this_thr->th.th_task_state_memo_stack) {
4137 this_thr->th.th_task_state_memo_stack =
4138 (kmp_uint8 *)__kmp_allocate(4 *
sizeof(kmp_uint8));
4139 this_thr->th.th_task_state_top = 0;
4140 this_thr->th.th_task_state_stack_sz = 4;
4141 for (i = 0; i < this_thr->th.th_task_state_stack_sz;
4143 this_thr->th.th_task_state_memo_stack[i] = 0;
4146 KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here);
4147 KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0);
4157 kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
4159 kmp_team_t *serial_team;
4160 kmp_info_t *new_thr;
4163 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d\n", __kmp_get_gtid()));
4164 KMP_DEBUG_ASSERT(root && team);
4165 #if !KMP_NESTED_HOT_TEAMS 4166 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid()));
4171 if (__kmp_thread_pool) {
4173 new_thr = CCAST(kmp_info_t *, __kmp_thread_pool);
4174 __kmp_thread_pool = (
volatile kmp_info_t *)new_thr->th.th_next_pool;
4175 if (new_thr == __kmp_thread_pool_insert_pt) {
4176 __kmp_thread_pool_insert_pt = NULL;
4178 TCW_4(new_thr->th.th_in_pool, FALSE);
4181 __kmp_thread_pool_nth--;
4183 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d using thread T#%d\n",
4184 __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid));
4185 KMP_ASSERT(!new_thr->th.th_team);
4186 KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity);
4187 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth >= 0);
4190 __kmp_initialize_info(new_thr, team, new_tid,
4191 new_thr->th.th_info.ds.ds_gtid);
4192 KMP_DEBUG_ASSERT(new_thr->th.th_serial_team);
4194 TCW_4(__kmp_nth, __kmp_nth + 1);
4195 root->r.r_cg_nthreads++;
4197 new_thr->th.th_task_state = 0;
4198 new_thr->th.th_task_state_top = 0;
4199 new_thr->th.th_task_state_stack_sz = 4;
4201 #ifdef KMP_ADJUST_BLOCKTIME 4204 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4205 if (__kmp_nth > __kmp_avail_proc) {
4206 __kmp_zero_bt = TRUE;
4215 kmp_balign_t *balign = new_thr->th.th_bar;
4216 for (b = 0; b < bs_last_barrier; ++b)
4217 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
4220 KF_TRACE(10, (
"__kmp_allocate_thread: T#%d using thread %p T#%d\n",
4221 __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid));
4228 KMP_ASSERT(__kmp_nth == __kmp_all_nth);
4229 KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity);
4234 if (!TCR_4(__kmp_init_monitor)) {
4235 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
4236 if (!TCR_4(__kmp_init_monitor)) {
4237 KF_TRACE(10, (
"before __kmp_create_monitor\n"));
4238 TCW_4(__kmp_init_monitor, 1);
4239 __kmp_create_monitor(&__kmp_monitor);
4240 KF_TRACE(10, (
"after __kmp_create_monitor\n"));
4251 while (TCR_4(__kmp_init_monitor) < 2) {
4254 KF_TRACE(10, (
"after monitor thread has started\n"));
4257 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
4262 for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) {
4263 KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity);
4267 new_thr = (kmp_info_t *)__kmp_allocate(
sizeof(kmp_info_t));
4269 TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
4271 if (__kmp_storage_map) {
4272 __kmp_print_thread_storage_map(new_thr, new_gtid);
4277 kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team);
4278 KF_TRACE(10, (
"__kmp_allocate_thread: before th_serial/serial_team\n"));
4279 new_thr->th.th_serial_team = serial_team =
4280 (kmp_team_t *)__kmp_allocate_team(root, 1, 1,
4287 &r_icvs, 0 USE_NESTED_HOT_ARG(NULL));
4289 KMP_ASSERT(serial_team);
4290 serial_team->t.t_serialized = 0;
4292 serial_team->t.t_threads[0] = new_thr;
4294 (
"__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n",
4298 __kmp_initialize_info(new_thr, team, new_tid, new_gtid);
4301 __kmp_initialize_fast_memory(new_thr);
4305 KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL);
4306 __kmp_initialize_bget(new_thr);
4309 __kmp_init_random(new_thr);
4313 (
"__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n",
4314 __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
4317 kmp_balign_t *balign = new_thr->th.th_bar;
4318 for (b = 0; b < bs_last_barrier; ++b) {
4319 balign[b].bb.b_go = KMP_INIT_BARRIER_STATE;
4320 balign[b].bb.team = NULL;
4321 balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING;
4322 balign[b].bb.use_oncore_barrier = 0;
4325 new_thr->th.th_spin_here = FALSE;
4326 new_thr->th.th_next_waiting = 0;
4328 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4329 new_thr->th.th_current_place = KMP_PLACE_UNDEFINED;
4330 new_thr->th.th_new_place = KMP_PLACE_UNDEFINED;
4331 new_thr->th.th_first_place = KMP_PLACE_UNDEFINED;
4332 new_thr->th.th_last_place = KMP_PLACE_UNDEFINED;
4335 TCW_4(new_thr->th.th_in_pool, FALSE);
4336 new_thr->th.th_active_in_pool = FALSE;
4337 TCW_4(new_thr->th.th_active, TRUE);
4343 root->r.r_cg_nthreads++;
4347 if (__kmp_adjust_gtid_mode) {
4348 if (__kmp_all_nth >= __kmp_tls_gtid_min) {
4349 if (TCR_4(__kmp_gtid_mode) != 2) {
4350 TCW_4(__kmp_gtid_mode, 2);
4353 if (TCR_4(__kmp_gtid_mode) != 1) {
4354 TCW_4(__kmp_gtid_mode, 1);
4359 #ifdef KMP_ADJUST_BLOCKTIME 4362 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
4363 if (__kmp_nth > __kmp_avail_proc) {
4364 __kmp_zero_bt = TRUE;
4371 10, (
"__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
4372 __kmp_create_worker(new_gtid, new_thr, __kmp_stksize);
4374 (
"__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr));
4376 KA_TRACE(20, (
"__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(),
4387 static void __kmp_reinitialize_team(kmp_team_t *team,
4388 kmp_internal_control_t *new_icvs,
4390 KF_TRACE(10, (
"__kmp_reinitialize_team: enter this_thread=%p team=%p\n",
4391 team->t.t_threads[0], team));
4392 KMP_DEBUG_ASSERT(team && new_icvs);
4393 KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
4394 KMP_CHECK_UPDATE(team->t.t_ident, loc);
4396 KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID());
4398 __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE);
4399 copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs);
4401 KF_TRACE(10, (
"__kmp_reinitialize_team: exit this_thread=%p team=%p\n",
4402 team->t.t_threads[0], team));
4408 static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
4409 kmp_internal_control_t *new_icvs,
4411 KF_TRACE(10, (
"__kmp_initialize_team: enter: team=%p\n", team));
4414 KMP_DEBUG_ASSERT(team);
4415 KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc);
4416 KMP_DEBUG_ASSERT(team->t.t_threads);
4419 team->t.t_master_tid = 0;
4421 team->t.t_serialized = new_nproc > 1 ? 0 : 1;
4422 team->t.t_nproc = new_nproc;
4425 team->t.t_next_pool = NULL;
4429 TCW_SYNC_PTR(team->t.t_pkfn, NULL);
4430 team->t.t_invoke = NULL;
4433 team->t.t_sched.sched = new_icvs->sched.sched;
4435 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 4436 team->t.t_fp_control_saved = FALSE;
4437 team->t.t_x87_fpu_control_word = 0;
4438 team->t.t_mxcsr = 0;
4441 team->t.t_construct = 0;
4443 team->t.t_ordered.dt.t_value = 0;
4444 team->t.t_master_active = FALSE;
4446 memset(&team->t.t_taskq,
'\0',
sizeof(kmp_taskq_t));
4449 team->t.t_copypriv_data = NULL;
4451 team->t.t_copyin_counter = 0;
4453 team->t.t_control_stack_top = NULL;
4455 __kmp_reinitialize_team(team, new_icvs, loc);
4458 KF_TRACE(10, (
"__kmp_initialize_team: exit: team=%p\n", team));
4461 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4464 __kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) {
4465 if (KMP_AFFINITY_CAPABLE()) {
4467 if (old_mask != NULL) {
4468 status = __kmp_get_system_affinity(old_mask, TRUE);
4471 __kmp_fatal(KMP_MSG(ChangeThreadAffMaskError), KMP_ERR(error),
4475 __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE);
4480 #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED 4486 static void __kmp_partition_places(kmp_team_t *team,
int update_master_only) {
4488 kmp_info_t *master_th = team->t.t_threads[0];
4489 KMP_DEBUG_ASSERT(master_th != NULL);
4490 kmp_proc_bind_t proc_bind = team->t.t_proc_bind;
4491 int first_place = master_th->th.th_first_place;
4492 int last_place = master_th->th.th_last_place;
4493 int masters_place = master_th->th.th_current_place;
4494 team->t.t_first_place = first_place;
4495 team->t.t_last_place = last_place;
4497 KA_TRACE(20, (
"__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) " 4498 "bound to place %d partition = [%d,%d]\n",
4499 proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]),
4500 team->t.t_id, masters_place, first_place, last_place));
4502 switch (proc_bind) {
4504 case proc_bind_default:
4507 KMP_DEBUG_ASSERT(team->t.t_nproc == 1);
4510 case proc_bind_master: {
4512 int n_th = team->t.t_nproc;
4513 for (f = 1; f < n_th; f++) {
4514 kmp_info_t *th = team->t.t_threads[f];
4515 KMP_DEBUG_ASSERT(th != NULL);
4516 th->th.th_first_place = first_place;
4517 th->th.th_last_place = last_place;
4518 th->th.th_new_place = masters_place;
4520 KA_TRACE(100, (
"__kmp_partition_places: master: T#%d(%d:%d) place %d " 4521 "partition = [%d,%d]\n",
4522 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4523 f, masters_place, first_place, last_place));
4527 case proc_bind_close: {
4529 int n_th = team->t.t_nproc;
4531 if (first_place <= last_place) {
4532 n_places = last_place - first_place + 1;
4534 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4536 if (n_th <= n_places) {
4537 int place = masters_place;
4538 for (f = 1; f < n_th; f++) {
4539 kmp_info_t *th = team->t.t_threads[f];
4540 KMP_DEBUG_ASSERT(th != NULL);
4542 if (place == last_place) {
4543 place = first_place;
4544 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4549 th->th.th_first_place = first_place;
4550 th->th.th_last_place = last_place;
4551 th->th.th_new_place = place;
4553 KA_TRACE(100, (
"__kmp_partition_places: close: T#%d(%d:%d) place %d " 4554 "partition = [%d,%d]\n",
4555 __kmp_gtid_from_thread(team->t.t_threads[f]),
4556 team->t.t_id, f, place, first_place, last_place));
4559 int S, rem, gap, s_count;
4560 S = n_th / n_places;
4562 rem = n_th - (S * n_places);
4563 gap = rem > 0 ? n_places / rem : n_places;
4564 int place = masters_place;
4566 for (f = 0; f < n_th; f++) {
4567 kmp_info_t *th = team->t.t_threads[f];
4568 KMP_DEBUG_ASSERT(th != NULL);
4570 th->th.th_first_place = first_place;
4571 th->th.th_last_place = last_place;
4572 th->th.th_new_place = place;
4575 if ((s_count == S) && rem && (gap_ct == gap)) {
4577 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4579 if (place == last_place) {
4580 place = first_place;
4581 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4589 }
else if (s_count == S) {
4590 if (place == last_place) {
4591 place = first_place;
4592 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4602 (
"__kmp_partition_places: close: T#%d(%d:%d) place %d " 4603 "partition = [%d,%d]\n",
4604 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f,
4605 th->th.th_new_place, first_place, last_place));
4607 KMP_DEBUG_ASSERT(place == masters_place);
4611 case proc_bind_spread: {
4613 int n_th = team->t.t_nproc;
4616 if (first_place <= last_place) {
4617 n_places = last_place - first_place + 1;
4619 n_places = __kmp_affinity_num_masks - first_place + last_place + 1;
4621 if (n_th <= n_places) {
4624 if (n_places != static_cast<int>(__kmp_affinity_num_masks)) {
4625 int S = n_places / n_th;
4626 int s_count, rem, gap, gap_ct;
4628 place = masters_place;
4629 rem = n_places - n_th * S;
4630 gap = rem ? n_th / rem : 1;
4633 if (update_master_only == 1)
4635 for (f = 0; f < thidx; f++) {
4636 kmp_info_t *th = team->t.t_threads[f];
4637 KMP_DEBUG_ASSERT(th != NULL);
4639 th->th.th_first_place = place;
4640 th->th.th_new_place = place;
4642 while (s_count < S) {
4643 if (place == last_place) {
4644 place = first_place;
4645 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4652 if (rem && (gap_ct == gap)) {
4653 if (place == last_place) {
4654 place = first_place;
4655 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4663 th->th.th_last_place = place;
4666 if (place == last_place) {
4667 place = first_place;
4668 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4675 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4676 "partition = [%d,%d], __kmp_affinity_num_masks: %u\n",
4677 __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id,
4678 f, th->th.th_new_place, th->th.th_first_place,
4679 th->th.th_last_place, __kmp_affinity_num_masks));
4685 double current =
static_cast<double>(masters_place);
4687 (
static_cast<double>(n_places + 1) / static_cast<double>(n_th));
4692 if (update_master_only == 1)
4694 for (f = 0; f < thidx; f++) {
4695 first =
static_cast<int>(current);
4696 last =
static_cast<int>(current + spacing) - 1;
4697 KMP_DEBUG_ASSERT(last >= first);
4698 if (first >= n_places) {
4699 if (masters_place) {
4702 if (first == (masters_place + 1)) {
4703 KMP_DEBUG_ASSERT(f == n_th);
4706 if (last == masters_place) {
4707 KMP_DEBUG_ASSERT(f == (n_th - 1));
4711 KMP_DEBUG_ASSERT(f == n_th);
4716 if (last >= n_places) {
4717 last = (n_places - 1);
4722 KMP_DEBUG_ASSERT(0 <= first);
4723 KMP_DEBUG_ASSERT(n_places > first);
4724 KMP_DEBUG_ASSERT(0 <= last);
4725 KMP_DEBUG_ASSERT(n_places > last);
4726 KMP_DEBUG_ASSERT(last_place >= first_place);
4727 th = team->t.t_threads[f];
4728 KMP_DEBUG_ASSERT(th);
4729 th->th.th_first_place = first;
4730 th->th.th_new_place = place;
4731 th->th.th_last_place = last;
4734 (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4735 "partition = [%d,%d], spacing = %.4f\n",
4736 __kmp_gtid_from_thread(team->t.t_threads[f]),
4737 team->t.t_id, f, th->th.th_new_place,
4738 th->th.th_first_place, th->th.th_last_place, spacing));
4742 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4744 int S, rem, gap, s_count;
4745 S = n_th / n_places;
4747 rem = n_th - (S * n_places);
4748 gap = rem > 0 ? n_places / rem : n_places;
4749 int place = masters_place;
4752 if (update_master_only == 1)
4754 for (f = 0; f < thidx; f++) {
4755 kmp_info_t *th = team->t.t_threads[f];
4756 KMP_DEBUG_ASSERT(th != NULL);
4758 th->th.th_first_place = place;
4759 th->th.th_last_place = place;
4760 th->th.th_new_place = place;
4763 if ((s_count == S) && rem && (gap_ct == gap)) {
4765 }
else if ((s_count == S + 1) && rem && (gap_ct == gap)) {
4767 if (place == last_place) {
4768 place = first_place;
4769 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4777 }
else if (s_count == S) {
4778 if (place == last_place) {
4779 place = first_place;
4780 }
else if (place == (
int)(__kmp_affinity_num_masks - 1)) {
4789 KA_TRACE(100, (
"__kmp_partition_places: spread: T#%d(%d:%d) place %d " 4790 "partition = [%d,%d]\n",
4791 __kmp_gtid_from_thread(team->t.t_threads[f]),
4792 team->t.t_id, f, th->th.th_new_place,
4793 th->th.th_first_place, th->th.th_last_place));
4795 KMP_DEBUG_ASSERT(update_master_only || place == masters_place);
4803 KA_TRACE(20, (
"__kmp_partition_places: exit T#%d\n", team->t.t_id));
4811 __kmp_allocate_team(kmp_root_t *root,
int new_nproc,
int max_nproc,
4813 ompt_data_t ompt_parallel_data,
4816 kmp_proc_bind_t new_proc_bind,
4818 kmp_internal_control_t *new_icvs,
4819 int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) {
4820 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
4823 int use_hot_team = !root->r.r_active;
4826 KA_TRACE(20, (
"__kmp_allocate_team: called\n"));
4827 KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0);
4828 KMP_DEBUG_ASSERT(max_nproc >= new_nproc);
4831 #if KMP_NESTED_HOT_TEAMS 4832 kmp_hot_team_ptr_t *hot_teams;
4834 team = master->th.th_team;
4835 level = team->t.t_active_level;
4836 if (master->th.th_teams_microtask) {
4837 if (master->th.th_teams_size.nteams > 1 &&
4840 (microtask_t)__kmp_teams_master ||
4841 master->th.th_teams_level <
4847 hot_teams = master->th.th_hot_teams;
4848 if (level < __kmp_hot_teams_max_level && hot_teams &&
4858 if (use_hot_team && new_nproc > 1) {
4859 KMP_DEBUG_ASSERT(new_nproc == max_nproc);
4860 #if KMP_NESTED_HOT_TEAMS 4861 team = hot_teams[level].hot_team;
4863 team = root->r.r_hot_team;
4866 if (__kmp_tasking_mode != tskm_immediate_exec) {
4867 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p " 4868 "task_team[1] = %p before reinit\n",
4869 team->t.t_task_team[0], team->t.t_task_team[1]));
4876 if (team->t.t_nproc == new_nproc) {
4877 KA_TRACE(20, (
"__kmp_allocate_team: reusing hot team\n"));
4880 if (team->t.t_size_changed == -1) {
4881 team->t.t_size_changed = 1;
4883 KMP_CHECK_UPDATE(team->t.t_size_changed, 0);
4887 kmp_r_sched_t new_sched = new_icvs->sched;
4889 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_sched.sched);
4891 __kmp_reinitialize_team(team, new_icvs,
4892 root->r.r_uber_thread->th.th_ident);
4894 KF_TRACE(10, (
"__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0,
4895 team->t.t_threads[0], team));
4896 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
4899 #if KMP_AFFINITY_SUPPORTED 4900 if ((team->t.t_size_changed == 0) &&
4901 (team->t.t_proc_bind == new_proc_bind)) {
4902 if (new_proc_bind == proc_bind_spread) {
4903 __kmp_partition_places(
4906 KA_TRACE(200, (
"__kmp_allocate_team: reusing hot team #%d bindings: " 4907 "proc_bind = %d, partition = [%d,%d]\n",
4908 team->t.t_id, new_proc_bind, team->t.t_first_place,
4909 team->t.t_last_place));
4911 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4912 __kmp_partition_places(team);
4915 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4918 }
else if (team->t.t_nproc > new_nproc) {
4920 (
"__kmp_allocate_team: decreasing hot team thread count to %d\n",
4923 team->t.t_size_changed = 1;
4924 #if KMP_NESTED_HOT_TEAMS 4925 if (__kmp_hot_teams_mode == 0) {
4928 KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc);
4929 hot_teams[level].hot_team_nth = new_nproc;
4930 #endif // KMP_NESTED_HOT_TEAMS 4932 for (f = new_nproc; f < team->t.t_nproc; f++) {
4933 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4934 if (__kmp_tasking_mode != tskm_immediate_exec) {
4937 team->t.t_threads[f]->th.th_task_team = NULL;
4939 __kmp_free_thread(team->t.t_threads[f]);
4940 team->t.t_threads[f] = NULL;
4942 #if KMP_NESTED_HOT_TEAMS 4947 for (f = new_nproc; f < team->t.t_nproc; ++f) {
4948 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
4949 kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar;
4950 for (
int b = 0; b < bs_last_barrier; ++b) {
4951 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) {
4952 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
4954 KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0);
4958 #endif // KMP_NESTED_HOT_TEAMS 4959 team->t.t_nproc = new_nproc;
4961 KMP_CHECK_UPDATE(team->t.t_sched.sched, new_icvs->sched.sched);
4962 __kmp_reinitialize_team(team, new_icvs,
4963 root->r.r_uber_thread->th.th_ident);
4966 for (f = 0; f < new_nproc; ++f) {
4967 team->t.t_threads[f]->th.th_team_nproc = new_nproc;
4971 KF_TRACE(10, (
"__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0,
4972 team->t.t_threads[0], team));
4974 __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0);
4977 for (f = 0; f < team->t.t_nproc; f++) {
4978 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
4979 team->t.t_threads[f]->th.th_team_nproc ==
4985 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
4986 #if KMP_AFFINITY_SUPPORTED 4987 __kmp_partition_places(team);
4991 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 4992 kmp_affin_mask_t *old_mask;
4993 if (KMP_AFFINITY_CAPABLE()) {
4994 KMP_CPU_ALLOC(old_mask);
4999 (
"__kmp_allocate_team: increasing hot team thread count to %d\n",
5002 team->t.t_size_changed = 1;
5004 #if KMP_NESTED_HOT_TEAMS 5005 int avail_threads = hot_teams[level].hot_team_nth;
5006 if (new_nproc < avail_threads)
5007 avail_threads = new_nproc;
5008 kmp_info_t **other_threads = team->t.t_threads;
5009 for (f = team->t.t_nproc; f < avail_threads; ++f) {
5013 kmp_balign_t *balign = other_threads[f]->th.th_bar;
5014 for (b = 0; b < bs_last_barrier; ++b) {
5015 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5016 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5018 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5022 if (hot_teams[level].hot_team_nth >= new_nproc) {
5025 KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1);
5026 team->t.t_nproc = new_nproc;
5032 hot_teams[level].hot_team_nth = new_nproc;
5033 #endif // KMP_NESTED_HOT_TEAMS 5034 if (team->t.t_max_nproc < new_nproc) {
5036 __kmp_reallocate_team_arrays(team, new_nproc);
5037 __kmp_reinitialize_team(team, new_icvs, NULL);
5040 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 5045 __kmp_set_thread_affinity_mask_full_tmp(old_mask);
5049 for (f = team->t.t_nproc; f < new_nproc; f++) {
5050 kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f);
5051 KMP_DEBUG_ASSERT(new_worker);
5052 team->t.t_threads[f] = new_worker;
5055 (
"__kmp_allocate_team: team %d init T#%d arrived: " 5056 "join=%llu, plain=%llu\n",
5057 team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f,
5058 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
5059 team->t.t_bar[bs_plain_barrier].b_arrived));
5063 kmp_balign_t *balign = new_worker->th.th_bar;
5064 for (b = 0; b < bs_last_barrier; ++b) {
5065 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5066 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag !=
5067 KMP_BARRIER_PARENT_FLAG);
5069 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5075 #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED 5076 if (KMP_AFFINITY_CAPABLE()) {
5078 __kmp_set_system_affinity(old_mask, TRUE);
5079 KMP_CPU_FREE(old_mask);
5082 #if KMP_NESTED_HOT_TEAMS 5084 #endif // KMP_NESTED_HOT_TEAMS 5086 int old_nproc = team->t.t_nproc;
5088 __kmp_initialize_team(team, new_nproc, new_icvs,
5089 root->r.r_uber_thread->th.th_ident);
5092 KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc);
5093 for (f = 0; f < team->t.t_nproc; ++f)
5094 __kmp_initialize_info(team->t.t_threads[f], team, f,
5095 __kmp_gtid_from_tid(f, team));
5102 for (f = old_nproc; f < team->t.t_nproc; ++f)
5103 team->t.t_threads[f]->th.th_task_state =
5104 team->t.t_threads[0]->th.th_task_state_memo_stack[level];
5107 team->t.t_threads[0]->th.th_task_state;
5108 for (f = old_nproc; f < team->t.t_nproc; ++f)
5109 team->t.t_threads[f]->th.th_task_state = old_state;
5113 for (f = 0; f < team->t.t_nproc; ++f) {
5114 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
5115 team->t.t_threads[f]->th.th_team_nproc ==
5121 KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind);
5122 #if KMP_AFFINITY_SUPPORTED 5123 __kmp_partition_places(team);
5129 kmp_info_t *master = team->t.t_threads[0];
5130 if (master->th.th_teams_microtask) {
5131 for (f = 1; f < new_nproc; ++f) {
5133 kmp_info_t *thr = team->t.t_threads[f];
5134 thr->th.th_teams_microtask = master->th.th_teams_microtask;
5135 thr->th.th_teams_level = master->th.th_teams_level;
5136 thr->th.th_teams_size = master->th.th_teams_size;
5140 #if KMP_NESTED_HOT_TEAMS 5144 for (f = 1; f < new_nproc; ++f) {
5145 kmp_info_t *thr = team->t.t_threads[f];
5147 kmp_balign_t *balign = thr->th.th_bar;
5148 for (b = 0; b < bs_last_barrier; ++b) {
5149 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
5150 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
5152 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
5157 #endif // KMP_NESTED_HOT_TEAMS 5160 __kmp_alloc_argv_entries(argc, team, TRUE);
5161 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5165 KF_TRACE(10, (
" hot_team = %p\n", team));
5168 if (__kmp_tasking_mode != tskm_immediate_exec) {
5169 KA_TRACE(20, (
"__kmp_allocate_team: hot team task_team[0] = %p " 5170 "task_team[1] = %p after reinit\n",
5171 team->t.t_task_team[0], team->t.t_task_team[1]));
5176 __ompt_team_assign_id(team, ompt_parallel_data);
5186 for (team = CCAST(kmp_team_t *, __kmp_team_pool); (team);) {
5189 if (team->t.t_max_nproc >= max_nproc) {
5191 __kmp_team_pool = team->t.t_next_pool;
5194 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5196 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and " 5197 "task_team[1] %p to NULL\n",
5198 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5199 team->t.t_task_team[0] = NULL;
5200 team->t.t_task_team[1] = NULL;
5203 __kmp_alloc_argv_entries(argc, team, TRUE);
5204 KMP_CHECK_UPDATE(team->t.t_argc, argc);
5207 20, (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5208 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5211 for (b = 0; b < bs_last_barrier; ++b) {
5212 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5214 team->t.t_bar[b].b_master_arrived = 0;
5215 team->t.t_bar[b].b_team_arrived = 0;
5221 team->t.t_proc_bind = new_proc_bind;
5224 KA_TRACE(20, (
"__kmp_allocate_team: using team from pool %d.\n",
5228 __ompt_team_assign_id(team, ompt_parallel_data);
5240 team = __kmp_reap_team(team);
5241 __kmp_team_pool = team;
5246 team = (kmp_team_t *)__kmp_allocate(
sizeof(kmp_team_t));
5249 team->t.t_max_nproc = max_nproc;
5252 __kmp_allocate_team_arrays(team, max_nproc);
5254 KA_TRACE(20, (
"__kmp_allocate_team: making a new team\n"));
5255 __kmp_initialize_team(team, new_nproc, new_icvs, NULL);
5257 KA_TRACE(20, (
"__kmp_allocate_team: setting task_team[0] %p and task_team[1] " 5259 &team->t.t_task_team[0], &team->t.t_task_team[1]));
5260 team->t.t_task_team[0] = NULL;
5262 team->t.t_task_team[1] = NULL;
5265 if (__kmp_storage_map) {
5266 __kmp_print_team_storage_map(
"team", team, team->t.t_id, new_nproc);
5270 __kmp_alloc_argv_entries(argc, team, FALSE);
5271 team->t.t_argc = argc;
5274 (
"__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n",
5275 team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE));
5278 for (b = 0; b < bs_last_barrier; ++b) {
5279 team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE;
5281 team->t.t_bar[b].b_master_arrived = 0;
5282 team->t.t_bar[b].b_team_arrived = 0;
5288 team->t.t_proc_bind = new_proc_bind;
5292 __ompt_team_assign_id(team, ompt_parallel_data);
5293 team->t.ompt_serialized_team_info = NULL;
5298 KA_TRACE(20, (
"__kmp_allocate_team: done creating a new team %d.\n",
5309 void __kmp_free_team(kmp_root_t *root,
5310 kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) {
5312 KA_TRACE(20, (
"__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(),
5316 KMP_DEBUG_ASSERT(root);
5317 KMP_DEBUG_ASSERT(team);
5318 KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc);
5319 KMP_DEBUG_ASSERT(team->t.t_threads);
5321 int use_hot_team = team == root->r.r_hot_team;
5322 #if KMP_NESTED_HOT_TEAMS 5324 kmp_hot_team_ptr_t *hot_teams;
5326 level = team->t.t_active_level - 1;
5327 if (master->th.th_teams_microtask) {
5328 if (master->th.th_teams_size.nteams > 1) {
5332 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
5333 master->th.th_teams_level == team->t.t_level) {
5338 hot_teams = master->th.th_hot_teams;
5339 if (level < __kmp_hot_teams_max_level) {
5340 KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team);
5344 #endif // KMP_NESTED_HOT_TEAMS 5347 TCW_SYNC_PTR(team->t.t_pkfn,
5349 team->t.t_copyin_counter = 0;
5353 if (!use_hot_team) {
5354 if (__kmp_tasking_mode != tskm_immediate_exec) {
5356 for (f = 1; f < team->t.t_nproc; ++f) {
5357 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5358 kmp_info_t *th = team->t.t_threads[f];
5359 volatile kmp_uint32 *state = &th->th.th_reap_state;
5360 while (*state != KMP_SAFE_TO_REAP) {
5364 if (!__kmp_is_thread_alive(th, &ecode)) {
5365 *state = KMP_SAFE_TO_REAP;
5370 kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th);
5371 if (fl.is_sleeping())
5372 fl.resume(__kmp_gtid_from_thread(th));
5379 for (tt_idx = 0; tt_idx < 2; ++tt_idx) {
5380 kmp_task_team_t *task_team = team->t.t_task_team[tt_idx];
5381 if (task_team != NULL) {
5382 for (f = 0; f < team->t.t_nproc;
5384 team->t.t_threads[f]->th.th_task_team = NULL;
5388 (
"__kmp_free_team: T#%d deactivating task_team %p on team %d\n",
5389 __kmp_get_gtid(), task_team, team->t.t_id));
5390 #if KMP_NESTED_HOT_TEAMS 5391 __kmp_free_task_team(master, task_team);
5393 team->t.t_task_team[tt_idx] = NULL;
5399 team->t.t_parent = NULL;
5400 team->t.t_level = 0;
5401 team->t.t_active_level = 0;
5404 for (f = 1; f < team->t.t_nproc; ++f) {
5405 KMP_DEBUG_ASSERT(team->t.t_threads[f]);
5406 __kmp_free_thread(team->t.t_threads[f]);
5407 team->t.t_threads[f] = NULL;
5412 team->t.t_next_pool = CCAST(kmp_team_t *, __kmp_team_pool);
5413 __kmp_team_pool = (
volatile kmp_team_t *)team;
5420 kmp_team_t *__kmp_reap_team(kmp_team_t *team) {
5421 kmp_team_t *next_pool = team->t.t_next_pool;
5423 KMP_DEBUG_ASSERT(team);
5424 KMP_DEBUG_ASSERT(team->t.t_dispatch);
5425 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
5426 KMP_DEBUG_ASSERT(team->t.t_threads);
5427 KMP_DEBUG_ASSERT(team->t.t_argv);
5432 __kmp_free_team_arrays(team);
5433 if (team->t.t_argv != &team->t.t_inline_argv[0])
5434 __kmp_free((
void *)team->t.t_argv);
5466 void __kmp_free_thread(kmp_info_t *this_th) {
5469 kmp_root_t *root = this_th->th.th_root;
5471 KA_TRACE(20, (
"__kmp_free_thread: T#%d putting T#%d back on free pool.\n",
5472 __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid));
5474 KMP_DEBUG_ASSERT(this_th);
5479 kmp_balign_t *balign = this_th->th.th_bar;
5480 for (b = 0; b < bs_last_barrier; ++b) {
5481 if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG)
5482 balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG;
5483 balign[b].bb.team = NULL;
5484 balign[b].bb.leaf_kids = 0;
5486 this_th->th.th_task_state = 0;
5487 this_th->th.th_reap_state = KMP_SAFE_TO_REAP;
5490 TCW_PTR(this_th->th.th_team, NULL);
5491 TCW_PTR(this_th->th.th_root, NULL);
5492 TCW_PTR(this_th->th.th_dispatch, NULL);
5499 __kmp_free_implicit_task(this_th);
5500 this_th->th.th_current_task = NULL;
5504 gtid = this_th->th.th_info.ds.ds_gtid;
5505 if (__kmp_thread_pool_insert_pt != NULL) {
5506 KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL);
5507 if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) {
5508 __kmp_thread_pool_insert_pt = NULL;
5517 if (__kmp_thread_pool_insert_pt != NULL) {
5518 scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool);
5520 scan = CCAST(kmp_info_t **, &__kmp_thread_pool);
5522 for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid);
5523 scan = &((*scan)->th.th_next_pool))
5528 TCW_PTR(this_th->th.th_next_pool, *scan);
5529 __kmp_thread_pool_insert_pt = *scan = this_th;
5530 KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) ||
5531 (this_th->th.th_info.ds.ds_gtid <
5532 this_th->th.th_next_pool->th.th_info.ds.ds_gtid));
5533 TCW_4(this_th->th.th_in_pool, TRUE);
5534 __kmp_thread_pool_nth++;
5536 TCW_4(__kmp_nth, __kmp_nth - 1);
5537 root->r.r_cg_nthreads--;
5539 #ifdef KMP_ADJUST_BLOCKTIME 5542 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5543 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5544 if (__kmp_nth <= __kmp_avail_proc) {
5545 __kmp_zero_bt = FALSE;
5555 void *__kmp_launch_thread(kmp_info_t *this_thr) {
5556 int gtid = this_thr->th.th_info.ds.ds_gtid;
5558 kmp_team_t *(*
volatile pteam);
5561 KA_TRACE(10, (
"__kmp_launch_thread: T#%d start\n", gtid));
5563 if (__kmp_env_consistency_check) {
5564 this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid);
5568 ompt_data_t *thread_data;
5569 if (ompt_enabled.enabled) {
5570 thread_data = &(this_thr->th.ompt_thread_info.thread_data);
5571 thread_data->ptr = NULL;
5573 this_thr->th.ompt_thread_info.state = omp_state_overhead;
5574 this_thr->th.ompt_thread_info.wait_id = 0;
5575 this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
5576 if (ompt_enabled.ompt_callback_thread_begin) {
5577 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
5578 ompt_thread_worker, thread_data);
5584 if (ompt_enabled.enabled) {
5585 this_thr->th.ompt_thread_info.state = omp_state_idle;
5589 while (!TCR_4(__kmp_global.g.g_done)) {
5590 KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]);
5594 KA_TRACE(20, (
"__kmp_launch_thread: T#%d waiting for work\n", gtid));
5597 __kmp_fork_barrier(gtid, KMP_GTID_DNE);
5600 if (ompt_enabled.enabled) {
5601 this_thr->th.ompt_thread_info.state = omp_state_overhead;
5605 pteam = (kmp_team_t * (*))(&this_thr->th.th_team);
5608 if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) {
5610 if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) {
5613 (
"__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n",
5614 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5615 (*pteam)->t.t_pkfn));
5617 updateHWFPControl(*pteam);
5620 if (ompt_enabled.enabled) {
5621 this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
5626 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
5627 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
5628 rc = (*pteam)->t.t_invoke(gtid);
5633 KA_TRACE(20, (
"__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n",
5634 gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid),
5635 (*pteam)->t.t_pkfn));
5638 if (ompt_enabled.enabled) {
5640 __ompt_get_task_info_object(0)->frame.exit_frame = NULL;
5642 this_thr->th.ompt_thread_info.state = omp_state_overhead;
5646 __kmp_join_barrier(gtid);
5649 TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done);
5652 if (ompt_enabled.ompt_callback_thread_end) {
5653 ompt_callbacks.ompt_callback(ompt_callback_thread_end)(thread_data);
5657 this_thr->th.th_task_team = NULL;
5659 __kmp_common_destroy_gtid(gtid);
5661 KA_TRACE(10, (
"__kmp_launch_thread: T#%d done\n", gtid));
5668 void __kmp_internal_end_dest(
void *specific_gtid) {
5669 #if KMP_COMPILER_ICC 5670 #pragma warning(push) 5671 #pragma warning(disable : 810) // conversion from "void *" to "int" may lose 5675 int gtid = (kmp_intptr_t)specific_gtid - 1;
5676 #if KMP_COMPILER_ICC 5677 #pragma warning(pop) 5680 KA_TRACE(30, (
"__kmp_internal_end_dest: T#%d\n", gtid));
5693 if (gtid >= 0 && KMP_UBER_GTID(gtid))
5694 __kmp_gtid_set_specific(gtid);
5695 #ifdef KMP_TDATA_GTID 5698 __kmp_internal_end_thread(gtid);
5701 #if KMP_OS_UNIX && KMP_DYNAMIC_LIB 5707 __attribute__((destructor))
void __kmp_internal_end_dtor(
void) {
5708 __kmp_internal_end_atexit();
5711 void __kmp_internal_end_fini(
void) { __kmp_internal_end_atexit(); }
5717 void __kmp_internal_end_atexit(
void) {
5718 KA_TRACE(30, (
"__kmp_internal_end_atexit\n"));
5742 __kmp_internal_end_library(-1);
5744 __kmp_close_console();
5748 static void __kmp_reap_thread(kmp_info_t *thread,
int is_root) {
5753 KMP_DEBUG_ASSERT(thread != NULL);
5755 gtid = thread->th.th_info.ds.ds_gtid;
5759 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
5762 20, (
"__kmp_reap_thread: releasing T#%d from fork barrier for reap\n",
5766 ANNOTATE_HAPPENS_BEFORE(thread);
5767 kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread);
5768 __kmp_release_64(&flag);
5772 __kmp_reap_worker(thread);
5784 if (thread->th.th_active_in_pool) {
5785 thread->th.th_active_in_pool = FALSE;
5786 KMP_TEST_THEN_DEC32(&__kmp_thread_pool_active_nth);
5787 KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
5791 KMP_DEBUG_ASSERT(__kmp_thread_pool_nth > 0);
5792 --__kmp_thread_pool_nth;
5795 __kmp_free_implicit_task(thread);
5799 __kmp_free_fast_memory(thread);
5802 __kmp_suspend_uninitialize_thread(thread);
5804 KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread);
5805 TCW_SYNC_PTR(__kmp_threads[gtid], NULL);
5810 #ifdef KMP_ADJUST_BLOCKTIME 5813 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
5814 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
5815 if (__kmp_nth <= __kmp_avail_proc) {
5816 __kmp_zero_bt = FALSE;
5822 if (__kmp_env_consistency_check) {
5823 if (thread->th.th_cons) {
5824 __kmp_free_cons_stack(thread->th.th_cons);
5825 thread->th.th_cons = NULL;
5829 if (thread->th.th_pri_common != NULL) {
5830 __kmp_free(thread->th.th_pri_common);
5831 thread->th.th_pri_common = NULL;
5834 if (thread->th.th_task_state_memo_stack != NULL) {
5835 __kmp_free(thread->th.th_task_state_memo_stack);
5836 thread->th.th_task_state_memo_stack = NULL;
5840 if (thread->th.th_local.bget_data != NULL) {
5841 __kmp_finalize_bget(thread);
5845 #if KMP_AFFINITY_SUPPORTED 5846 if (thread->th.th_affin_mask != NULL) {
5847 KMP_CPU_FREE(thread->th.th_affin_mask);
5848 thread->th.th_affin_mask = NULL;
5852 __kmp_reap_team(thread->th.th_serial_team);
5853 thread->th.th_serial_team = NULL;
5860 static void __kmp_internal_end(
void) {
5864 __kmp_unregister_library();
5871 __kmp_reclaim_dead_roots();
5875 for (i = 0; i < __kmp_threads_capacity; i++)
5877 if (__kmp_root[i]->r.r_active)
5880 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
5882 if (i < __kmp_threads_capacity) {
5894 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
5895 if (TCR_4(__kmp_init_monitor)) {
5896 __kmp_reap_monitor(&__kmp_monitor);
5897 TCW_4(__kmp_init_monitor, 0);
5899 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
5900 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
5901 #endif // KMP_USE_MONITOR 5906 for (i = 0; i < __kmp_threads_capacity; i++) {
5907 if (__kmp_root[i]) {
5910 KMP_ASSERT(!__kmp_root[i]->r.r_active);
5919 while (__kmp_thread_pool != NULL) {
5921 kmp_info_t *thread = CCAST(kmp_info_t *, __kmp_thread_pool);
5922 __kmp_thread_pool = thread->th.th_next_pool;
5924 KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP);
5925 thread->th.th_next_pool = NULL;
5926 thread->th.th_in_pool = FALSE;
5927 __kmp_reap_thread(thread, 0);
5929 __kmp_thread_pool_insert_pt = NULL;
5932 while (__kmp_team_pool != NULL) {
5934 kmp_team_t *team = CCAST(kmp_team_t *, __kmp_team_pool);
5935 __kmp_team_pool = team->t.t_next_pool;
5937 team->t.t_next_pool = NULL;
5938 __kmp_reap_team(team);
5941 __kmp_reap_task_teams();
5943 for (i = 0; i < __kmp_threads_capacity; ++i) {
5950 TCW_SYNC_4(__kmp_init_common, FALSE);
5952 KA_TRACE(10, (
"__kmp_internal_end: all workers reaped\n"));
5960 __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock);
5961 if (TCR_4(__kmp_init_monitor)) {
5962 __kmp_reap_monitor(&__kmp_monitor);
5963 TCW_4(__kmp_init_monitor, 0);
5965 __kmp_release_bootstrap_lock(&__kmp_monitor_lock);
5966 KA_TRACE(10, (
"__kmp_internal_end: monitor reaped\n"));
5969 TCW_4(__kmp_init_gtid, FALSE);
5978 void __kmp_internal_end_library(
int gtid_req) {
5985 if (__kmp_global.g.g_abort) {
5986 KA_TRACE(11, (
"__kmp_internal_end_library: abort, exiting\n"));
5990 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
5991 KA_TRACE(10, (
"__kmp_internal_end_library: already finished\n"));
5999 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6001 10, (
"__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req));
6002 if (gtid == KMP_GTID_SHUTDOWN) {
6003 KA_TRACE(10, (
"__kmp_internal_end_library: !__kmp_init_runtime, system " 6004 "already shutdown\n"));
6006 }
else if (gtid == KMP_GTID_MONITOR) {
6007 KA_TRACE(10, (
"__kmp_internal_end_library: monitor thread, gtid not " 6008 "registered, or system shutdown\n"));
6010 }
else if (gtid == KMP_GTID_DNE) {
6011 KA_TRACE(10, (
"__kmp_internal_end_library: gtid not registered or system " 6014 }
else if (KMP_UBER_GTID(gtid)) {
6016 if (__kmp_root[gtid]->r.r_active) {
6017 __kmp_global.g.g_abort = -1;
6018 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6020 (
"__kmp_internal_end_library: root still active, abort T#%d\n",
6026 (
"__kmp_internal_end_library: unregistering sibling T#%d\n", gtid));
6027 __kmp_unregister_root_current_thread(gtid);
6034 #ifdef DUMP_DEBUG_ON_EXIT 6035 if (__kmp_debug_buf)
6036 __kmp_dump_debug_buffer();
6042 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6045 if (__kmp_global.g.g_abort) {
6046 KA_TRACE(10, (
"__kmp_internal_end_library: abort, exiting\n"));
6048 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6051 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6052 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6061 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6064 __kmp_internal_end();
6066 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6067 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6069 KA_TRACE(10, (
"__kmp_internal_end_library: exit\n"));
6071 #ifdef DUMP_DEBUG_ON_EXIT 6072 if (__kmp_debug_buf)
6073 __kmp_dump_debug_buffer();
6077 __kmp_close_console();
6080 __kmp_fini_allocator();
6084 void __kmp_internal_end_thread(
int gtid_req) {
6093 if (__kmp_global.g.g_abort) {
6094 KA_TRACE(11, (
"__kmp_internal_end_thread: abort, exiting\n"));
6098 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6099 KA_TRACE(10, (
"__kmp_internal_end_thread: already finished\n"));
6107 int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific();
6109 (
"__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req));
6110 if (gtid == KMP_GTID_SHUTDOWN) {
6111 KA_TRACE(10, (
"__kmp_internal_end_thread: !__kmp_init_runtime, system " 6112 "already shutdown\n"));
6114 }
else if (gtid == KMP_GTID_MONITOR) {
6115 KA_TRACE(10, (
"__kmp_internal_end_thread: monitor thread, gtid not " 6116 "registered, or system shutdown\n"));
6118 }
else if (gtid == KMP_GTID_DNE) {
6119 KA_TRACE(10, (
"__kmp_internal_end_thread: gtid not registered or system " 6123 }
else if (KMP_UBER_GTID(gtid)) {
6125 if (__kmp_root[gtid]->r.r_active) {
6126 __kmp_global.g.g_abort = -1;
6127 TCW_SYNC_4(__kmp_global.g.g_done, TRUE);
6129 (
"__kmp_internal_end_thread: root still active, abort T#%d\n",
6133 KA_TRACE(10, (
"__kmp_internal_end_thread: unregistering sibling T#%d\n",
6135 __kmp_unregister_root_current_thread(gtid);
6139 KA_TRACE(10, (
"__kmp_internal_end_thread: worker thread T#%d\n", gtid));
6142 __kmp_threads[gtid]->th.th_task_team = NULL;
6146 (
"__kmp_internal_end_thread: worker thread done, exiting T#%d\n",
6151 #if defined KMP_DYNAMIC_LIB 6160 KA_TRACE(10, (
"__kmp_internal_end_thread: exiting T#%d\n", gtid_req));
6164 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6167 if (__kmp_global.g.g_abort) {
6168 KA_TRACE(10, (
"__kmp_internal_end_thread: abort, exiting\n"));
6170 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6173 if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) {
6174 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6185 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
6187 for (i = 0; i < __kmp_threads_capacity; ++i) {
6188 if (KMP_UBER_GTID(i)) {
6191 (
"__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i));
6192 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6193 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6200 __kmp_internal_end();
6202 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
6203 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6205 KA_TRACE(10, (
"__kmp_internal_end_thread: exit T#%d\n", gtid_req));
6207 #ifdef DUMP_DEBUG_ON_EXIT 6208 if (__kmp_debug_buf)
6209 __kmp_dump_debug_buffer();
6216 static long __kmp_registration_flag = 0;
6218 static char *__kmp_registration_str = NULL;
6221 static inline char *__kmp_reg_status_name() {
6226 return __kmp_str_format(
"__KMP_REGISTERED_LIB_%d", (
int)getpid());
6229 void __kmp_register_library_startup(
void) {
6231 char *name = __kmp_reg_status_name();
6237 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6238 __kmp_initialize_system_tick();
6240 __kmp_read_system_time(&time.dtime);
6241 __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL);
6242 __kmp_registration_str =
6243 __kmp_str_format(
"%p-%lx-%s", &__kmp_registration_flag,
6244 __kmp_registration_flag, KMP_LIBRARY_FILE);
6246 KA_TRACE(50, (
"__kmp_register_library_startup: %s=\"%s\"\n", name,
6247 __kmp_registration_str));
6254 __kmp_env_set(name, __kmp_registration_str, 0);
6256 value = __kmp_env_get(name);
6257 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6267 char *flag_addr_str = NULL;
6268 char *flag_val_str = NULL;
6269 char const *file_name = NULL;
6270 __kmp_str_split(tail,
'-', &flag_addr_str, &tail);
6271 __kmp_str_split(tail,
'-', &flag_val_str, &tail);
6274 long *flag_addr = 0;
6276 KMP_SSCANF(flag_addr_str,
"%p", &flag_addr);
6277 KMP_SSCANF(flag_val_str,
"%lx", &flag_val);
6278 if (flag_addr != 0 && flag_val != 0 && strcmp(file_name,
"") != 0) {
6282 if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) {
6296 file_name =
"unknown library";
6300 char *duplicate_ok = __kmp_env_get(
"KMP_DUPLICATE_LIB_OK");
6301 if (!__kmp_str_match_true(duplicate_ok)) {
6303 __kmp_fatal(KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name),
6304 KMP_HNT(DuplicateLibrary), __kmp_msg_null);
6306 KMP_INTERNAL_FREE(duplicate_ok);
6307 __kmp_duplicate_library_ok = 1;
6312 __kmp_env_unset(name);
6314 default: { KMP_DEBUG_ASSERT(0); }
break;
6317 KMP_INTERNAL_FREE((
void *)value);
6319 KMP_INTERNAL_FREE((
void *)name);
6323 void __kmp_unregister_library(
void) {
6325 char *name = __kmp_reg_status_name();
6326 char *value = __kmp_env_get(name);
6328 KMP_DEBUG_ASSERT(__kmp_registration_flag != 0);
6329 KMP_DEBUG_ASSERT(__kmp_registration_str != NULL);
6330 if (value != NULL && strcmp(value, __kmp_registration_str) == 0) {
6332 __kmp_env_unset(name);
6335 KMP_INTERNAL_FREE(__kmp_registration_str);
6336 KMP_INTERNAL_FREE(value);
6337 KMP_INTERNAL_FREE(name);
6339 __kmp_registration_flag = 0;
6340 __kmp_registration_str = NULL;
6347 #if KMP_MIC_SUPPORTED 6349 static void __kmp_check_mic_type() {
6350 kmp_cpuid_t cpuid_state = {0};
6351 kmp_cpuid_t *cs_p = &cpuid_state;
6352 __kmp_x86_cpuid(1, 0, cs_p);
6354 if ((cs_p->eax & 0xff0) == 0xB10) {
6355 __kmp_mic_type = mic2;
6356 }
else if ((cs_p->eax & 0xf0ff0) == 0x50670) {
6357 __kmp_mic_type = mic3;
6359 __kmp_mic_type = non_mic;
6365 static void __kmp_do_serial_initialize(
void) {
6369 KA_TRACE(10, (
"__kmp_do_serial_initialize: enter\n"));
6371 KMP_DEBUG_ASSERT(
sizeof(kmp_int32) == 4);
6372 KMP_DEBUG_ASSERT(
sizeof(kmp_uint32) == 4);
6373 KMP_DEBUG_ASSERT(
sizeof(kmp_int64) == 8);
6374 KMP_DEBUG_ASSERT(
sizeof(kmp_uint64) == 8);
6375 KMP_DEBUG_ASSERT(
sizeof(kmp_intptr_t) ==
sizeof(
void *));
6381 __kmp_validate_locks();
6384 __kmp_init_allocator();
6389 __kmp_register_library_startup();
6392 if (TCR_4(__kmp_global.g.g_done)) {
6393 KA_TRACE(10, (
"__kmp_do_serial_initialize: reinitialization of library\n"));
6396 __kmp_global.g.g_abort = 0;
6397 TCW_SYNC_4(__kmp_global.g.g_done, FALSE);
6400 #if KMP_USE_ADAPTIVE_LOCKS 6401 #if KMP_DEBUG_ADAPTIVE_LOCKS 6402 __kmp_init_speculative_stats();
6405 #if KMP_STATS_ENABLED 6408 __kmp_init_lock(&__kmp_global_lock);
6409 __kmp_init_queuing_lock(&__kmp_dispatch_lock);
6410 __kmp_init_lock(&__kmp_debug_lock);
6411 __kmp_init_atomic_lock(&__kmp_atomic_lock);
6412 __kmp_init_atomic_lock(&__kmp_atomic_lock_1i);
6413 __kmp_init_atomic_lock(&__kmp_atomic_lock_2i);
6414 __kmp_init_atomic_lock(&__kmp_atomic_lock_4i);
6415 __kmp_init_atomic_lock(&__kmp_atomic_lock_4r);
6416 __kmp_init_atomic_lock(&__kmp_atomic_lock_8i);
6417 __kmp_init_atomic_lock(&__kmp_atomic_lock_8r);
6418 __kmp_init_atomic_lock(&__kmp_atomic_lock_8c);
6419 __kmp_init_atomic_lock(&__kmp_atomic_lock_10r);
6420 __kmp_init_atomic_lock(&__kmp_atomic_lock_16r);
6421 __kmp_init_atomic_lock(&__kmp_atomic_lock_16c);
6422 __kmp_init_atomic_lock(&__kmp_atomic_lock_20c);
6423 __kmp_init_atomic_lock(&__kmp_atomic_lock_32c);
6424 __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock);
6425 __kmp_init_bootstrap_lock(&__kmp_exit_lock);
6427 __kmp_init_bootstrap_lock(&__kmp_monitor_lock);
6429 __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock);
6433 __kmp_runtime_initialize();
6435 #if KMP_MIC_SUPPORTED 6436 __kmp_check_mic_type();
6443 __kmp_abort_delay = 0;
6447 __kmp_dflt_team_nth_ub = __kmp_xproc;
6448 if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) {
6449 __kmp_dflt_team_nth_ub = KMP_MIN_NTH;
6451 if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) {
6452 __kmp_dflt_team_nth_ub = __kmp_sys_max_nth;
6454 __kmp_max_nth = __kmp_sys_max_nth;
6455 __kmp_cg_max_nth = __kmp_sys_max_nth;
6456 __kmp_teams_max_nth = __kmp_xproc;
6457 if (__kmp_teams_max_nth > __kmp_sys_max_nth) {
6458 __kmp_teams_max_nth = __kmp_sys_max_nth;
6463 __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME;
6465 __kmp_monitor_wakeups =
6466 KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6467 __kmp_bt_intervals =
6468 KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups);
6471 __kmp_library = library_throughput;
6473 __kmp_static = kmp_sch_static_balanced;
6480 #if KMP_FAST_REDUCTION_BARRIER 6481 #define kmp_reduction_barrier_gather_bb ((int)1) 6482 #define kmp_reduction_barrier_release_bb ((int)1) 6483 #define kmp_reduction_barrier_gather_pat bp_hyper_bar 6484 #define kmp_reduction_barrier_release_pat bp_hyper_bar 6485 #endif // KMP_FAST_REDUCTION_BARRIER 6486 for (i = bs_plain_barrier; i < bs_last_barrier; i++) {
6487 __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt;
6488 __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt;
6489 __kmp_barrier_gather_pattern[i] = __kmp_barrier_gather_pat_dflt;
6490 __kmp_barrier_release_pattern[i] = __kmp_barrier_release_pat_dflt;
6491 #if KMP_FAST_REDUCTION_BARRIER 6492 if (i == bs_reduction_barrier) {
6494 __kmp_barrier_gather_branch_bits[i] = kmp_reduction_barrier_gather_bb;
6495 __kmp_barrier_release_branch_bits[i] = kmp_reduction_barrier_release_bb;
6496 __kmp_barrier_gather_pattern[i] = kmp_reduction_barrier_gather_pat;
6497 __kmp_barrier_release_pattern[i] = kmp_reduction_barrier_release_pat;
6499 #endif // KMP_FAST_REDUCTION_BARRIER 6501 #if KMP_FAST_REDUCTION_BARRIER 6502 #undef kmp_reduction_barrier_release_pat 6503 #undef kmp_reduction_barrier_gather_pat 6504 #undef kmp_reduction_barrier_release_bb 6505 #undef kmp_reduction_barrier_gather_bb 6506 #endif // KMP_FAST_REDUCTION_BARRIER 6507 #if KMP_MIC_SUPPORTED 6508 if (__kmp_mic_type == mic2) {
6510 __kmp_barrier_gather_branch_bits[bs_plain_barrier] = 3;
6511 __kmp_barrier_release_branch_bits[bs_forkjoin_barrier] =
6513 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6514 __kmp_barrier_release_pattern[bs_forkjoin_barrier] = bp_hierarchical_bar;
6516 #if KMP_FAST_REDUCTION_BARRIER 6517 if (__kmp_mic_type == mic2) {
6518 __kmp_barrier_gather_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6519 __kmp_barrier_release_pattern[bs_reduction_barrier] = bp_hierarchical_bar;
6521 #endif // KMP_FAST_REDUCTION_BARRIER 6522 #endif // KMP_MIC_SUPPORTED 6526 __kmp_env_checks = TRUE;
6528 __kmp_env_checks = FALSE;
6532 __kmp_foreign_tp = TRUE;
6534 __kmp_global.g.g_dynamic = FALSE;
6535 __kmp_global.g.g_dynamic_mode = dynamic_default;
6537 __kmp_env_initialize(NULL);
6541 char const *val = __kmp_env_get(
"KMP_DUMP_CATALOG");
6542 if (__kmp_str_match_true(val)) {
6543 kmp_str_buf_t buffer;
6544 __kmp_str_buf_init(&buffer);
6545 __kmp_i18n_dump_catalog(&buffer);
6546 __kmp_printf(
"%s", buffer.str);
6547 __kmp_str_buf_free(&buffer);
6549 __kmp_env_free(&val);
6552 __kmp_threads_capacity =
6553 __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub);
6555 __kmp_tp_capacity = __kmp_default_tp_capacity(
6556 __kmp_dflt_team_nth_ub, __kmp_max_nth, __kmp_allThreadsSpecified);
6561 KMP_DEBUG_ASSERT(__kmp_thread_pool == NULL);
6562 KMP_DEBUG_ASSERT(__kmp_thread_pool_insert_pt == NULL);
6563 KMP_DEBUG_ASSERT(__kmp_team_pool == NULL);
6564 __kmp_thread_pool = NULL;
6565 __kmp_thread_pool_insert_pt = NULL;
6566 __kmp_team_pool = NULL;
6573 (
sizeof(kmp_info_t *) +
sizeof(kmp_root_t *)) * __kmp_threads_capacity +
6575 __kmp_threads = (kmp_info_t **)__kmp_allocate(size);
6576 __kmp_root = (kmp_root_t **)((
char *)__kmp_threads +
6577 sizeof(kmp_info_t *) * __kmp_threads_capacity);
6580 KMP_DEBUG_ASSERT(__kmp_all_nth ==
6582 KMP_DEBUG_ASSERT(__kmp_nth == 0);
6587 gtid = __kmp_register_root(TRUE);
6588 KA_TRACE(10, (
"__kmp_do_serial_initialize T#%d\n", gtid));
6589 KMP_ASSERT(KMP_UBER_GTID(gtid));
6590 KMP_ASSERT(KMP_INITIAL_GTID(gtid));
6594 __kmp_common_initialize();
6598 __kmp_register_atfork();
6601 #if !defined KMP_DYNAMIC_LIB 6605 int rc = atexit(__kmp_internal_end_atexit);
6607 __kmp_fatal(KMP_MSG(FunctionError,
"atexit()"), KMP_ERR(rc),
6613 #if KMP_HANDLE_SIGNALS 6619 __kmp_install_signals(FALSE);
6622 __kmp_install_signals(TRUE);
6627 __kmp_init_counter++;
6629 __kmp_init_serial = TRUE;
6631 if (__kmp_settings) {
6636 if (__kmp_display_env || __kmp_display_env_verbose) {
6637 __kmp_env_print_2();
6639 #endif // OMP_40_ENABLED 6647 KA_TRACE(10, (
"__kmp_do_serial_initialize: exit\n"));
6650 void __kmp_serial_initialize(
void) {
6651 if (__kmp_init_serial) {
6654 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6655 if (__kmp_init_serial) {
6656 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6659 __kmp_do_serial_initialize();
6660 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6663 static void __kmp_do_middle_initialize(
void) {
6665 int prev_dflt_team_nth;
6667 if (!__kmp_init_serial) {
6668 __kmp_do_serial_initialize();
6671 KA_TRACE(10, (
"__kmp_middle_initialize: enter\n"));
6675 prev_dflt_team_nth = __kmp_dflt_team_nth;
6677 #if KMP_AFFINITY_SUPPORTED 6680 __kmp_affinity_initialize();
6684 for (i = 0; i < __kmp_threads_capacity; i++) {
6685 if (TCR_PTR(__kmp_threads[i]) != NULL) {
6686 __kmp_affinity_set_init_mask(i, TRUE);
6691 KMP_ASSERT(__kmp_xproc > 0);
6692 if (__kmp_avail_proc == 0) {
6693 __kmp_avail_proc = __kmp_xproc;
6699 while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) {
6700 __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub =
6705 if (__kmp_dflt_team_nth == 0) {
6706 #ifdef KMP_DFLT_NTH_CORES 6708 __kmp_dflt_team_nth = __kmp_ncores;
6709 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 6710 "__kmp_ncores (%d)\n",
6711 __kmp_dflt_team_nth));
6714 __kmp_dflt_team_nth = __kmp_avail_proc;
6715 KA_TRACE(20, (
"__kmp_middle_initialize: setting __kmp_dflt_team_nth = " 6716 "__kmp_avail_proc(%d)\n",
6717 __kmp_dflt_team_nth));
6721 if (__kmp_dflt_team_nth < KMP_MIN_NTH) {
6722 __kmp_dflt_team_nth = KMP_MIN_NTH;
6724 if (__kmp_dflt_team_nth > __kmp_sys_max_nth) {
6725 __kmp_dflt_team_nth = __kmp_sys_max_nth;
6730 KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub);
6732 if (__kmp_dflt_team_nth != prev_dflt_team_nth) {
6737 for (i = 0; i < __kmp_threads_capacity; i++) {
6738 kmp_info_t *thread = __kmp_threads[i];
6741 if (thread->th.th_current_task->td_icvs.nproc != 0)
6744 set__nproc(__kmp_threads[i], __kmp_dflt_team_nth);
6749 (
"__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n",
6750 __kmp_dflt_team_nth));
6752 #ifdef KMP_ADJUST_BLOCKTIME 6754 if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) {
6755 KMP_DEBUG_ASSERT(__kmp_avail_proc > 0);
6756 if (__kmp_nth > __kmp_avail_proc) {
6757 __kmp_zero_bt = TRUE;
6763 TCW_SYNC_4(__kmp_init_middle, TRUE);
6765 KA_TRACE(10, (
"__kmp_do_middle_initialize: exit\n"));
6768 void __kmp_middle_initialize(
void) {
6769 if (__kmp_init_middle) {
6772 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6773 if (__kmp_init_middle) {
6774 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6777 __kmp_do_middle_initialize();
6778 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6781 void __kmp_parallel_initialize(
void) {
6782 int gtid = __kmp_entry_gtid();
6785 if (TCR_4(__kmp_init_parallel))
6787 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
6788 if (TCR_4(__kmp_init_parallel)) {
6789 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6794 if (TCR_4(__kmp_global.g.g_done)) {
6797 (
"__kmp_parallel_initialize: attempt to init while shutting down\n"));
6798 __kmp_infinite_loop();
6804 if (!__kmp_init_middle) {
6805 __kmp_do_middle_initialize();
6809 KA_TRACE(10, (
"__kmp_parallel_initialize: enter\n"));
6810 KMP_ASSERT(KMP_UBER_GTID(gtid));
6812 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 6815 __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
6816 __kmp_store_mxcsr(&__kmp_init_mxcsr);
6817 __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK;
6821 #if KMP_HANDLE_SIGNALS 6823 __kmp_install_signals(TRUE);
6827 __kmp_suspend_initialize();
6829 #if defined(USE_LOAD_BALANCE) 6830 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6831 __kmp_global.g.g_dynamic_mode = dynamic_load_balance;
6834 if (__kmp_global.g.g_dynamic_mode == dynamic_default) {
6835 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
6839 if (__kmp_version) {
6840 __kmp_print_version_2();
6844 TCW_SYNC_4(__kmp_init_parallel, TRUE);
6847 KA_TRACE(10, (
"__kmp_parallel_initialize: exit\n"));
6849 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
6854 void __kmp_run_before_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6856 kmp_disp_t *dispatch;
6861 this_thr->th.th_local.this_construct = 0;
6862 #if KMP_CACHE_MANAGE 6863 KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived);
6865 dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch);
6866 KMP_DEBUG_ASSERT(dispatch);
6867 KMP_DEBUG_ASSERT(team->t.t_dispatch);
6871 dispatch->th_disp_index = 0;
6873 dispatch->th_doacross_buf_idx =
6876 if (__kmp_env_consistency_check)
6877 __kmp_push_parallel(gtid, team->t.t_ident);
6882 void __kmp_run_after_invoked_task(
int gtid,
int tid, kmp_info_t *this_thr,
6884 if (__kmp_env_consistency_check)
6885 __kmp_pop_parallel(gtid, team->t.t_ident);
6887 __kmp_finish_implicit_task(this_thr);
6890 int __kmp_invoke_task_func(
int gtid) {
6892 int tid = __kmp_tid_from_gtid(gtid);
6893 kmp_info_t *this_thr = __kmp_threads[gtid];
6894 kmp_team_t *team = this_thr->th.th_team;
6896 __kmp_run_before_invoked_task(gtid, tid, this_thr, team);
6898 if (__itt_stack_caller_create_ptr) {
6899 __kmp_itt_stack_callee_enter(
6901 team->t.t_stack_id);
6904 #if INCLUDE_SSC_MARKS 6905 SSC_MARK_INVOKING();
6910 void **exit_runtime_p;
6911 ompt_data_t *my_task_data;
6912 ompt_data_t *my_parallel_data;
6915 if (ompt_enabled.enabled) {
6917 team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame);
6919 exit_runtime_p = &dummy;
6923 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data);
6924 my_parallel_data = &(team->t.ompt_team_info.parallel_data);
6925 if (ompt_enabled.ompt_callback_implicit_task) {
6926 ompt_team_size = team->t.t_nproc;
6927 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
6928 ompt_scope_begin, my_parallel_data, my_task_data, ompt_team_size,
6929 __kmp_tid_from_gtid(gtid));
6930 OMPT_CUR_TASK_INFO(this_thr)->thread_num = __kmp_tid_from_gtid(gtid);
6935 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
6936 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
6938 __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid,
6939 tid, (
int)team->t.t_argc, (
void **)team->t.t_argv
6946 *exit_runtime_p = NULL;
6951 if (__itt_stack_caller_create_ptr) {
6952 __kmp_itt_stack_callee_leave(
6954 team->t.t_stack_id);
6957 __kmp_run_after_invoked_task(gtid, tid, this_thr, team);
6963 void __kmp_teams_master(
int gtid) {
6965 kmp_info_t *thr = __kmp_threads[gtid];
6966 kmp_team_t *team = thr->th.th_team;
6967 ident_t *loc = team->t.t_ident;
6968 thr->th.th_set_nproc = thr->th.th_teams_size.nth;
6969 KMP_DEBUG_ASSERT(thr->th.th_teams_microtask);
6970 KMP_DEBUG_ASSERT(thr->th.th_set_nproc);
6971 KA_TRACE(20, (
"__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid,
6972 __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask));
6975 #if INCLUDE_SSC_MARKS 6978 __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc,
6979 (microtask_t)thr->th.th_teams_microtask,
6980 VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL);
6981 #if INCLUDE_SSC_MARKS 6987 __kmp_join_call(loc, gtid
6996 int __kmp_invoke_teams_master(
int gtid) {
6997 kmp_info_t *this_thr = __kmp_threads[gtid];
6998 kmp_team_t *team = this_thr->th.th_team;
7000 if (!__kmp_threads[gtid]->th.th_team->t.t_serialized)
7001 KMP_DEBUG_ASSERT((
void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn ==
7002 (
void *)__kmp_teams_master);
7004 __kmp_run_before_invoked_task(gtid, 0, this_thr, team);
7005 __kmp_teams_master(gtid);
7006 __kmp_run_after_invoked_task(gtid, 0, this_thr, team);
7016 void __kmp_push_num_threads(
ident_t *
id,
int gtid,
int num_threads) {
7017 kmp_info_t *thr = __kmp_threads[gtid];
7019 if (num_threads > 0)
7020 thr->th.th_set_nproc = num_threads;
7027 void __kmp_push_num_teams(
ident_t *
id,
int gtid,
int num_teams,
7029 kmp_info_t *thr = __kmp_threads[gtid];
7030 KMP_DEBUG_ASSERT(num_teams >= 0);
7031 KMP_DEBUG_ASSERT(num_threads >= 0);
7035 if (num_teams > __kmp_teams_max_nth) {
7036 if (!__kmp_reserve_warn) {
7037 __kmp_reserve_warn = 1;
7038 __kmp_msg(kmp_ms_warning,
7039 KMP_MSG(CantFormThrTeam, num_teams, __kmp_teams_max_nth),
7040 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7042 num_teams = __kmp_teams_max_nth;
7046 thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams;
7049 if (num_threads == 0) {
7050 if (!TCR_4(__kmp_init_middle))
7051 __kmp_middle_initialize();
7052 num_threads = __kmp_avail_proc / num_teams;
7053 if (num_teams * num_threads > __kmp_teams_max_nth) {
7055 num_threads = __kmp_teams_max_nth / num_teams;
7058 if (num_teams * num_threads > __kmp_teams_max_nth) {
7059 int new_threads = __kmp_teams_max_nth / num_teams;
7060 if (!__kmp_reserve_warn) {
7061 __kmp_reserve_warn = 1;
7062 __kmp_msg(kmp_ms_warning,
7063 KMP_MSG(CantFormThrTeam, num_threads, new_threads),
7064 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
7066 num_threads = new_threads;
7069 thr->th.th_teams_size.nth = num_threads;
7073 void __kmp_push_proc_bind(
ident_t *
id,
int gtid, kmp_proc_bind_t proc_bind) {
7074 kmp_info_t *thr = __kmp_threads[gtid];
7075 thr->th.th_set_proc_bind = proc_bind;
7082 void __kmp_internal_fork(
ident_t *
id,
int gtid, kmp_team_t *team) {
7083 kmp_info_t *this_thr = __kmp_threads[gtid];
7089 KMP_DEBUG_ASSERT(team);
7090 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7091 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7094 team->t.t_construct = 0;
7095 team->t.t_ordered.dt.t_value =
7099 KMP_DEBUG_ASSERT(team->t.t_disp_buffer);
7100 if (team->t.t_max_nproc > 1) {
7102 for (i = 0; i < __kmp_dispatch_num_buffers; ++i) {
7103 team->t.t_disp_buffer[i].buffer_index = i;
7105 team->t.t_disp_buffer[i].doacross_buf_idx = i;
7109 team->t.t_disp_buffer[0].buffer_index = 0;
7111 team->t.t_disp_buffer[0].doacross_buf_idx = 0;
7116 KMP_ASSERT(this_thr->th.th_team == team);
7119 for (f = 0; f < team->t.t_nproc; f++) {
7120 KMP_DEBUG_ASSERT(team->t.t_threads[f] &&
7121 team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc);
7126 __kmp_fork_barrier(gtid, 0);
7129 void __kmp_internal_join(
ident_t *
id,
int gtid, kmp_team_t *team) {
7130 kmp_info_t *this_thr = __kmp_threads[gtid];
7132 KMP_DEBUG_ASSERT(team);
7133 KMP_DEBUG_ASSERT(this_thr->th.th_team == team);
7134 KMP_ASSERT(KMP_MASTER_GTID(gtid));
7140 if (__kmp_threads[gtid] &&
7141 __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) {
7142 __kmp_printf(
"GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid,
7143 __kmp_threads[gtid]);
7144 __kmp_printf(
"__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, " 7145 "team->t.t_nproc=%d\n",
7146 gtid, __kmp_threads[gtid]->th.th_team_nproc, team,
7148 __kmp_print_structure();
7150 KMP_DEBUG_ASSERT(__kmp_threads[gtid] &&
7151 __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc);
7154 __kmp_join_barrier(gtid);
7156 if (ompt_enabled.enabled &&
7157 this_thr->th.ompt_thread_info.state == omp_state_wait_barrier_implicit) {
7158 int ds_tid = this_thr->th.th_info.ds.ds_tid;
7159 ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
7160 this_thr->th.ompt_thread_info.state = omp_state_overhead;
7162 void *codeptr = NULL;
7163 if (KMP_MASTER_TID(ds_tid) &&
7164 (ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait) ||
7165 ompt_callbacks.ompt_callback(ompt_callback_sync_region)))
7166 codeptr = OMPT_CUR_TEAM_INFO(this_thr)->master_return_address;
7168 if (ompt_enabled.ompt_callback_sync_region_wait) {
7169 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
7170 ompt_sync_region_barrier, ompt_scope_end, NULL, task_data, codeptr);
7172 if (ompt_enabled.ompt_callback_sync_region) {
7173 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
7174 ompt_sync_region_barrier, ompt_scope_end, NULL, task_data, codeptr);
7177 if (!KMP_MASTER_TID(ds_tid) && ompt_enabled.ompt_callback_implicit_task) {
7178 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
7179 ompt_scope_end, NULL, task_data, 0, ds_tid);
7185 KMP_ASSERT(this_thr->th.th_team == team);
7190 #ifdef USE_LOAD_BALANCE 7194 static int __kmp_active_hot_team_nproc(kmp_root_t *root) {
7197 kmp_team_t *hot_team;
7199 if (root->r.r_active) {
7202 hot_team = root->r.r_hot_team;
7203 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
7204 return hot_team->t.t_nproc - 1;
7209 for (i = 1; i < hot_team->t.t_nproc; i++) {
7210 if (hot_team->t.t_threads[i]->th.th_active) {
7219 static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc) {
7222 int hot_team_active;
7223 int team_curr_active;
7226 KB_TRACE(20, (
"__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root,
7228 KMP_DEBUG_ASSERT(root);
7229 KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0]
7230 ->th.th_current_task->td_icvs.dynamic == TRUE);
7231 KMP_DEBUG_ASSERT(set_nproc > 1);
7233 if (set_nproc == 1) {
7234 KB_TRACE(20, (
"__kmp_load_balance_nproc: serial execution.\n"));
7243 pool_active = TCR_4(__kmp_thread_pool_active_nth);
7244 hot_team_active = __kmp_active_hot_team_nproc(root);
7245 team_curr_active = pool_active + hot_team_active + 1;
7248 system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active);
7249 KB_TRACE(30, (
"__kmp_load_balance_nproc: system active = %d pool active = %d " 7250 "hot team active = %d\n",
7251 system_active, pool_active, hot_team_active));
7253 if (system_active < 0) {
7257 __kmp_global.g.g_dynamic_mode = dynamic_thread_limit;
7258 KMP_WARNING(CantLoadBalUsing,
"KMP_DYNAMIC_MODE=thread limit");
7261 retval = __kmp_avail_proc - __kmp_nth +
7262 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
7263 if (retval > set_nproc) {
7266 if (retval < KMP_MIN_NTH) {
7267 retval = KMP_MIN_NTH;
7270 KB_TRACE(20, (
"__kmp_load_balance_nproc: thread limit exit. retval:%d\n",
7278 if (system_active < team_curr_active) {
7279 system_active = team_curr_active;
7281 retval = __kmp_avail_proc - system_active + team_curr_active;
7282 if (retval > set_nproc) {
7285 if (retval < KMP_MIN_NTH) {
7286 retval = KMP_MIN_NTH;
7289 KB_TRACE(20, (
"__kmp_load_balance_nproc: exit. retval:%d\n", retval));
7298 void __kmp_cleanup(
void) {
7301 KA_TRACE(10, (
"__kmp_cleanup: enter\n"));
7303 if (TCR_4(__kmp_init_parallel)) {
7304 #if KMP_HANDLE_SIGNALS 7305 __kmp_remove_signals();
7307 TCW_4(__kmp_init_parallel, FALSE);
7310 if (TCR_4(__kmp_init_middle)) {
7311 #if KMP_AFFINITY_SUPPORTED 7312 __kmp_affinity_uninitialize();
7314 __kmp_cleanup_hierarchy();
7315 TCW_4(__kmp_init_middle, FALSE);
7318 KA_TRACE(10, (
"__kmp_cleanup: go serial cleanup\n"));
7320 if (__kmp_init_serial) {
7321 __kmp_runtime_destroy();
7322 __kmp_init_serial = FALSE;
7325 __kmp_cleanup_threadprivate_caches();
7327 for (f = 0; f < __kmp_threads_capacity; f++) {
7328 if (__kmp_root[f] != NULL) {
7329 __kmp_free(__kmp_root[f]);
7330 __kmp_root[f] = NULL;
7333 __kmp_free(__kmp_threads);
7336 __kmp_threads = NULL;
7338 __kmp_threads_capacity = 0;
7340 #if KMP_USE_DYNAMIC_LOCK 7341 __kmp_cleanup_indirect_user_locks();
7343 __kmp_cleanup_user_locks();
7346 #if KMP_AFFINITY_SUPPORTED 7347 KMP_INTERNAL_FREE(CCAST(
char *, __kmp_cpuinfo_file));
7348 __kmp_cpuinfo_file = NULL;
7351 #if KMP_USE_ADAPTIVE_LOCKS 7352 #if KMP_DEBUG_ADAPTIVE_LOCKS 7353 __kmp_print_speculative_stats();
7356 KMP_INTERNAL_FREE(__kmp_nested_nth.nth);
7357 __kmp_nested_nth.nth = NULL;
7358 __kmp_nested_nth.size = 0;
7359 __kmp_nested_nth.used = 0;
7360 KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types);
7361 __kmp_nested_proc_bind.bind_types = NULL;
7362 __kmp_nested_proc_bind.size = 0;
7363 __kmp_nested_proc_bind.used = 0;
7365 __kmp_i18n_catclose();
7367 #if KMP_STATS_ENABLED 7371 KA_TRACE(10, (
"__kmp_cleanup: exit\n"));
7376 int __kmp_ignore_mppbeg(
void) {
7379 if ((env = getenv(
"KMP_IGNORE_MPPBEG")) != NULL) {
7380 if (__kmp_str_match_false(env))
7387 int __kmp_ignore_mppend(
void) {
7390 if ((env = getenv(
"KMP_IGNORE_MPPEND")) != NULL) {
7391 if (__kmp_str_match_false(env))
7398 void __kmp_internal_begin(
void) {
7404 gtid = __kmp_entry_gtid();
7405 root = __kmp_threads[gtid]->th.th_root;
7406 KMP_ASSERT(KMP_UBER_GTID(gtid));
7408 if (root->r.r_begin)
7410 __kmp_acquire_lock(&root->r.r_begin_lock, gtid);
7411 if (root->r.r_begin) {
7412 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7416 root->r.r_begin = TRUE;
7418 __kmp_release_lock(&root->r.r_begin_lock, gtid);
7423 void __kmp_user_set_library(
enum library_type arg) {
7430 gtid = __kmp_entry_gtid();
7431 thread = __kmp_threads[gtid];
7433 root = thread->th.th_root;
7435 KA_TRACE(20, (
"__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg,
7437 if (root->r.r_in_parallel) {
7439 KMP_WARNING(SetLibraryIncorrectCall);
7444 case library_serial:
7445 thread->th.th_set_nproc = 0;
7446 set__nproc(thread, 1);
7448 case library_turnaround:
7449 thread->th.th_set_nproc = 0;
7450 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7451 : __kmp_dflt_team_nth_ub);
7453 case library_throughput:
7454 thread->th.th_set_nproc = 0;
7455 set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth
7456 : __kmp_dflt_team_nth_ub);
7459 KMP_FATAL(UnknownLibraryType, arg);
7462 __kmp_aux_set_library(arg);
7465 void __kmp_aux_set_stacksize(
size_t arg) {
7466 if (!__kmp_init_serial)
7467 __kmp_serial_initialize();
7470 if (arg & (0x1000 - 1)) {
7471 arg &= ~(0x1000 - 1);
7476 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
7479 if (!TCR_4(__kmp_init_parallel)) {
7482 if (value < __kmp_sys_min_stksize)
7483 value = __kmp_sys_min_stksize;
7484 else if (value > KMP_MAX_STKSIZE)
7485 value = KMP_MAX_STKSIZE;
7487 __kmp_stksize = value;
7489 __kmp_env_stksize = TRUE;
7492 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
7497 void __kmp_aux_set_library(
enum library_type arg) {
7498 __kmp_library = arg;
7500 switch (__kmp_library) {
7501 case library_serial: {
7502 KMP_INFORM(LibraryIsSerial);
7503 (void)__kmp_change_library(TRUE);
7505 case library_turnaround:
7506 (void)__kmp_change_library(TRUE);
7508 case library_throughput:
7509 (void)__kmp_change_library(FALSE);
7512 KMP_FATAL(UnknownLibraryType, arg);
7518 void __kmp_aux_set_blocktime(
int arg, kmp_info_t *thread,
int tid) {
7519 int blocktime = arg;
7525 __kmp_save_internal_controls(thread);
7528 if (blocktime < KMP_MIN_BLOCKTIME)
7529 blocktime = KMP_MIN_BLOCKTIME;
7530 else if (blocktime > KMP_MAX_BLOCKTIME)
7531 blocktime = KMP_MAX_BLOCKTIME;
7533 set__blocktime_team(thread->th.th_team, tid, blocktime);
7534 set__blocktime_team(thread->th.th_serial_team, 0, blocktime);
7538 bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups);
7540 set__bt_intervals_team(thread->th.th_team, tid, bt_intervals);
7541 set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals);
7547 set__bt_set_team(thread->th.th_team, tid, bt_set);
7548 set__bt_set_team(thread->th.th_serial_team, 0, bt_set);
7550 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, " 7551 "bt_intervals=%d, monitor_updates=%d\n",
7552 __kmp_gtid_from_tid(tid, thread->th.th_team),
7553 thread->th.th_team->t.t_id, tid, blocktime, bt_intervals,
7554 __kmp_monitor_wakeups));
7556 KF_TRACE(10, (
"kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n",
7557 __kmp_gtid_from_tid(tid, thread->th.th_team),
7558 thread->th.th_team->t.t_id, tid, blocktime));
7562 void __kmp_aux_set_defaults(
char const *str,
int len) {
7563 if (!__kmp_init_serial) {
7564 __kmp_serial_initialize();
7566 __kmp_env_initialize(str);
7570 || __kmp_display_env || __kmp_display_env_verbose
7580 PACKED_REDUCTION_METHOD_T
7581 __kmp_determine_reduction_method(
7582 ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
size_t reduce_size,
7583 void *reduce_data,
void (*reduce_func)(
void *lhs_data,
void *rhs_data),
7584 kmp_critical_name *lck) {
7595 PACKED_REDUCTION_METHOD_T retval;
7599 KMP_DEBUG_ASSERT(loc);
7600 KMP_DEBUG_ASSERT(lck);
7602 #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \ 7603 ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)) 7604 #define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func)) 7606 retval = critical_reduce_block;
7609 team_size = __kmp_get_team_num_threads(global_tid);
7610 if (team_size == 1) {
7612 retval = empty_reduce_block;
7616 int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7617 int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7619 #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 7621 #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || \ 7624 int teamsize_cutoff = 4;
7626 #if KMP_MIC_SUPPORTED 7627 if (__kmp_mic_type != non_mic) {
7628 teamsize_cutoff = 8;
7631 if (tree_available) {
7632 if (team_size <= teamsize_cutoff) {
7633 if (atomic_available) {
7634 retval = atomic_reduce_block;
7637 retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7639 }
else if (atomic_available) {
7640 retval = atomic_reduce_block;
7643 #error "Unknown or unsupported OS" 7644 #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || 7647 #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS 7649 #if KMP_OS_LINUX || KMP_OS_WINDOWS 7653 if (atomic_available) {
7654 if (num_vars <= 2) {
7655 retval = atomic_reduce_block;
7661 if (atomic_available && (num_vars <= 3)) {
7662 retval = atomic_reduce_block;
7663 }
else if (tree_available) {
7664 if ((reduce_size > (9 *
sizeof(kmp_real64))) &&
7665 (reduce_size < (2000 *
sizeof(kmp_real64)))) {
7666 retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER;
7671 #error "Unknown or unsupported OS" 7675 #error "Unknown or unsupported architecture" 7683 if (__kmp_force_reduction_method != reduction_method_not_defined &&
7686 PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block;
7688 int atomic_available, tree_available;
7690 switch ((forced_retval = __kmp_force_reduction_method)) {
7691 case critical_reduce_block:
7695 case atomic_reduce_block:
7696 atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;
7697 if (!atomic_available) {
7698 KMP_WARNING(RedMethodNotSupported,
"atomic");
7699 forced_retval = critical_reduce_block;
7703 case tree_reduce_block:
7704 tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;
7705 if (!tree_available) {
7706 KMP_WARNING(RedMethodNotSupported,
"tree");
7707 forced_retval = critical_reduce_block;
7709 #if KMP_FAST_REDUCTION_BARRIER 7710 forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER;
7719 retval = forced_retval;
7722 KA_TRACE(10, (
"reduction method selected=%08x\n", retval));
7724 #undef FAST_REDUCTION_TREE_METHOD_GENERATED 7725 #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED 7731 kmp_int32 __kmp_get_reduce_method(
void) {
7732 return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8);
#define KMP_START_EXPLICIT_TIMER(name)
"Starts" an explicit timer which will need a corresponding KMP_STOP_EXPLICIT_TIMER() macro...
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid)
#define KMP_IDENT_AUTOPAR
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the paritioned timers to begin with name.
KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid)